#!/bin/bash # We have a bunch of these files cat 5000000.txt ls *.txt # And we want graph-able output data like this: # alg\n 10 100 1000.. # magyar-rand 0,005 0,03 0,3... # magyar-worst 0,007 0,06 0,4... # 4pasu-rand 0,017 0,11 0,7... # 4pasu-worst 0,0237 0,42 1,3... # One can get started thinking grep + sed as usual, but it can become tedious # At least we can grab the number of elements from the file itself (ensures filename is not bad) cat 5000000.txt | grep Sorting | sed "s/.* \([0-9]*\) .*/\1/" # But enter AWK! awk '{print $1}' 5000000.txt awk '{if(n =="") n = $2} END{print n}' 5000000.txt awk 'BEGIN{getline; print $2}' 5000000.txt # More interesting stuff awk '{if(NR % 2 == 0) elemek[NR/2] = $2} END{for(i = 1; i <= NR/2; ++i) print elemek[i]}' 5000000.txt awk '{getline; print $2}' 5000000.txt # And to be fancy (also showing ls can use creation time nowadays, check without to see bad stuff) awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -t --time=birth *.txt` awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -tr --time=birth *.txt` awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -t *.txt` ###################################### # Column to row translation with AWK # ###################################### # To get every second field with awk its really simple. OFS is output sep, FS is input sep awk '{print $2}' 5000000.txt # This is closer to what we want, because the last row starts elsewhere, but duplicates junk awk '{print $2; last=$1} END {print last}' 5000000.txt # This way we never print while processing line until END, just manually afterwards with loop - what we want awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=1;i<=NR;i++) print out[i]}' 5000000.txt # This leaves out the first few lines but is the same awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' 5000000.txt # This way, we write output as tab-separated awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' ORS='\t' 5000000.tx # Alternative: can change ORS (output record separator) on the fly. You can do with OFS and FS too and RS too! awk '{out[NR]=$2; first=$1} END {ORS="\t"; out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' 5000000.txt # I wanted to omit empty lines of the input awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i]}' ORS='\t' 5000000.txt # A way to store the first lines second column (n) into the last position near alg name awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i]; print n}' ORS='\t' 5000000.txt # NOT what I want (but want to show) awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i] n}' ORS='\t' 5000000.txt awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i $outfile awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' ORS='\t' `ls -tr --time=birth *.txt` | sed 's/,$//' >> $outfile } header data.csv # Just look at this awsome... we do not even need to leave AWK to save which is the column index!!! awk "/worst/{getline; last=1} last{for(i=1;i<=NF;++i) if (\$i == \"$alg\") col=i} END{print col}" 5000000.txt # Lets put together really this too for how to fill files basefile=5000000.txt # fill data.csv magyar rand fill() { outfile=$1 alg=$2 input=$3 col=$(cat $basefile | grep copy | sed 's/^\s*//' | awk "{out[NR] = \$1} END {for(i=1;i<=NR;i++) if(out[i] == \"$alg\") print i;}" RS=' +') echo -n "$alg-$input " >> $outfile awk "/$input/{print \$($col+1)}" ORS='\t' `ls -tr --time=birth *.txt` >> $outfile echo "" >> $outfile } # See: combinator.inc ###################### # Dividing data by n # ###################### # Look at this random other file awk '{for(i=1; i<=NF; ++i) print $i}' a.num # We can save every columns data / line if we want just simply - so save first line's data # Rem.: The getline in begin removes that line from the later block! Yes... awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i} {for(i=1; i<=NF; ++i) print saved[i]}' a.num # Instead of printing real data (except for i==1) make it be divided by n (the first line / row and same col position) awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) print $i; else print ($i/saved[i])}' a.num # Try tab-separated (but well... turns out this becomes a single line now) awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) print $i; else print ($i/saved[i])}' ORS='\t' a.num # Use printf and manual ORS/OFS # Nearly... awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' a.num # Working... awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' a.num # But what about the 'Hungarian' floating point numbers with comma instead of dot? awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' b.num # Solvable (enter AKH helper functions) awk 'function hn(x) { gsub(",", ".", x); return x+0 } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", (hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num # But the above uses dot again - so convert back # WARNING: awk 'function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num # GOOD: awk 'function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num # But at this complexity often you put things into external .awk file awk -f num1.awk OFS='\t' b.num # Can better organize at that point awk -f num2.awk OFS='\t' b.num