diff --git a/a.num b/a.num new file mode 100644 index 0000000..d7dea05 --- /dev/null +++ b/a.num @@ -0,0 +1,3 @@ +n 64 100 1024 4096 +k1 0.5 0.3 0.42 0.88 +k2 0.8 0.2 0.76 0.62 diff --git a/b.num b/b.num new file mode 100644 index 0000000..b27d3ae --- /dev/null +++ b/b.num @@ -0,0 +1,3 @@ +n 64 100 1024 4096 +k1 0,5 0,3 0,42 0,88 +k2 0,8 0,2 0,76 0,62 diff --git a/combinator.inc b/combinator.inc new file mode 100644 index 0000000..10124ae --- /dev/null +++ b/combinator.inc @@ -0,0 +1,68 @@ +# Just . combinator.inc or source combinator.inc +# After that, +echo "You can (manually):" +echo "- header data.csv" +echo "- fill data.csv magyar rand" +echo "Or create comparison for a specific data kind:" +echo "- genfor rand data.csv" +echo "Or just a big default mess:" +echo "- generate data.csv" +echo "To cleanup data for libreoffice calc (hungarian one that is):" +echo "- cleanup data.csv" +echo "" +echo "The generate gives a 'default set' that you can add your missing stuff with further 'fill' commands if needed" + +basefile=5000000.txt + +declare -a definputs=("worst" "smallrange" "rand" "constant") +declare -a sortalgs=(`awk '/worst/{getline; last=1} last{for(x=1;x<=NF;++x) print $x}' ORS=' ' $basefile`) + +# header data.csv +header() { + outfile="$1" + + echo -n 'alg ' > "$outfile" + awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' ORS='\t' `ls -tr --time=birth *.txt` | sed 's/,$//' >> "$outfile" + echo "" >> "$outfile" +} + +# fill data.csv magyar rand +fill() { + outfile="$1" + alg="$2" + input="$3" + + col=$(cat $basefile | grep copy | sed 's/^\s*//' | awk "{out[NR] = \$1} END {for(i=1;i<=NR;i++) if(out[i] == \"$alg\") print i;}" RS=' +') + + echo -n "$alg-$input " >> "$outfile" + awk "/$input/{print \$($col+1)}" ORS='\t' `ls -tr --time=birth *.txt` >> "$outfile" + echo "" >> "$outfile" +} + +# genfor "rand" data.csv +genfor() { + inp="$1" + outfile="$2" + header "$outfile" + for alg in "${sortalgs[@]}"; do + echo -n "Adding $alg-"; echo "$inp" + fill "$outfile" "$alg" "$inp" + done +} + +# generate data.csv +generate() { + outfile="$1" + header "$outfile" + + for inp in "${definputs[@]}"; do + genfor "$inp" "$outfile" + done +} + +# cleanup data.csv prepared.csv +cleanup() { + in="$1" + out="$2" + sed "s/\([0-9][0-9]*\)\.\([0-9][0-9]*\)s*/\1,\2/g" "$in" > "$out" +} diff --git a/num1.awk b/num1.awk new file mode 100644 index 0000000..c650361 --- /dev/null +++ b/num1.awk @@ -0,0 +1 @@ +function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)} diff --git a/num2.awk b/num2.awk new file mode 100644 index 0000000..b9ca270 --- /dev/null +++ b/num2.awk @@ -0,0 +1,19 @@ +function hn(x) { + gsub(",", ".", x); + return x+0 +} + +function ihn(x) { + gsub("\\.", ",", x); + return x +} + +BEGIN { + getline; + for(i=1; i<=NF; ++i) saved[i]=$i; + print $0 +} + +{ + for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS) +} diff --git a/parancs.awk b/parancs.awk new file mode 100644 index 0000000..2d182d1 --- /dev/null +++ b/parancs.awk @@ -0,0 +1 @@ +{print $2} diff --git a/pelda.asd b/pelda.asd new file mode 100644 index 0000000..c54fa78 --- /dev/null +++ b/pelda.asd @@ -0,0 +1,8 @@ +Errors, at: +1 2 3 4 +4pasu copy frewr gptbuck +Warnings: +14:22 turned off machine +21:38 file is written with bad group +23:22 turned off machine +23:42 file is written with bad group diff --git a/steps.sh b/steps.sh new file mode 100644 index 0000000..bc6b239 --- /dev/null +++ b/steps.sh @@ -0,0 +1,189 @@ +#!/bin/bash + +# We have a bunch of these files +cat 5000000.txt +ls *.txt + +# And we want graph-able output data like this: +# alg\n 10 100 1000.. +# magyar-rand 0,005 0,03 0,3... +# magyar-worst 0,007 0,06 0,4... +# 4pasu-rand 0,017 0,11 0,7... +# 4pasu-worst 0,0237 0,42 1,3... + +# One can get started thinking grep + sed as usual, but it can become tedious +# At least we can grab the number of elements from the file itself (ensures filename is not bad) +cat 5000000.txt | grep Sorting | sed "s/.* \([0-9]*\) .*/\1/" + +# But enter AWK! +awk '{print $1}' 5000000.txt +awk '{if(n =="") n = $2} END{print n}' 5000000.txt +awk 'BEGIN{getline; print $2}' 5000000.txt + +# More interesting stuff +awk '{if(NR % 2 == 0) elemek[NR/2] = $2} END{for(i = 1; i <= NR/2; ++i) print elemek[i]}' 5000000.txt +awk '{getline; print $2}' 5000000.txt + +# And to be fancy (also showing ls can use creation time nowadays, check without to see bad stuff) +awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -t --time=birth *.txt` +awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -tr --time=birth *.txt` +awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -t *.txt` + +###################################### +# Column to row translation with AWK # +###################################### + +# To get every second field with awk its really simple. OFS is output sep, FS is input sep +awk '{print $2}' 5000000.txt + +# This is closer to what we want, because the last row starts elsewhere, but duplicates junk +awk '{print $2; last=$1} END {print last}' 5000000.txt + +# This way we never print while processing line until END, just manually afterwards with loop - what we want +awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=1;i<=NR;i++) print out[i]}' 5000000.txt + +# This leaves out the first few lines but is the same +awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' 5000000.txt + +# This way, we write output as tab-separated +awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' ORS='\t' 5000000.tx +# Alternative: can change ORS (output record separator) on the fly. You can do with OFS and FS too and RS too! +awk '{out[NR]=$2; first=$1} END {ORS="\t"; out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' 5000000.txt + +# I wanted to omit empty lines of the input +awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i]}' ORS='\t' 5000000.txt + +# A way to store the first lines second column (n) into the last position near alg name +awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i]; print n}' ORS='\t' 5000000.txt + +# NOT what I want (but want to show) +awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i] n}' ORS='\t' 5000000.txt +awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i $outfile + awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' ORS='\t' `ls -tr --time=birth *.txt` | sed 's/,$//' >> $outfile +} +header data.csv + +# Just look at this awsome... we do not even need to leave AWK to save which is the column index!!! +awk "/worst/{getline; last=1} last{for(i=1;i<=NF;++i) if (\$i == \"$alg\") col=i} END{print col}" 5000000.txt + +# Lets put together really this too for how to fill files + +basefile=5000000.txt +# fill data.csv magyar rand +fill() { + outfile=$1 + alg=$2 + input=$3 + + col=$(cat $basefile | grep copy | sed 's/^\s*//' | awk "{out[NR] = \$1} END {for(i=1;i<=NR;i++) if(out[i] == \"$alg\") print i;}" RS=' +') + + echo -n "$alg-$input " >> $outfile + awk "/$input/{print \$($col+1)}" ORS='\t' `ls -tr --time=birth *.txt` >> $outfile + echo "" >> $outfile +} + +# See: combinator.inc + +###################### +# Dividing data by n # +###################### + +# Look at this random other file +awk '{for(i=1; i<=NF; ++i) print $i}' a.num + +# We can save every columns data / line if we want just simply - so save first line's data +# Rem.: The getline in begin removes that line from the later block! Yes... +awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i} {for(i=1; i<=NF; ++i) print saved[i]}' a.num + +# Instead of printing real data (except for i==1) make it be divided by n (the first line / row and same col position) +awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) print $i; else print ($i/saved[i])}' a.num + +# Try tab-separated (but well... turns out this becomes a single line now) +awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) print $i; else print ($i/saved[i])}' ORS='\t' a.num + +# Use printf and manual ORS/OFS +# Nearly... +awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' a.num +# Working... +awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' a.num + +# But what about the 'Hungarian' floating point numbers with comma instead of dot? +awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' b.num + +# Solvable (enter AKH helper functions) +awk 'function hn(x) { gsub(",", ".", x); return x+0 } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", (hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num + +# But the above uses dot again - so convert back +# WARNING: +awk 'function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num +# GOOD: +awk 'function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num + +# But at this complexity often you put things into external .awk file +awk -f num1.awk OFS='\t' b.num + +# Can better organize at that point +awk -f num2.awk OFS='\t' b.num