From 05b6d9a40cd00985765feee99bcede496f3cebca Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Sun, 12 Oct 2025 12:07:08 +0200 Subject: [PATCH] main content --- a.num | 3 + b.num | 3 + combinator.inc | 68 ++++++++++++++++++ num1.awk | 1 + num2.awk | 19 +++++ parancs.awk | 1 + pelda.asd | 8 +++ steps.sh | 189 +++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 292 insertions(+) create mode 100644 a.num create mode 100644 b.num create mode 100644 combinator.inc create mode 100644 num1.awk create mode 100644 num2.awk create mode 100644 parancs.awk create mode 100644 pelda.asd create mode 100644 steps.sh diff --git a/a.num b/a.num new file mode 100644 index 0000000..d7dea05 --- /dev/null +++ b/a.num @@ -0,0 +1,3 @@ +n 64 100 1024 4096 +k1 0.5 0.3 0.42 0.88 +k2 0.8 0.2 0.76 0.62 diff --git a/b.num b/b.num new file mode 100644 index 0000000..b27d3ae --- /dev/null +++ b/b.num @@ -0,0 +1,3 @@ +n 64 100 1024 4096 +k1 0,5 0,3 0,42 0,88 +k2 0,8 0,2 0,76 0,62 diff --git a/combinator.inc b/combinator.inc new file mode 100644 index 0000000..10124ae --- /dev/null +++ b/combinator.inc @@ -0,0 +1,68 @@ +# Just . combinator.inc or source combinator.inc +# After that, +echo "You can (manually):" +echo "- header data.csv" +echo "- fill data.csv magyar rand" +echo "Or create comparison for a specific data kind:" +echo "- genfor rand data.csv" +echo "Or just a big default mess:" +echo "- generate data.csv" +echo "To cleanup data for libreoffice calc (hungarian one that is):" +echo "- cleanup data.csv" +echo "" +echo "The generate gives a 'default set' that you can add your missing stuff with further 'fill' commands if needed" + +basefile=5000000.txt + +declare -a definputs=("worst" "smallrange" "rand" "constant") +declare -a sortalgs=(`awk '/worst/{getline; last=1} last{for(x=1;x<=NF;++x) print $x}' ORS=' ' $basefile`) + +# header data.csv +header() { + outfile="$1" + + echo -n 'alg ' > "$outfile" + awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' ORS='\t' `ls -tr --time=birth *.txt` | sed 's/,$//' >> "$outfile" + echo "" >> "$outfile" +} + +# fill data.csv magyar rand +fill() { + outfile="$1" + alg="$2" + input="$3" + + col=$(cat $basefile | grep copy | sed 's/^\s*//' | awk "{out[NR] = \$1} END {for(i=1;i<=NR;i++) if(out[i] == \"$alg\") print i;}" RS=' +') + + echo -n "$alg-$input " >> "$outfile" + awk "/$input/{print \$($col+1)}" ORS='\t' `ls -tr --time=birth *.txt` >> "$outfile" + echo "" >> "$outfile" +} + +# genfor "rand" data.csv +genfor() { + inp="$1" + outfile="$2" + header "$outfile" + for alg in "${sortalgs[@]}"; do + echo -n "Adding $alg-"; echo "$inp" + fill "$outfile" "$alg" "$inp" + done +} + +# generate data.csv +generate() { + outfile="$1" + header "$outfile" + + for inp in "${definputs[@]}"; do + genfor "$inp" "$outfile" + done +} + +# cleanup data.csv prepared.csv +cleanup() { + in="$1" + out="$2" + sed "s/\([0-9][0-9]*\)\.\([0-9][0-9]*\)s*/\1,\2/g" "$in" > "$out" +} diff --git a/num1.awk b/num1.awk new file mode 100644 index 0000000..c650361 --- /dev/null +++ b/num1.awk @@ -0,0 +1 @@ +function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)} diff --git a/num2.awk b/num2.awk new file mode 100644 index 0000000..b9ca270 --- /dev/null +++ b/num2.awk @@ -0,0 +1,19 @@ +function hn(x) { + gsub(",", ".", x); + return x+0 +} + +function ihn(x) { + gsub("\\.", ",", x); + return x +} + +BEGIN { + getline; + for(i=1; i<=NF; ++i) saved[i]=$i; + print $0 +} + +{ + for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS) +} diff --git a/parancs.awk b/parancs.awk new file mode 100644 index 0000000..2d182d1 --- /dev/null +++ b/parancs.awk @@ -0,0 +1 @@ +{print $2} diff --git a/pelda.asd b/pelda.asd new file mode 100644 index 0000000..c54fa78 --- /dev/null +++ b/pelda.asd @@ -0,0 +1,8 @@ +Errors, at: +1 2 3 4 +4pasu copy frewr gptbuck +Warnings: +14:22 turned off machine +21:38 file is written with bad group +23:22 turned off machine +23:42 file is written with bad group diff --git a/steps.sh b/steps.sh new file mode 100644 index 0000000..bc6b239 --- /dev/null +++ b/steps.sh @@ -0,0 +1,189 @@ +#!/bin/bash + +# We have a bunch of these files +cat 5000000.txt +ls *.txt + +# And we want graph-able output data like this: +# alg\n 10 100 1000.. +# magyar-rand 0,005 0,03 0,3... +# magyar-worst 0,007 0,06 0,4... +# 4pasu-rand 0,017 0,11 0,7... +# 4pasu-worst 0,0237 0,42 1,3... + +# One can get started thinking grep + sed as usual, but it can become tedious +# At least we can grab the number of elements from the file itself (ensures filename is not bad) +cat 5000000.txt | grep Sorting | sed "s/.* \([0-9]*\) .*/\1/" + +# But enter AWK! +awk '{print $1}' 5000000.txt +awk '{if(n =="") n = $2} END{print n}' 5000000.txt +awk 'BEGIN{getline; print $2}' 5000000.txt + +# More interesting stuff +awk '{if(NR % 2 == 0) elemek[NR/2] = $2} END{for(i = 1; i <= NR/2; ++i) print elemek[i]}' 5000000.txt +awk '{getline; print $2}' 5000000.txt + +# And to be fancy (also showing ls can use creation time nowadays, check without to see bad stuff) +awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -t --time=birth *.txt` +awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -tr --time=birth *.txt` +awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -t *.txt` + +###################################### +# Column to row translation with AWK # +###################################### + +# To get every second field with awk its really simple. OFS is output sep, FS is input sep +awk '{print $2}' 5000000.txt + +# This is closer to what we want, because the last row starts elsewhere, but duplicates junk +awk '{print $2; last=$1} END {print last}' 5000000.txt + +# This way we never print while processing line until END, just manually afterwards with loop - what we want +awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=1;i<=NR;i++) print out[i]}' 5000000.txt + +# This leaves out the first few lines but is the same +awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' 5000000.txt + +# This way, we write output as tab-separated +awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' ORS='\t' 5000000.tx +# Alternative: can change ORS (output record separator) on the fly. You can do with OFS and FS too and RS too! +awk '{out[NR]=$2; first=$1} END {ORS="\t"; out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' 5000000.txt + +# I wanted to omit empty lines of the input +awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i]}' ORS='\t' 5000000.txt + +# A way to store the first lines second column (n) into the last position near alg name +awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i]; print n}' ORS='\t' 5000000.txt + +# NOT what I want (but want to show) +awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i] n}' ORS='\t' 5000000.txt +awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i $outfile + awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' ORS='\t' `ls -tr --time=birth *.txt` | sed 's/,$//' >> $outfile +} +header data.csv + +# Just look at this awsome... we do not even need to leave AWK to save which is the column index!!! +awk "/worst/{getline; last=1} last{for(i=1;i<=NF;++i) if (\$i == \"$alg\") col=i} END{print col}" 5000000.txt + +# Lets put together really this too for how to fill files + +basefile=5000000.txt +# fill data.csv magyar rand +fill() { + outfile=$1 + alg=$2 + input=$3 + + col=$(cat $basefile | grep copy | sed 's/^\s*//' | awk "{out[NR] = \$1} END {for(i=1;i<=NR;i++) if(out[i] == \"$alg\") print i;}" RS=' +') + + echo -n "$alg-$input " >> $outfile + awk "/$input/{print \$($col+1)}" ORS='\t' `ls -tr --time=birth *.txt` >> $outfile + echo "" >> $outfile +} + +# See: combinator.inc + +###################### +# Dividing data by n # +###################### + +# Look at this random other file +awk '{for(i=1; i<=NF; ++i) print $i}' a.num + +# We can save every columns data / line if we want just simply - so save first line's data +# Rem.: The getline in begin removes that line from the later block! Yes... +awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i} {for(i=1; i<=NF; ++i) print saved[i]}' a.num + +# Instead of printing real data (except for i==1) make it be divided by n (the first line / row and same col position) +awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) print $i; else print ($i/saved[i])}' a.num + +# Try tab-separated (but well... turns out this becomes a single line now) +awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) print $i; else print ($i/saved[i])}' ORS='\t' a.num + +# Use printf and manual ORS/OFS +# Nearly... +awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' a.num +# Working... +awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' a.num + +# But what about the 'Hungarian' floating point numbers with comma instead of dot? +awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' b.num + +# Solvable (enter AKH helper functions) +awk 'function hn(x) { gsub(",", ".", x); return x+0 } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", (hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num + +# But the above uses dot again - so convert back +# WARNING: +awk 'function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num +# GOOD: +awk 'function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num + +# But at this complexity often you put things into external .awk file +awk -f num1.awk OFS='\t' b.num + +# Can better organize at that point +awk -f num2.awk OFS='\t' b.num