main content
This commit is contained in:
parent
2c670b99c7
commit
05b6d9a40c
3
a.num
Normal file
3
a.num
Normal file
@ -0,0 +1,3 @@
|
||||
n 64 100 1024 4096
|
||||
k1 0.5 0.3 0.42 0.88
|
||||
k2 0.8 0.2 0.76 0.62
|
||||
3
b.num
Normal file
3
b.num
Normal file
@ -0,0 +1,3 @@
|
||||
n 64 100 1024 4096
|
||||
k1 0,5 0,3 0,42 0,88
|
||||
k2 0,8 0,2 0,76 0,62
|
||||
68
combinator.inc
Normal file
68
combinator.inc
Normal file
@ -0,0 +1,68 @@
|
||||
# Just . combinator.inc or source combinator.inc
|
||||
# After that,
|
||||
echo "You can (manually):"
|
||||
echo "- header data.csv"
|
||||
echo "- fill data.csv magyar rand"
|
||||
echo "Or create comparison for a specific data kind:"
|
||||
echo "- genfor rand data.csv"
|
||||
echo "Or just a big default mess:"
|
||||
echo "- generate data.csv"
|
||||
echo "To cleanup data for libreoffice calc (hungarian one that is):"
|
||||
echo "- cleanup data.csv"
|
||||
echo ""
|
||||
echo "The generate gives a 'default set' that you can add your missing stuff with further 'fill' commands if needed"
|
||||
|
||||
basefile=5000000.txt
|
||||
|
||||
declare -a definputs=("worst" "smallrange" "rand" "constant")
|
||||
declare -a sortalgs=(`awk '/worst/{getline; last=1} last{for(x=1;x<=NF;++x) print $x}' ORS=' ' $basefile`)
|
||||
|
||||
# header data.csv
|
||||
header() {
|
||||
outfile="$1"
|
||||
|
||||
echo -n 'alg ' > "$outfile"
|
||||
awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' ORS='\t' `ls -tr --time=birth *.txt` | sed 's/,$//' >> "$outfile"
|
||||
echo "" >> "$outfile"
|
||||
}
|
||||
|
||||
# fill data.csv magyar rand
|
||||
fill() {
|
||||
outfile="$1"
|
||||
alg="$2"
|
||||
input="$3"
|
||||
|
||||
col=$(cat $basefile | grep copy | sed 's/^\s*//' | awk "{out[NR] = \$1} END {for(i=1;i<=NR;i++) if(out[i] == \"$alg\") print i;}" RS=' +')
|
||||
|
||||
echo -n "$alg-$input " >> "$outfile"
|
||||
awk "/$input/{print \$($col+1)}" ORS='\t' `ls -tr --time=birth *.txt` >> "$outfile"
|
||||
echo "" >> "$outfile"
|
||||
}
|
||||
|
||||
# genfor "rand" data.csv
|
||||
genfor() {
|
||||
inp="$1"
|
||||
outfile="$2"
|
||||
header "$outfile"
|
||||
for alg in "${sortalgs[@]}"; do
|
||||
echo -n "Adding $alg-"; echo "$inp"
|
||||
fill "$outfile" "$alg" "$inp"
|
||||
done
|
||||
}
|
||||
|
||||
# generate data.csv
|
||||
generate() {
|
||||
outfile="$1"
|
||||
header "$outfile"
|
||||
|
||||
for inp in "${definputs[@]}"; do
|
||||
genfor "$inp" "$outfile"
|
||||
done
|
||||
}
|
||||
|
||||
# cleanup data.csv prepared.csv
|
||||
cleanup() {
|
||||
in="$1"
|
||||
out="$2"
|
||||
sed "s/\([0-9][0-9]*\)\.\([0-9][0-9]*\)s*/\1,\2/g" "$in" > "$out"
|
||||
}
|
||||
1
num1.awk
Normal file
1
num1.awk
Normal file
@ -0,0 +1 @@
|
||||
function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}
|
||||
19
num2.awk
Normal file
19
num2.awk
Normal file
@ -0,0 +1,19 @@
|
||||
function hn(x) {
|
||||
gsub(",", ".", x);
|
||||
return x+0
|
||||
}
|
||||
|
||||
function ihn(x) {
|
||||
gsub("\\.", ",", x);
|
||||
return x
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
getline;
|
||||
for(i=1; i<=NF; ++i) saved[i]=$i;
|
||||
print $0
|
||||
}
|
||||
|
||||
{
|
||||
for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)
|
||||
}
|
||||
1
parancs.awk
Normal file
1
parancs.awk
Normal file
@ -0,0 +1 @@
|
||||
{print $2}
|
||||
8
pelda.asd
Normal file
8
pelda.asd
Normal file
@ -0,0 +1,8 @@
|
||||
Errors, at:
|
||||
1 2 3 4
|
||||
4pasu copy frewr gptbuck
|
||||
Warnings:
|
||||
14:22 turned off machine
|
||||
21:38 file is written with bad group
|
||||
23:22 turned off machine
|
||||
23:42 file is written with bad group
|
||||
189
steps.sh
Normal file
189
steps.sh
Normal file
@ -0,0 +1,189 @@
|
||||
#!/bin/bash
|
||||
|
||||
# We have a bunch of these files
|
||||
cat 5000000.txt
|
||||
ls *.txt
|
||||
|
||||
# And we want graph-able output data like this:
|
||||
# alg\n 10 100 1000..
|
||||
# magyar-rand 0,005 0,03 0,3...
|
||||
# magyar-worst 0,007 0,06 0,4...
|
||||
# 4pasu-rand 0,017 0,11 0,7...
|
||||
# 4pasu-worst 0,0237 0,42 1,3...
|
||||
|
||||
# One can get started thinking grep + sed as usual, but it can become tedious
|
||||
# At least we can grab the number of elements from the file itself (ensures filename is not bad)
|
||||
cat 5000000.txt | grep Sorting | sed "s/.* \([0-9]*\) .*/\1/"
|
||||
|
||||
# But enter AWK!
|
||||
awk '{print $1}' 5000000.txt
|
||||
awk '{if(n =="") n = $2} END{print n}' 5000000.txt
|
||||
awk 'BEGIN{getline; print $2}' 5000000.txt
|
||||
|
||||
# More interesting stuff
|
||||
awk '{if(NR % 2 == 0) elemek[NR/2] = $2} END{for(i = 1; i <= NR/2; ++i) print elemek[i]}' 5000000.txt
|
||||
awk '{getline; print $2}' 5000000.txt
|
||||
|
||||
# And to be fancy (also showing ls can use creation time nowadays, check without to see bad stuff)
|
||||
awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -t --time=birth *.txt`
|
||||
awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -tr --time=birth *.txt`
|
||||
awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' `ls -t *.txt`
|
||||
|
||||
######################################
|
||||
# Column to row translation with AWK #
|
||||
######################################
|
||||
|
||||
# To get every second field with awk its really simple. OFS is output sep, FS is input sep
|
||||
awk '{print $2}' 5000000.txt
|
||||
|
||||
# This is closer to what we want, because the last row starts elsewhere, but duplicates junk
|
||||
awk '{print $2; last=$1} END {print last}' 5000000.txt
|
||||
|
||||
# This way we never print while processing line until END, just manually afterwards with loop - what we want
|
||||
awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=1;i<=NR;i++) print out[i]}' 5000000.txt
|
||||
|
||||
# This leaves out the first few lines but is the same
|
||||
awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' 5000000.txt
|
||||
|
||||
# This way, we write output as tab-separated
|
||||
awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' ORS='\t' 5000000.tx
|
||||
# Alternative: can change ORS (output record separator) on the fly. You can do with OFS and FS too and RS too!
|
||||
awk '{out[NR]=$2; first=$1} END {ORS="\t"; out[NR]=first; for(i=3;i<=NR;i++) print out[i]}' 5000000.txt
|
||||
|
||||
# I wanted to omit empty lines of the input
|
||||
awk '{out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i]}' ORS='\t' 5000000.txt
|
||||
|
||||
# A way to store the first lines second column (n) into the last position near alg name
|
||||
awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i]; print n}' ORS='\t' 5000000.txt
|
||||
|
||||
# NOT what I want (but want to show)
|
||||
awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != "") print out[i] n}' ORS='\t' 5000000.txt
|
||||
awk '{if(n == "") n = $2; out[NR]=$2; first=$1} END {out[NR]=first; for(i=3;i<NR;i++) if(out[i] != "") print out[i]; print out[i] n}' ORS='\t' 5000000.txt
|
||||
|
||||
# More bash-friendly this will be - just so that I can use bash's variables in AWK from now on
|
||||
awk "{if(n == \"\") n = \$2; out[NR]=\$2; first=\$1} END {out[NR]=first; for(i=3;i<=NR;i++) if(out[i] != \"\") print out[i]; print n}" ORS="\t" 5000000.txt
|
||||
|
||||
######################################
|
||||
# Getting the index of the algorithm #
|
||||
######################################
|
||||
|
||||
# This gets the line of the algorithms
|
||||
cat 5000000.txt | grep copy | sed 's/^\s*//'
|
||||
|
||||
# Get the column (column index) of the given algorithm
|
||||
# RS is used here instead of FS, because I want tabbed values as records to count them;
|
||||
# BEWARE: ' +' because its not the special (usually line end) and not FS where ' ' mean 'any number of whitespace' and '[ ]' single
|
||||
alg='magyar'
|
||||
cat 5000000.txt | grep copy | sed 's/^\s*//' | awk "{out[NR] = \$1} END {for(i=1;i<NR;i++) if(out[i] == \"$alg\") print i;}" RS=' +'
|
||||
|
||||
# Get value in bash variable (backtick fails, so do $(..) instead
|
||||
col=$(cat 5000000.txt | grep copy | sed 's/^\s*//' | awk "{out[NR] = \$1} END {for(i=1;i<NR;i++) if(out[i] == \"$alg\") print i;}" RS=' +')
|
||||
echo "$col"
|
||||
|
||||
#########################################################################
|
||||
# Getting row-column based value from file + add extracted 'n' above it #
|
||||
#########################################################################
|
||||
|
||||
# So we already have $col, for alg=magyar, how to get value for input="rand" for example?
|
||||
# This is really simple and wanted to show this
|
||||
|
||||
alg='magyar'
|
||||
input='rand'
|
||||
|
||||
col=$(cat 5000000.txt | grep copy | sed 's/^\s*//' | awk "{out[NR] = \$1} END {for(i=1;i<NR;i++) if(out[i] == \"$alg\") print i;}" RS=' +')
|
||||
|
||||
input='rand'
|
||||
awk "{if(\$1 == \"$input\") print \$($col+1)}" 5000000.txt
|
||||
cat 5000000.txt # check
|
||||
|
||||
# Or actually even simpler if you regex-search to position with AWK
|
||||
awk "/rand/{print \$($col+1)}" 5000000.txt
|
||||
awk "/$input/{print \$($col+1)}" 5000000.txt
|
||||
|
||||
# So we can write out a CSV list of values too after a search actually!
|
||||
# This is maybe not working as you expect, and you can start thinking about getlines, loops, double searches, etc
|
||||
# Because a block without any prefix runs again for the whole file...
|
||||
awk "/asc/{print \$($col+1)} {print \$($col+1)}" ORS='\t' 5000000.txt
|
||||
# But this is the way - because variables can be "flags" for the blocks!
|
||||
# This writes out every target column from when ascdesc was found (including) and until descdesc (not including)
|
||||
awk "/ascdesc/{flag=1} /descdesc/{flag=0} flag{print \$($col+1)}" ORS=',' 5000000.txt
|
||||
|
||||
# Yeah... But didn't we want data like this?
|
||||
#
|
||||
# alg\n 10 100 1000..
|
||||
# magyar-rand 0,005 0,03 0,3...
|
||||
# magyar-worst 0,007 0,06 0,4...
|
||||
# 4pasu-rand 0,017 0,11 0,7...
|
||||
# 4pasu-worst 0,0237 0,42 1,3...
|
||||
|
||||
# Lets put it together
|
||||
|
||||
header() {
|
||||
outfile=$1
|
||||
|
||||
echo -n 'alg ' > $outfile
|
||||
awk 'BEGINFILE{n=""} {if(n =="") n = $2} ENDFILE{print n}' ORS='\t' `ls -tr --time=birth *.txt` | sed 's/,$//' >> $outfile
|
||||
}
|
||||
header data.csv
|
||||
|
||||
# Just look at this awsome... we do not even need to leave AWK to save which is the column index!!!
|
||||
awk "/worst/{getline; last=1} last{for(i=1;i<=NF;++i) if (\$i == \"$alg\") col=i} END{print col}" 5000000.txt
|
||||
|
||||
# Lets put together really this too for how to fill files
|
||||
|
||||
basefile=5000000.txt
|
||||
# fill data.csv magyar rand
|
||||
fill() {
|
||||
outfile=$1
|
||||
alg=$2
|
||||
input=$3
|
||||
|
||||
col=$(cat $basefile | grep copy | sed 's/^\s*//' | awk "{out[NR] = \$1} END {for(i=1;i<=NR;i++) if(out[i] == \"$alg\") print i;}" RS=' +')
|
||||
|
||||
echo -n "$alg-$input " >> $outfile
|
||||
awk "/$input/{print \$($col+1)}" ORS='\t' `ls -tr --time=birth *.txt` >> $outfile
|
||||
echo "" >> $outfile
|
||||
}
|
||||
|
||||
# See: combinator.inc
|
||||
|
||||
######################
|
||||
# Dividing data by n #
|
||||
######################
|
||||
|
||||
# Look at this random other file
|
||||
awk '{for(i=1; i<=NF; ++i) print $i}' a.num
|
||||
|
||||
# We can save every columns data / line if we want just simply - so save first line's data
|
||||
# Rem.: The getline in begin removes that line from the later block! Yes...
|
||||
awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i} {for(i=1; i<=NF; ++i) print saved[i]}' a.num
|
||||
|
||||
# Instead of printing real data (except for i==1) make it be divided by n (the first line / row and same col position)
|
||||
awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) print $i; else print ($i/saved[i])}' a.num
|
||||
|
||||
# Try tab-separated (but well... turns out this becomes a single line now)
|
||||
awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) print $i; else print ($i/saved[i])}' ORS='\t' a.num
|
||||
|
||||
# Use printf and manual ORS/OFS
|
||||
# Nearly...
|
||||
awk 'BEGIN{getline; for(i=1; i<=NF; ++i){saved[i]=$i; print $i}} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' a.num
|
||||
# Working...
|
||||
awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' a.num
|
||||
|
||||
# But what about the 'Hungarian' floating point numbers with comma instead of dot?
|
||||
awk 'BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ($i/saved[i]), (i==NF ? ORS : OFS)}' OFS='\t' b.num
|
||||
|
||||
# Solvable (enter AKH helper functions)
|
||||
awk 'function hn(x) { gsub(",", ".", x); return x+0 } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", (hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num
|
||||
|
||||
# But the above uses dot again - so convert back
|
||||
# WARNING:
|
||||
awk 'function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num
|
||||
# GOOD:
|
||||
awk 'function hn(x) { gsub(",", ".", x); return x+0 } function ihn(x) { gsub("\\.", ",", x); return x } BEGIN{getline; for(i=1; i<=NF; ++i) saved[i]=$i; print $0} {for(i=1; i<=NF; ++i) if(i == 1) printf "%s%s", $i, OFS; else printf "%s%s", ihn(hn($i)/hn(saved[i])), (i==NF ? ORS : OFS)}' OFS='\t' b.num
|
||||
|
||||
# But at this complexity often you put things into external .awk file
|
||||
awk -f num1.awk OFS='\t' b.num
|
||||
|
||||
# Can better organize at that point
|
||||
awk -f num2.awk OFS='\t' b.num
|
||||
Loading…
x
Reference in New Issue
Block a user