dodb.cr/bin/stats.sh

67 lines
1.6 KiB
Bash
Raw Normal View History

2024-05-12 16:47:53 +02:00
#!/bin/sh
extract="./bin/extract-final-data.sh"
summary="./bin/summary.r"
summary_to_line="./bin/rsum2line.awk"
if [ $# -ne 1 ]
then
echo "usage: $0 result-directory"
exit 0
fi
dir="$1"
raw_to_summary() {
for i in $dir/*.raw
do
summary_with_bad_format=$(echo $i | sed "s/.raw$/.unconveniently_formated_summary/")
target=$(echo $i | sed "s/.raw$/.summary/")
if [ -f $summary_with_bad_format ]; then
echo -n "\r$summary_with_bad_format already exists: skipping "
else
Rscript $summary $i > $summary_with_bad_format
fi
if [ -f $target ]; then
echo -n "\r$target already exists: skipping "
else
$summary_to_line $summary_with_bad_format > $target
fi
done
echo ""
# Beyond a certain number of entries, retrieving data from partitions and tags isn't tested anymore.
# This leads to create "fake entries" with a duration of 0, resulting to causing some problems with
# statistical analysis. So, we need to replace "NaN" by "0" in summaries.
sed -i "s/NaN/0/g" $dir/*.summary
}
# List raw files with the number of iterations as a prefix so they can then be sorted.
sort_summary_files() {
for i in $dir/*.summary ; do f $i ; done | sort -n
}
f() {
echo $* | sed "s/[_./]/ /g" | xargs echo "$* " | awk '{ printf "%s %s/%s_%s %s\n", $4, $2, $3, $5, $1 }'
}
fill() {
while read LINE; do
nb_it=$(echo $LINE | awk '{ print $1 }')
target=$(echo $LINE | awk '{ print $2 }')
fname=$(echo $LINE | awk '{ print $3 }')
cat $fname | xargs echo "$nb_it " >> $target.d
done
}
raw_to_summary
sort_summary_files | fill
extract_final_data() {
$extract $dir
}
extract_final_data