dodb.cr/bin/stats.sh

#!/bin/sh

extract="./bin/extract-final-data.sh"
summary="./bin/summary.r"
summary_to_line="./bin/rsum2line.awk"

if [ $# -ne 1 ]
then
	echo "usage: $0 result-directory"
	exit 0
fi

dir="$1"

raw_to_summary() {
	for i in $dir/*.raw
	do
		summary_with_bad_format=$(echo $i | sed "s/.raw$/.unconveniently_formated_summary/")
		target=$(echo $i | sed "s/.raw$/.summary/")
		if [ -f $summary_with_bad_format ]; then
			echo -n "\r$summary_with_bad_format already exists: skipping                         "
		else
			Rscript $summary $i > $summary_with_bad_format
		fi
		if [ -f $target ]; then
			echo -n "\r$target already exists: skipping                         "
		else
			$summary_to_line $summary_with_bad_format > $target
		fi
	done
	echo ""

	# Beyond a certain number of entries, retrieving data from partitions and tags isn't tested anymore.
	# This leads to create "fake entries" with a duration of 0, resulting to causing some problems with
	# statistical analysis. So, we need to replace "NaN" by "0" in summaries.
	sed -i "s/NaN/0/g" $dir/*.summary
}

# List raw files with the number of iterations as a prefix so they can then be sorted.
sort_summary_files() {
	for i in $dir/*.summary ; do f $i ; done | sort -n
}

f() {
	echo $* | sed "s/[_./]/ /g" | xargs echo "$* " | awk '{ printf "%s %s/%s_%s %s\n", $4, $2, $3, $5, $1 }'
}

fill() {
	while read LINE; do
		nb_it=$(echo $LINE | awk '{ print $1 }')
		target=$(echo $LINE | awk '{ print $2 }')
		fname=$(echo $LINE | awk '{ print $3 }')

		cat $fname | xargs echo "$nb_it " >> $target.d
	done
}

raw_to_summary

sort_summary_files | fill

extract_final_data() {
	$extract $dir
}

extract_final_data