#!/bin/sh extract="./bin/extract-final-data.sh" summary="./bin/summary.r" summary_to_line="./bin/rsum2line.awk" if [ $# -ne 1 ] then echo "usage: $0 result-directory" exit 0 fi dir="$1" raw_to_summary() { for i in $dir/*.raw do summary_with_bad_format=$(echo $i | sed "s/.raw$/.unconveniently_formated_summary/") target=$(echo $i | sed "s/.raw$/.summary/") if [ -f $summary_with_bad_format ]; then echo -n "\r$summary_with_bad_format already exists: skipping " else Rscript $summary $i > $summary_with_bad_format fi if [ -f $target ]; then echo -n "\r$target already exists: skipping " else $summary_to_line $summary_with_bad_format > $target fi done echo "" # Beyond a certain number of entries, retrieving data from partitions and tags isn't tested anymore. # This leads to create "fake entries" with a duration of 0, resulting to causing some problems with # statistical analysis. So, we need to replace "NaN" by "0" in summaries. sed -i "s/NaN/0/g" $dir/*.summary } # List raw files with the number of iterations as a prefix so they can then be sorted. sort_summary_files() { for i in $dir/*.summary ; do f $i ; done | sort -n } f() { echo $* | sed "s/[_./]/ /g" | xargs echo "$* " | awk '{ printf "%s %s/%s_%s %s\n", $4, $2, $3, $5, $1 }' } fill() { while read LINE; do nb_it=$(echo $LINE | awk '{ print $1 }') target=$(echo $LINE | awk '{ print $2 }') fname=$(echo $LINE | awk '{ print $3 }') cat $fname | xargs echo "$nb_it " >> $target.d done } raw_to_summary sort_summary_files | fill extract_final_data() { $extract $dir } extract_final_data