From dc34e5b8817e9e7542cdd49c8fc384bebdec1838 Mon Sep 17 00:00:00 2001 From: Philippe PITTOLI Date: Thu, 23 May 2024 22:58:57 +0200 Subject: [PATCH] stats --- ...data.sh => extract-data-benchmark-cars.sh} | 10 +---- bin/stats.sh | 42 ++++--------------- bin/summary-to-truncated-data.sh | 16 +++++++ 3 files changed, 26 insertions(+), 42 deletions(-) rename bin/{extract-final-data.sh => extract-data-benchmark-cars.sh} (74%) create mode 100755 bin/summary-to-truncated-data.sh diff --git a/bin/extract-final-data.sh b/bin/extract-data-benchmark-cars.sh similarity index 74% rename from bin/extract-final-data.sh rename to bin/extract-data-benchmark-cars.sh index 034c79e..b976d61 100755 --- a/bin/extract-final-data.sh +++ b/bin/extract-data-benchmark-cars.sh @@ -1,5 +1,4 @@ #!/bin/sh - if [ $# -ne 1 ] then echo "usage: $0 result-directory" @@ -8,14 +7,7 @@ fi d=$1 -echo "from data (.d) to truncated data (.t)" -for i in $d/*.d -do - fname=$(echo $i | sed "s/[.]d$/.t/") - awk '{ print $2, $3, $5 }' < $i > $fname -done - -awk '{ print $1 }' < $d/ram_index.d > it +awk '{ print $1 }' < $d/ram_index.d | sort -n | uniq > it mkdir data echo "from truncated data (.t) to graphed data data/XXX.d" diff --git a/bin/stats.sh b/bin/stats.sh index c176365..4a660dd 100755 --- a/bin/stats.sh +++ b/bin/stats.sh @@ -1,8 +1,10 @@ #!/bin/sh -extract="./bin/extract-final-data.sh" -summary="./bin/summary.r" -summary_to_line="./bin/rsum2line.awk" +# .raw -> bad format -> .summary (great format) +raw2sum="./bin/raw-to-summary.sh" +# .summary (with too much data) -> truncated data (.t) +truncate_data="./bin/summary-to-truncated-data.sh" +# ./bin/extract-data-*.sh: .t -> data/XXX.d (paste an index + *.t) if [ $# -ne 1 ] then @@ -12,29 +14,7 @@ fi dir="$1" -raw_to_summary() { - for i in $dir/*.raw - do - summary_with_bad_format=$(echo $i | sed "s/.raw$/.unconveniently_formated_summary/") - target=$(echo $i | sed "s/.raw$/.summary/") - if [ -f $summary_with_bad_format ]; then - echo -n "\r$summary_with_bad_format already exists: skipping " - else - Rscript $summary $i > $summary_with_bad_format - fi - if [ -f $target ]; then - echo -n "\r$target already exists: skipping " - else - $summary_to_line $summary_with_bad_format > $target - fi - done - echo "" - - # Beyond a certain number of entries, retrieving data from partitions and tags isn't tested anymore. - # This leads to create "fake entries" with a duration of 0, resulting to causing some problems with - # statistical analysis. So, we need to replace "NaN" by "0" in summaries. - sed -i "s/NaN/0/g" $dir/*.summary -} +$raw2sum "${dir}" # List raw files with the number of iterations as a prefix so they can then be sorted. sort_summary_files() { @@ -46,6 +26,8 @@ f() { } fill() { + # Remove previous computations. + rm ${dir}/*.d while read LINE; do nb_it=$(echo $LINE | awk '{ print $1 }') target=$(echo $LINE | awk '{ print $2 }') @@ -55,12 +37,6 @@ fill() { done } -raw_to_summary - sort_summary_files | fill -extract_final_data() { - $extract $dir -} - -extract_final_data +$truncate_data "${dir}" diff --git a/bin/summary-to-truncated-data.sh b/bin/summary-to-truncated-data.sh new file mode 100755 index 0000000..f01885e --- /dev/null +++ b/bin/summary-to-truncated-data.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +if [ $# -ne 1 ] +then + echo "usage: $0 result-directory" + exit 0 +fi + +dir=$1 + +echo "from data (.d) to truncated data (.t)" +for i in $dir/*.d +do + fname=$(echo $i | sed "s/[.]d$/.t/") + awk '{ print $2, $3, $5 }' < $i > $fname +done