From dc34e5b8817e9e7542cdd49c8fc384bebdec1838 Mon Sep 17 00:00:00 2001
From: Philippe PITTOLI <karchnu@karchnu.fr>
Date: Thu, 23 May 2024 22:58:57 +0200
Subject: [PATCH] stats

---
 ...data.sh => extract-data-benchmark-cars.sh} | 10 +----
 bin/stats.sh                                  | 42 ++++---------------
 bin/summary-to-truncated-data.sh              | 16 +++++++
 3 files changed, 26 insertions(+), 42 deletions(-)
 rename bin/{extract-final-data.sh => extract-data-benchmark-cars.sh} (74%)
 create mode 100755 bin/summary-to-truncated-data.sh

diff --git a/bin/extract-final-data.sh b/bin/extract-data-benchmark-cars.sh
similarity index 74%
rename from bin/extract-final-data.sh
rename to bin/extract-data-benchmark-cars.sh
index 034c79e..b976d61 100755
--- a/bin/extract-final-data.sh
+++ b/bin/extract-data-benchmark-cars.sh
@@ -1,5 +1,4 @@
 #!/bin/sh
-
 if [ $# -ne 1 ]
 then
 	echo "usage: $0 result-directory"
@@ -8,14 +7,7 @@ fi
 
 d=$1
 
-echo "from data (.d) to truncated data (.t)"
-for i in $d/*.d
-do
-	fname=$(echo $i | sed "s/[.]d$/.t/")
-	awk '{ print $2, $3, $5 }' < $i > $fname
-done
-
-awk '{ print $1 }' < $d/ram_index.d > it
+awk '{ print $1 }' < $d/ram_index.d | sort -n | uniq > it
 mkdir data
 
 echo "from truncated data (.t) to graphed data data/XXX.d"
diff --git a/bin/stats.sh b/bin/stats.sh
index c176365..4a660dd 100755
--- a/bin/stats.sh
+++ b/bin/stats.sh
@@ -1,8 +1,10 @@
 #!/bin/sh
 
-extract="./bin/extract-final-data.sh"
-summary="./bin/summary.r"
-summary_to_line="./bin/rsum2line.awk"
+# .raw -> bad format -> .summary (great format)
+raw2sum="./bin/raw-to-summary.sh"
+# .summary (with too much data) -> truncated data (.t)
+truncate_data="./bin/summary-to-truncated-data.sh"
+# ./bin/extract-data-*.sh: .t -> data/XXX.d (paste an index + *.t)
 
 if [ $# -ne 1 ]
 then
@@ -12,29 +14,7 @@ fi
 
 dir="$1"
 
-raw_to_summary() {
-	for i in $dir/*.raw
-	do
-		summary_with_bad_format=$(echo $i | sed "s/.raw$/.unconveniently_formated_summary/")
-		target=$(echo $i | sed "s/.raw$/.summary/")
-		if [ -f $summary_with_bad_format ]; then
-			echo -n "\r$summary_with_bad_format already exists: skipping                         "
-		else
-			Rscript $summary $i > $summary_with_bad_format
-		fi
-		if [ -f $target ]; then
-			echo -n "\r$target already exists: skipping                         "
-		else
-			$summary_to_line $summary_with_bad_format > $target
-		fi
-	done
-	echo ""
-
-	# Beyond a certain number of entries, retrieving data from partitions and tags isn't tested anymore.
-	# This leads to create "fake entries" with a duration of 0, resulting to causing some problems with
-	# statistical analysis. So, we need to replace "NaN" by "0" in summaries.
-	sed -i "s/NaN/0/g" $dir/*.summary
-}
+$raw2sum "${dir}"
 
 # List raw files with the number of iterations as a prefix so they can then be sorted.
 sort_summary_files() {
@@ -46,6 +26,8 @@ f() {
 }
 
 fill() {
+	# Remove previous computations.
+	rm ${dir}/*.d
 	while read LINE; do
 		nb_it=$(echo $LINE | awk '{ print $1 }')
 		target=$(echo $LINE | awk '{ print $2 }')
@@ -55,12 +37,6 @@ fill() {
 	done
 }
 
-raw_to_summary
-
 sort_summary_files | fill
 
-extract_final_data() {
-	$extract $dir
-}
-
-extract_final_data
+$truncate_data "${dir}"
diff --git a/bin/summary-to-truncated-data.sh b/bin/summary-to-truncated-data.sh
new file mode 100755
index 0000000..f01885e
--- /dev/null
+++ b/bin/summary-to-truncated-data.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+if [ $# -ne 1 ]
+then
+	echo "usage: $0 result-directory"
+	exit 0
+fi
+
+dir=$1
+
+echo "from data (.d) to truncated data (.t)"
+for i in $dir/*.d
+do
+	fname=$(echo $i | sed "s/[.]d$/.t/")
+	awk '{ print $2, $3, $5 }' < $i > $fname
+done