high volume db test - spec/ dir is currently a mess

2025-01-29 02:42:54 +01:00 · 2025-01-29 02:42:54 +01:00 · 5602a6a7ef
commit 5602a6a7ef
parent 0f6607b54d
4 changed files with 146 additions and 96 deletions
--- a/3
+++ b/3
@ -25,6 +25,9 @@ release:
 doc:
 	crystal docs src/dodb.cr

+high-volume-db:
+	$(Q)crystal build spec/high-volume-db.cr $(OPTS) --release
+
 HTTPD_ACCESS_LOGS ?= /tmp/access-dodb-docs.log
 HTTPD_ADDR        ?= 127.0.0.1
 HTTPD_PORT        ?= 9000
--- a/spec/benchmark-cars.cr
+++ b/spec/benchmark-cars.cr
@ -14,91 +14,7 @@ require "./db-cars.cr"
 # ENV["MAXINDEXES"]       rescue 5_000
 # ENV["LRU_SIZE"]         rescue 10_000

-class Context
-	class_property report_dir = "results"
-	class_property max_indexes = 5_000
-	class_property nb_run      = 100
-	class_property from        = 1_000
-	class_property to          = 50_000
-	class_property incr        = 1_000
-	class_property lru_size    : UInt32 = 10_000
-end
-
-# To simplify the creation of graphs, it's better to have fake data for
-# partitions and tags that won't be actually covered.
-# 0 means the absence of data.
-def fake_report(name)
-	durations = Array(Int32).new Context.nb_run, 0
-	File.open("#{Context.report_dir}/#{name}.raw", "w") do |file|
-		durations.each do |d|
-			file.puts d
-		end
-	end
-	puts "#{name}: no report"
-end
-def report(storage, name, &block)
-	durations = run_n_times Context.nb_run, &block
-	File.open("#{Context.report_dir}/#{name}.raw", "w") do |file|
-		durations.each do |d|
-			file.puts d
-		end
-	end
-	avr = durations.reduce { |a, b| a + b }  / Context.nb_run
-	puts "#{name}: #{avr}"
-	avr
-end
-
-def verbose_add_cars(storage, nbcars, name, max_indexes)
-	long_operation "add #{nbcars} values to #{name}" do
-		add_cars storage, nbcars, max_indexes: max_indexes
-	end
-end
-
-#  Add first entries, then loop: speed tests, add entries.
-def prepare_env(storage, name, s_index, s_partition, s_tags, &)
-	verbose_add_cars storage, Context.from, name, max_indexes: Context.max_indexes
-
-	current = Context.from
-	to = Context.to
-	incr = Context.incr
-
-	while current < to
-		yield storage, current, name, s_index, s_partition, s_tags
-
-		break if current + incr >= to
-
-		verbose_add_cars storage, incr, name, max_indexes: Context.max_indexes
-		current += incr
-	end
-
-	long_operation "removing #{name} data" { storage.rm_storage_dir }
-end
-
-def search_benchmark(storage : DODB::Storage(Car),
-		current_db_size : Int32,
-		name            : String,
-		search_name     : DODB::Trigger::Index(Car),
-		search_color    : DODB::Trigger::Partition(Car),
-		search_keywords : DODB::Trigger::Tags(Car))
-	name_to_search    = ENV["CARNAME"]    rescue "Corvet-#{(current_db_size/2).to_i}"
-	color_to_search   = ENV["CARCOLOR"]   rescue "red"
-	keyword_to_search = ENV["CARKEYWORD"] rescue "spacious"
-	puts "NEW BATCH: db-size #{current_db_size}, name: '#{name_to_search}', color: '#{color_to_search}', tag: '#{keyword_to_search}'"
-	report(storage, "#{name}_#{current_db_size}_index") do
-		corvet = search_name.get name_to_search
-	end
-	if current_db_size <= Context.max_indexes
-		report(storage, "#{name}_#{current_db_size}_partitions") do
-			corvet = search_color.get? color_to_search
-		end
-		report(storage, "#{name}_#{current_db_size}_tags") do
-			corvet = search_keywords.get? keyword_to_search
-		end
-	else
-		fake_report("#{name}_#{current_db_size}_partitions")
-		fake_report("#{name}_#{current_db_size}_tags")
-	end
-end
+require "./utilities-cars.cr"

 def bench_searches()
 	cars_ram      = SPECDB::RAMOnly(Car).new
@ -122,17 +38,6 @@ def bench_searches()
 	prepare_env cars_uncached, "uncached", uncached_Sby_name, uncached_Sby_color, uncached_Sby_keywords, &fn
 end

-def perform_add(storage : DODB::Storage(Car))
-	corvet0 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]
-	i = 0
-	perform_benchmark_average Context.nb_run, do
-		corvet = corvet0.clone
-		corvet.name = "Corvet-#{i}"
-		storage.unsafe_add corvet
-		i += 1
-	end
-end
-
 def bench_add()
 	cars_ram      = SPECDB::RAMOnly(Car).new
 	cars_cached   = SPECDB::Cached(Car).new
--- a/spec/high-volume-db.cr
+++ b/spec/high-volume-db.cr
@ -0,0 +1,45 @@
+require "./db-cars.cr"
+require "./utilities.cr"
+
+class Context
+	class_property dbsize   = 1_000_000
+	class_property nb_run   = 100
+end
+
+ENV["DBSIZE"]?.try  { |it| Context.dbsize      = it.to_i }
+ENV["NBRUN"]?.try   { |it| Context.nb_run      = it.to_i }
+
+if ARGV.size == 0
+	puts "Usage: high-volume-db (add|get)"
+	exit 0
+end
+
+db = DODB::Storage::Common(Car).new "TESTDB", 5000
+by_name = db.new_index "name", &.name
+
+something = Hash(String,Bool).new
+
+case ARGV[0]
+when /get/
+	counter = 0
+	car_number_to_get = (Context.dbsize/2).to_i
+	puts "let's get the car #{car_number_to_get}"
+	avr = perform_benchmark_average Context.nb_run, do
+		car = by_name.get "somecar-#{car_number_to_get}"
+		something[car.name] = true
+		#STDOUT.write "\rgot the car #{car.name}: #{counter}/#{Context.nb_run}".to_slice
+		counter += 1
+	end
+	puts
+	puts "average time was: #{avr}"
+	puts "done!"
+when /add/
+	counter = db.last_key
+	while counter < Context.dbsize
+		STDOUT.write "\radding car #{counter}/#{Context.dbsize}".to_slice
+		db << Car.new "somecar-#{counter}", "red", [] of String
+		counter += 1
+	end
+	puts
+	puts "done!"
+end
--- a/spec/utilities-cars.cr
+++ b/spec/utilities-cars.cr
@ -0,0 +1,97 @@
+class Context
+	class_property report_dir = "results"
+	class_property max_indexes = 5_000
+	class_property nb_run      = 100
+	class_property from        = 1_000
+	class_property to          = 50_000
+	class_property incr        = 1_000
+	class_property lru_size    : UInt32 = 10_000
+end
+
+# To simplify the creation of graphs, it's better to have fake data for
+# partitions and tags that won't be actually covered.
+# 0 means the absence of data.
+def fake_report(name)
+	durations = Array(Int32).new Context.nb_run, 0
+	File.open("#{Context.report_dir}/#{name}.raw", "w") do |file|
+		durations.each do |d|
+			file.puts d
+		end
+	end
+	puts "#{name}: no report"
+end
+def report(storage, name, &block)
+	durations = run_n_times Context.nb_run, &block
+	File.open("#{Context.report_dir}/#{name}.raw", "w") do |file|
+		durations.each do |d|
+			file.puts d
+		end
+	end
+	avr = durations.reduce { |a, b| a + b }  / Context.nb_run
+	puts "#{name}: #{avr}"
+	avr
+end
+
+def verbose_add_cars(storage, nbcars, name, max_indexes)
+	long_operation "add #{nbcars} values to #{name}" do
+		add_cars storage, nbcars, max_indexes: max_indexes
+	end
+end
+
+#  Add first entries, then loop: speed tests, add entries.
+def prepare_env(storage, name, s_index, s_partition, s_tags, &)
+	verbose_add_cars storage, Context.from, name, max_indexes: Context.max_indexes
+
+	current = Context.from
+	to = Context.to
+	incr = Context.incr
+
+	while current < to
+		puts "current number of cars: #{current}/#{to}"
+		yield storage, current, name, s_index, s_partition, s_tags
+
+		break if current + incr >= to
+
+		verbose_add_cars storage, incr, name, max_indexes: Context.max_indexes
+		current += incr
+	end
+
+	long_operation "removing #{name} data" { storage.rm_storage_dir }
+end
+
+def perform_add(storage : DODB::Storage(Car))
+	corvet0 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]
+	i = 0
+	perform_benchmark_average Context.nb_run, do
+		corvet = corvet0.clone
+		corvet.name = "Corvet-#{i}"
+		storage.unsafe_add corvet
+		i += 1
+	end
+end
+
+def search_benchmark(storage : DODB::Storage(Car),
+		current_db_size : Int32,
+		name            : String,
+		search_name     : DODB::Trigger::Index(Car),
+		search_color    : DODB::Trigger::Partition(Car),
+		search_keywords : DODB::Trigger::Tags(Car))
+	name_to_search    = ENV["CARNAME"]    rescue "Corvet-#{(current_db_size/2).to_i}"
+	color_to_search   = ENV["CARCOLOR"]   rescue "red"
+	keyword_to_search = ENV["CARKEYWORD"] rescue "spacious"
+	puts "NEW BATCH: db-size #{current_db_size}, name: '#{name_to_search}', color: '#{color_to_search}', tag: '#{keyword_to_search}'"
+	report(storage, "#{name}_#{current_db_size}_index") do
+		corvet = search_name.get name_to_search
+	end
+	if current_db_size <= Context.max_indexes
+		report(storage, "#{name}_#{current_db_size}_partitions") do
+			corvet = search_color.get? color_to_search
+		end
+		report(storage, "#{name}_#{current_db_size}_tags") do
+			corvet = search_keywords.get? keyword_to_search
+		end
+	else
+		fake_report("#{name}_#{current_db_size}_partitions")
+		fake_report("#{name}_#{current_db_size}_tags")
+	end
+end