From fefc4e9b26d3b321d7546c11c2fefeff9396e85a Mon Sep 17 00:00:00 2001 From: Philippe PITTOLI Date: Fri, 24 May 2024 00:25:28 +0200 Subject: [PATCH] FIFO + paper --- Makefile | 6 ++ paper/graph_query_partition.grap | 21 +++++-- paper/graph_query_tag.grap | 21 +++++-- paper/legend.grap | 9 ++- spec/benchmark-cars.cr | 104 ++++++++++++++++++++----------- spec/spec-database.cr | 6 +- src/dodb/storage/cached.cr | 1 - src/dodb/storage/stacked.cr | 29 ++++++++- src/fifo.cr | 7 ++- 9 files changed, 146 insertions(+), 58 deletions(-) diff --git a/Makefile b/Makefile index 42e6dbf..68cd6a1 100644 --- a/Makefile +++ b/Makefile @@ -5,9 +5,15 @@ Q ?= @ SHOULD_UPDATE = ./bin/should-update DBDIR=/tmp/tests-on-dodb +RESULTS_DIR=results benchmark-cars: $(Q)crystal build spec/benchmark-cars.cr $(OPTS) --release +benchmark-cars-run: benchmark-cars + ./benchmark-cars search # by default, test search durations + ./bin/stats.sh $(RESULTS_DIR) + ./bin/extract-data-benchmark-cars.sh $(RESULTS_DIR) + build: benchmark-cars wipe-db: diff --git a/paper/graph_query_partition.grap b/paper/graph_query_partition.grap index f7d9a65..8bb765d 100644 --- a/paper/graph_query_partition.grap +++ b/paper/graph_query_partition.grap @@ -7,8 +7,8 @@ ticks left out from 0 to 350 by 50 label left "Request duration" unaligned "for a partition (ms)" "(Median)" left 0.8 label bot "Number of cars matching the partition" down 0.1 -obram = obuncache = obcache = obsemi = 0 -cbram = cbuncache = cbcache = cbsemi = 0 +obram = obuncache = obfifo = obcache = obsemi = 0 +cbram = cbuncache = cbfifo = cbcache = cbsemi = 0 legendxleft = 1000 legendxright = 6500 @@ -23,21 +23,26 @@ copy "../data/partitions.d" thru X y_scale = 1000000 - # ram cached semi uncached + # ram cached fifo semi uncached line from cx,$2/y_scale to cx,$4/y_scale line from cx,$5/y_scale to cx,$7/y_scale line from cx,$8/y_scale to cx,$10/y_scale line from cx,$11/y_scale to cx,$13/y_scale + line from cx,$14/y_scale to cx,$16/y_scale #ty = $3 cbram = $3/y_scale cbcache = $6/y_scale - cbsemi = $9/y_scale - cbuncache = $12/y_scale + cbfifo = $9/y_scale + cbsemi = $12/y_scale + cbuncache = $15/y_scale if (obram > 0) then {line from cx,cbram to ox,obram} if (obcache > 0) then {line from cx,cbcache to ox,obcache} +.gcolor pink + if (obfifo > 0) then {line from cx,cbfifo to ox,obfifo} +.gcolor .gcolor blue if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi} .gcolor @@ -47,15 +52,19 @@ copy "../data/partitions.d" thru X obram = cbram obcache = cbcache + obfifo = cbfifo obsemi = cbsemi obuncache = cbuncache ox = cx - # ram cached semi uncached + # ram cached fifo semi uncached .gcolor red bullet at cx,cbram .gcolor bullet at cx,cbcache +.gcolor pink + bullet at cx,cbfifo +.gcolor .gcolor blue bullet at cx,cbsemi .gcolor diff --git a/paper/graph_query_tag.grap b/paper/graph_query_tag.grap index 84a91a0..d4efd86 100644 --- a/paper/graph_query_tag.grap +++ b/paper/graph_query_tag.grap @@ -6,8 +6,8 @@ ticks left out from 0 to 170 by 20 label left "Request duration" unaligned "for a tag (ms)" "(Median)" left 0.8 label bot "Number of cars matching the tag" down 0.1 -obram = obuncache = obcache = obsemi = 0 -cbram = cbuncache = cbcache = cbsemi = 0 +obram = obuncache = obfifo = obcache = obsemi = 0 +cbram = cbuncache = cbfifo = cbcache = cbsemi = 0 legendxleft = 200 legendxright = 3000 @@ -22,21 +22,26 @@ copy "../data/tags.d" thru X y_scale = 1000000 - # ram cached semi uncached + # ram cached fifo semi uncached line from cx,$2/y_scale to cx,$4/y_scale line from cx,$5/y_scale to cx,$7/y_scale line from cx,$8/y_scale to cx,$10/y_scale line from cx,$11/y_scale to cx,$13/y_scale + line from cx,$14/y_scale to cx,$16/y_scale #ty = $3 cbram = $3/y_scale cbcache = $6/y_scale - cbsemi = $9/y_scale - cbuncache = $12/y_scale + cbfifo = $9/y_scale + cbsemi = $12/y_scale + cbuncache = $15/y_scale if (obram > 0) then {line from cx,cbram to ox,obram} if (obcache > 0) then {line from cx,cbcache to ox,obcache} +.gcolor pink + if (obfifo > 0) then {line from cx,cbfifo to ox,obfifo} +.gcolor .gcolor blue if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi} .gcolor @@ -46,15 +51,19 @@ copy "../data/tags.d" thru X obram = cbram obcache = cbcache + obfifo = cbfifo obsemi = cbsemi obuncache = cbuncache ox = cx - # ram cached semi uncached + # ram cached fifo semi uncached .gcolor red bullet at cx,cbram .gcolor bullet at cx,cbcache +.gcolor pink + bullet at cx,cbfifo +.gcolor .gcolor blue bullet at cx,cbsemi .gcolor diff --git a/paper/legend.grap b/paper/legend.grap index af31174..3af51b1 100644 --- a/paper/legend.grap +++ b/paper/legend.grap @@ -19,7 +19,7 @@ define legend { diffx = xright - xleft diffy = yup - ydown - hdiff = diffy/4.3 + hdiff = diffy/5.7 cy = yup - (diffy/6) cx = (diffx/20) + xleft @@ -27,6 +27,7 @@ define legend { lendx = cx + diffx/8 tstartx = lendx + diffx/20 +.ps -2 .gcolor red line from lstartx,cy to lendx,cy .gcolor @@ -35,6 +36,11 @@ define legend { line from lstartx,cy to lendx,cy "Cached db and index" ljust at tstartx,cy cy = cy - hdiff + .gcolor pink + line from lstartx,cy to lendx,cy + .gcolor + "FIFO db and cached index" ljust at tstartx,cy + cy = cy - hdiff .gcolor blue line from lstartx,cy to lendx,cy .gcolor @@ -44,4 +50,5 @@ define legend { line from lstartx,cy to lendx,cy .gcolor "Uncached db and index" ljust at tstartx,cy +.ps +2 } diff --git a/spec/benchmark-cars.cr b/spec/benchmark-cars.cr index eb8ce1f..637cf68 100644 --- a/spec/benchmark-cars.cr +++ b/spec/benchmark-cars.cr @@ -13,13 +13,6 @@ require "./db-cars.cr" # ENV["NBRUN"] rescue 100 # ENV["MAXINDEXES"] rescue 5_000 -class DODB::Storage(V) - def empty_db - while pop - end - end -end - class Context class_property report_dir = "results" class_property max_indexes = 5_000 @@ -85,10 +78,36 @@ def prepare_env(storage, name, s_index, s_partition, s_tags, &) long_operation "removing #{name} data" { storage.rm_storage_dir } end -def batch() +def search_benchmark(storage : DODB::Storage(Car), + current_db_size : Int32, + name : String, + search_name : DODB::Index::Basic(Car), + search_color : DODB::Index::Partition(Car), + search_keywords : DODB::Index::Tags(Car)) + name_to_search = ENV["CARNAME"] rescue "Corvet-#{(current_db_size/2).to_i}" + color_to_search = ENV["CARCOLOR"] rescue "red" + keyword_to_search = ENV["CARKEYWORD"] rescue "spacious" + puts "NEW BATCH: db-size #{current_db_size}, name: '#{name_to_search}', color: '#{color_to_search}', tag: '#{keyword_to_search}'" + report(storage, "#{name}_#{current_db_size}_index") do + corvet = search_name.get name_to_search + end + if current_db_size <= Context.max_indexes + report(storage, "#{name}_#{current_db_size}_partitions") do + corvet = search_color.get? color_to_search + end + report(storage, "#{name}_#{current_db_size}_tags") do + corvet = search_keywords.get? keyword_to_search + end + else + fake_report("#{name}_#{current_db_size}_partitions") + fake_report("#{name}_#{current_db_size}_tags") + end +end + +def bench_searches() cars_ram = SPECDB::RAMOnly(Car).new cars_cached = SPECDB::Cached(Car).new - cars_fifo = DODB::FIFOSpecDataBase(Car).new + cars_fifo = SPECDB::FIFO(Car).new "", 5000 # With only 5_000 entries cars_semi = SPECDB::Uncached(Car).new "-semi" cars_uncached = SPECDB::Uncached(Car).new @@ -98,32 +117,7 @@ def batch() semi_Sby_name, semi_Sby_color, semi_Sby_keywords = cached_indexes cars_semi uncached_Sby_name, uncached_Sby_color, uncached_Sby_keywords = uncached_indexes cars_uncached - fn = ->(storage : DODB::Storage(Car), - current_db_size : Int32, - name : String, - search_name : DODB::Index::Basic(Car), - search_color : DODB::Index::Partition(Car), - search_keywords : DODB::Index::Tags(Car)) { - - name_to_search = ENV["CARNAME"] rescue "Corvet-#{(current_db_size/2).to_i}" - color_to_search = ENV["CARCOLOR"] rescue "red" - keyword_to_search = ENV["CARKEYWORD"] rescue "spacious" - puts "NEW BATCH: db-size #{current_db_size}, name: '#{name_to_search}', color: '#{color_to_search}', tag: '#{keyword_to_search}'" - report(storage, "#{name}_#{current_db_size}_index") do - corvet = search_name.get name_to_search - end - if current_db_size <= Context.max_indexes - report(storage, "#{name}_#{current_db_size}_partitions") do - corvet = search_color.get? color_to_search - end - report(storage, "#{name}_#{current_db_size}_tags") do - corvet = search_keywords.get? keyword_to_search - end - else - fake_report("#{name}_#{current_db_size}_partitions") - fake_report("#{name}_#{current_db_size}_tags") - end - } + fn = ->search_benchmark(DODB::Storage(Car), Int32, String, DODB::Index::Basic(Car), DODB::Index::Partition(Car), DODB::Index::Tags(Car)) prepare_env cars_ram, "ram", ram_Sby_name, ram_Sby_color, ram_Sby_keywords, &fn prepare_env cars_cached, "cached", cached_Sby_name, cached_Sby_color, cached_Sby_keywords, &fn @@ -143,14 +137,16 @@ def perform_add(storage : DODB::Storage(Car)) end end -def batch_add() +def bench_add() cars_ram = SPECDB::RAMOnly(Car).new cars_cached = SPECDB::Cached(Car).new + cars_fifo = SPECDB::FIFO(Car).new "", 5_000 cars_semi = SPECDB::Uncached(Car).new "-semi" cars_uncached = SPECDB::Uncached(Car).new ram_indexes cars_ram cached_indexes cars_cached + cached_indexes cars_fifo cached_indexes cars_semi uncached_indexes cars_uncached @@ -160,6 +156,9 @@ def batch_add() avr = perform_add(cars_cached) puts "(cached db and indexes) add a value (average on #{Context.nb_run} tries): #{avr}" + avr = perform_add(cars_fifo) + puts "(fifo db and cached indexes) add a value (average on #{Context.nb_run} tries): #{avr}" + avr = perform_add(cars_semi) puts "(uncached db but cached indexes) add a value (average on #{Context.nb_run} tries): #{avr}" @@ -172,6 +171,22 @@ def batch_add() cars_uncached.rm_storage_dir end +def bench_50_shades_of_fifo() + cars_fifo1 = SPECDB::FIFO(Car).new "", 1_000 + cars_fifo5 = SPECDB::FIFO(Car).new "", 5_000 + cars_fifo10 = SPECDB::FIFO(Car).new "", 10_000 + + fifo_Sby_name1, fifo_Sby_color1, fifo_Sby_keywords1 = cached_indexes cars_fifo1 + fifo_Sby_name5, fifo_Sby_color5, fifo_Sby_keywords5 = cached_indexes cars_fifo5 + fifo_Sby_name10, fifo_Sby_color10, fifo_Sby_keywords10 = cached_indexes cars_fifo10 + + fn = ->search_benchmark(DODB::Storage(Car), Int32, String, DODB::Index::Basic(Car), DODB::Index::Partition(Car), DODB::Index::Tags(Car)) + + prepare_env cars_fifo1, "fifo1", fifo_Sby_name1, fifo_Sby_color1, fifo_Sby_keywords1, &fn + prepare_env cars_fifo5, "fifo5", fifo_Sby_name5, fifo_Sby_color5, fifo_Sby_keywords5, &fn + prepare_env cars_fifo10, "fifo10", fifo_Sby_name10, fifo_Sby_color10, fifo_Sby_keywords10, &fn +end + ENV["REPORT_DIR"]?.try { |report_dir| Context.report_dir = report_dir } Dir.mkdir_p Context.report_dir @@ -187,5 +202,18 @@ pp! Context.to pp! Context.incr pp! Context.max_indexes -batch -batch_add +if ARGV.size == 0 + puts "Usage: benchmark-cars (fifo|search|add)" + exit 0 +end + +case ARGV[0] +when /fifo/ + bench_50_shades_of_fifo +when /search/ + bench_searches +when /add/ + bench_add +else + puts "Usage: benchmark-cars (fifo|search|add)" +end diff --git a/spec/spec-database.cr b/spec/spec-database.cr index fc0cbfb..5fd9319 100644 --- a/spec/spec-database.cr +++ b/spec/spec-database.cr @@ -24,10 +24,10 @@ class SPECDB::Cached(V) < DODB::Storage::Cached(V) end end -class DODB::FIFOSpecDataBase(V) < DODB::Storage::Stacked(V) +class SPECDB::FIFO(V) < DODB::Storage::Stacked(V) property storage_dir : String - def initialize(storage_ext = "", remove_previous_data = true) - @storage_dir = "specdb-storage-fifo#{storage_ext}" + def initialize(storage_ext = "", @max_entries = 100_000, remove_previous_data = true) + @storage_dir = "specdb-storage-fifo-#{@max_entries}#{storage_ext}" ::FileUtils.rm_rf storage_dir if remove_previous_data super storage_dir end diff --git a/src/dodb/storage/cached.cr b/src/dodb/storage/cached.cr index 86fd660..546f6ff 100644 --- a/src/dodb/storage/cached.cr +++ b/src/dodb/storage/cached.cr @@ -73,7 +73,6 @@ class DODB::Storage::Cached(V) < DODB::Storage(V) @data[key] rescue raise MissingEntry.new(key) end - # :inherit: def []=(key : Int32, value : V) old_value = self.[key]? diff --git a/src/dodb/storage/stacked.cr b/src/dodb/storage/stacked.cr index 23630c9..6f20256 100644 --- a/src/dodb/storage/stacked.cr +++ b/src/dodb/storage/stacked.cr @@ -31,7 +31,6 @@ # ``` # # NOTE: fast for frequently requested data and requires a stable (and configurable) amount of memory. -# TODO: not yet implemented. class DODB::Storage::Stacked(V) < DODB::Storage::Cached(V) # The *stack* a simple FIFO instance where the key of the requested data is pushed. # In case the number of stored entries exceeds what is allowed, the least recently used entry is removed. @@ -52,9 +51,35 @@ class DODB::Storage::Stacked(V) < DODB::Storage::Cached(V) def [](key : Int32) : V val = @data[key] rescue raise MissingEntry.new(key) + push_fifo key + val + end + + # Assumes new entries are more requested than old ones. + def []=(key : Int32, value : V) + super key, value + push_fifo key + end + + # :inherit: + # + # Assumes new entries are more requested than old ones. + def <<(item : V) + key = super item + push_fifo key + end + + def unsafe_delete(key : Int32) + @stack.delete key if super key + end + + def delete(key : Int32) + @stack.delete key if super key + end + + private def push_fifo(key : Int32) if entry_to_remove = @stack << key @data.delete entry_to_remove end - val end end diff --git a/src/fifo.cr b/src/fifo.cr index 33b4185..81b7093 100644 --- a/src/fifo.cr +++ b/src/fifo.cr @@ -27,11 +27,16 @@ class FIFO(V) end # Pushes a value in the FIFO and gets the oldest value whether it exceeds the allowed number of entries. - # NOTE: `#<<(v : V)` is the only function since it's enough for the intended use, feel free to improve this. + # NOTE: `#<<(v : V)` is (almost) the only function since it's enough for the intended use, feel free to improve this. # WARNING: implementation is extremely simple (3 lines) and not designed to be highly efficient. def <<(v : V) : V? @data.select! { |x| v != v } # remove dups @data.unshift v # push on top of the stack @data.pop if @data.size > @max_entries # remove least recently used entry if `@data` is too big end + + # Removes a value. + def delete(v : V) + @data.select! { |x| v != v } + end end