Compare commits

..

37 commits

Author SHA1 Message Date
7cf7104757 Small fixes. 2025-04-20 11:54:35 +02:00
d113026d40 Minor changes. 2025-04-20 11:54:35 +02:00
bc2fb503f2 Almost finished!! 2025-04-20 11:54:35 +02:00
3d79a94f8f Feedback. 2025-04-20 11:54:35 +02:00
d7e378f121 Filesystem introduction. 2025-04-20 11:54:35 +02:00
1420f4b10a More filesystem explanations. 2025-04-20 11:54:35 +02:00
3fef72ab8a Introduction to filesystems. 2025-04-20 11:54:35 +02:00
2493249fb4 Improvements. 2025-04-20 11:54:35 +02:00
4cadc96181 The review continues. 2025-04-20 11:54:35 +02:00
71d8acf85f Introduction + s/hash/lookup table/ 2025-04-20 11:54:35 +02:00
9df8a20c2e PAPER: conclusion and minor improvements. 2025-04-20 11:54:35 +02:00
7e0c32628e Minor sentence. 2025-04-20 11:54:35 +02:00
62ba6ecf8d Alternatives++. 2025-04-20 11:54:35 +02:00
8ba6105d82 Filesystems. 2025-04-20 11:54:35 +02:00
6daa23f2f1 Introduction++ 2025-04-20 11:54:35 +02:00
d4efe64d83 Tag-based filesystems. 2025-04-20 11:54:35 +02:00
279213e292 More exotic filesystems. 2025-04-20 11:54:35 +02:00
d84ebcbf7e Limits of DODB++. 2025-04-20 11:54:35 +02:00
ff14fd73b5 Limits of DODB++. 2025-04-20 11:54:35 +02:00
4a3be277af Alternatives++ 2025-04-20 11:54:35 +02:00
e8ffee2c7a Alternatives++ 2025-04-20 11:54:35 +02:00
d66f5ba14c KS/KE 2025-04-20 11:54:35 +02:00
3f320f71bb Minor update. 2025-04-20 11:54:35 +02:00
5ec06cb44e Talking about filesystems. 2025-04-20 11:54:35 +02:00
90f43f14cc Paper improved. Slowing reaching a first readable version. 2025-04-20 11:54:35 +02:00
8a06e3f286 Rewrite. 2025-04-20 11:54:35 +02:00
cba95ebe3b Introduction++. 2025-04-20 11:54:35 +02:00
b0e08f0698 Triggers 2025-04-20 11:54:35 +02:00
91244cb815 Add a few explanations (basic stuff). 2025-04-20 11:54:35 +02:00
75f433ff95 Enable nested bullets. 2025-04-20 11:54:35 +02:00
3e4da1a36d netlibre explanation and data leak prevention methods. 2025-04-20 11:54:35 +02:00
e91d83713a Data-leak++ 2025-04-20 11:54:35 +02:00
b3333df82e FIFO -> LRU 2025-04-20 11:54:35 +02:00
2edf490517 Add missing files. 2025-04-20 11:54:35 +02:00
49184960f0 bin/ 2025-04-20 11:54:35 +02:00
b069f202f8 Paper, advanced draft. 2025-04-20 11:54:35 +02:00
5602a6a7ef high volume db test - spec/ dir is currently a mess 2025-01-29 02:42:54 +01:00
4 changed files with 146 additions and 96 deletions

View file

@ -25,6 +25,9 @@ release:
doc: doc:
crystal docs src/dodb.cr crystal docs src/dodb.cr
high-volume-db:
$(Q)crystal build spec/high-volume-db.cr $(OPTS) --release
HTTPD_ACCESS_LOGS ?= /tmp/access-dodb-docs.log HTTPD_ACCESS_LOGS ?= /tmp/access-dodb-docs.log
HTTPD_ADDR ?= 127.0.0.1 HTTPD_ADDR ?= 127.0.0.1
HTTPD_PORT ?= 9000 HTTPD_PORT ?= 9000

View file

@ -14,91 +14,7 @@ require "./db-cars.cr"
# ENV["MAXINDEXES"] rescue 5_000 # ENV["MAXINDEXES"] rescue 5_000
# ENV["LRU_SIZE"] rescue 10_000 # ENV["LRU_SIZE"] rescue 10_000
class Context require "./utilities-cars.cr"
class_property report_dir = "results"
class_property max_indexes = 5_000
class_property nb_run = 100
class_property from = 1_000
class_property to = 50_000
class_property incr = 1_000
class_property lru_size : UInt32 = 10_000
end
# To simplify the creation of graphs, it's better to have fake data for
# partitions and tags that won't be actually covered.
# 0 means the absence of data.
def fake_report(name)
durations = Array(Int32).new Context.nb_run, 0
File.open("#{Context.report_dir}/#{name}.raw", "w") do |file|
durations.each do |d|
file.puts d
end
end
puts "#{name}: no report"
end
def report(storage, name, &block)
durations = run_n_times Context.nb_run, &block
File.open("#{Context.report_dir}/#{name}.raw", "w") do |file|
durations.each do |d|
file.puts d
end
end
avr = durations.reduce { |a, b| a + b } / Context.nb_run
puts "#{name}: #{avr}"
avr
end
def verbose_add_cars(storage, nbcars, name, max_indexes)
long_operation "add #{nbcars} values to #{name}" do
add_cars storage, nbcars, max_indexes: max_indexes
end
end
# Add first entries, then loop: speed tests, add entries.
def prepare_env(storage, name, s_index, s_partition, s_tags, &)
verbose_add_cars storage, Context.from, name, max_indexes: Context.max_indexes
current = Context.from
to = Context.to
incr = Context.incr
while current < to
yield storage, current, name, s_index, s_partition, s_tags
break if current + incr >= to
verbose_add_cars storage, incr, name, max_indexes: Context.max_indexes
current += incr
end
long_operation "removing #{name} data" { storage.rm_storage_dir }
end
def search_benchmark(storage : DODB::Storage(Car),
current_db_size : Int32,
name : String,
search_name : DODB::Trigger::Index(Car),
search_color : DODB::Trigger::Partition(Car),
search_keywords : DODB::Trigger::Tags(Car))
name_to_search = ENV["CARNAME"] rescue "Corvet-#{(current_db_size/2).to_i}"
color_to_search = ENV["CARCOLOR"] rescue "red"
keyword_to_search = ENV["CARKEYWORD"] rescue "spacious"
puts "NEW BATCH: db-size #{current_db_size}, name: '#{name_to_search}', color: '#{color_to_search}', tag: '#{keyword_to_search}'"
report(storage, "#{name}_#{current_db_size}_index") do
corvet = search_name.get name_to_search
end
if current_db_size <= Context.max_indexes
report(storage, "#{name}_#{current_db_size}_partitions") do
corvet = search_color.get? color_to_search
end
report(storage, "#{name}_#{current_db_size}_tags") do
corvet = search_keywords.get? keyword_to_search
end
else
fake_report("#{name}_#{current_db_size}_partitions")
fake_report("#{name}_#{current_db_size}_tags")
end
end
def bench_searches() def bench_searches()
cars_ram = SPECDB::RAMOnly(Car).new cars_ram = SPECDB::RAMOnly(Car).new
@ -122,17 +38,6 @@ def bench_searches()
prepare_env cars_uncached, "uncached", uncached_Sby_name, uncached_Sby_color, uncached_Sby_keywords, &fn prepare_env cars_uncached, "uncached", uncached_Sby_name, uncached_Sby_color, uncached_Sby_keywords, &fn
end end
def perform_add(storage : DODB::Storage(Car))
corvet0 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]
i = 0
perform_benchmark_average Context.nb_run, do
corvet = corvet0.clone
corvet.name = "Corvet-#{i}"
storage.unsafe_add corvet
i += 1
end
end
def bench_add() def bench_add()
cars_ram = SPECDB::RAMOnly(Car).new cars_ram = SPECDB::RAMOnly(Car).new
cars_cached = SPECDB::Cached(Car).new cars_cached = SPECDB::Cached(Car).new

45
spec/high-volume-db.cr Normal file
View file

@ -0,0 +1,45 @@
require "./db-cars.cr"
require "./utilities.cr"
class Context
class_property dbsize = 1_000_000
class_property nb_run = 100
end
ENV["DBSIZE"]?.try { |it| Context.dbsize = it.to_i }
ENV["NBRUN"]?.try { |it| Context.nb_run = it.to_i }
if ARGV.size == 0
puts "Usage: high-volume-db (add|get)"
exit 0
end
db = DODB::Storage::Common(Car).new "TESTDB", 5000
by_name = db.new_index "name", &.name
something = Hash(String,Bool).new
case ARGV[0]
when /get/
counter = 0
car_number_to_get = (Context.dbsize/2).to_i
puts "let's get the car #{car_number_to_get}"
avr = perform_benchmark_average Context.nb_run, do
car = by_name.get "somecar-#{car_number_to_get}"
something[car.name] = true
#STDOUT.write "\rgot the car #{car.name}: #{counter}/#{Context.nb_run}".to_slice
counter += 1
end
puts
puts "average time was: #{avr}"
puts "done!"
when /add/
counter = db.last_key
while counter < Context.dbsize
STDOUT.write "\radding car #{counter}/#{Context.dbsize}".to_slice
db << Car.new "somecar-#{counter}", "red", [] of String
counter += 1
end
puts
puts "done!"
end

97
spec/utilities-cars.cr Normal file
View file

@ -0,0 +1,97 @@
class Context
class_property report_dir = "results"
class_property max_indexes = 5_000
class_property nb_run = 100
class_property from = 1_000
class_property to = 50_000
class_property incr = 1_000
class_property lru_size : UInt32 = 10_000
end
# To simplify the creation of graphs, it's better to have fake data for
# partitions and tags that won't be actually covered.
# 0 means the absence of data.
def fake_report(name)
durations = Array(Int32).new Context.nb_run, 0
File.open("#{Context.report_dir}/#{name}.raw", "w") do |file|
durations.each do |d|
file.puts d
end
end
puts "#{name}: no report"
end
def report(storage, name, &block)
durations = run_n_times Context.nb_run, &block
File.open("#{Context.report_dir}/#{name}.raw", "w") do |file|
durations.each do |d|
file.puts d
end
end
avr = durations.reduce { |a, b| a + b } / Context.nb_run
puts "#{name}: #{avr}"
avr
end
def verbose_add_cars(storage, nbcars, name, max_indexes)
long_operation "add #{nbcars} values to #{name}" do
add_cars storage, nbcars, max_indexes: max_indexes
end
end
# Add first entries, then loop: speed tests, add entries.
def prepare_env(storage, name, s_index, s_partition, s_tags, &)
verbose_add_cars storage, Context.from, name, max_indexes: Context.max_indexes
current = Context.from
to = Context.to
incr = Context.incr
while current < to
puts "current number of cars: #{current}/#{to}"
yield storage, current, name, s_index, s_partition, s_tags
break if current + incr >= to
verbose_add_cars storage, incr, name, max_indexes: Context.max_indexes
current += incr
end
long_operation "removing #{name} data" { storage.rm_storage_dir }
end
def perform_add(storage : DODB::Storage(Car))
corvet0 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]
i = 0
perform_benchmark_average Context.nb_run, do
corvet = corvet0.clone
corvet.name = "Corvet-#{i}"
storage.unsafe_add corvet
i += 1
end
end
def search_benchmark(storage : DODB::Storage(Car),
current_db_size : Int32,
name : String,
search_name : DODB::Trigger::Index(Car),
search_color : DODB::Trigger::Partition(Car),
search_keywords : DODB::Trigger::Tags(Car))
name_to_search = ENV["CARNAME"] rescue "Corvet-#{(current_db_size/2).to_i}"
color_to_search = ENV["CARCOLOR"] rescue "red"
keyword_to_search = ENV["CARKEYWORD"] rescue "spacious"
puts "NEW BATCH: db-size #{current_db_size}, name: '#{name_to_search}', color: '#{color_to_search}', tag: '#{keyword_to_search}'"
report(storage, "#{name}_#{current_db_size}_index") do
corvet = search_name.get name_to_search
end
if current_db_size <= Context.max_indexes
report(storage, "#{name}_#{current_db_size}_partitions") do
corvet = search_color.get? color_to_search
end
report(storage, "#{name}_#{current_db_size}_tags") do
corvet = search_keywords.get? keyword_to_search
end
else
fake_report("#{name}_#{current_db_size}_partitions")
fake_report("#{name}_#{current_db_size}_tags")
end
end