diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..16f299c --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +docs/ +bin/ diff --git a/Makefile b/Makefile index f5628b1..68cd6a1 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,18 @@ all: build -OPTS ?= --progress +OPTS ?= --progress --no-debug Q ?= @ SHOULD_UPDATE = ./bin/should-update DBDIR=/tmp/tests-on-dodb +RESULTS_DIR=results benchmark-cars: - $(Q)crystal build spec/benchmark-cars.cr $(OPTS) + $(Q)crystal build spec/benchmark-cars.cr $(OPTS) --release + +benchmark-cars-run: benchmark-cars + ./benchmark-cars search # by default, test search durations + ./bin/stats.sh $(RESULTS_DIR) + ./bin/extract-data-benchmark-cars.sh $(RESULTS_DIR) build: benchmark-cars @@ -15,3 +21,13 @@ wipe-db: release: make build OPTS="--release --progress" + +doc: + crystal docs src/dodb.cr + +HTTPD_ACCESS_LOGS ?= /tmp/access-dodb-docs.log +HTTPD_ADDR ?= 127.0.0.1 +HTTPD_PORT ?= 9000 +DIR ?= docs +serve-doc: + darkhttpd $(DIR) --addr $(HTTPD_ADDR) --port $(HTTPD_PORT) --log $(HTTPD_ACCESS_LOGS) diff --git a/README.md b/README.md index daea8aa..28a2028 100644 --- a/README.md +++ b/README.md @@ -11,11 +11,12 @@ The objective is to get rid of DBMS when storing simple files directly on the fi A brief summary: - no SQL - objects are serialized (currently in JSON) -- indexes (simple symlinks on the FS) can be created to improve significantly searches in the db +- data is indexed to improve significantly searches in the db - db is fully integrated in the language (basically a simple array with a few more functions) - -Also, data can be `cached`. -The entire base will be kept in memory (if you can), enabling incredible speeds. +- symlinks on the FS can be generated to enable data searches **outside the application, with UNIX tools** +- configurable data cache size +- RAM-only databases for short-lived data +- triggers can be easily implemented to extend indexes beyond you wildest expectations ## Limitations @@ -41,15 +42,8 @@ Since DODB doesn't use SQL and doesn't even try to handle stuff like atomicity o Reading data from disk takes about a few dozen microseconds, and not much more when searching an indexed data. **On my more-than-decade-old, slow-as-fuck machine**, the simplest possible SQL request to Postgres takes about 100 to 900 microseconds. -With DODB, to reach on-disk data: 13 microseconds. -To search then retrieve indexed data: almost the same thing, 16 microseconds on average, since it's just a path to a symlink we have to build. - -With the `cached` version of DODB, there is not even deserialization happening, so 7 nanoseconds. - -Indexes (indexes, partitions and tags) are also cached **by default**. -The speed up is great compared to the uncached version since you won't walk the file-system. -Searching an index takes about 35 nanoseconds when cached. -To avoid the memory cost of cached indexes, you can explicitely ask for uncached ones. +With DODB, to reach on-disk data: 15 microseconds; and just a few dozen **nanoseconds** for cached data. +Even when searching a specific value with an index. **NOTE:** of course SQL and DODB cannot be fairly compared based on performance since they don't have the same properties. But still, this is the kind of speed you can get with the tool. @@ -65,12 +59,11 @@ dependencies: git: https://git.baguette.netlib.re/Baguette/dodb.cr ``` - # Basic usage ```crystal -# Database creation -db = DODB::DataBase(Thing).new "path/to/storage/directory" +# Database creation, with a data cache of 100k entries. +db = DODB::Storage::Common(Thing).new "path/to/storage/directory", 100_000 # Adding an element to the db db << Thing.new @@ -88,7 +81,7 @@ end The DB creation is simply creating a few directories on the file-system. ```crystal -db = DODB::DataBase(Thing).new "path/to/storage/directory" +db = DODB::Storage::Common(Thing).new "path/to/storage/directory", 100_000 ``` ## Adding a new object @@ -101,8 +94,8 @@ db << Thing.new To speed-up searches in the DB, we can sort them, based on their attributes for example. There are 3 sorting methods: -- index, 1-1 relations, an attribute value is bound to a single object (an identifier) -- partition, 1-n relations, an attribute value may be related to several objects (the color of a car, for instance) +- basic indexes, 1-1 relations, an attribute value is bound to a single object (an identifier) +- partitions, 1-n relations, an attribute value may be related to several objects (the color of a car, for instance) - tags, n-n relations, each object may have several tags, each tag may be related to several objects Let's take an example. @@ -123,7 +116,7 @@ end We want to store `cars` in a database and index them on their `id` attribute: ```Crystal -cars = DODB::DataBase(Car).new "path/to/storage/directory" +cars = DODB::Storage::Common(Car).new "path/to/storage/directory", 100_000 # We give a name to the index, then the code to extract the id from a Car instance cars_by_id = cars.new_index "id", &.id @@ -214,8 +207,7 @@ car = cars_by_id.get "86a07924-ab3a-4f46-a975-e9803acba22d" # we modify it car.color = "Blue" -# update -# simple case: no change in the index +# update, simple case: no change in the index cars_by_id.update car # otherwise car.id = "something-else-than-before" @@ -250,6 +242,7 @@ end # Remove a value based on a tag. cars_by_keyword.delete "shiny" +cars_by_keyword.delete ["slow", "expensive"] # Remove cars that are both slow and expensive. cars_by_keyword.delete "elegant", do |car| car.name == "GTI" end @@ -282,7 +275,7 @@ end # Database creation # ##################### -cars = DODB::DataBase(Car).new "./bin/storage" +cars = DODB::Storage::Common(Car).new "./db-storage", 100_000 ########################## @@ -334,6 +327,8 @@ pp! cars_by_color.get "red" # based on a tag (print all fast cars) pp! cars_by_keyword.get "fast" +# based on several tags (print all cars that are both slow and expensive) +pp! cars_by_keyword.get ["slow", "expensive"] ############ # Updating # @@ -355,7 +350,7 @@ cars_by_name.update_or_create car.name, car # We all know it, elegant cars are also expensive. cars_by_keyword.get("elegant").each do |car| car.keywords << "expensive" - cars_by_name.update car.name, car + cars_by_name.update car end ############### @@ -372,6 +367,6 @@ cars_by_color.delete "blue", &.name.==("GTI") # based on a keyword cars_by_keyword.delete "solid" -# based on a keyword (but not only) -cars_by_keyword.delete "fast", &.name.==("Corvet") +# based on a few keywords (but not only) +cars_by_keyword.delete ["slow", "expensive"], &.name.==("Corvet") ``` diff --git a/TODO.md b/TODO.md index 1b66136..2b4ee32 100644 --- a/TODO.md +++ b/TODO.md @@ -1,8 +1,13 @@ -# API - -Cached indexes (index, partition, tags) should be used by default. -Uncached indexes should be an option, through a new function `add_uncached_index` or something. - # Performance -Search with some kind of "pagination" system: ask entries with a limit on the number of elements and an offset. +- search functions of *index objects* with a "pagination" system: ask entries with a limit on the number of elements and an offset. + +# Memory and file-system management + +- When a value is removed, the related partitions (and tags) may be empty, leaving both an empty array + in memory and a directory on the file-system. Should they be removed? + +# Documentation + +- Finish the PDF to explain *why DODB*. +- Change *index* by *key* in `DODB::Storage` and inherited classes. diff --git a/shard.yml b/shard.yml index 474f220..a3bef0e 100644 --- a/shard.yml +++ b/shard.yml @@ -1,5 +1,5 @@ name: dodb -version: 0.3.0 +version: 0.5.0 authors: - Luka Vandervelden @@ -8,4 +8,4 @@ authors: description: | Simple, embeddable Document-Oriented DataBase in Crystal. -license: MIT +license: ISC diff --git a/spec/benchmark-cars.cr b/spec/benchmark-cars.cr index 988a4fb..a3c06c9 100644 --- a/spec/benchmark-cars.cr +++ b/spec/benchmark-cars.cr @@ -1,181 +1,221 @@ require "benchmark" -require "./benchmark-utilities.cr" +require "./utilities.cr" +require "./db-cars.cr" -require "../src/dodb.cr" -require "./test-data.cr" +# List of environment variables and default values: +# ENV["CARNAME"] rescue "Corvet-#{(db_size/2).to_i}" +# ENV["CARCOLOR"] rescue "red" +# ENV["CARKEYWORD"] rescue "spacious" +# ENV["DBSIZE"] rescue 50_000 +# ENV["DBSIZE_START"] rescue 1_000 +# ENV["DBSIZE_INCREMENT"] rescue 1_000 +# ENV["REPORT_DIR"] rescue "results" +# ENV["NBRUN"] rescue 100 +# ENV["MAXINDEXES"] rescue 5_000 +# ENV["FIFO_SIZE"] rescue 10_000 -class DODBCachedCars < DODB::CachedDataBase(Car) - property storage_dir : String - def initialize(storage_ext = "", remove_previous_data = true) - @storage_dir = "test-storage-cars-cached#{storage_ext}" +class Context + class_property report_dir = "results" + class_property max_indexes = 5_000 + class_property nb_run = 100 + class_property from = 1_000 + class_property to = 50_000 + class_property incr = 1_000 + class_property fifo_size : UInt32 = 10_000 +end - if remove_previous_data - ::FileUtils.rm_rf storage_dir +# To simplify the creation of graphs, it's better to have fake data for +# partitions and tags that won't be actually covered. +# 0 means the absence of data. +def fake_report(name) + durations = Array(Int32).new Context.nb_run, 0 + File.open("#{Context.report_dir}/#{name}.raw", "w") do |file| + durations.each do |d| + file.puts d end - - super storage_dir - end - - def rm_storage_dir - ::FileUtils.rm_rf @storage_dir end + puts "#{name}: no report" end - -class DODBUnCachedCars < DODB::DataBase(Car) - property storage_dir : String - def initialize(storage_ext = "", remove_previous_data = true) - @storage_dir = "test-storage-cars-uncached#{storage_ext}" - - if remove_previous_data - ::FileUtils.rm_rf storage_dir +def report(storage, name, &block) + durations = run_n_times Context.nb_run, &block + File.open("#{Context.report_dir}/#{name}.raw", "w") do |file| + durations.each do |d| + file.puts d end - - super storage_dir end + avr = durations.reduce { |a, b| a + b } / Context.nb_run + puts "#{name}: #{avr}" + avr +end - def rm_storage_dir - ::FileUtils.rm_rf @storage_dir +def verbose_add_cars(storage, nbcars, name, max_indexes) + long_operation "add #{nbcars} values to #{name}" do + add_cars storage, nbcars, max_indexes: max_indexes end end -class DODBSemiCachedCars < DODB::DataBase(Car) - property storage_dir : String - def initialize(storage_ext = "", remove_previous_data = true) - @storage_dir = "test-storage-cars-semi#{storage_ext}" +# Add first entries, then loop: speed tests, add entries. +def prepare_env(storage, name, s_index, s_partition, s_tags, &) + verbose_add_cars storage, Context.from, name, max_indexes: Context.max_indexes - if remove_previous_data - ::FileUtils.rm_rf storage_dir + current = Context.from + to = Context.to + incr = Context.incr + + while current < to + yield storage, current, name, s_index, s_partition, s_tags + + break if current + incr >= to + + verbose_add_cars storage, incr, name, max_indexes: Context.max_indexes + current += incr + end + + long_operation "removing #{name} data" { storage.rm_storage_dir } +end + +def search_benchmark(storage : DODB::Storage(Car), + current_db_size : Int32, + name : String, + search_name : DODB::Trigger::Index(Car), + search_color : DODB::Trigger::Partition(Car), + search_keywords : DODB::Trigger::Tags(Car)) + name_to_search = ENV["CARNAME"] rescue "Corvet-#{(current_db_size/2).to_i}" + color_to_search = ENV["CARCOLOR"] rescue "red" + keyword_to_search = ENV["CARKEYWORD"] rescue "spacious" + puts "NEW BATCH: db-size #{current_db_size}, name: '#{name_to_search}', color: '#{color_to_search}', tag: '#{keyword_to_search}'" + report(storage, "#{name}_#{current_db_size}_index") do + corvet = search_name.get name_to_search + end + if current_db_size <= Context.max_indexes + report(storage, "#{name}_#{current_db_size}_partitions") do + corvet = search_color.get? color_to_search end - - super storage_dir - end - - def rm_storage_dir - ::FileUtils.rm_rf @storage_dir + report(storage, "#{name}_#{current_db_size}_tags") do + corvet = search_keywords.get? keyword_to_search + end + else + fake_report("#{name}_#{current_db_size}_partitions") + fake_report("#{name}_#{current_db_size}_tags") end end -def init_indexes(storage : DODB::Storage) - n = storage.new_index "name", &.name - c = storage.new_partition "color", &.color - k = storage.new_tags "keyword", &.keywords - return n, c, k +def bench_searches() + cars_ram = SPECDB::RAMOnly(Car).new + cars_cached = SPECDB::Cached(Car).new + cars_fifo = SPECDB::Common(Car).new "-#{Context.fifo_size}", Context.fifo_size + cars_semi = SPECDB::Uncached(Car).new "-semi" + cars_uncached = SPECDB::Uncached(Car).new + + ram_Sby_name, ram_Sby_color, ram_Sby_keywords = ram_indexes cars_ram + cached_Sby_name, cached_Sby_color, cached_Sby_keywords = cached_indexes cars_cached + fifo_Sby_name, fifo_Sby_color, fifo_Sby_keywords = cached_indexes cars_fifo + semi_Sby_name, semi_Sby_color, semi_Sby_keywords = cached_indexes cars_semi + uncached_Sby_name, uncached_Sby_color, uncached_Sby_keywords = uncached_indexes cars_uncached + + fn = ->search_benchmark(DODB::Storage(Car), Int32, String, DODB::Trigger::Index(Car), DODB::Trigger::Partition(Car), DODB::Trigger::Tags(Car)) + + prepare_env cars_ram, "ram", ram_Sby_name, ram_Sby_color, ram_Sby_keywords, &fn + prepare_env cars_cached, "cached", cached_Sby_name, cached_Sby_color, cached_Sby_keywords, &fn + prepare_env cars_fifo, "fifo", fifo_Sby_name, fifo_Sby_color, fifo_Sby_keywords, &fn + prepare_env cars_semi, "semi", semi_Sby_name, semi_Sby_color, semi_Sby_keywords, &fn + prepare_env cars_uncached, "uncached", uncached_Sby_name, uncached_Sby_color, uncached_Sby_keywords, &fn end -def init_uncached_indexes(storage : DODB::Storage) - n = storage.new_uncached_index "name", &.name - c = storage.new_uncached_partition "color", &.color - k = storage.new_uncached_tags "keyword", &.keywords - return n, c, k -end - -def add_cars(storage : DODB::Storage, nb_iterations : Int32) +def perform_add(storage : DODB::Storage(Car)) + corvet0 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ] i = 0 - car1 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ] - car2 = Car.new "Bullet-GT", "blue", [ "shiny", "fast", "expensive" ] - car3 = Car.new "Deudeuche", "beige", [ "curvy", "sublime" ] - car4 = Car.new "Ford-5", "red", [ "unknown" ] - car5 = Car.new "C-MAX", "gray", [ "spacious", "affordable" ] - - while i < nb_iterations - car1.name = "Corvet-#{i}" - car2.name = "Bullet-GT-#{i}" - car3.name = "Deudeuche-#{i}" - car4.name = "Ford-5-#{i}" - car5.name = "C-MAX-#{i}" - - storage << car1 - storage << car2 - storage << car3 - storage << car4 - storage << car5 + perform_benchmark_average Context.nb_run, do + corvet = corvet0.clone + corvet.name = "Corvet-#{i}" + storage.unsafe_add corvet i += 1 - STDOUT.write "\radding value #{i}".to_slice - end - puts "" -end - -cars_cached = DODBCachedCars.new -cars_uncached = DODBUnCachedCars.new -cars_semi = DODBSemiCachedCars.new - -cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached -uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_uncached_indexes cars_uncached -semi_searchby_name, semi_searchby_color, semi_searchby_keywords = init_indexes cars_semi - -add_cars cars_cached, 1_000 -add_cars cars_uncached, 1_000 -add_cars cars_semi, 1_000 - -# Searching for data with an index. -Benchmark.ips do |x| - x.report("(cars db) searching a data with an index (with a cache)") do - corvet = cached_searchby_name.get "Corvet-500" - end - - x.report("(cars db) searching a data with an index (semi: cache is only on index)") do - corvet = semi_searchby_name.get "Corvet-500" - end - - x.report("(cars db) searching a data with an index (without a cache)") do - corvet = uncached_searchby_name.get "Corvet-500" end end -# Searching for data with a partition. -Benchmark.ips do |x| - x.report("(cars db) searching a data with a partition (with a cache)") do - red_cars = cached_searchby_color.get "red" - end +def bench_add() + cars_ram = SPECDB::RAMOnly(Car).new + cars_cached = SPECDB::Cached(Car).new + cars_fifo = SPECDB::Common(Car).new "-#{Context.fifo_size}", Context.fifo_size + cars_semi = SPECDB::Uncached(Car).new "-semi" + cars_uncached = SPECDB::Uncached(Car).new - x.report("(cars db) searching a data with a partition (semi: cache is only on partition)") do - red_cars = semi_searchby_color.get "red" - end + ram_indexes cars_ram + cached_indexes cars_cached + cached_indexes cars_fifo + cached_indexes cars_semi + uncached_indexes cars_uncached - x.report("(cars db) searching a data with a partition (without a cache)") do - red_cars = uncached_searchby_color.get "red" - end + avr = perform_add(cars_ram) + puts "(ram db and indexes) add a value (average on #{Context.nb_run} tries): #{avr}" + + avr = perform_add(cars_cached) + puts "(cached db and indexes) add a value (average on #{Context.nb_run} tries): #{avr}" + + avr = perform_add(cars_fifo) + puts "(fifo db and cached indexes) add a value (average on #{Context.nb_run} tries): #{avr}" + + avr = perform_add(cars_semi) + puts "(uncached db but cached indexes) add a value (average on #{Context.nb_run} tries): #{avr}" + + avr = perform_add(cars_uncached) + puts "(uncached db and indexes) add a value (average on #{Context.nb_run} tries): #{avr}" + + cars_ram.rm_storage_dir + cars_cached.rm_storage_dir + cars_semi.rm_storage_dir + cars_uncached.rm_storage_dir end -# Searching for data with a tag. -Benchmark.ips do |x| - x.report("(cars db) searching a data with a tag (with a cache)") do - red_cars = cached_searchby_keywords.get "spacious" - end +def bench_50_shades_of_fifo() + cars_fifo1 = SPECDB::Common(Car).new "-1k", 1_000 + cars_fifo5 = SPECDB::Common(Car).new "-5k", 5_000 + cars_fifo10 = SPECDB::Common(Car).new "-10k", 10_000 + cars_fifo20 = SPECDB::Common(Car).new "-20k", 20_000 - x.report("(cars db) searching a data with a tag (semi: cache is only on tags)") do - red_cars = semi_searchby_keywords.get "spacious" - end + fifo_Sby_name1, fifo_Sby_color1, fifo_Sby_keywords1 = cached_indexes cars_fifo1 + fifo_Sby_name5, fifo_Sby_color5, fifo_Sby_keywords5 = cached_indexes cars_fifo5 + fifo_Sby_name10, fifo_Sby_color10, fifo_Sby_keywords10 = cached_indexes cars_fifo10 + fifo_Sby_name20, fifo_Sby_color20, fifo_Sby_keywords20 = cached_indexes cars_fifo20 - x.report("(cars db) searching a data with a tag (without a cache)") do - red_cars = uncached_searchby_keywords.get "spacious" - end + fn = ->search_benchmark(DODB::Storage(Car), Int32, String, DODB::Trigger::Index(Car), DODB::Trigger::Partition(Car), DODB::Trigger::Tags(Car)) + + prepare_env cars_fifo1, "fifo1", fifo_Sby_name1, fifo_Sby_color1, fifo_Sby_keywords1, &fn + prepare_env cars_fifo5, "fifo5", fifo_Sby_name5, fifo_Sby_color5, fifo_Sby_keywords5, &fn + prepare_env cars_fifo10, "fifo10", fifo_Sby_name10, fifo_Sby_color10, fifo_Sby_keywords10, &fn + prepare_env cars_fifo20, "fifo20", fifo_Sby_name20, fifo_Sby_color20, fifo_Sby_keywords20, &fn end -cars_cached.rm_storage_dir -cars_uncached.rm_storage_dir +ENV["REPORT_DIR"]?.try { |report_dir| Context.report_dir = report_dir } +Dir.mkdir_p Context.report_dir -cars_cached = DODBCachedCars.new -cars_uncached = DODBUnCachedCars.new +ENV["MAXINDEXES"]?.try { |it| Context.max_indexes = it.to_i } +ENV["NBRUN"]?.try { |it| Context.nb_run = it.to_i } +ENV["DBSIZE"]?.try { |it| Context.to = it.to_i } +ENV["DBSIZE_START"]?.try { |it| Context.from = it.to_i } +ENV["DBSIZE_INCREMENT"]?.try { |it| Context.incr = it.to_i } +ENV["FIFO_SIZE"]?.try { |it| Context.fifo_size = it.to_u32 } -#init_indexes cars_cached -#init_indexes cars_uncached -cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached -uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_uncached_indexes cars_uncached +puts "REPORT_DIR: #{Context.report_dir}" +puts "MAXINDEXES: #{Context.max_indexes}" +puts "NBRUN: #{Context.nb_run}" +puts "DBSIZE: #{Context.to}" +puts "DBSIZE_START: #{Context.from}" +puts "DBSIZE_INCREMENT: #{Context.incr}" +puts "FIFO_SIZE: #{Context.fifo_size}" -add_cars cars_cached, 1_000 -add_cars cars_uncached, 1_000 - -nb_run = 1000 - -perform_benchmark_average_verbose "(cached) search db with an index", nb_run, do - cached_searchby_name.get "Corvet-500" +if ARGV.size == 0 + puts "Usage: benchmark-cars (fifo|search|add)" + exit 0 end -perform_benchmark_average_verbose "(uncached) search db with an index", nb_run, do - uncached_searchby_name.get "Corvet-500" +case ARGV[0] +when /fifo/ + bench_50_shades_of_fifo +when /search/ + bench_searches +when /add/ + bench_add +else + puts "Usage: benchmark-cars (fifo|search|add)" end - -cars_cached.rm_storage_dir -cars_uncached.rm_storage_dir -cars_semi.rm_storage_dir diff --git a/spec/benchmark-fifo.cr b/spec/benchmark-fifo.cr new file mode 100644 index 0000000..4effcbf --- /dev/null +++ b/spec/benchmark-fifo.cr @@ -0,0 +1,70 @@ +require "benchmark" +require "./utilities.cr" +require "../src/fifo.cr" + +def add(fifo : FIFO(Int32) | EfficientFIFO(Int32), nb : UInt32) + i = 0 + while i < nb + fifo << i + i += 1 + end +end + +def report_add(fifo : FIFO(Int32) | EfficientFIFO(Int32), nb : UInt32, fname : String) + File.open("#{Context.report_dir}/#{fname}.raw", "w") do |file| + i = 0 + while i < nb + elapsed_time = perform_something { fifo << i } + i += 1 + file.puts "#{i} #{elapsed_time.total_nanoseconds}" + end + end +end + +class Context + class_property nb_values : UInt32 = 100_000 + class_property fifo_size : UInt32 = 10_000 + class_property report_dir = "results" +end + +if nb_values = ENV["NBVAL"]? + Context.nb_values = nb_values.to_u32 +end + +if fifo_size = ENV["FIFOSIZE"]? + Context.fifo_size = fifo_size.to_u32 +end + +if ARGV.size > 0 + puts "Usage: benchmark-fifo" + puts "" + puts "envvar: REPORT_DIR= where to put the results" + puts "envvar: REPORT_EACH_ADD= to report the duration of each addition of a value in the structure" + puts "envvar: NBVAL= (default: 100_000) nb of values to add to the structure" + puts "envvar: FIFOSIZE= (default: 10_000) max number of values in the structure" + exit 0 +end + +ENV["REPORT_DIR"]?.try { |report_dir| Context.report_dir = report_dir } +Dir.mkdir_p Context.report_dir + +if ENV["REPORT_EACH_ADD"]? + FIFO(Int32).new(Context.fifo_size).tap do |fifo| + report_add fifo, Context.nb_values, "fifo_#{Context.fifo_size}_#{Context.nb_values}" + end + EfficientFIFO(Int32).new(Context.fifo_size).tap do |fifo| + report_add fifo, Context.nb_values, "efficientfifo_#{Context.fifo_size}_#{Context.nb_values}" + end +else + Benchmark.ips do |x| + x.report("adding #{Context.nb_values} values, FIFO limited to #{Context.fifo_size}") do + fifo = FIFO(Int32).new Context.fifo_size + add fifo, Context.nb_values + end + + x.report("adding #{Context.nb_values} values, EfficientFIFO limited to #{Context.fifo_size}") do + fifo = EfficientFIFO(Int32).new Context.fifo_size + add fifo, Context.nb_values + end + end +end diff --git a/spec/benchmark.cr b/spec/benchmark-todo.cr similarity index 89% rename from spec/benchmark.cr rename to spec/benchmark-todo.cr index 16dd521..0f564b3 100644 --- a/spec/benchmark.cr +++ b/spec/benchmark-todo.cr @@ -1,9 +1,7 @@ require "benchmark" +require "./db-ships.cr" -require "../src/dodb.cr" -require "./test-data.cr" - -class DODBCached < DODB::CachedDataBase(Ship) +class DODBCached < DODB::Storage::Cached(Ship) def initialize(storage_ext = "", remove_previous_data = true) storage_dir = "test-storage#{storage_ext}" @@ -15,7 +13,7 @@ class DODBCached < DODB::CachedDataBase(Ship) end end -class DODBUnCached < DODB::DataBase(Ship) +class DODBUnCached < DODB::Storage::Uncached(Ship) def initialize(storage_ext = "", remove_previous_data = true) storage_dir = "test-storage#{storage_ext}" diff --git a/spec/benchmark-utilities.cr b/spec/benchmark-utilities.cr deleted file mode 100644 index 5627560..0000000 --- a/spec/benchmark-utilities.cr +++ /dev/null @@ -1,32 +0,0 @@ -def perform_something(&block) - start = Time.monotonic - yield - Time.monotonic - start -end - -def perform_benchmark_average(ntimes : Int32, &block) - i = 1 - sum = Time::Span.zero - while i <= ntimes - elapsed_time = perform_something &block - sum += elapsed_time - i += 1 - end - - sum / ntimes -end - -def perform_benchmark_average_verbose(title : String, ntimes : Int32, &block) - i = 1 - sum = Time::Span.zero - puts "Execute '#{title}' × #{ntimes}" - while i <= ntimes - elapsed_time = perform_something &block - sum += elapsed_time - STDOUT.write "\relapsed_time: #{elapsed_time}, average: #{sum/i}".to_slice - - i += 1 - end - puts "" - puts "Average: #{sum/ntimes}" -end diff --git a/spec/cached.cr b/spec/cached.cr deleted file mode 100644 index c9dd575..0000000 --- a/spec/cached.cr +++ /dev/null @@ -1,402 +0,0 @@ -require "spec" -require "file_utils" - -require "../src/dodb.cr" -require "./test-data.cr" - - -class DODB::SpecDataBase < DODB::CachedDataBase(Ship) - def initialize(storage_ext = "", remove_previous_data = true) - storage_dir = "test-storage#{storage_ext}" - - if remove_previous_data - ::FileUtils.rm_rf storage_dir - end - - super storage_dir - end -end - -describe "DODB::DataBase::Cached" do - describe "basics" do - it "store and get data" do - db = DODB::SpecDataBase.new - - Ship.all_ships.each do |ship| - db << ship - end - - db.to_a.sort.should eq(Ship.all_ships.sort) - end - - it "rewrite already stored data" do - db = DODB::SpecDataBase.new - ship = Ship.all_ships[0] - - key = db << ship - - db[key] = Ship.new "broken" - db[key] = ship - - db[key].should eq(ship) - end - - it "properly remove data" do - db = DODB::SpecDataBase.new - - Ship.all_ships.each do |ship| - db << ship - end - - Ship.all_ships.each do |ship| - db.pop - end - - Ship.all_ships.each_with_index do |ship, i| - # FIXME: Should it raise a particular exception? - expect_raises DODB::MissingEntry do - db[i] - end - - db[i]?.should be_nil - end - end - - it "preserves data on reopening" do - db1 = DODB::SpecDataBase.new - db1 << Ship.kisaragi - - db1.to_a.size.should eq(1) - - db2 = DODB::SpecDataBase.new remove_previous_data: false - db2 << Ship.mutsuki - - # Only difference with DODB::DataBase: for now, concurrent DB cannot coexists. - db2.to_a.size.should eq(2) - end - - it "iterates in normal and reversed order" do - db = DODB::SpecDataBase.new - - Ship.all_ships.each do |ship| - db << ship - end - - # The two #each test iteration. - db.each_with_index do |item, index| - item.should eq Ship.all_ships[index] - end - - db.each_with_index(reversed: true) do |item, index| - item.should eq Ship.all_ships[index] - end - - # Actual reversal is tested here. - db.to_a(reversed: true).should eq db.to_a.reverse - end - - it "respects the provided offsets if any" do - db = DODB::SpecDataBase.new - - Ship.all_ships.each do |ship| - db << ship - end - - db.to_a(start_offset: 0, end_offset: 0)[0]?.should eq Ship.mutsuki - db.to_a(start_offset: 1, end_offset: 1)[0]?.should eq Ship.kisaragi - db.to_a(start_offset: 2, end_offset: 2)[0]?.should eq Ship.yayoi - - db.to_a(start_offset: 0, end_offset: 2).should eq [ - Ship.mutsuki, Ship.kisaragi, Ship.yayoi - ] - end - end - - describe "indices" do - it "do basic indexing" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - Ship.all_ships.each do |ship| - db << ship - end - - Ship.all_ships.each_with_index do |ship| - db_ships_by_name.get?(ship.name).should eq(ship) - end - end - - it "raise on index overload" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - db << Ship.kisaragi - - # Should not be allowed to store an entry whose “name” field - # already exists. - expect_raises(DODB::IndexOverload) do - db << Ship.kisaragi - end - end - - it "properly deindex" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - Ship.all_ships.each do |ship| - db << ship - end - - Ship.all_ships.each_with_index do |ship, i| - db.delete i - end - - Ship.all_ships.each do |ship| - db_ships_by_name.get?(ship.name).should be_nil - end - end - - it "properly reindex" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - key = db << Ship.kisaragi - - # We give the old id to the new ship, to get it replaced in - # the database. - some_new_ship = Ship.all_ships[2].clone - - db[key] = some_new_ship - - db[key].should eq(some_new_ship) - - db_ships_by_name.get?(some_new_ship.name).should eq(some_new_ship) - end - - it "properly updates" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - Ship.all_ships.each do |ship| - db << ship - end - - new_kisaragi = Ship.kisaragi.clone.tap do |s| - s.name = "Kisaragi Kai" # Don’t think about it too much. - end - - # We’re changing an indexed value on purpose. - db_ships_by_name.update "Kisaragi", new_kisaragi - - db_ships_by_name.get?("Kisaragi").should be_nil - db_ships_by_name.get?(new_kisaragi.name).should eq new_kisaragi - end - end - - describe "partitions" do - it "do basic partitioning" do - db = DODB::SpecDataBase.new - - db_ships_by_class = db.new_partition "class", &.klass - - Ship.all_ships.each do |ship| - db << ship - end - - Ship.all_ships.each do |ship| - db_ships_by_class.get(ship.klass).should contain(ship) - end - - # We extract the possible classes to do test on them. - ship_classes = Ship.all_ships.map(&.klass).uniq - ship_classes.each do |klass| - partition = db_ships_by_class.get klass - - # A partition on “class” should contain entries that all - # share the same value of “class”. - partition.map(&.klass.==(klass)).reduce { |a, b| - a && b - }.should be_true - end - - db_ships_by_class.get("does-not-exist").should eq [] of Ship - end - - it "removes select elements from partitions" do - db = DODB::SpecDataBase.new - - db_ships_by_class = db.new_partition "class", &.klass - - Ship.all_ships.each do |ship| - db << ship - end - - db_ships_by_class.delete "Mutsuki", &.name.==("Kisaragi") - - Ship.all_ships.map(&.klass).uniq.each do |klass| - partition = db_ships_by_class.get klass - - partition.any?(&.name.==("Kisaragi")).should be_false - end - end - end - - describe "tags" do - it "do basic tagging" do - db = DODB::SpecDataBase.new - - db_ships_by_tags = db.new_tags "tags", &.tags - - Ship.all_ships.each do |ship| - db << ship - end - - db_ships_by_tags.get("flagship").should eq([Ship.flagship]) - - # All returned entries should have the requested tag. - db_ships_by_tags.get("name ship") - .map(&.tags.includes?("name ship")) - .reduce { |a, e| a && e } - .should be_true - - # There shouldn’t be one in our data about WWII Japanese warships… - db_ships_by_tags.get("starship").should eq([] of Ship) - end - - it "properly removes tags" do - db = DODB::SpecDataBase.new - - db_ships_by_tags = db.new_tags "tags", &.tags - - Ship.all_ships.each do |ship| - db << ship - end - - # Removing the “flagship” tag, brace for impact. - flagship, index = db_ships_by_tags.get_with_indices("flagship")[0] - flagship.tags = [] of String - db[index] = flagship - - - -# ship, index = db_ships_by_tags.update(tag: "flagship") do |ship, index| -# ship.tags = [] of String -# db[index] = ship -# end - - db_ships_by_tags.get("flagship").should eq([] of Ship) - end - - it "gets items that have multiple tags" do - db = DODB::SpecDataBase.new - - db_ships_by_tags = db.new_tags "tags", &.tags - - Ship.all_ships.each do |ship| - db << ship - end - - results = db_ships_by_tags.get(["flagship", "name ship"]) - results.should eq([Ship.yamato]) - - results = db_ships_by_tags.get(["name ship", "flagship"]) - results.should eq([Ship.yamato]) - - results = db_ships_by_tags.get(["flagship"]) - results.should eq([Ship.yamato]) - end - end - - describe "atomic operations" do - it "safe_get and safe_get?" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - Ship.all_ships.each do |ship| - db << ship - end - - Ship.all_ships.each do |ship| - db_ships_by_name.safe_get ship.name do |results| - results.should eq(ship) - end - - db_ships_by_name.safe_get? ship.name do |results| - results.should eq(ship) - end - end - end - end - - describe "tools" do - it "rebuilds indexes" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - db_ships_by_class = db.new_partition "class", &.klass - db_ships_by_tags = db.new_tags "tags", &.tags - - Ship.all_ships.each do |ship| - db << ship - end - - db.reindex_everything! - - Ship.all_ships.each do |ship| - db_ships_by_name.get?(ship.name).should eq(ship) - db_ships_by_class.get(ship.klass).should contain(ship) - end - end - - it "migrates properly" do - ::FileUtils.rm_rf "test-storage-migration-origin" - old_db = DODB::DataBase(PrimitiveShip).new "test-storage-migration-origin" - - old_ships_by_name = old_db.new_index "name", &.name - old_ships_by_class = old_db.new_partition "class", &.class_name - - PrimitiveShip.all_ships.each do |ship| - old_db << ship - end - - # At this point, the “old” DB is filled. Now we need to convert - # to the new DB. - - new_db = DODB::SpecDataBase.new "-migration-target" - - new_ships_by_name = new_db.new_index "name", &.name - new_ships_by_class = new_db.new_partition "class", &.klass - new_ships_by_tags = new_db.new_tags "tags", &.tags - - old_db.each_with_index do |ship, index| - new_ship = Ship.new ship.name, - klass: ship.class_name, - id: ship.id, - tags: Array(String).new.tap { |tags| - tags << "name ship" if ship.name == ship.class_name - } - - new_db[index] = new_ship - end - - # At this point, the conversion is done, so… we’re making a few - # arbitrary tests on the new data. - - old_db.each_with_index do |old_ship, old_index| - ship = new_db[old_index] - - ship.id.should eq(old_ship.id) - ship.klass.should eq(old_ship.class_name) - - ship.tags.any?(&.==("name ship")).should be_true if ship.name == ship.klass - end - end - end -end - diff --git a/spec/db-cars.cr b/spec/db-cars.cr new file mode 100644 index 0000000..65bc407 --- /dev/null +++ b/spec/db-cars.cr @@ -0,0 +1,104 @@ +# This file contains all the necessary code to perform tests based on the following Car database. + +require "json" +require "../src/dodb.cr" +require "./spec-database.cr" + +class Car + include JSON::Serializable + + property name : String # unique to each instance (1-1 relations) + property color : String | DODB::NoIndex # a simple attribute (1-n relations) + property keywords : Array(String) | DODB::NoIndex # tags about a car, example: "shiny" (n-n relations) + + def_clone + + def initialize(@name, @color, @keywords) + end + class_getter cars = [ + Car.new("Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]), + Car.new("SUV", "red", [ "solid", "impressive" ]), + Car.new("Mustang", "red", [ "shiny", "impressive", "elegant" ]), + Car.new("Bullet-GT", "red", [ "shiny", "impressive", "fast", "elegant" ]), + Car.new("GTI", "blue", [ "average" ]), + Car.new("Deudeuch", "violet", [ "dirty", "slow", "only French will understand" ]) + ] + + # Equality is true if every property is identical. + def ==(other : Car) + @name == other.name && @color == other.color && @keywords == other.keywords + end + + # Equality is true if every property is identical. + def <=>(other : Car) + @name <=> other.name + end +end + +def ram_indexes(storage : DODB::Storage) + n = storage.new_RAM_index "name", &.name + c = storage.new_RAM_partition "color", &.color + k = storage.new_RAM_tags "keyword", &.keywords + return n, c, k +end + +def cached_indexes(storage : DODB::Storage) + n = storage.new_index "name", &.name + c = storage.new_partition "color", &.color + k = storage.new_tags "keyword", &.keywords + return n, c, k +end + +def uncached_indexes(storage : DODB::Storage) + n = storage.new_uncached_index "name", &.name + c = storage.new_uncached_partition "color", &.color + k = storage.new_uncached_tags "keyword", &.keywords + return n, c, k +end + +# `max_indexes` limits the number of indexes (partitions and tags). +# Once the last index (db last_key/5) is above this value, the following +# cars won't be tagged nor partitionned. +def add_cars(storage : DODB::Storage, nb_iterations : Int32, max_indexes = 5000) + last_key = ((storage.last_key + 1) / 5).to_i + i = 0 + car1 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ] + car2 = Car.new "Bullet-GT", "blue", [ "shiny", "fast", "expensive" ] + car3 = Car.new "Deudeuche", "beige", [ "curvy", "sublime" ] + car4 = Car.new "Ford-5", "red", [ "unknown" ] + car5 = Car.new "C-MAX", "gray", [ "spacious", "affordable" ] + + while i < nb_iterations + car1.name = "Corvet-#{last_key}" + car2.name = "Bullet-GT-#{last_key}" + car3.name = "Deudeuche-#{last_key}" + car4.name = "Ford-5-#{last_key}" + car5.name = "C-MAX-#{last_key}" + + last_key += 1 + + if last_key > max_indexes + car1.color = DODB.no_index + car2.color = DODB.no_index + car3.color = DODB.no_index + car4.color = DODB.no_index + car5.color = DODB.no_index + + car1.keywords = DODB.no_index + car2.keywords = DODB.no_index + car3.keywords = DODB.no_index + car4.keywords = DODB.no_index + car5.keywords = DODB.no_index + end + + storage.unsafe_add car1.clone + storage.unsafe_add car2.clone + storage.unsafe_add car3.clone + storage.unsafe_add car4.clone + storage.unsafe_add car5.clone + + i += 1 + #STDOUT.write "\radding value #{i}".to_slice + end + #puts "" +end diff --git a/spec/test-data.cr b/spec/db-ships.cr similarity index 75% rename from spec/test-data.cr rename to spec/db-ships.cr index 42da67e..b792d5b 100644 --- a/spec/test-data.cr +++ b/spec/db-ships.cr @@ -1,6 +1,9 @@ require "uuid" require "json" +require "../src/dodb.cr" +require "./spec-database.cr" + # FIXME: Split the test data in separate files. We don’t care about those here. class Ship @@ -85,24 +88,3 @@ class PrimitiveShip @@asakaze ] end - -class Car - include JSON::Serializable - - property name : String # unique to each instance (1-1 relations) - property color : String # a simple attribute (1-n relations) - property keywords : Array(String) # tags about a car, example: "shiny" (n-n relations) - - def_clone - - def initialize(@name, @color, @keywords) - end - class_getter cars = [ - Car.new("Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]), - Car.new("SUV", "red", [ "solid", "impressive" ]), - Car.new("Mustang", "red", [ "shiny", "impressive", "elegant" ]), - Car.new("Bullet-GT", "red", [ "shiny", "impressive", "fast", "elegant" ]), - Car.new("GTI", "blue", [ "average" ]), - Car.new("Deudeuch", "violet", [ "dirty", "slow", "only French will understand" ]) - ] -end diff --git a/spec/spec-database.cr b/spec/spec-database.cr new file mode 100644 index 0000000..f5aa922 --- /dev/null +++ b/spec/spec-database.cr @@ -0,0 +1,51 @@ +class SPECDB::Uncached(V) < DODB::Storage::Uncached(V) + property storage_dir : String + def initialize(storage_ext = "", remove_previous_data = true) + @storage_dir = "specdb-storage-uncached#{storage_ext}" + ::FileUtils.rm_rf storage_dir if remove_previous_data + super storage_dir + end + + def rm_storage_dir + ::FileUtils.rm_rf @storage_dir + end +end + +class SPECDB::Cached(V) < DODB::Storage::Cached(V) + property storage_dir : String + def initialize(storage_ext = "", remove_previous_data = true) + @storage_dir = "specdb-storage-cached#{storage_ext}" + ::FileUtils.rm_rf storage_dir if remove_previous_data + super storage_dir + end + + def rm_storage_dir + ::FileUtils.rm_rf @storage_dir + end +end + +class SPECDB::Common(V) < DODB::Storage::Common(V) + property storage_dir : String + def initialize(storage_ext = "", @max_entries : UInt32 = 5_000, remove_previous_data = true) + @storage_dir = "specdb-storage-common-#{@max_entries}#{storage_ext}" + ::FileUtils.rm_rf storage_dir if remove_previous_data + super storage_dir, max_entries + end + + def rm_storage_dir + ::FileUtils.rm_rf @storage_dir + end +end + +class SPECDB::RAMOnly(V) < DODB::Storage::RAMOnly(V) + property storage_dir : String + def initialize(storage_ext = "", remove_previous_data = true) + @storage_dir = "specdb-storage-ram#{storage_ext}" + ::FileUtils.rm_rf storage_dir if remove_previous_data + super storage_dir + end + + def rm_storage_dir + ::FileUtils.rm_rf @storage_dir + end +end diff --git a/spec/test-cars.cr b/spec/test-cars.cr new file mode 100644 index 0000000..a7dd740 --- /dev/null +++ b/spec/test-cars.cr @@ -0,0 +1,103 @@ +require "spec" +require "./db-cars.cr" + +corvet0 = Car.new "Corvet-0", "red", [ "shiny", "impressive", "fast", "elegant" ] + +describe "uncached, cached and ram indexes" do + it "RAM DB - add items, add indexes, search, reindex, search" do + + cars_ram0 = SPECDB::RAMOnly(Car).new "-0" + cars_ram1 = SPECDB::RAMOnly(Car).new "-1" + cars_ram2 = SPECDB::RAMOnly(Car).new "-2" + + add_cars cars_ram0, 1 + add_cars cars_ram1, 1 + add_cars cars_ram2, 1 + + uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = uncached_indexes cars_ram0 + cached_searchby_name, cached_searchby_color, cached_searchby_keywords = cached_indexes cars_ram1 + ram_searchby_name, ram_searchby_color, ram_searchby_keywords = ram_indexes cars_ram2 + + uncached_searchby_name.get?("Corvet-0").should be_nil + cached_searchby_name.get?("Corvet-0").should be_nil + ram_searchby_name.get?("Corvet-0").should be_nil + + cars_ram0.reindex_everything! + cars_ram1.reindex_everything! + cars_ram2.reindex_everything! + + # Get the value even if not written on the disk since the index was written on the disk. + # The value is retrieved by the database, the index only reads its key in the database. + uncached_searchby_name.get?("Corvet-0").should eq corvet0 + + # Both cached and RAM indexes can retrieve the value since they store the key. + cached_searchby_name.get?("Corvet-0").should eq corvet0 + ram_searchby_name.get?("Corvet-0").should eq corvet0 + + cars_ram0.rm_storage_dir + cars_ram1.rm_storage_dir + cars_ram2.rm_storage_dir + end +end + +describe "tracking inconsistencies between implementations" do + it "index - partitions - tags" do + cars_ram0 = SPECDB::RAMOnly(Car).new "-0" + cars_ram1 = SPECDB::RAMOnly(Car).new "-1" + cars_ram2 = SPECDB::RAMOnly(Car).new "-2" + cars_fifo = SPECDB::Common(Car).new "-2", 5 + + uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = uncached_indexes cars_ram0 + cached_searchby_name, cached_searchby_color, cached_searchby_keywords = cached_indexes cars_ram1 + ram_searchby_name, ram_searchby_color, ram_searchby_keywords = ram_indexes cars_ram2 + fifo_cached_searchby_name, fifo_cached_searchby_color, fifo_cached_searchby_keywords = cached_indexes cars_fifo + + add_cars cars_ram0, 1 + add_cars cars_ram1, 1 + add_cars cars_ram2, 1 + add_cars cars_fifo, 1 + + # Searches should be consistent between all implementations of basic indexes, partitions and tags. + + # Basic index. + uncached_corvet_car = uncached_searchby_name.get? "Corvet-0" + cached_corvet_car = cached_searchby_name.get? "Corvet-0" + ram_corvet_car = ram_searchby_name.get? "Corvet-0" + fifo_cached_corvet_car = fifo_cached_searchby_name.get? "Corvet-0" + uncached_corvet_car.should eq cached_corvet_car + uncached_corvet_car.should eq ram_corvet_car + uncached_corvet_car.should eq fifo_cached_corvet_car + uncached_corvet_car.should eq corvet0 + + # Partitions. + red_cars = [ Car.new("Corvet-0", "red", [ "shiny", "impressive", "fast", "elegant" ]), + Car.new("Ford-5-0", "red", [ "unknown" ]) + ] + uncached_red_cars = uncached_searchby_color.get? "red" + cached_red_cars = cached_searchby_color.get? "red" + ram_red_cars = ram_searchby_color.get? "red" + fifo_cached_red_cars = fifo_cached_searchby_color.get? "red" + uncached_red_cars.sort.should eq cached_red_cars.sort + uncached_red_cars.sort.should eq ram_red_cars.sort + uncached_red_cars.sort.should eq fifo_cached_red_cars.sort + uncached_red_cars.sort.should eq red_cars.sort + + # Tags. + fast_cars = [ Car.new("Corvet-0", "red", [ "shiny", "impressive", "fast", "elegant" ]), + Car.new("Bullet-GT-0", "blue", [ "shiny", "fast", "expensive" ]) + ] + uncached_fast_cars = uncached_searchby_keywords.get? "fast" + cached_fast_cars = cached_searchby_keywords.get? "fast" + ram_fast_cars = ram_searchby_keywords.get? "fast" + fifo_cached_fast_cars = fifo_cached_searchby_keywords.get? "fast" + uncached_fast_cars.sort.should eq cached_fast_cars.sort + uncached_fast_cars.sort.should eq ram_fast_cars.sort + uncached_fast_cars.sort.should eq fifo_cached_fast_cars.sort + uncached_fast_cars.sort.should eq fast_cars.sort + + cars_ram0.rm_storage_dir + cars_ram1.rm_storage_dir + cars_ram2.rm_storage_dir + cars_fifo.rm_storage_dir + end +end diff --git a/spec/test-common.cr b/spec/test-common.cr new file mode 100644 index 0000000..bbd78fd --- /dev/null +++ b/spec/test-common.cr @@ -0,0 +1,33 @@ +require "spec" +require "./db-cars.cr" + +describe "SPECDB::Common" do + it "basics, 3 values" do + car0 = Car.new "Corvet-0", "red", [] of String + car1 = Car.new "Corvet-1", "red", [] of String + car2 = Car.new "Corvet-2", "red", [] of String + car3 = Car.new "Corvet-3", "red", [] of String + + db = SPECDB::Common(Car).new "", 3 + + db.data.keys.sort.should eq([] of Int32) + + db << car0 + db.data.keys.sort.should eq([0] of Int32) + + db << car1 + db.data.keys.sort.should eq([0, 1] of Int32) + + db << car2 + db.data.keys.sort.should eq([0, 1, 2] of Int32) + db[0] # Let's use the first value, it shouldn't be the one to be dropped. + + db << car3 + db.data.keys.sort.should eq([0, 2, 3] of Int32) + db.fifo.to_s.should eq "[ 3, 0, 2 ]" + + db.delete 2 + db.data.keys.sort.should eq([0, 3] of Int32) + db.fifo.to_s.should eq "[ 3, 0 ]" + end +end diff --git a/spec/test-fifo.cr b/spec/test-fifo.cr new file mode 100644 index 0000000..5e30118 --- /dev/null +++ b/spec/test-fifo.cr @@ -0,0 +1,54 @@ +require "spec" +require "../src/fifo.cr" + +describe "FIFO" do + it "add and remove values" do + fifo = FIFO(Int32).new 3 # Only 3 allowed entries. + (fifo << 1).should be_nil # there is still room in the fifo + (fifo << 2).should be_nil # there is still room in the fifo + (fifo << 3).should be_nil # last entry without exceeding the allowed size + (fifo << 4).should eq 1 # -> 1 (least recently used data) + (fifo << 4).should be_nil # -> nil (already in the fifo) + (fifo << 2).should be_nil # -> nil (already in the fifo) + (fifo << 5).should eq 3 # -> 3 (least recently used data) + fifo.data.should eq([5, 2, 4] of Int32) + + fifo.delete 2 + fifo.data.should eq([5, 4] of Int32) + end +end + +describe "EfficientFIFO" do + it "add and remove values" do + fifo = EfficientFIFO(Int32).new 3 # Only 3 allowed entries. + (fifo << 1).should be_nil # there is still room in the fifo + (fifo << 2).should be_nil # there is still room in the fifo + (fifo << 3).should be_nil # last entry without exceeding the allowed size + (fifo << 4).should eq 1 # -> 1 (least recently used data) + (fifo << 4).should be_nil # -> nil (already in the fifo) + (fifo << 2).should be_nil # -> nil (already in the fifo) + (fifo << 5).should eq 3 # -> 3 (least recently used data) + fifo.list.to_s.should eq "[ 5, 2, 4 ]" + + fifo.delete 2 + fifo.list.to_s.should eq "[ 5, 4 ]" + + (fifo << 4).should be_nil # -> nil (just a re-order) + fifo.list.to_s.should eq "[ 4, 5 ]" + + fifo.delete 5 + (fifo << 0).should be_nil + fifo.list.to_s.should eq "[ 0, 4 ]" + + (fifo << 1).should be_nil + fifo.list.to_s.should eq "[ 1, 0, 4 ]" + + fifo.delete 4 + fifo.list.to_s.should eq "[ 1, 0 ]" + + fifo.delete 4 + fifo.list.to_s.should eq "[ 1, 0 ]" + fifo.list.size.should eq 2 + fifo.hash.size.should eq 2 + end +end diff --git a/spec/test-lists.cr b/spec/test-lists.cr new file mode 100644 index 0000000..8ff2472 --- /dev/null +++ b/spec/test-lists.cr @@ -0,0 +1,124 @@ +require "spec" +require "../src/list.cr" + +describe "DoubleLinkedList" do + it "add and remove values" do + list = DoubleLinkedList(Int32).new + list.push 1 + list.to_s.should eq "[ 1 ]" + list.push 2 + list.to_s.should eq "[ 1, 2 ]" + list << 3 << 4 + list.to_s.should eq "[ 1, 2, 3, 4 ]" + list.insert_at(8, 1) + list.to_s.should eq "[ 1, 8, 2, 3, 4 ]" + list.insert_at(0, 0) + list.to_s.should eq "[ 0, 1, 8, 2, 3, 4 ]" + list.insert_at(5, 6) + list.to_s.should eq "[ 0, 1, 8, 2, 3, 4, 5 ]" + v = list.pop + v.value.should eq 5 + list.to_s.should eq "[ 0, 1, 8, 2, 3, 4 ]" + v = list.pop + v.value.should eq 4 + list.to_s.should eq "[ 0, 1, 8, 2, 3 ]" + v = list.pop + v.value.should eq 3 + list.to_s.should eq "[ 0, 1, 8, 2 ]" + v = list.pop + v.value.should eq 2 + list.to_s.should eq "[ 0, 1, 8 ]" + v = list.pop + v.value.should eq 8 + list.to_s.should eq "[ 0, 1 ]" + v = list.pop + v.value.should eq 1 + list.to_s.should eq "[ 0 ]" + v = list.pop + v.value.should eq 0 + list.to_s.should eq "[ ]" + + expect_raises DoubleLinkedList::OutOfBounds do + list.pop + end + + expect_raises DoubleLinkedList::OutOfBounds do + list[0] + end + + list.size.should eq 0 + end + + it "insert_at" do + list = DoubleLinkedList(Int32).new + list.insert_at 2, 0 + list.insert_at 1, 0 + list.insert_at 3, 2 + list.insert_at 0, 0 + list.to_s.should eq "[ 0, 1, 2, 3 ]" + end + + it "reverse" do + list = DoubleLinkedList(Int32).new + list.reverse.to_s.should eq "[ ]" + list << 1 << 2 << 3 << 4 + list.reverse.to_s.should eq "[ 4, 3, 2, 1 ]" + end + + it "concat" do + list1 = DoubleLinkedList(Int32).new + list2 = DoubleLinkedList(Int32).new + list1 << 1 << 2 + list2 << 3 << 4 + list1.concat list2 + list1.to_s.should eq "[ 1, 2, 3, 4 ]" + end + + it "+" do + list1 = DoubleLinkedList(Int32).new + list2 = DoubleLinkedList(Int32).new + list1 << 1 << 2 + list2 << 3 << 4 + list3 = list1 + list2 + list1.to_s.should eq "[ 1, 2 ]" + list2.to_s.should eq "[ 3, 4 ]" + list3.to_s.should eq "[ 1, 2, 3, 4 ]" + end + + it "shift" do + list = DoubleLinkedList(Int32).new + list << 1 << 2 << 3 << 4 + list.shift.value.should eq 1 + list.shift.value.should eq 2 + list.shift.value.should eq 3 + list.shift.value.should eq 4 + end + + it "unshift" do + list = DoubleLinkedList(Int32).new + list.unshift 1 + list.unshift 2 + list.unshift 3 + list.unshift 4 + list.pop + node = list.unshift 4 + list.to_s.should eq "[ 4, 4, 3, 2 ]" + list.delete node + list.to_s.should eq "[ 4, 3, 2 ]" + end + + it "peek" do + list = DoubleLinkedList(Int32).new + list << 1 << 2 << 3 << 4 + list.peek.value.should eq 4 + list.pop + list.peek.value.should eq 3 + end + + it "delete_at" do + list = DoubleLinkedList(Int32).new + list << 1 << 2 << 3 << 4 + list.delete_at(2).value.should eq 3 + list.to_s.should eq "[ 1, 2, 4 ]" + end +end diff --git a/spec/test-ships.cr b/spec/test-ships.cr new file mode 100644 index 0000000..0bc20ac --- /dev/null +++ b/spec/test-ships.cr @@ -0,0 +1,949 @@ +require "spec" +require "file_utils" +require "./db-ships.cr" + +def fork_process(&) + Process.new Crystal::System::Process.fork { yield } +end + +describe "DODB::Storage::Uncached" do + describe "basics" do + it "store and get data" do + db = SPECDB::Uncached(Ship).new + + Ship.all_ships.each do |ship| + db << ship + end + + db.to_a.sort.should eq(Ship.all_ships.sort) + + db.rm_storage_dir + end + + it "rewrite already stored data" do + db = SPECDB::Uncached(Ship).new + ship = Ship.all_ships[0] + + key = db << ship + + db[key] = Ship.new "broken" + db[key] = ship + + db[key].should eq(ship) + + db.rm_storage_dir + end + + it "properly remove data" do + db = SPECDB::Uncached(Ship).new + + Ship.all_ships.each do |ship| + db << ship + end + + Ship.all_ships.each do |ship| + db.pop + end + + Ship.all_ships.each_with_index do |ship, i| + # FIXME: Should it raise a particular exception? + expect_raises DODB::MissingEntry do + db[i] + end + + db[i]?.should be_nil + end + + db.rm_storage_dir + end + + it "preserves data on reopening" do + db1 = SPECDB::Uncached(Ship).new + db1 << Ship.kisaragi + + db1.to_a.size.should eq(1) + + db2 = SPECDB::Uncached(Ship).new remove_previous_data: false + db2 << Ship.mutsuki + + db1.to_a.size.should eq(2) + + db1.rm_storage_dir + db2.rm_storage_dir + end + + it "iterates in normal and reversed order" do + db = SPECDB::Uncached(Ship).new + + Ship.all_ships.each do |ship| + db << ship + end + + # The two #each test iteration. + db.each_with_key do |item, index| + item.should eq Ship.all_ships[index] + end + + db.each_with_key(reversed: true) do |item, index| + item.should eq Ship.all_ships[index] + end + + # Actual reversal is tested here. + db.to_a(reversed: true).should eq db.to_a.reverse + + db.rm_storage_dir + end + + it "respects the provided offsets if any" do + db = SPECDB::Uncached(Ship).new + + Ship.all_ships.each do |ship| + db << ship + end + + db.to_a(offset: 0, limit: 1)[0]?.should eq Ship.mutsuki + db.to_a(offset: 1, limit: 1)[0]?.should eq Ship.kisaragi + db.to_a(offset: 2, limit: 1)[0]?.should eq Ship.yayoi + + db.to_a(offset: 0, limit: 3).should eq [ + Ship.mutsuki, Ship.kisaragi, Ship.yayoi + ] + + db.rm_storage_dir + end + end + + describe "indices" do + it "do basic indexing" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + Ship.all_ships.each do |ship| + db << ship + end + + Ship.all_ships.each_with_index do |ship| + db_ships_by_name.get?(ship.name).should eq(ship) + end + + db.rm_storage_dir + end + + it "raise on index overload" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + db << Ship.kisaragi + + # Should not be allowed to store an entry whose “name” field + # already exists. + expect_raises(DODB::IndexOverload) do + db << Ship.kisaragi + end + + db.rm_storage_dir + end + + it "properly deindex" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + Ship.all_ships.each do |ship| + db << ship + end + + Ship.all_ships.each_with_index do |ship, i| + db.delete i + end + + Ship.all_ships.each do |ship| + db_ships_by_name.get?(ship.name).should be_nil + end + + db.rm_storage_dir + end + + it "properly reindex" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + key = db << Ship.kisaragi + + # We give the old id to the new ship, to get it replaced in + # the database. + some_new_ship = Ship.all_ships[2].clone + + db[key] = some_new_ship + + db[key].should eq(some_new_ship) + + db_ships_by_name.get?(some_new_ship.name).should eq(some_new_ship) + + db.rm_storage_dir + end + + it "properly updates" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + Ship.all_ships.each do |ship| + db << ship + end + + new_kisaragi = Ship.kisaragi.clone.tap do |s| + s.name = "Kisaragi Kai" # Don’t think about it too much. + end + + # We’re changing an indexed value on purpose. + db_ships_by_name.update "Kisaragi", new_kisaragi + + db_ships_by_name.get?("Kisaragi").should be_nil + db_ships_by_name.get?(new_kisaragi.name).should eq new_kisaragi + + db.rm_storage_dir + end + end + + describe "partitions" do + it "do basic partitioning" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_class = db.new_partition "class", &.klass + + Ship.all_ships.each do |ship| + db << ship + end + + Ship.all_ships.each do |ship| + db_ships_by_class.get(ship.klass).should contain(ship) + end + + # We extract the possible classes to do test on them. + ship_classes = Ship.all_ships.map(&.klass).uniq + ship_classes.each do |klass| + partition = db_ships_by_class.get klass + + # A partition on “class” should contain entries that all + # share the same value of “class”. + partition.map(&.klass.==(klass)).reduce { |a, b| + a && b + }.should be_true + end + + db_ships_by_class.get?("does-not-exist").should eq([] of Ship) + + db.rm_storage_dir + end + + it "removes select elements from partitions" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_class = db.new_partition "class", &.klass + + Ship.all_ships.each do |ship| + db << ship + end + + db_ships_by_class.delete "Mutsuki", &.name.==("Kisaragi") + + Ship.all_ships.map(&.klass).uniq.each do |klass| + partition = db_ships_by_class.get klass + + partition.any?(&.name.==("Kisaragi")).should be_false + end + + db.rm_storage_dir + end + end + + describe "tags" do + it "do basic tagging" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_tags = db.new_tags "tags", &.tags + + Ship.all_ships.each do |ship| + db << ship + end + + db_ships_by_tags.get("flagship").should eq([Ship.flagship]) + + # All returned entries should have the requested tag. + db_ships_by_tags.get("name ship") + .map(&.tags.includes?("name ship")) + .reduce { |a, e| a && e } + .should be_true + + # There shouldn’t be one in our data about WWII Japanese warships… + db_ships_by_tags.get?("starship").should eq([] of Ship) + + db.rm_storage_dir + end + + it "properly removes tags" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_tags = db.new_tags "tags", &.tags + + Ship.all_ships.each do |ship| + db << ship + end + + # Removing the “flagship” tag, brace for impact. + flagship, index = db_ships_by_tags.get_with_keys("flagship")[0] + flagship.tags = [] of String + db[index] = flagship + + + +# ship, index = db_ships_by_tags.update(tag: "flagship") do |ship, index| +# ship.tags = [] of String +# db[index] = ship +# end + + db_ships_by_tags.get("flagship").should eq([] of Ship) + + db.rm_storage_dir + end + + it "gets items that have multiple tags" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_tags = db.new_tags "tags", &.tags + + Ship.all_ships.each do |ship| + db << ship + end + + results = db_ships_by_tags.get(["flagship", "name ship"]) + results.should eq([Ship.yamato]) + + results = db_ships_by_tags.get(["name ship", "flagship"]) + results.should eq([Ship.yamato]) + + results = db_ships_by_tags.get(["flagship"]) + results.should eq([Ship.yamato]) + + db.rm_storage_dir + end + end + + describe "atomic operations" do + it "safe_get and safe_get?" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + Ship.all_ships.each do |ship| + db << ship + end + + Ship.all_ships.each do |ship| + db_ships_by_name.safe_get ship.name do |results| + results.should eq(ship) + end + + db_ships_by_name.safe_get? ship.name do |results| + results.should eq(ship) + end + end + + db.rm_storage_dir + end + end + + describe "tools" do + it "rebuilds indexes" do + db = SPECDB::Uncached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + db_ships_by_class = db.new_partition "class", &.klass + db_ships_by_tags = db.new_tags "tags", &.tags + + Ship.all_ships.each do |ship| + db << ship + end + + db.reindex_everything! + + Ship.all_ships.each do |ship| + db_ships_by_name.get?(ship.name).should eq(ship) + db_ships_by_class.get(ship.klass).should contain(ship) + end + + db.rm_storage_dir + end + + it "migrates properly" do + old_db = SPECDB::Uncached(PrimitiveShip).new "-migration-origin" + + old_ships_by_name = old_db.new_index "name", &.name + old_ships_by_class = old_db.new_partition "class", &.class_name + + PrimitiveShip.all_ships.each do |ship| + old_db << ship + end + + # At this point, the “old” DB is filled. Now we need to convert + # to the new DB. + + new_db = SPECDB::Uncached(Ship).new "-migration-target" + + new_ships_by_name = new_db.new_index "name", &.name + new_ships_by_class = new_db.new_partition "class", &.klass + new_ships_by_tags = new_db.new_tags "tags", &.tags + + old_db.each_with_key do |ship, index| + new_ship = Ship.new ship.name, + klass: ship.class_name, + id: ship.id, + tags: Array(String).new.tap { |tags| + tags << "name ship" if ship.name == ship.class_name + } + + new_db[index] = new_ship + end + + # At this point, the conversion is done, so… we’re making a few + # arbitrary tests on the new data. + + old_db.each_with_key do |old_ship, old_index| + ship = new_db[old_index] + + ship.id.should eq(old_ship.id) + ship.klass.should eq(old_ship.class_name) + + ship.tags.any?(&.==("name ship")).should be_true if ship.name == ship.klass + end + + old_db.rm_storage_dir + new_db.rm_storage_dir + end + end + + describe "parallel support" do + # Not sure how many forks would be safe in a test like that. + fork_count = 25 + entries_per_fork = 100 + + it "works for pushing values" do + db = SPECDB::Uncached(Ship).new + + processes = [] of Process + + fork_count.times do |fork_id| + processes << fork_process do + entries_per_fork.times do |entry_id| + db << Ship.new("entry-#{fork_id}-#{entry_id}", "???") + end + end + end + + processes.each &.wait + + dump = db.to_a + + dump.size.should eq fork_count * entries_per_fork + + db.rm_storage_dir + end + + it "works for updating values" do + db = SPECDB::Uncached(Ship).new + db_entries_by_name = db.new_index "name", &.name + + # First pass, creating data. + processes = [] of Process + fork_count.times do |fork_id| + processes << fork_process do + entries_per_fork.times do |entry_id| + db << Ship.new("entry-#{fork_id}-#{entry_id}", "???") + end + end + end + processes.each &.wait + + # Second pass, updating data. + processes = [] of Process + fork_count.times do |fork_id| + processes << fork_process do + entries_per_fork.times do |entry_id| + db_entries_by_name.update Ship.new("entry-#{fork_id}-#{entry_id}", "???", tags: ["updated"]) + end + end + end + processes.each &.wait + + # Third pass, testing database content. + dump = db.to_a + + fork_count.times do |fork_id| + entries_per_fork.times do |entry_id| + entry = db_entries_by_name.get "entry-#{fork_id}-#{entry_id}" + + entry.tags.should eq ["updated"] + end + end + + db.rm_storage_dir + end + + it "does parallel-safe updates" do + db = SPECDB::Uncached(Ship).new + db_entries_by_name = db.new_index "name", &.name + + # We’ll be storing an integer in the "klass" field, and incrementing + # it in forks in a second time. + db << Ship.new("test", "0") + + processes = [] of Process + fork_count.times do |fork_id| + processes << fork_process do + entries_per_fork.times do |entry_id| + db_entries_by_name.safe_get "test" do |entry| + entry.klass = (entry.klass.to_i + 1).to_s + + db_entries_by_name.update "test", entry + end + end + end + end + processes.each &.wait + + db_entries_by_name.get("test").klass.should eq((fork_count * entries_per_fork).to_s) + + db.rm_storage_dir + end + end +end + +# Basically the same thing as before, with some slight +# differences based on the fact that changing the on-disk data +# won't change the cached one. +describe "DODB::Storage::Cached" do + describe "basics" do + it "store and get data" do + db = SPECDB::Cached(Ship).new + + Ship.all_ships.each do |ship| + db << ship + end + + db.to_a.sort.should eq(Ship.all_ships.sort) + + db.rm_storage_dir + end + + it "rewrite already stored data" do + db = SPECDB::Cached(Ship).new + ship = Ship.all_ships[0] + + key = db << ship + + db[key] = Ship.new "broken" + db[key] = ship + + db[key].should eq(ship) + + db.rm_storage_dir + end + + it "properly remove data" do + db = SPECDB::Cached(Ship).new + + Ship.all_ships.each do |ship| + db << ship + end + + Ship.all_ships.each do |ship| + db.pop + end + + Ship.all_ships.each_with_index do |ship, i| + # FIXME: Should it raise a particular exception? + expect_raises DODB::MissingEntry do + db[i] + end + + db[i]?.should be_nil + end + + db.rm_storage_dir + end + + it "preserves data on reopening" do + db1 = SPECDB::Cached(Ship).new + db1 << Ship.kisaragi + + db1.to_a.size.should eq(1) + + db2 = SPECDB::Cached(Ship).new remove_previous_data: false + db2 << Ship.mutsuki + + # Only difference with DODB::Storage::Uncached: concurrent DB cannot coexists. + db2.to_a.size.should eq(2) + + db1.rm_storage_dir + db2.rm_storage_dir + end + + it "iterates in normal and reversed order" do + db = SPECDB::Cached(Ship).new + + Ship.all_ships.each do |ship| + db << ship + end + + # The two #each test iteration. + db.each_with_key do |item, index| + item.should eq Ship.all_ships[index] + end + + db.each_with_key(reversed: true) do |item, index| + item.should eq Ship.all_ships[index] + end + + # Actual reversal is tested here. + db.to_a(reversed: true).should eq db.to_a.reverse + + db.rm_storage_dir + end + + it "respects the provided offsets if any" do + db = SPECDB::Cached(Ship).new + + Ship.all_ships.each do |ship| + db << ship + end + + db.to_a(offset: 0, limit: 1)[0]?.should eq Ship.mutsuki + db.to_a(offset: 1, limit: 1)[0]?.should eq Ship.kisaragi + db.to_a(offset: 2, limit: 1)[0]?.should eq Ship.yayoi + + db.to_a(offset: 0, limit: 3).should eq [ + Ship.mutsuki, Ship.kisaragi, Ship.yayoi + ] + + db.rm_storage_dir + end + end + + describe "indices" do + it "do basic indexing" do + db = SPECDB::Cached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + Ship.all_ships.each do |ship| + db << ship + end + + Ship.all_ships.each_with_index do |ship| + db_ships_by_name.get?(ship.name).should eq(ship) + end + + db.rm_storage_dir + end + + it "raise on index overload" do + db = SPECDB::Cached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + db << Ship.kisaragi + + # Should not be allowed to store an entry whose “name” field + # already exists. + expect_raises(DODB::IndexOverload) do + db << Ship.kisaragi + end + + db.rm_storage_dir + end + + it "properly deindex" do + db = SPECDB::Cached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + Ship.all_ships.each do |ship| + db << ship + end + + Ship.all_ships.each_with_index do |ship, i| + db.delete i + end + + Ship.all_ships.each do |ship| + db_ships_by_name.get?(ship.name).should be_nil + end + + db.rm_storage_dir + end + + it "properly reindex" do + db = SPECDB::Cached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + key = db << Ship.kisaragi + + # We give the old id to the new ship, to get it replaced in + # the database. + some_new_ship = Ship.all_ships[2].clone + + db[key] = some_new_ship + + db[key].should eq(some_new_ship) + + db_ships_by_name.get?(some_new_ship.name).should eq(some_new_ship) + + db.rm_storage_dir + end + + it "properly updates" do + db = SPECDB::Cached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + Ship.all_ships.each do |ship| + db << ship + end + + new_kisaragi = Ship.kisaragi.clone.tap do |s| + s.name = "Kisaragi Kai" # Don’t think about it too much. + end + + # We’re changing an indexed value on purpose. + db_ships_by_name.update "Kisaragi", new_kisaragi + + db_ships_by_name.get?("Kisaragi").should be_nil + db_ships_by_name.get?(new_kisaragi.name).should eq new_kisaragi + + db.rm_storage_dir + end + end + + describe "partitions" do + it "do basic partitioning" do + db = SPECDB::Cached(Ship).new + + db_ships_by_class = db.new_partition "class", &.klass + + Ship.all_ships.each do |ship| + db << ship + end + + Ship.all_ships.each do |ship| + db_ships_by_class.get(ship.klass).should contain(ship) + end + + # We extract the possible classes to do test on them. + ship_classes = Ship.all_ships.map(&.klass).uniq + ship_classes.each do |klass| + partition = db_ships_by_class.get klass + + # A partition on “class” should contain entries that all + # share the same value of “class”. + partition.map(&.klass.==(klass)).reduce { |a, b| + a && b + }.should be_true + end + + db_ships_by_class.get?("does-not-exist").should eq([] of Ship) + + db.rm_storage_dir + end + + it "removes select elements from partitions" do + db = SPECDB::Cached(Ship).new + + db_ships_by_class = db.new_partition "class", &.klass + + Ship.all_ships.each do |ship| + db << ship + end + + db_ships_by_class.delete "Mutsuki", &.name.==("Kisaragi") + + Ship.all_ships.map(&.klass).uniq.each do |klass| + partition = db_ships_by_class.get klass + + partition.any?(&.name.==("Kisaragi")).should be_false + end + + db.rm_storage_dir + end + end + + describe "tags" do + it "do basic tagging" do + db = SPECDB::Cached(Ship).new + + db_ships_by_tags = db.new_tags "tags", &.tags + + Ship.all_ships.each do |ship| + db << ship + end + + db_ships_by_tags.get("flagship").should eq([Ship.flagship]) + + # All returned entries should have the requested tag. + db_ships_by_tags.get("name ship") + .map(&.tags.includes?("name ship")) + .reduce { |a, e| a && e } + .should be_true + + # There shouldn’t be one in our data about WWII Japanese warships… + db_ships_by_tags.get?("starship").should eq([] of Ship) + + db.rm_storage_dir + end + + it "properly removes tags" do + db = SPECDB::Cached(Ship).new + + db_ships_by_tags = db.new_tags "tags", &.tags + + Ship.all_ships.each do |ship| + db << ship + end + + # Removing the “flagship” tag, brace for impact. + flagship, index = db_ships_by_tags.get_with_keys("flagship")[0] + flagship = flagship.clone + flagship.tags = [] of String + db[index] = flagship + + + +# ship, index = db_ships_by_tags.update(tag: "flagship") do |ship, index| +# ship.tags = [] of String +# db[index] = ship +# end + + db_ships_by_tags.get("flagship").should eq([] of Ship) + + db.rm_storage_dir + end + + it "gets items that have multiple tags" do + db = SPECDB::Cached(Ship).new + + db_ships_by_tags = db.new_tags "tags", &.tags + + Ship.all_ships.each do |ship| + db << ship + end + + results = db_ships_by_tags.get(["flagship", "name ship"]) + results.should eq([Ship.yamato]) + + results = db_ships_by_tags.get(["name ship", "flagship"]) + results.should eq([Ship.yamato]) + + results = db_ships_by_tags.get(["flagship"]) + results.should eq([Ship.yamato]) + + db.rm_storage_dir + end + end + + describe "atomic operations" do + it "safe_get and safe_get?" do + db = SPECDB::Cached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + + Ship.all_ships.each do |ship| + db << ship + end + + Ship.all_ships.each do |ship| + db_ships_by_name.safe_get ship.name do |results| + results.should eq(ship) + end + + db_ships_by_name.safe_get? ship.name do |results| + results.should eq(ship) + end + end + + db.rm_storage_dir + end + end + + describe "tools" do + it "rebuilds indexes" do + db = SPECDB::Cached(Ship).new + + db_ships_by_name = db.new_index "name", &.name + db_ships_by_class = db.new_partition "class", &.klass + db_ships_by_tags = db.new_tags "tags", &.tags + + Ship.all_ships.each do |ship| + db << ship + end + + db.reindex_everything! + + Ship.all_ships.each do |ship| + db_ships_by_name.get?(ship.name).should eq(ship) + db_ships_by_class.get(ship.klass).should contain(ship) + end + + db.rm_storage_dir + end + + it "migrates properly" do + old_db = SPECDB::Cached(PrimitiveShip).new "-migration-origin" + + old_ships_by_name = old_db.new_index "name", &.name + old_ships_by_class = old_db.new_partition "class", &.class_name + + PrimitiveShip.all_ships.each do |ship| + old_db << ship + end + + # At this point, the “old” DB is filled. Now we need to convert + # to the new DB. + + new_db = SPECDB::Cached(Ship).new "-migration-target" + + new_ships_by_name = new_db.new_index "name", &.name + new_ships_by_class = new_db.new_partition "class", &.klass + new_ships_by_tags = new_db.new_tags "tags", &.tags + + old_db.each_with_key do |ship, index| + new_ship = Ship.new ship.name, + klass: ship.class_name, + id: ship.id, + tags: Array(String).new.tap { |tags| + tags << "name ship" if ship.name == ship.class_name + } + + new_db[index] = new_ship + end + + # At this point, the conversion is done, so… we’re making a few + # arbitrary tests on the new data. + + old_db.each_with_key do |old_ship, old_index| + ship = new_db[old_index] + + ship.id.should eq(old_ship.id) + ship.klass.should eq(old_ship.class_name) + + ship.tags.any?(&.==("name ship")).should be_true if ship.name == ship.klass + end + + old_db.rm_storage_dir + new_db.rm_storage_dir + end + end +end diff --git a/spec/test.cr b/spec/test.cr deleted file mode 100644 index 13e0685..0000000 --- a/spec/test.cr +++ /dev/null @@ -1,490 +0,0 @@ -require "spec" -require "file_utils" - -require "../src/dodb.cr" -require "./test-data.cr" - - -class DODB::SpecDataBase < DODB::DataBase(Ship) - def initialize(storage_ext = "", remove_previous_data = true) - storage_dir = "test-storage#{storage_ext}" - - if remove_previous_data - ::FileUtils.rm_rf storage_dir - end - - super storage_dir - end -end - -describe "DODB::DataBase" do - describe "basics" do - it "store and get data" do - db = DODB::SpecDataBase.new - - Ship.all_ships.each do |ship| - db << ship - end - - db.to_a.sort.should eq(Ship.all_ships.sort) - end - - it "rewrite already stored data" do - db = DODB::SpecDataBase.new - ship = Ship.all_ships[0] - - key = db << ship - - db[key] = Ship.new "broken" - db[key] = ship - - db[key].should eq(ship) - end - - it "properly remove data" do - db = DODB::SpecDataBase.new - - Ship.all_ships.each do |ship| - db << ship - end - - Ship.all_ships.each do |ship| - db.pop - end - - Ship.all_ships.each_with_index do |ship, i| - # FIXME: Should it raise a particular exception? - expect_raises DODB::MissingEntry do - db[i] - end - - db[i]?.should be_nil - end - end - - it "preserves data on reopening" do - db1 = DODB::SpecDataBase.new - db1 << Ship.kisaragi - - db1.to_a.size.should eq(1) - - db2 = DODB::SpecDataBase.new remove_previous_data: false - db2 << Ship.mutsuki - - db1.to_a.size.should eq(2) - end - - it "iterates in normal and reversed order" do - db = DODB::SpecDataBase.new - - Ship.all_ships.each do |ship| - db << ship - end - - # The two #each test iteration. - db.each_with_index do |item, index| - item.should eq Ship.all_ships[index] - end - - db.each_with_index(reversed: true) do |item, index| - item.should eq Ship.all_ships[index] - end - - # Actual reversal is tested here. - db.to_a(reversed: true).should eq db.to_a.reverse - end - - it "respects the provided offsets if any" do - db = DODB::SpecDataBase.new - - Ship.all_ships.each do |ship| - db << ship - end - - db.to_a(start_offset: 0, end_offset: 0)[0]?.should eq Ship.mutsuki - db.to_a(start_offset: 1, end_offset: 1)[0]?.should eq Ship.kisaragi - db.to_a(start_offset: 2, end_offset: 2)[0]?.should eq Ship.yayoi - - db.to_a(start_offset: 0, end_offset: 2).should eq [ - Ship.mutsuki, Ship.kisaragi, Ship.yayoi - ] - end - end - - describe "indices" do - it "do basic indexing" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - Ship.all_ships.each do |ship| - db << ship - end - - Ship.all_ships.each_with_index do |ship| - db_ships_by_name.get?(ship.name).should eq(ship) - end - end - - it "raise on index overload" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - db << Ship.kisaragi - - # Should not be allowed to store an entry whose “name” field - # already exists. - expect_raises(DODB::IndexOverload) do - db << Ship.kisaragi - end - end - - it "properly deindex" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - Ship.all_ships.each do |ship| - db << ship - end - - Ship.all_ships.each_with_index do |ship, i| - db.delete i - end - - Ship.all_ships.each do |ship| - db_ships_by_name.get?(ship.name).should be_nil - end - end - - it "properly reindex" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - key = db << Ship.kisaragi - - # We give the old id to the new ship, to get it replaced in - # the database. - some_new_ship = Ship.all_ships[2].clone - - db[key] = some_new_ship - - db[key].should eq(some_new_ship) - - db_ships_by_name.get?(some_new_ship.name).should eq(some_new_ship) - end - - it "properly updates" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - Ship.all_ships.each do |ship| - db << ship - end - - new_kisaragi = Ship.kisaragi.clone.tap do |s| - s.name = "Kisaragi Kai" # Don’t think about it too much. - end - - # We’re changing an indexed value on purpose. - db_ships_by_name.update "Kisaragi", new_kisaragi - - db_ships_by_name.get?("Kisaragi").should be_nil - db_ships_by_name.get?(new_kisaragi.name).should eq new_kisaragi - end - end - - describe "partitions" do - it "do basic partitioning" do - db = DODB::SpecDataBase.new - - db_ships_by_class = db.new_partition "class", &.klass - - Ship.all_ships.each do |ship| - db << ship - end - - Ship.all_ships.each do |ship| - db_ships_by_class.get(ship.klass).should contain(ship) - end - - # We extract the possible classes to do test on them. - ship_classes = Ship.all_ships.map(&.klass).uniq - ship_classes.each do |klass| - partition = db_ships_by_class.get klass - - # A partition on “class” should contain entries that all - # share the same value of “class”. - partition.map(&.klass.==(klass)).reduce { |a, b| - a && b - }.should be_true - end - - db_ships_by_class.get("does-not-exist").should eq [] of Ship - end - - it "removes select elements from partitions" do - db = DODB::SpecDataBase.new - - db_ships_by_class = db.new_partition "class", &.klass - - Ship.all_ships.each do |ship| - db << ship - end - - db_ships_by_class.delete "Mutsuki", &.name.==("Kisaragi") - - Ship.all_ships.map(&.klass).uniq.each do |klass| - partition = db_ships_by_class.get klass - - partition.any?(&.name.==("Kisaragi")).should be_false - end - end - end - - describe "tags" do - it "do basic tagging" do - db = DODB::SpecDataBase.new - - db_ships_by_tags = db.new_tags "tags", &.tags - - Ship.all_ships.each do |ship| - db << ship - end - - db_ships_by_tags.get("flagship").should eq([Ship.flagship]) - - # All returned entries should have the requested tag. - db_ships_by_tags.get("name ship") - .map(&.tags.includes?("name ship")) - .reduce { |a, e| a && e } - .should be_true - - # There shouldn’t be one in our data about WWII Japanese warships… - db_ships_by_tags.get("starship").should eq([] of Ship) - end - - it "properly removes tags" do - db = DODB::SpecDataBase.new - - db_ships_by_tags = db.new_tags "tags", &.tags - - Ship.all_ships.each do |ship| - db << ship - end - - # Removing the “flagship” tag, brace for impact. - flagship, index = db_ships_by_tags.get_with_indice("flagship")[0] - flagship.tags = [] of String - db[index] = flagship - - - -# ship, index = db_ships_by_tags.update(tag: "flagship") do |ship, index| -# ship.tags = [] of String -# db[index] = ship -# end - - db_ships_by_tags.get("flagship").should eq([] of Ship) - end - - it "gets items that have multiple tags" do - db = DODB::SpecDataBase.new - - db_ships_by_tags = db.new_tags "tags", &.tags - - Ship.all_ships.each do |ship| - db << ship - end - - results = db_ships_by_tags.get(["flagship", "name ship"]) - results.should eq([Ship.yamato]) - - results = db_ships_by_tags.get(["name ship", "flagship"]) - results.should eq([Ship.yamato]) - - results = db_ships_by_tags.get(["flagship"]) - results.should eq([Ship.yamato]) - end - end - - describe "atomic operations" do - it "safe_get and safe_get?" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - - Ship.all_ships.each do |ship| - db << ship - end - - Ship.all_ships.each do |ship| - db_ships_by_name.safe_get ship.name do |results| - results.should eq(ship) - end - - db_ships_by_name.safe_get? ship.name do |results| - results.should eq(ship) - end - end - end - end - - describe "tools" do - it "rebuilds indexes" do - db = DODB::SpecDataBase.new - - db_ships_by_name = db.new_index "name", &.name - db_ships_by_class = db.new_partition "class", &.klass - db_ships_by_tags = db.new_tags "tags", &.tags - - Ship.all_ships.each do |ship| - db << ship - end - - db.reindex_everything! - - Ship.all_ships.each do |ship| - db_ships_by_name.get?(ship.name).should eq(ship) - db_ships_by_class.get(ship.klass).should contain(ship) - end - end - - it "migrates properly" do - ::FileUtils.rm_rf "test-storage-migration-origin" - old_db = DODB::DataBase(PrimitiveShip).new "test-storage-migration-origin" - - old_ships_by_name = old_db.new_index "name", &.name - old_ships_by_class = old_db.new_partition "class", &.class_name - - PrimitiveShip.all_ships.each do |ship| - old_db << ship - end - - # At this point, the “old” DB is filled. Now we need to convert - # to the new DB. - - new_db = DODB::SpecDataBase.new "-migration-target" - - new_ships_by_name = new_db.new_index "name", &.name - new_ships_by_class = new_db.new_partition "class", &.klass - new_ships_by_tags = new_db.new_tags "tags", &.tags - - old_db.each_with_index do |ship, index| - new_ship = Ship.new ship.name, - klass: ship.class_name, - id: ship.id, - tags: Array(String).new.tap { |tags| - tags << "name ship" if ship.name == ship.class_name - } - - new_db[index] = new_ship - end - - # At this point, the conversion is done, so… we’re making a few - # arbitrary tests on the new data. - - old_db.each_with_index do |old_ship, old_index| - ship = new_db[old_index] - - ship.id.should eq(old_ship.id) - ship.klass.should eq(old_ship.class_name) - - ship.tags.any?(&.==("name ship")).should be_true if ship.name == ship.klass - end - end - end - - describe "parallel support" do - # Not sure how many forks would be safe in a test like that. - fork_count = 25 - entries_per_fork = 100 - - it "works for pushing values" do - db = DODB::SpecDataBase.new - - processes = [] of Process - - fork_count.times do |fork_id| - processes << Process.fork do - entries_per_fork.times do |entry_id| - db << Ship.new("entry-#{fork_id}-#{entry_id}", "???") - end - end - end - - processes.each &.wait - - dump = db.to_a - - dump.size.should eq fork_count * entries_per_fork - end - - it "works for updating values" do - db = DODB::SpecDataBase.new - db_entries_by_name = db.new_index "name", &.name - - # First pass, creating data. - processes = [] of Process - fork_count.times do |fork_id| - processes << Process.fork do - entries_per_fork.times do |entry_id| - db << Ship.new("entry-#{fork_id}-#{entry_id}", "???") - end - end - end - processes.each &.wait - - # Second pass, updating data. - processes = [] of Process - fork_count.times do |fork_id| - processes << Process.fork do - entries_per_fork.times do |entry_id| - db_entries_by_name.update Ship.new("entry-#{fork_id}-#{entry_id}", "???", tags: ["updated"]) - end - end - end - processes.each &.wait - - # Third pass, testing database content. - dump = db.to_a - - fork_count.times do |fork_id| - entries_per_fork.times do |entry_id| - entry = db_entries_by_name.get "entry-#{fork_id}-#{entry_id}" - - entry.tags.should eq ["updated"] - end - end - end - - it "does parallel-safe updates" do - db = DODB::SpecDataBase.new - db_entries_by_name = db.new_index "name", &.name - - # We’ll be storing an integer in the "klass" field, and incrementing - # it in forks in a second time. - db << Ship.new("test", "0") - - processes = [] of Process - fork_count.times do |fork_id| - processes << Process.fork do - entries_per_fork.times do |entry_id| - db_entries_by_name.safe_get "test" do |entry| - entry.klass = (entry.klass.to_i + 1).to_s - - db_entries_by_name.update "test", entry - end - end - end - end - processes.each &.wait - - db_entries_by_name.get("test").klass.should eq((fork_count * entries_per_fork).to_s) - end - end -end - diff --git a/spec/utilities.cr b/spec/utilities.cr new file mode 100644 index 0000000..ec20b5e --- /dev/null +++ b/spec/utilities.cr @@ -0,0 +1,38 @@ +def perform_something(&block) + start = Time.monotonic + yield + Time.monotonic - start +end + +def perform_benchmark_average(ntimes : Int32, &block) + i = 1 + sum = Time::Span.zero + while i <= ntimes + elapsed_time = perform_something &block + sum += elapsed_time + i += 1 + end + sum / ntimes +end + +def run_n_times(ntimes : Int32, &block) + i = 1 + durations = Array(Float64).new + while i <= ntimes + elapsed_time = perform_something &block + durations << elapsed_time.total_nanoseconds + i += 1 + end + durations +end + +# TODO +def should_nb_files(path : String, expected_nb_files : UInt32) + raise Exception.new "should_nb_files: not implemented yet" +end + +def long_operation(text) + STDOUT.write "#{text}\r".to_slice + yield + STDOUT.write " \r".to_slice +end diff --git a/src/cached.cr b/src/cached.cr deleted file mode 100644 index ad1f6c1..0000000 --- a/src/cached.cr +++ /dev/null @@ -1,115 +0,0 @@ -require "file_utils" -require "json" - -class Hash(K,V) - def reverse - rev = Array(Tuple(K,V)).new - keys = Array(K).new - each_key do |k| - keys << k - end - keys.reverse.each do |k| - rev << {k, self.[k]} - end - - rev - end -end - -class DODB::CachedDataBase(V) < DODB::Storage(V) - @indexers = [] of Indexer(V) - property data = Hash(Int32, V).new - - def initialize(@directory_name : String) - Dir.mkdir_p data_path - Dir.mkdir_p locks_directory - - begin - self.last_index - rescue - self.last_index = -1 - end - - # TODO: load the database in RAM at start-up - DODB::DataBase(V).new(@directory_name).each_with_index do |v, index| - puts "loading value #{v} at index #{index}" - self[index] = v - end - end - - # Getting data from the hash in RAM. - def []?(key : Int32) : V? - @data[key] - rescue e - # FIXME: rescues any error the same way. - return nil - end - - # WARNING: data isn't cloned. - # You have to do it yourself in case you modify any value, - # otherwise you may encounter problems (at least with indexes). - def [](key : Int32) : V - @data[key] rescue raise MissingEntry.new(key) - end - - def []=(index : Int32, value : V) - old_value = self.[index]? - - check_collisions! index, value, old_value - - # Removes any old indices or partitions pointing to a value about - # to be replaced. - if old_value - remove_indexes index, old_value - end - - # Avoids corruption in case the application crashes while writing. - file_path(index).tap do |path| - ::File.write "#{path}.new", value.to_json - ::FileUtils.mv "#{path}.new", path - end - - write_partitions index, value - - if index > last_index - self.last_index = index - end - - @data[index] = value - end - - ## - # Can be useful for making dumps or to restore a database. - def each_with_index(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil) - i = -1 # do not trust key to be the right index - (reversed ? @data.reverse : @data).each do |index, v| - i += 1 - next if start_offset > i - break unless end_offset.nil? || i <= end_offset - - yield v, index - end - end - - def delete(key : Int32) - value = self[key]? - - return if value.nil? - - begin - ::File.delete file_path key - rescue - # FIXME: Only intercept “no such file" errors - end - - remove_indexes key, value - - @data.delete key - value - end - - private def remove_data! - super - @data = Hash(Int32, V).new - end -end diff --git a/src/dodb.cr b/src/dodb.cr index 82dc5b8..a656200 100644 --- a/src/dodb.cr +++ b/src/dodb.cr @@ -1,400 +1,6 @@ require "file_utils" require "json" -require "./dodb/*" - -abstract class DODB::Storage(V) - property directory_name : String - - def initialize(@directory_name : String) - end - - def request_lock(name, subname = nil) - r = -1 - file_path = get_lock_file_path name, subname - file_perms = 0o644 - - flags = LibC::O_EXCL | LibC::O_CREAT - while (r = LibC.open file_path, flags, file_perms) == -1 - sleep 1.milliseconds - end - - LibC.close r - end - def release_lock(name, subname = nil) - File.delete get_lock_file_path name, subname - end - - private def index_file - "#{@directory_name}/last-index" - end - def last_index : Int32 - File.read(index_file).to_i - end - def last_index=(x : Int32) - file = File.open(index_file, "w") - - file << x.to_s - - file.close - - x - rescue - raise Exception.new "could not update index file" - end - - def stringify_key(key : Int32) - # Negative numbers give strange results with Crystal’s printf. - if key >= 0 - "%010i" % key - else - key.to_s - end - end - - def <<(item : V) - request_lock "index" - index = last_index + 1 - self[index] = item - self.last_index = index - - release_lock "index" - - index # FIXME: Should we really return the internal key? - rescue e - release_lock "index" - - raise e - end - - def each(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil) - each_with_index( - reversed: reversed, - start_offset: start_offset, - end_offset: end_offset - ) do |item, index| - yield item - end - end - - ## - # CAUTION: Very slow. Try not to use. - def to_a(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil) - array = ::Array(V).new - - each( - reversed: reversed, - start_offset: start_offset, - end_offset: end_offset - ) do |value| - array << value - end - - array - end - - ## - # CAUTION: Very slow. Try not to use. - def to_h(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil) - hash = ::Hash(Int32, V).new - - each_with_index( - reversed: reversed, - start_offset: start_offset, - end_offset: end_offset - ) do |element, index| - hash[index] = element - end - - hash - end - - ## - # name is the name that will be used on the file system. - def new_index(name : String, &block : Proc(V, String)) - CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer| - @indexers << indexer - end - end - - def new_nilable_index(name : String, &block : Proc(V, String | DODB::NoIndex)) - CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer| - @indexers << indexer - end - end - - def new_uncached_index(name : String, &block : Proc(V, String)) - Index(V).new(self, @directory_name, name, block).tap do |indexer| - @indexers << indexer - end - end - - def new_nilable_uncached_index(name : String, &block : Proc(V, String | DODB::NoIndex)) - Index(V).new(self, @directory_name, name, block).tap do |indexer| - @indexers << indexer - end - end - - def get_index(name : String, key) - index = @indexers.find &.name.==(name) - - index.not_nil!.as(DODB::Index).get key - end - - ## - # name is the name that will be used on the file system. - def new_partition(name : String, &block : Proc(V, String)) - CachedPartition(V).new(self, @directory_name, name, block).tap do |table| - @indexers << table - end - end - - def new_uncached_partition(name : String, &block : Proc(V, String)) - Partition(V).new(self, @directory_name, name, block).tap do |table| - @indexers << table - end - end - - def get_partition(table_name : String, partition_name : String) - partition = @indexers.find &.name.==(table_name) - - partition.not_nil!.as(DODB::Partition).get partition_name - end - - def write_partitions(key : Int32, value : V) - @indexers.each &.index(stringify_key(key), value) - end - - def new_tags(name : String, &block : Proc(V, Array(String))) - CachedTags(V).new(self, @directory_name, name, block).tap do |tags| - @indexers << tags - end - end - - def new_uncached_tags(name : String, &block : Proc(V, Array(String))) - Tags(V).new(self, @directory_name, name, block).tap do |tags| - @indexers << tags - end - end - - def get_tags(name, key : String) - tag = @indexers.find &.name.==(name) - - tag.not_nil!.as(DODB::Tags).get name, key - end - - def new_directed_graph(name : String, index : DODB::Index(V), &block : Proc(V, Array(String))) : DirectedGraph(V) - DirectedGraph(V).new(self, @directory_name, index, name, block).tap do |table| - @indexers << table - end - end - - def check_collisions!(key : Int32, value : V, old_value : V?) - @indexers.each &.check!(stringify_key(key), value, old_value) - end - - def pop - request_lock "index" - - index = last_index - - # Some entries may have been removed. We’ll skip over those. - # Not the most efficient if a large number of indices are empty. - while index >= 0 && self[index]?.nil? - index = index - 1 - end - - if index < 0 - return nil - end - - poped = self[index] - - self.delete index - - last_index = index - 1 - - release_lock "index" - - poped - end - - private def data_path - "#{@directory_name}/data" - end - - private def file_path(key : Int32) - "#{data_path}/%010i" % key - end - - private def locks_directory : String - "#{@directory_name}/locks" - end - - private def get_lock_file_path(name : String, subname : String? = nil) - if subname - "#{locks_directory}/#{name}-#{subname}.lock" # FIXME: Separator that causes less collisions? - else - "#{locks_directory}/#{name}.lock" - end - end - - private def read(file_path : String) - V.from_json ::File.read file_path - end - - private def remove_data! - FileUtils.rm_rf data_path - Dir.mkdir_p data_path - end - - private def remove_indexing! - @indexers.each do |indexer| - FileUtils.rm_rf indexer.indexing_directory - end - end - - # A very slow operation that removes all indices and then rewrites - # them all. - # FIXME: Is this really useful in its current form? We should remove the - # index directories, not the indices based on our current (and - # possiblly different from what’s stored) data. - def reindex_everything! - old_data = to_h - - remove_indexing! - remove_data! - - old_data.each do |index, item| - self[index] = item - end - end - - def remove_indexes(key : Int32, value : V) - @indexers.each &.deindex(stringify_key(key), value) - end - - def []?(key : Int32) : V? - self[key] - rescue MissingEntry - # FIXME: Only rescue JSON and “no such file” errors. - return nil - end - - abstract def [](key : Int32) - abstract def delete(key : Int32) -end - -class DODB::DataBase(V) < DODB::Storage(V) - @indexers = [] of Indexer(V) - - def initialize(@directory_name : String) - Dir.mkdir_p data_path - Dir.mkdir_p locks_directory - - begin - self.last_index - rescue - self.last_index = -1 - end - end - - def [](key : Int32) : V - raise MissingEntry.new(key) unless ::File.exists? file_path key - - read file_path key - end - - def []=(index : Int32, value : V) - old_value = self.[index]? - - check_collisions! index, value, old_value - - # Removes any old indices or partitions pointing to a value about - # to be replaced. - if old_value - remove_indexes index, old_value - end - - # Avoids corruption in case the application crashes while writing. - file_path(index).tap do |path| - ::File.write "#{path}.new", value.to_json - ::FileUtils.mv "#{path}.new", path - end - - write_partitions index, value - - if index > last_index - self.last_index = index - end - end - - def delete(key : Int32) - value = self[key]? - - return if value.nil? - - begin - ::File.delete file_path key - rescue File::NotFoundError - end - - remove_indexes key, value - - value - end - - private def each_key(reversed = false) - start = 0 - _end = last_index - step = 1 - - if reversed - start = _end - _end = 0 - step = -1 - end - - key = start - while step == 1 ? key <= _end : key >= _end - full_path = file_path key - - if File.exists? full_path - yield key, full_path - end - - key = key + step - end - end - - ## - # CAUTION: Very slow. Try not to use. - # Can be useful for making dumps or to restore a database, however. - def each_with_index(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil) - dirname = data_path - - offset = -1 - - each_key(reversed) do |key, path| - offset += 1 - - if offset < start_offset - next - end - if !end_offset.nil? && offset > end_offset - next - end - - begin - # FIXME: Only intercept JSON parsing errors. - field = read path - rescue - next - end - - yield field, key - end - end -end - -require "./cached.cr" +require "./fifo.cr" # FIFO class to implement a cache policy. +require "./list.cr" # Double Linked List. +require "./dodb/*" # Databases and indexes (basic indexes, partitions, tags, etc.). diff --git a/src/dodb/exceptions.cr b/src/dodb/exceptions.cr index d41bb9f..501d3c7 100644 --- a/src/dodb/exceptions.cr +++ b/src/dodb/exceptions.cr @@ -1,17 +1,30 @@ - +# Exception `DODB::MissingEntry` is thrown anytime the database +# tries to retrieve a value with an invalid key. class DODB::MissingEntry < Exception getter index : String? - getter key : String | Int32 + getter key : Array(String) | String | Int32 - def initialize(@index, @key) + # The exception will contain both the key and the name of the index, + # and a human-readable string. + def initialize(@index, @key : String | Int32) super "no entry in index '#{@index}' for key '#{@key}'" end + # The exception will contain the name of the index and all requested keys, + # and a human-readable string. + def initialize(@index, @key : Array(String)) + super "no entry in index '#{@index}' for keys '#{key.join('-')}'" + end + + # The exception will contain the key and a human-readable string. def initialize(@key) super "no entry for key '#{@key}' in database" end end +# Exception `DODB::IndexOverload` is thrown anytime there is a +# collision with an index. This is currently only used by `DODB::Index` +# since other indexes don't have collisions. class DODB::IndexOverload < Exception end diff --git a/src/dodb/index.cr b/src/dodb/index.cr deleted file mode 100644 index c559532..0000000 --- a/src/dodb/index.cr +++ /dev/null @@ -1,202 +0,0 @@ -require "file_utils" - -require "./exceptions.cr" -require "./indexer.cr" - -class DODB::Index(V) < DODB::Indexer(V) - property name : String - property key_proc : Proc(V, String | NoIndex) | Proc(V, String) - getter storage_root : String - - @storage : DODB::Storage(V) - - def initialize(@storage, @storage_root, @name, @key_proc) - Dir.mkdir_p indexing_directory - end - - def check!(key, value, old_value) - index_key = key_proc.call value - - symlink = file_path_index index_key.to_s - - # FIXME: Check it’s not pointing to “old_value”, if any, before raising. - if ::File.exists? symlink - if old_value - old_key = key_proc.call old_value - return if symlink == file_path_index old_key.to_s - end - - raise IndexOverload.new "index '#{@name}' is overloaded for key '#{key}', file #{symlink} exists" - end - end - - def index(key, value) - index_key = key_proc.call value - - return if index_key.is_a? NoIndex - - symlink = file_path_index index_key - - Dir.mkdir_p ::File.dirname symlink - - # FIXME: Now that this is done in check!, can we remove it? - if ::File.exists? symlink - raise Exception.new "symlink already exists: #{symlink}" - end - - ::File.symlink get_data_symlink_index(key), symlink - end - - def deindex(key, value) - index_key = key_proc.call value - - return if index_key.is_a? NoIndex - - symlink = file_path_index index_key - - begin - ::File.delete symlink - rescue File::NotFoundError - end - end - - # Get the key (ex: 343) for an entry in the DB. - # Without caching, it translates to walk the file-system in `db/indices/by_#{name}/`. - def get_key(index : String) : Int32 - get_key_on_fs index - end - - def get(index : String) : V - @storage[get_key index] - end - - def get?(index : String) : V? - get index - rescue MissingEntry - nil - end - - # FIXME: Unlock on exception. - def safe_get(index : String) : Nil - @storage.request_lock @name, index - internal_key = get_key(index).to_s - @storage.request_lock internal_key - - yield get index - - @storage.release_lock internal_key - @storage.release_lock @name, index - end - - def safe_get?(index : String, &block : Proc(V | Nil, Nil)) : Nil - safe_get index, &block - rescue MissingEntry - yield nil - end - - def get_key_on_fs(index : String) : Int32 - file_path = file_path_index index - - raise MissingEntry.new(@name, index) unless ::File.exists? file_path - - ::File.readlink(file_path).sub(/^.*\//, "").to_i - end - - def get_with_key(index : String) : Tuple(V, Int32) - key = get_key index - - value = @storage[key] - - {value, key} - end - - # in case new_value hasn't changed its index - def update(new_value : V) - index = key_proc.call new_value - - raise Exception.new "new value is not indexable" if index.is_a? NoIndex - - update index, new_value - end - - def update(index : String, new_value : V) - key = get_key index - - @storage[key] = new_value - end - - def update_or_create(index : String, new_value : V) - update index, new_value - rescue MissingEntry - @storage << new_value - end - - def delete(index : String) - key = get_key index - - @storage.delete key - end - - def indexing_directory : String - "#{@storage_root}/indices/by_#{@name}" - end - - # FIXME: Now that it’s being used outside of this class, name it properly. - def file_path_index(index_key : String) - "#{indexing_directory}/#{index_key}" - end - - private def get_data_symlink_index(key : String) - "../../data/#{key}" - end -end - -class DODB::CachedIndex(V) < DODB::Index(V) - # This hash contains the relation between the index key and the data key. - property data = Hash(String, Int32).new - - def check!(key, value, old_value) - index_key = key_proc.call value - - # FIXME: Check it’s not pointing to “old_value”, if any, before raising. - if data[index_key]? - if old_value - old_key = key_proc.call old_value - return if index_key == old_key - end - - raise IndexOverload.new "index '#{@name}' is overloaded for key '#{key}'" - end - end - - def index(key, value) - super(key, value) - - index_key = key_proc.call value - return if index_key.is_a? NoIndex - - @data[index_key] = key.to_i - end - - def deindex(key, value) - super(key, value) - - index_key = key_proc.call value - return if index_key.is_a? NoIndex - - @data.delete index_key - end - - # Get the key (ex: 343) for an entry in the DB. - # With caching, the key is probably stored in a hash, or we'll search in the FS. - def get_key(index : String) : Int32 - if k = @data[index]? - k - elsif k = get_key_on_fs(index) - @data[index] = k - k - else - raise MissingEntry.new(@name, index) - end - end -end diff --git a/src/dodb/indexer.cr b/src/dodb/indexer.cr deleted file mode 100644 index 807cfd9..0000000 --- a/src/dodb/indexer.cr +++ /dev/null @@ -1,10 +0,0 @@ - -abstract class DODB::Indexer(V) - abstract def index (key : String, value : V) - abstract def deindex (key : String, value : V) - abstract def check! (key : String, value : V, old_value : V?) - abstract def name : String - - abstract def indexing_directory : String -end - diff --git a/src/dodb/no_index.cr b/src/dodb/no_index.cr index 37e7a1c..8c0d929 100644 --- a/src/dodb/no_index.cr +++ b/src/dodb/no_index.cr @@ -1,8 +1,17 @@ - +# In case a value doesn't have the attribute to be indexed. class DODB::NoIndex + include JSON::Serializable + + def_clone + + def initialize() + end end module DODB + # Since the `NoIndex` class doesn't convey any value, + # there is no point creating multiple instances. + # Use `DODB#no_index` any time a `NoIndex` instance is required. class_getter no_index = NoIndex.new end diff --git a/src/dodb/partition.cr b/src/dodb/partition.cr deleted file mode 100644 index 53d68e1..0000000 --- a/src/dodb/partition.cr +++ /dev/null @@ -1,153 +0,0 @@ -require "file_utils" - -require "./indexer.cr" - -class DODB::Partition(V) < DODB::Indexer(V) - property name : String - property key_proc : Proc(V, String) - getter storage_root : String - - # Required to remove an entry in the DB. - @storage : DODB::Storage(V) - - def initialize(@storage, @storage_root, @name, @key_proc) - ::Dir.mkdir_p indexing_directory - end - - def check!(key, value, old_value) - return true # Partitions don’t have collisions or overloads. - end - - def index(key, value) - partition = key_proc.call value - - symlink = get_partition_symlink(partition, key) - - Dir.mkdir_p ::File.dirname symlink - - # FIXME: Should not happen anymore. Should we remove this? - ::File.delete symlink if ::File.exists? symlink - - ::File.symlink get_data_symlink(key), symlink - end - - def deindex(key, value) - partition = key_proc.call value - - symlink = get_partition_symlink(partition, key) - - begin - ::File.delete symlink - rescue File::NotFoundError - end - end - - def get(partition) : Array(V) - r_value = Array(V).new - - partition_directory = indexing_directory partition - - return r_value unless Dir.exists? partition_directory - - Dir.each_child partition_directory do |child| - r_value << @storage[get_key child] - end - - r_value - end - - def get?(partition) : Array(V)? - get partition - rescue MissingEntry - nil - end - - def delete(partition) - delete partition, do true end - end - - def delete(partition, &matcher) - partition_directory = indexing_directory partition - - return unless Dir.exists? partition_directory - - Dir.each_child partition_directory do |child| - key = get_key child - item = @storage[key] - - if yield item - @storage.delete key - end - end - end - - def indexing_directory : String - "#{@storage_root}/partitions/by_#{@name}" - end - - private def get_key(path : String) : Int32 - path.sub(/^.*\//, "").to_i - end - - private def indexing_directory(partition) - "#{indexing_directory}/#{partition}" - end - - private def get_partition_symlink(partition : String, key : String) - "#{indexing_directory partition}/#{key}" - end - - private def get_data_symlink(key : String) - "../../../data/#{key}" - end -end - -class DODB::CachedPartition(V) < DODB::Partition(V) - # This hash contains the relation between the index key and the data keys. - property data = Hash(String, Array(Int32)).new - - def index(key, value) - super(key, value) - partition = key_proc.call value - - array = if v = @data[partition]? - v - else - Array(Int32).new - end - array << key.to_i - - @data[partition] = array - end - - def deindex(key, value) - super(key, value) - partition = key_proc.call value - - if v = @data[partition]? - v.delete key.to_i - @data[partition] = v - end - end - - def get(partition) - r_value = Array(Tuple(V, Int32)).new - - if keys = @data[partition]? - keys.each do |data_key| - r_value << { @storage[data_key], data_key } - end - else - # Get the key from the database representation on the file-system. - partition_directory = indexing_directory partition - raise MissingEntry.new(@name, partition) unless Dir.exists? partition_directory - - Dir.each_child partition_directory do |child| - r_value << { @storage[get_key child], get_key child } - end - - @data[partition] = r_value.map &.[1] - end - r_value.map &.[0] - end -end diff --git a/src/dodb/storage.cr b/src/dodb/storage.cr new file mode 100644 index 0000000..41e9d6c --- /dev/null +++ b/src/dodb/storage.cr @@ -0,0 +1,477 @@ +# The `DODB::Storage` abstract class defines the specifications of +# subsequent DODB databases (uncached, cached, RAM-only, etc.). +abstract class DODB::Storage(V) + # List of triggers (basic indexes, partitions, tags, etc.). + @triggers = [] of Trigger(V) + + property cached_last_key : Int32 + + # Directory where data and triggers will be written. + property directory_name : String + + # Creates a database. + # + # A DODB database is instanciated with a *path* where data will be written. + # Another directory is created where locks can be written. + # In case the database is empty, the *last_key* is set to *-1*. + def initialize(@directory_name : String) + Dir.mkdir_p data_path + Dir.mkdir_p locks_directory + + @cached_last_key = init_last_key + end + + # Requests a (named) lock. + # Locks prevent concurrent access to the same data. + # + # In case of a request for a lock that is already in use, + # wait for a millisecond then retry, loop until it works. + # A lock is simply an opened file with the `LibC::O_EXCL` flag. + def request_lock(name, subname = nil) + r = -1 + file_path = get_lock_file_path name, subname + file_perms = 0o644 + + flags = LibC::O_EXCL | LibC::O_CREAT + while (r = LibC.open file_path, flags, file_perms) == -1 + sleep 1.milliseconds + end + + LibC.close r + end + + # Releases a (named) lock. + # + # The implementation is simple, it just removes the file. + def release_lock(name, subname = nil) + File.delete get_lock_file_path name, subname + end + + private def key_file + "#{@directory_name}/last-key" + end + + # Reads the last database *key* from the storage device. + def init_last_key : Int32 + File.read(key_file).to_i + rescue + -1 + end + + # Reads the (cached) last key. + def last_key : Int32 + @cached_last_key + end + + # Changes the last *key* in the database. + def last_key=(x : Int32) + file = File.open(key_file, "w") + file << x.to_s + file.close + + @cached_last_key = x + + x + rescue + raise Exception.new "could not update last-key file" + end + + # Take a database key and convert it in a formated string. Example: 343 -> "0000000343" + def stringify_key(key : Int32) + # Negative numbers give strange results with Crystal’s printf. + if key >= 0 + "%010i" % key + else + key.to_s + end + end + + # Adds a value to the database without a locking mechanism. + # + # For a thread-safe version, use `#<<`. + # + # WARNING: not thread-safe. + def unsafe_add(item : V) + key = last_key + 1 + + self[key] = item + self.last_key = key + + key # FIXME: Should we really return the internal key? + rescue e + raise e + end + + # Adds a value to the database, with a locking mechanism to prevent race conditions. + # + # This operation should be thread-safe since a lock is required before tinkering with the database. + # Because of the file-system operations, this function may be a bit slow. + # For single-thread applications, use the `#unsafe_add` operation instead. + def <<(item : V) + request_lock "key" + key = init_last_key + 1 + self[key] = item + self.last_key = key + + release_lock "key" + + key # FIXME: Should we really return the internal key? + rescue e + release_lock "key" + + raise e + end + + # Lists all entries in the database. + # + # WARNING: Very slow. Try not to use. + def each(reversed : Bool = false, offset = 0, limit : Int32? = nil) + each_with_key( + reversed: reversed, + offset: offset, + limit: limit + ) do |item, key| + yield item + end + end + + # Converts all the database into an array. + # + # WARNING: Very slow. Try not to use. + def to_a(reversed : Bool = false, offset = 0, limit : Int32? = nil) + array = ::Array(V).new + + each( + reversed: reversed, + offset: offset, + limit: limit + ) do |value| + array << value + end + + array + end + + # Converts the entire database into a hash. + # + # WARNING: Very slow. Try not to use. + def to_h(reversed : Bool = false, offset = 0, limit : Int32? = nil) + hash = ::Hash(Int32, V).new + + each_with_key( + reversed: reversed, + offset: offset, + limit: limit + ) do |element, key| + hash[key] = element + end + + hash + end + + # Run triggers (indexes, partitions, tags, etc.) for a value. + def run_triggers(key : Int32, value : V) + @triggers.each &.index(stringify_key(key), value) + end + + # Creates a new basic index **with a cache**. + # The *name* parameter is the name of the directory that will be created. + def new_index(name : String, &block : Proc(V, String | DODB::NoIndex)) + Trigger::IndexCached(V).new(self, @directory_name, name, block).tap do |trigger| + @triggers << trigger + end + end + + # Creates a new basic index **without a cache**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this will be a lot slower than the cached version. + def new_uncached_index(name : String, &block : Proc(V, String | DODB::NoIndex)) + Trigger::Index(V).new(self, @directory_name, name, block).tap do |trigger| + @triggers << trigger + end + end + + # Creates a new basic index **only in RAM**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this index is the fastest, but doesn't have a file-system representation. + def new_RAM_index(name : String, &block : Proc(V, String | DODB::NoIndex)) + Trigger::IndexRAMOnly(V).new(self, @directory_name, name, block).tap do |trigger| + @triggers << trigger + end + end + + # Gets an *index object* based on its name. + def get_index(name : String, key) + index = @triggers.find &.name.==(name) + index.not_nil!.as(Trigger).get key + end + + # Creates a new partition **with a cache**. + # The *name* parameter is the name of the directory that will be created. + def new_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) + Trigger::PartitionCached(V).new(self, @directory_name, name, block).tap do |table| + @triggers << table + end + end + + # Creates a new partition **without a cache**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this will be a lot slower than the cached version. + def new_uncached_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) + Trigger::Partition(V).new(self, @directory_name, name, block).tap do |table| + @triggers << table + end + end + + # Creates a new partition **only in RAM**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this partition index is the fastest but doesn't have a file-system representation. + def new_RAM_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) + Trigger::PartitionRAMOnly(V).new(self, @directory_name, name, block).tap do |table| + @triggers << table + end + end + + # Gets an *index (partition) object* based on its name. + def get_partition(table_name : String, partition_name : String) + partition = @triggers.find &.name.==(table_name) + partition.not_nil!.as(DODB::Partition).get partition_name + end + + # Creates a new tag **with a cache**. + # The *name* parameter is the name of the directory that will be created. + def new_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) + Trigger::TagsCached(V).new(self, @directory_name, name, block).tap do |tags| + @triggers << tags + end + end + + # Creates a new tag **without a cache**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this will be a lot slower than the cached version. + def new_uncached_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) + Trigger::Tags(V).new(self, @directory_name, name, block).tap do |tags| + @triggers << tags + end + end + + # Creates a new partition **only in RAM**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this tag index is the fastest but doesn't have a file-system representation. + def new_RAM_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) + Trigger::TagsRAMOnly(V).new(self, @directory_name, name, block).tap do |tags| + @triggers << tags + end + end + + # Gets an *index (tag) object* based on its name. + def get_tags(name, key : String) + tag = @triggers.find &.name.==(name) + tag.not_nil!.as(DODB::Tags).get name, key + end + + # WARNING: directed graphs haven't been reviewed in YEARS, assume as dead code. + def new_directed_graph(name : String, index : DODB::Trigger(V), &block : Proc(V, Array(String))) : DirectedGraph(V) + Trigger::DirectedGraph(V).new(self, @directory_name, index, name, block).tap do |table| + @triggers << table + end + end + + # Checks for collisions in the indexes. + def check_collisions!(key : Int32, value : V, old_value : V?) + @triggers.each &.check!(stringify_key(key), value, old_value) + end + + # Retrieves a value and remove it from the database. + def pop + request_lock "key" + + key = last_key + + # Some entries may have been removed. We’ll skip over those. + # Not the most efficient if a large number of indices are empty. + while key >= 0 && self[key]?.nil? + key = key - 1 + end + + if key < 0 + return nil + end + + poped = self[key] + + self.unsafe_delete key + + last_key = key - 1 + + release_lock "key" + + poped + end + + private def data_path + "#{@directory_name}/data" + end + + private def file_path(key : Int32) + "#{data_path}/%010i" % key + end + + private def locks_directory : String + "#{@directory_name}/locks" + end + + private def get_lock_file_path(name : String, subname : String? = nil) + if subname + "#{locks_directory}/#{name}-#{subname}.lock" # FIXME: Separator that causes less collisions? + else + "#{locks_directory}/#{name}.lock" + end + end + + private def read(file_path : String) + V.from_json ::File.read file_path + end + + private def remove_data! + FileUtils.rm_rf data_path + Dir.mkdir_p data_path + end + + private def remove_triggers! + @triggers.each do |trigger| + trigger.nuke_trigger + end + end + + # Removes all indices and then rewrites them all. + # + # WARNING: slow operation. + def reindex_everything! + remove_triggers! + + each_with_key() do |item, key| + run_triggers key, item + end + end + + # Removes all indexes of a value. + def remove_triggers(key : Int32, value : V) + @triggers.each &.deindex(stringify_key(key), value) + end + + # Gets the data with the *key*. + # In case the data is missing, returns *nil*. + def []?(key : Int32) : V? + self[key] + rescue MissingEntry + # FIXME: Only rescue JSON and “no such file” errors. + return nil + end + + # Gets the data with the *key*. + # In case the data is missing, returns an exception `DODB::MissingEntry`. + def [](key : Int32) : V + raise MissingEntry.new(key) unless ::File.exists? file_path key + read file_path key + end + + # Replaces the data with the *key*. + # In case the data is missing, returns an exception `DODB::MissingEntry`. + def []=(key : Int32, value : V) + old_value = self.[key]? + + check_collisions! key, value, old_value + + # Removes any old indices or partitions pointing to a value about + # to be replaced. + if old_value + remove_triggers key, old_value + end + + # Avoids corruption in case the application crashes while writing. + file_path(key).tap do |path| + ::File.write "#{path}.new", value.to_json + ::FileUtils.mv "#{path}.new", path + end + + run_triggers key, value + + if key > @cached_last_key + self.last_key = key + end + end + + # Deletes the data with the *key* but doesn't request for a lock. + # This function is required since `#pop` is already locked before trying to remove an entry, + # thus calling `#unsafe_delete`. + def unsafe_delete(key : Int32) + value = self[key]? + + return if value.nil? + + begin + ::File.delete file_path key + rescue File::NotFoundError + end + + remove_triggers key, value + + value + end + + # Deletes the data with the *key*. + def delete(key : Int32) + request_lock "key" + value = unsafe_delete key + release_lock "key" + + value + end + + # Lists all the keys in the database. + private def each_key(reversed = false) + # Removes the first two "." and ".." directories. + keys = Dir.entries(data_path).[2..].map(&.to_i).sort + (reversed ? keys.reverse : keys).each do |key| + yield key + end + end + + # Lists all database entries with their key. + # Can be useful for making dumps or to restore a database. + # + # WARNING: Very slow. Try not to use. + def each_with_key(reversed : Bool = false, offset = 0, limit : Int32? = -1) + limit = if l = limit + l + else + -1 + end + + each_key(reversed) do |key| + offset -= 1 if offset >= 0 + next if offset >= 0 + + return if limit == 0 + limit -= 1 if limit > 0 + + begin + # FIXME: Only intercept JSON parsing errors. + value = self[key] + rescue + next + end + + yield value, key + end + end +end + +require "./storage/*" diff --git a/src/dodb/storage/cached.cr b/src/dodb/storage/cached.cr new file mode 100644 index 0000000..cd688c5 --- /dev/null +++ b/src/dodb/storage/cached.cr @@ -0,0 +1,140 @@ +class Hash(K,V) + def reverse + rev = Array(Tuple(K,V)).new + keys = Array(K).new + each_key do |k| + keys << k + end + keys.reverse.each do |k| + rev << {k, self.[k]} + end + + rev + end +end + +# Cached database, stores data on the file-system and keeps it in RAM. +# +# ``` +# # Creates a DODB cached database. +# car_database = DODB::Storage::Cached.new "/path/to/db" +# +# # Creates a (cached) index. +# cars_by_name = car_database.new_index "name", &.name +# +# # Add a value in the database. +# car_database << Car.new "Corvet" +# ``` +# On the file-system: +# ```plain +# storage +# ├── data +# │   └── 0000000000 +# ├── indices +# │   └── by_name <- the "name" basic index +# │   └── Corvet -> ../../data/0000000000 +# ``` +# +# WARNING: beware of the RAM use, see `DODB::Storage::Common` for a less memory-hungry option. +class DODB::Storage::Cached(V) < DODB::Storage(V) + property data = Hash(Int32, V).new + + def initialize(@directory_name : String) + Dir.mkdir_p data_path + Dir.mkdir_p locks_directory + + @cached_last_key = init_last_key + + # Load the database in RAM at start-up. + DODB::Storage::Uncached(V).new(@directory_name).each_with_key do |v, key| + puts "\rloading data from #{@directory_name} at key #{key}" + self[key] = v + end + end + + # Gets the data with the *key*. + # In case the data is missing, returns an exception `DODB::MissingEntry`. + # + # Data needs to be cloned in case it will be modified, otherwise it will mess with indexes. + # + # WARNING: data isn't cloned. + # WARNING: may raise a MissingEntry exception. + def [](key : Int32) : V + @data[key] rescue raise MissingEntry.new(key) + end + + def []=(key : Int32, value : V) + old_value = self.[key]? + + check_collisions! key, value, old_value + + # Removes any old indices or partitions pointing to a value about + # to be replaced. + if old_value + remove_triggers key, old_value + end + + # Avoids corruption in case the application crashes while writing. + file_path(key).tap do |path| + ::File.write "#{path}.new", value.to_json + ::FileUtils.mv "#{path}.new", path + end + + run_triggers key, value + + if key > @cached_last_key + self.last_key = key + end + + @data[key] = value + end + + # :inherit: + # + # As `DODB::Storage#each_key`, keys are sorted in order to keep the behavior consistent. + private def each_key(reversed : Bool = false) + keys = @data.keys.sort + (reversed ? keys.reverse : keys).each do |key| + yield key + end + end + + # :inherit: + def each_with_key(reversed : Bool = false, offset = 0, limit : Int32? = -1) + limit = if l = limit + l + else + -1 + end + (reversed ? @data.reverse : @data).each do |key, v| + offset -= 1 if offset >= 0 + next if offset >= 0 + + return if limit == 0 + limit -= 1 if limit > 0 + + yield v, key + end + end + + def unsafe_delete(key : Int32) + value = self[key]? + + return if value.nil? + + begin + ::File.delete file_path key + rescue File::NotFoundError + end + + remove_triggers key, value + + @data.delete key + value + end + + private def remove_data! + super + @data = Hash(Int32, V).new + end +end diff --git a/src/dodb/storage/common.cr b/src/dodb/storage/common.cr new file mode 100644 index 0000000..fe67ac1 --- /dev/null +++ b/src/dodb/storage/common.cr @@ -0,0 +1,89 @@ +# Common database: only **recently added or requested** entries are kept in memory. +# +# Least recently used entries may be removed from the cache in order to keep the amount of memory used reasonable. +# +# The number of entries to keep in memory is **configurable**. +# +# This database is relevant for high demand applications; +# which means both a high number of entries (data cannot fit entirely in RAM), +# and a high number of requests, often to reach the same entries. +# Typically a retail website. +# In such applications, the "keep the most recently used data in cache" policy works since new users +# constantly ask for the same data over and over. +# +# ``` +# # Creates a DODB database for common usage (a limited number of cached entries). +# car_database = DODB::Storage::Common.new "/path/to/db" +# +# # Creates a (cached) index. +# cars_by_name = car_database.new_index "name", &.name +# +# # Add a value in the database. +# car_database << Car.new "Corvet" +# ``` +# On the file-system: +# ```plain +# storage +# ├── data +# │   └── 0000000000 +# ├── indices +# │   └── by_name <- the "name" basic index +# │   └── Corvet -> ../../data/0000000000 +# ``` +# +# NOTE: fast for frequently requested data and requires a stable (and configurable) amount of memory. +class DODB::Storage::Common(V) < DODB::Storage::Cached(V) + # The *fifo* is an instance of `EfficientFIFO` where the key of the requested data is pushed. + # In case the number of stored entries exceeds what is allowed, the least recently used entry is removed. + property fifo : EfficientFIFO(Int32) + + # Initializes the `DODB::Storage::Common` database with a maximum number of entries in the cache. + def initialize(@directory_name : String, max_entries : UInt32) + @fifo = EfficientFIFO(Int32).new max_entries + Dir.mkdir_p data_path + Dir.mkdir_p locks_directory + + @cached_last_key = init_last_key + end + + # Verifies that the value is in cache, or read it on disk. + # Pushes the key in the fifo. + def [](key : Int32) : V + val = @data[key]? + if val.nil? + raise MissingEntry.new(key) unless ::File.exists? file_path key + val = read file_path key + @data[key] = val + end + push_fifo key + val + end + + # Assumes new entries are more requested than old ones. + def []=(key : Int32, value : V) + super key, value + push_fifo key + end + + # :inherit: + # + # Assumes new entries are more requested than old ones. + def <<(item : V) + key = super item + push_fifo key + end + + def unsafe_delete(key : Int32) + @fifo.delete key if super key + end + + def delete(key : Int32) + @fifo.delete key if super key + end + + private def push_fifo(key : Int32) + if entry_to_remove = @fifo << key + @data.delete entry_to_remove + end + end +end diff --git a/src/dodb/storage/ramonly.cr b/src/dodb/storage/ramonly.cr new file mode 100644 index 0000000..58b5000 --- /dev/null +++ b/src/dodb/storage/ramonly.cr @@ -0,0 +1,74 @@ +# RAM-only database, without a file-system representation. +# +# This database implementation enables the use of DODB to store data with the same lifetime as the application. +# +# Triggers (basic indexes, partitions, tags) will behave the same way. +# ``` +# # Creates a DODB RAM-only database (yes, the path is still required). +# car_database = DODB::Storage::RAMOnly.new "/path/to/db" +# +# # Creates a (cached) index (same as for all other DODB database implementations). +# cars_by_name = car_database.new_index "name", &.name +# +# # Add a value in the database. +# car_database << Car.new "Corvet" +# ``` +# In this example there is a cached index, so on the file-system: +# ```plain +# storage +# ├── data <- this directory stays empty (RAM-only database, remember?) +# ├── indices +# │   └── by_name <- the "name" basic index +# │   └── Corvet -> ../../data/0000000000 <- the index works despite not pointing to a real file +# ``` +class DODB::Storage::RAMOnly(V) < DODB::Storage::Cached(V) + # Initialization still uses a directory name and creates a few paths. + # This is an implementation detail to re-use code of `DODB::Storage` and to get the triggers to work. + def initialize(@directory_name : String) + Dir.mkdir_p data_path + Dir.mkdir_p locks_directory + @cached_last_key = -1 + end + + # The `last_key=` function doesn't write to a file in the `DODB::Storage::RAMOnly` database. + def last_key=(key : Int32) + @cached_last_key = key + end + + # WARNING: takes `[]?` and `[]` implementations from `CachedDataBase`. + # This will lead to errors in case the implementations change, be aware. + def []=(key : Int32, value : V) + old_value = self.[key]? + + check_collisions! key, value, old_value + + # Removes any old indices or partitions pointing to a value about to be replaced. + if old_value + remove_triggers key, old_value + end + + run_triggers key, value + + if key > @cached_last_key + self.last_key = key + end + + @data[key] = value + end + + def unsafe_delete(key : Int32) + value = self[key]? + + return if value.nil? + + remove_triggers key, value + + @data.delete key + value + end + + private def remove_data! + super + @data = Hash(Int32, V).new + end +end diff --git a/src/dodb/storage/uncached.cr b/src/dodb/storage/uncached.cr new file mode 100644 index 0000000..c8f7708 --- /dev/null +++ b/src/dodb/storage/uncached.cr @@ -0,0 +1,27 @@ +# Basic database of DODB. +# Data isn't cached. +# +# ``` +# # Creates a DODB (uncached) database. +# car_database = DODB::Storage::Uncached.new "/path/to/db" +# +# # Creates a (cached) index. +# cars_by_name = car_database.new_index "name", &.name +# +# # Add a value in the database. +# car_database << Car.new "Corvet" +# ``` +# On the file-system: +# ```plain +# storage +# ├── data +# │   └── 0000000000 +# ├── indices +# │   └── by_name <- the "name" basic index +# │   └── Corvet -> ../../data/0000000000 +# ``` +# +# NOTE: slow but doesn't require much memory. +# NOTE: for a database with a configurable data cache size, use `DODB::Storage::Common`. +class DODB::Storage::Uncached(V) < DODB::Storage(V) +end diff --git a/src/dodb/tags.cr b/src/dodb/tags.cr deleted file mode 100644 index 5ef7df0..0000000 --- a/src/dodb/tags.cr +++ /dev/null @@ -1,174 +0,0 @@ -require "file_utils" - -class DODB::Tags(V) < DODB::Indexer(V) - property name : String - property key_proc : Proc(V, Array(String)) - getter storage_root : String - - # Required to remove an entry in the DB. - @storage : DODB::Storage(V) - - def initialize(@storage, @storage_root, @name, @key_proc) - ::Dir.mkdir_p indexing_directory - end - - def check!(key, value, old_value) - return true # Tags don’t have collisions or overloads. - end - - def index(key, value) - indices = key_proc.call(value) - - indices.each do |i| - symlink = get_tagged_entry_path(i, key) - Dir.mkdir_p ::File.dirname symlink - # FIXME: Should not happen anymore. Should we remove this? - ::File.delete symlink if ::File.exists? symlink - ::File.symlink get_data_symlink(key), symlink - end - end - - def deindex(key, value) - indices = key_proc.call(value) - - indices.each do |i| - symlink = get_tagged_entry_path(i, key) - - begin - ::File.delete symlink - rescue File::NotFoundError - end - end - end - - def get_with_indice(tag : String) : Array(Tuple(V, Int32)) - r_value = Array(Tuple(V, Int32)).new - - tag_directory = indexing_directory tag - - return r_value unless Dir.exists? tag_directory - - Dir.each_child tag_directory do |child| - key = get_key child - r_value << { @storage[key], key } - end - - r_value - end - - def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32)) - r_value = Array(Tuple(V, Int32)).new - keys.each do |tag| - r_value.concat get_with_indice tag - end - r_value - end - - def get(tag : String) : Array(V) - get_with_indice(tag).map &.[0] - end - - def get?(tag : String) : Array(V)? - get tag - rescue MissingEntry - nil - end - - def get(keys : Array(String)) : Array(V) - get_with_indices(keys.sort).map &.[0] - end - - def delete(tag) - delete tag, do true end - end - - def delete(tag, &matcher) - tag_directory = indexing_directory tag - - return unless Dir.exists? tag_directory - - Dir.each_child tag_directory do |child| - key = get_key child - item = @storage[key] - - if yield item - @storage.delete key - end - end - end - - private def get_key(path : String) : Int32 - path.sub(/^.*\//, "").to_i - end - - def indexing_directory : String - "#{@storage_root}/tags/by_#{@name}" - end - - private def indexing_directory(tag) - "#{indexing_directory}/#{tag}" - end - - private def get_tagged_entry_path(tag : String, key : String) - "#{indexing_directory}/#{tag}/#{key}" - end - - private def get_data_symlink(key : String) - "../../../data/#{key}" - end -end - -class DODB::CachedTags(V) < DODB::Tags(V) - # This hash contains the relation between the index key and the data keys. - property data = Hash(String, Array(Int32)).new - - def index(key, value) - super(key, value) - indices = key_proc.call value - - indices.each do |tag| - array = if v = @data[tag]? - v - else - Array(Int32).new - end - array << key.to_i - - @data[tag] = array - end - end - - def deindex(key, value) - super(key, value) - indices = key_proc.call value - - indices.each do |tag| - if v = @data[tag]? - v.delete key.to_i - @data[tag] = v - end - end - end - - def get_with_indice(tag : String) : Array(Tuple(V, Int32)) - r_value = Array(Tuple(V, Int32)).new - - if keys = @data[tag]? - keys.each do |data_key| - r_value << { @storage[data_key], data_key } - end - else - # Get the key from the database representation on the file-system. - tag_directory = indexing_directory tag - raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory - - Dir.each_child tag_directory do |child| - r_value << { @storage[get_key child], get_key child } - end - - @data[tag] = r_value.map &.[1] - end - - r_value - end -end diff --git a/src/dodb/trigger.cr b/src/dodb/trigger.cr new file mode 100644 index 0000000..57a1171 --- /dev/null +++ b/src/dodb/trigger.cr @@ -0,0 +1,40 @@ +# Abstract class `DODB::Trigger(V)` represents the specifications for +# triggers (indexes, partitions, tags, etc.). +abstract class DODB::Trigger(V) + + # Indexes a value, used for **internal operations**. + # + # NOTE: used for internal operations. + abstract def index (key : String, value : V) + + # Removes the index of a value, used for **internal operations**. + # + # NOTE: used for internal operations. + abstract def deindex (key : String, value : V) + + # Verifies whether a new value will create a collision with the index of + # currently stored value, used for **internal operations**. + # + # NOTE: used for internal operations. + abstract def check! (key : String, value : V, old_value : V?) + + # Name of the index, such as *id* or *color* for example. + # This is an arbitrary value, mostly to create the index directory. + # + # NOTE: used for internal operations. + abstract def name : String + + # Directory where the values will be written. + # + # NOTE: used for internal operations. + abstract def trigger_directory : String + + # Removes all the index entries, removes the `#trigger_directory` by default. + # + # NOTE: used for internal operations. + def nuke_trigger + FileUtils.rm_rf trigger_directory + end +end + +require "./trigger/*" diff --git a/src/dodb/directed_graph.cr b/src/dodb/trigger/directed_graph.cr similarity index 90% rename from src/dodb/directed_graph.cr rename to src/dodb/trigger/directed_graph.cr index e80f990..53f1251 100644 --- a/src/dodb/directed_graph.cr +++ b/src/dodb/trigger/directed_graph.cr @@ -1,21 +1,19 @@ require "file_utils" require "json" -require "./indexer.cr" - # WARNING: this code hasn't been reviewed nor used in years. -class DODB::DirectedGraph(V) < DODB::Indexer(V) +class DODB::Trigger::DirectedGraph(V) < DODB::Trigger(V) property name : String property key_proc : Proc(V, Array(String)) getter storage_root : String - getter index : Index(V) + getter index : Trigger::Index(V) @storage : DODB::Storage(V) def initialize(@storage, @storage_root, @index, @name, @key_proc) - ::Dir.mkdir_p indexing_directory + ::Dir.mkdir_p trigger_directory end def check!(key, value, old_value) @@ -128,7 +126,7 @@ class DODB::DirectedGraph(V) < DODB::Indexer(V) r_value end - def indexing_directory : String + def trigger_directory : String "#{@storage_root}/graphs/by_#{@name}" end @@ -139,16 +137,16 @@ class DODB::DirectedGraph(V) < DODB::Indexer(V) .to_i end - private def indexing_directory(node) - "#{indexing_directory}/#{node}" + private def trigger_directory(node) + "#{trigger_directory}/#{node}" end private def get_node_symlink(node : String, key : String) - "#{indexing_directory node}/#{key}.json" + "#{trigger_directory node}/#{key}.json" end private def get_outgoing_links_directory(node) - "#{indexing_directory node}/outgoing" + "#{trigger_directory node}/outgoing" end private def get_outgoing_symlink(node, link) @@ -156,7 +154,7 @@ class DODB::DirectedGraph(V) < DODB::Indexer(V) end private def get_incoming_links_directory(node) - "#{indexing_directory node}/incoming" + "#{trigger_directory node}/incoming" end private def get_incoming_symlink(node, link) @@ -167,7 +165,7 @@ class DODB::DirectedGraph(V) < DODB::Indexer(V) "../../../../data/#{key}.json" end - # Roughly matches Index#file_path_index, but works if @storage_root + # Roughly matches `Trigger#file_path_index`, but works if @storage_root # is an absolute path as well. private def get_cross_index_data_symlink(node : String) "../../../../indices/by_#{@index.name}/#{node}.json" diff --git a/src/dodb/trigger/index.cr b/src/dodb/trigger/index.cr new file mode 100644 index 0000000..654022c --- /dev/null +++ b/src/dodb/trigger/index.cr @@ -0,0 +1,428 @@ +require "file_utils" + +# Basic indexes for 1-to-1 relations. +# Uncached version. +# +# ``` +# cars_by_name = car_database.new_uncached_index "name", &.name +# ``` +# +# This index provides a file-system representation, enabling the administrators to +# select a value based on its index. The following example presents an index named "id" +# with some data indexed by an UUID attribute. +# +# ```plain +# storage +# ├── data +# │   ├── 0000000000 +# │   ├── 0000000001 +# │   └── 0000000002 +# ├── indices +# │   └── by_id <- this is an example of index named "id" +# │   ├── 6e109b82-25de-4250-9c67-e7e8415ad5a7 -> ../../data/0000000000 +# │   ├── 2080131b-97d7-4300-afa9-55b93cdfd124 -> ../../data/0000000001 +# │   └── 8b4e83e3-ef95-40dc-a6e5-e6e697ce6323 -> ../../data/0000000002 +# ``` +# +# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**. +# NOTE: see `IndexCached` for a cached version, faster for retrieval. +# NOTE: for fast operations without fs representation, see `IndexRAMOnly`. +class DODB::Trigger::Index(V) < DODB::Trigger(V) + # Name of the index, such as *id* or *color* for example. + # This is an arbitrary value, mostly to create the index directory. + # + # NOTE: used for internal operations. + property name : String + + # Procedure to retrieve the index attribute from the value, used for **internal operations**. + property key_proc : Proc(V, String | NoIndex) + + # Root database directory, used for **internal operations**. + getter storage_root : String + + # Reference to the database instance, used for **internal operations**. + @storage : DODB::Storage(V) + + # To create an index from a database, use `DODB::Storage#new_index` to create + # a cached index, `DODB::Storage#new_uncached_index` for an uncached index or + # `DODB::Storage#new_RAM_index` for a RAM-only index. + # + # WARNING: this is an internal operation, do not instanciate an index by hand. + def initialize(@storage : DODB::Storage(V), @storage_root : String, @name : String, @key_proc : Proc(V, String | NoIndex)) + Dir.mkdir_p trigger_directory + end + + def check!(key : String, value : V, old_value : V?) + index_key = key_proc.call value + + return if index_key.is_a? NoIndex + + symlink = file_path_index index_key.to_s + + if ::File.symlink? symlink + # In case both old and new values are pointing to the same key, + # this is not considered a collision. + if old_value + old_key = key_proc.call old_value + return if index_key == old_key + end + + raise IndexOverload.new "index '#{@name}' is overloaded for key '#{key}', file #{symlink} exists" + end + end + + # :inherit: + def index(key : String, value : V) + index_key = key_proc.call value + return if index_key.is_a? NoIndex + + symlink = file_path_index index_key + + Dir.mkdir_p ::File.dirname symlink + + ::File.symlink get_data_symlink_index(key), symlink + end + + # :inherit: + def deindex (key : String, value : V) + index_key = key_proc.call value + return if index_key.is_a? NoIndex + + symlink = file_path_index index_key + + begin + ::File.delete symlink + rescue File::NotFoundError + end + end + + # Gets the key (ex: 343) for an entry in the DB from an indexed value, used for **internal operations**. + # + # Reads the link in `db/indices/by_#{name}/`. + # + # Useful for internal purposes, to retrieve a value use `#get`. + # + # ``` + # internal_database_key_for_the_corvet = cars_by_name.get_key "Corvet" + # ``` + # + # NOTE: used for internal operations. + def get_key(index : String) : Int32 + get_key_on_fs index + end + + # Gets data from an indexed value (throws an exception on a missing entry). + # + # ``` + # corvet = cars_by_name.get "Corvet" + # ``` + # + # WARNING: throws an exception if the value isn't found. + # NOTE: for a safe version, use `#get?`. + def get(index : String) : V + @storage[get_key index] + end + + # Gets data from an indexed value without throwing an exception on a missing entry. + # + # ``` + # corvet = cars_by_name.get? "Corvet" + # ``` + # + # NOTE: safe version of `#get`, returns a *nil* value in case of a missing entry instead of an exception. + def get?(index : String) : V? + get index + rescue MissingEntry + nil + end + + # Gets data from an indexed value (thread-safe via two file locks) and gives it to a provided block of code. + # + # WARNING: should be thread-safe only between other `#safe_get` and `#safe_get?` calls, + # index creations and deletions do not use the same locks! + # NOTE: on exception, releases all locks. + def safe_get(index : String) : Nil + @storage.request_lock @name, index + internal_key = get_key(index).to_s + @storage.request_lock internal_key + + begin + yield get index + rescue e + # On exception, returns the exception after releasing locks. + @storage.release_lock internal_key + @storage.release_lock @name, index + raise e + end + + @storage.release_lock internal_key + @storage.release_lock @name, index + end + + # Same as `#safe_get` but doesn't throw an exception on a missing value + # (provided block of code receives a *nil* value). + # + # WARNING: should be thread-safe only between other `#safe_get` and `#safe_get?` calls, + # index creations and deletions do not use the same locks! + # NOTE: on exception, releases all locks. + def safe_get?(index : String, &block : Proc(V | Nil, Nil)) : Nil + safe_get index, &block + rescue MissingEntry + yield nil + end + + # Reads the indexed symlink to find its related key, used for **internal operations**. + # + # For example, for a car indexed by its name: + # + # ``` + # storage + # ├── data + # │   └── 0000000343 + # └── indices + #    └── by_name + #    └── Corvet -> ../../data/0000000343 + # ``` + # + # `#get_key_on_fs` reads the *storage/indices/by_name/Corvet* symlink and gets + # the name of the data file ("000000343") and converts it in an integer, + # which is the key in the database. + # + # NOTE: used for internal operations. + def get_key_on_fs(index : String) : Int32 + file_path = file_path_index index + raise MissingEntry.new(@name, index) unless ::File.symlink? file_path + ::File.readlink(file_path).sub(/^.*\//, "").to_i + end + + # Updates a value based on its indexed attribute (which must not have changed). + # + # ``` + # # Update the car "corvet" in the database. + # car_by_name.update corvet + # ``` + # WARNING: in case the indexed attribute has changed, use `#update(index, value)`. + def update(new_value : V) + index = key_proc.call new_value + + raise Exception.new "new value is not indexable" if index.is_a? NoIndex + + update index, new_value + end + + # Updates a value based on its indexed attribute (which may have changed). + # + # ``` + # # Update the car "corvet" in the database. + # car_by_name.update "Corvet", corvet + # ``` + # NOTE: in case the indexed attribute hasn't changed, you may prefer `#update(value)`. + def update(index : String, new_value : V) + key = get_key index + + @storage[key] = new_value + end + + # Updates a value. Creates it if necessary. + # + # ``` + # # Update or create the car "corvet" in the database. + # car_by_name.update_or_create corvet + # ``` + # WARNING: use `#update_or_create(index, value)` if the indexed value may have changed. + def update_or_create(new_value : V) + update new_value + rescue MissingEntry + @storage << new_value + end + + # Same as `#update_or_create(value)` but handles changed indexes. + # + # ``` + # # Update or create the car named "Corvet" in the database. + # # Its name may have changed in the object "corvet". + # car_by_name.update_or_create "Corvet", corvet + # ``` + # NOTE: safe version in case the index has changed. + def update_or_create(index : String, new_value : V) + update index, new_value + rescue MissingEntry + @storage << new_value + end + + # Deletes a value based on its index. + # + # ``` + # # Deletes the car named "Corvet". + # car_by_name.delete "Corvet" + # ``` + def delete(index : String) + key = get_key index + + @storage.delete key + end + + # :inherit: + def trigger_directory : String + "#{@storage_root}/indices/by_#{@name}" + end + + # FIXME: Now that it’s being used outside of this class, name it properly. + def file_path_index(index_key : String) + "#{trigger_directory}/#{index_key}" + end + + # Creates the relative path to the data from the indexing directory. + private def get_data_symlink_index(key : String) + "../../data/#{key}" + end +end + +# Basic indexes for 1-to-1 relations. +# Cached version. +# +# ``` +# cars_by_name = car_database.new_index "name", &.name +# ``` +# +# The cache makes this index fast and since the index doesn't contain +# the full value but just an attribute and a key, memory usage is still reasonable. +# +# A few file-system operations are required on index creation and deletion, +# thus this version still is slow for both these operations. +# +# ```plain +# storage +# ├── data +# │   ├── 0000000000 +# │   ├── 0000000001 +# │   └── 0000000002 +# ├── indices +# │   └── by_id <- this is an example of index named "id" +# │   ├── 6e109b82-25de-4250-9c67-e7e8415ad5a7 -> ../../data/0000000000 +# │   ├── 2080131b-97d7-4300-afa9-55b93cdfd124 -> ../../data/0000000001 +# │   └── 8b4e83e3-ef95-40dc-a6e5-e6e697ce6323 -> ../../data/0000000002 +# ``` +# +# NOTE: cached, reasonable amount of memory used since it's just an index. +# NOTE: fast for retrieval, slow for index creation and deletion (fs operations). +# NOTE: see `DODB::Trigger::Index` for an uncached version, even less memory-hungry. +# NOTE: for fast operations without fs representation, see `IndexRAMOnly`. +class DODB::Trigger::IndexCached(V) < DODB::Trigger::Index(V) + # This hash contains the relation between the index key and the data key, used for + # **internal operations**. + # + # WARNING: used for internal operations, do not change its content or access it directly. + property data = Hash(String, Int32).new + + def check!(key : String, value : V, old_value : V?) + index_key = key_proc.call value + return if index_key.is_a? NoIndex + + if data[index_key]? + # In case both old and new values are pointing to the same key, + # this is not considered a collision. + if old_value + old_key = key_proc.call old_value + return if index_key == old_key + end + + raise IndexOverload.new "index '#{@name}' is overloaded for key '#{key}'" + end + end + + # Clears the cache and removes the `#trigger_directory`. + def nuke_trigger + super + data.clear + end + + # Indexes the value on the file-system as `DODB::Trigger::Index#index` but also puts the index in a cache. + # + # NOTE: used for internal operations. + def index(key, value) + index_key = key_proc.call value + return if index_key.is_a? NoIndex + super(key, value) + + @data[index_key] = key.to_i + end + + # Removes the index of a value on the file-system as `DODB::Trigger::Index#deindex` but also from + # the cache, used for **internal operations**. + # + # NOTE: used for internal operations. + def deindex(key, value) + index_key = key_proc.call value + return if index_key.is_a? NoIndex + super(key, value) + + @data.delete index_key + end + + # Gets the key (ex: 343) for an entry in the DB. + # With caching, the key is probably stored in a hash, or we'll search in the FS. + # + # NOTE: used for internal operations. + def get_key(index : String) : Int32 + if k = @data[index]? + k + elsif k = get_key_on_fs(index) + @data[index] = k + k + else + raise MissingEntry.new(@name, index) + end + end +end + +# Basic indexes for 1-to-1 relations. +# RAM-only version, no file-system representation. +# +# ``` +# cars_by_name = car_database.new_RAM_index "name", &.name +# ``` +# +# Since there is no file-system operations, all the operations are fast. +# `DODB::Trigger::IndexRAMOnly` enables the flexibility of indexes without a file-system representation +# for absolute efficiency. +# Exactly as easy to use as the other index implementations. +# +# NOTE: reasonable amount of memory used since it's just an index. +# NOTE: fast for all operations, but no file-system representation. +class DODB::Trigger::IndexRAMOnly(V) < DODB::Trigger::IndexCached(V) + # Indexes a value in RAM, no file-system operation. + # + # NOTE: used for internal operations. + def index(key, value) + index_key = key_proc.call value + return if index_key.is_a? NoIndex + @data[index_key] = key.to_i + end + + # Removes the index of a value in RAM, no file-system operation. + # + # NOTE: used for internal operations. + def deindex(key, value) + index_key = key_proc.call value + return if index_key.is_a? NoIndex + @data.delete index_key + end + + # Gets the key (ex: 343) for an entry in the DB. + # With a RAM-only index, the key is necessarily stored in the hash. + # + # NOTE: used for internal operations. + def get_key(index : String) : Int32 + if k = @data[index]? + k + else + raise MissingEntry.new(@name, index) + end + end + + # Clears the index. + def nuke_trigger + data.clear + end +end diff --git a/src/dodb/trigger/partition.cr b/src/dodb/trigger/partition.cr new file mode 100644 index 0000000..79936e5 --- /dev/null +++ b/src/dodb/trigger/partition.cr @@ -0,0 +1,372 @@ +require "file_utils" + +# Partitions for 1-to-n relations. +# Uncached version. +# +# ``` +# cars_by_color = car_database.new_uncached_partition "color", &.color +# ``` +# +# This (partition) index provides a file-system representation, enabling the administrators to +# select a value based on its index. +# +# The following example presents an index named "color" with some data indexed by a color attribute. +# +# ```plain +# storage +# ├── data +# │   ├── 0000000000 +# │   ├── 0000000001 +# │   └── 0000000002 +# ├── partitions +# │   └── by_color <- this is an example of index named "color" +# │   ├── red +# │   │ └── 0000000000 -> ../../data/0000000000 +# │   └── blue +# │   ├── 0000000001 -> ../../data/0000000001 +# │   └── 0000000002 -> ../../data/0000000002 +# ``` +# +# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**. +# NOTE: see `PartitionCached` for a cached version, faster for retrieval. +# NOTE: for fast operations without fs representation, see `PartitionRAMOnly`. +class DODB::Trigger::Partition(V) < DODB::Trigger(V) + # Name of the index, such as *color* for example. + # This is an arbitrary value, mostly to create the index directory. + # + # NOTE: used for internal operations. + property name : String + + # Procedure to retrieve the index attribute from the value. + property key_proc : Proc(V, String | NoIndex) + + # Root database directory. + getter storage_root : String + + # Reference to the database instance. + @storage : DODB::Storage(V) + + # To create a *partition index* from a database, use `DODB::Storage#new_partition` to create + # a cached partition, `DODB::Storage#new_uncached_partition` for an uncached partition or + # `DODB::Storage#new_RAM_partition` for a RAM-only partition. + # + # WARNING: this is an internal operation, do not instanciate a partition by hand. + def initialize(@storage : DODB::Storage(V), @storage_root : String, @name : String, @key_proc : Proc(V, String | NoIndex)) + ::Dir.mkdir_p trigger_directory + end + + # Checks for collisions. + # + # NOTE: always returns true, no collision can happen in a partition. + def check!(key : String, value : V, old_value : V?) + return true # Partitions don’t have collisions or overloads. + end + + def index(key : String, value : V) + partition = key_proc.call value + return if partition.is_a? NoIndex + + symlink = get_partition_symlink(partition, key) + + Dir.mkdir_p ::File.dirname symlink + + ::File.symlink get_data_symlink(key), symlink + end + + def deindex(key : String, value : V) + partition = key_proc.call value + return if partition.is_a? NoIndex + + symlink = get_partition_symlink(partition, key) + + begin + ::File.delete symlink + rescue File::NotFoundError + end + end + + # Gets data from an indexed value (throws an exception on a missing entry). + # + # ``` + # red_cars = cars_by_color.get "red" # No red cars = MissingEntry exception + # ``` + # + # WARNING: throws an exception if no value is found. + # NOTE: for a safe version, use `#get?`. + def get(partition : String) : Array(V) + get_with_keys(partition).map &.[0] + end + + # Safe version of `#get`, gets data and returns *an empty array* in case of + # a missing entry instead of an exception. + # + # ``` + # red_cars = cars_by_color.get? "red" + # ``` + def get?(partition : String) : Array(V)? + get partition + rescue MissingEntry + Array(V).new + end + + # Gets partition entries (and their keys) from the file-system representation. + # + # ``` + # # Gets all red cars. + # cars_by_color.get "red" + # # Returns something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. + # ``` + # WARNING: throws a MissingEntry exception on non-existing partition. + def get_with_keys(partition : String) : Array(Tuple(V, Int32)) + partition_directory = trigger_directory partition + raise MissingEntry.new(@name, partition) unless Dir.exists? partition_directory + + r_value = Array(Tuple(V, Int32)).new + + Dir.each_child partition_directory do |child| + key = get_key child + r_value << { @storage[key], key } + end + + r_value + end + + # Safe version of `#get_with_keys`. + # NOTE: returns an empty list on empty or non-existing partition. + def get_with_keys?(partition : String) : Array(Tuple(V, Int32)) + get_with_keys partition + rescue MissingEntry + Array(Tuple(V, Int32)).new + end + + # Deletes all entries within the provided partition. + # + # ``` + # cars_by_color.delete "red" # Deletes all red cars. + # ``` + # WARNING: throws a MissingEntry exception on non-existing partition. + def delete(partition : String) + delete partition, do true end + end + + # Deletes entries within the provided partition and matching the provided block of code. + # + # ``` + # # Deletes all red Corvets. + # cars_by_color.delete "red", do |car| + # car.name == "Corvet" + # end + # ``` + # WARNING: throws a MissingEntry exception on non-existing partition. + def delete(partition : String, &matcher : Proc(V, Bool)) + get_with_keys(partition).each do |entry, key| + if yield entry + @storage.delete key + end + end + end + + # :inherit: + def trigger_directory : String + "#{@storage_root}/partitions/by_#{@name}" + end + + private def get_key(path : String) : Int32 + path.sub(/^.*\//, "").to_i + end + + private def trigger_directory(partition) + "#{trigger_directory}/#{partition}" + end + + private def get_partition_symlink(partition : String, key : String) + "#{trigger_directory partition}/#{key}" + end + + private def get_data_symlink(key : String) + "../../../data/#{key}" + end +end + +# Partitions for 1-to-n relations. +# Cached version. +# +# ``` +# cars_by_color = car_database.new_partition "color", &.color +# ``` +# +# This (partition) index provides a file-system representation, enabling the administrators to +# select a value based on its index. +# +# The following example presents an index named "color" with some data indexed by a color attribute. +# +# ```plain +# storage +# ├── data +# │   ├── 0000000000 +# │   ├── 0000000001 +# │   └── 0000000002 +# ├── partitions +# │   └── by_color <- this is an example of index named "color" +# │   ├── red +# │   │ └── 0000000000 -> ../../data/0000000000 +# │   └── blue +# │   ├── 0000000001 -> ../../data/0000000001 +# │   └── 0000000002 -> ../../data/0000000002 +# ``` +# +# NOTE: cached, reasonable amount of memory used since it's just an index. +# NOTE: fast for retrieval, slow for index creation and deletion (fs operations). +# NOTE: see `Partition` for an uncached version, even less memory-hungry. +# NOTE: for fast operations without fs representation, see `PartitionRAMOnly`. +class DODB::Trigger::PartitionCached(V) < DODB::Trigger::Partition(V) + # This hash contains the relation between the index key and the data key, used for + # **internal operations**. + # + # WARNING: used for internal operations, do not change its content or access it directly. + property data = Hash(String, Array(Int32)).new + + # Indexes the value on the file-system as `DODB::Trigger::Partition#index` but also puts the index in a cache. + # + # NOTE: used for internal operations. + def index(key : String, value : V) + partition = key_proc.call value + return if partition.is_a? NoIndex + super(key, value) + + array = if v = @data[partition]? + v + else + Array(Int32).new + end + array << key.to_i + + @data[partition] = array + end + + # Removes the index of a value on the file-system as `DODB::Trigger::Partition#deindex` but also from + # the cache, used for **internal operations**. + # + # NOTE: used for internal operations. + def deindex(key : String, value : V) + partition = key_proc.call value + return if partition.is_a? NoIndex + super(key, value) + + if v = @data[partition]? + v.delete key.to_i + @data[partition] = v + end + end + + # Gets partition entries and the database key for each entry. + # In `DODB::Trigger::PartitionCached`, `#get_with_keys(partition : String)` is modified to retrieve data keys from + # the index cache. + # In case the data isn't already in the cache, it is retrieved from the file-system. + # + # ``` + # # For example, get all red cars. + # cars_by_color.get_with_keys "red" + # # Will return something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. + # ``` + # WARNING: throws a MissingEntry exception on non-existing partition. + def get_with_keys(partition : String) : Array(Tuple(V, Int32)) + r_value = Array(Tuple(V, Int32)).new + + # In case the partition is cached. + if keys = @data[partition]? + keys.each do |data_key| + r_value << { @storage[data_key], data_key } + end + else + # Gets data from the database representation on the file-system. + r_value = super(partition) + @data[partition] = r_value.map &.[1] + end + r_value + end + + # Clears the cache and removes the `#trigger_directory`. + def nuke_trigger + super + data.clear + end +end + +# Partitions for 1-to-n relations. +# RAM-only version. +# +# ``` +# cars_by_color = car_database.new_RAM_partition "color", &.color +# ``` +# +# Since there is no file-system operations, all the operations are fast. +# `DODB::Trigger::PartitionRAMOnly` enables the flexibility of partitions without a file-system representation. +# Absolute efficiency, exactly as easy to use as the other partition implementations. +# +# NOTE: reasonable amount of memory used since it's just an index. +# NOTE: fast for all operations, but no file-system representation. +# NOTE: see `Partition` for an uncached version, even less memory-hungry. +# NOTE: for an fs representation but still fast for retrieval, see `PartitionCached`. +class DODB::Trigger::PartitionRAMOnly(V) < DODB::Trigger::PartitionCached(V) + def index(key : String, value : V) + partition = key_proc.call value + return if partition.is_a? NoIndex + + array = if v = @data[partition]? + v + else + Array(Int32).new + end + array << key.to_i + + @data[partition] = array + end + + def deindex(key : String, value : V) + partition = key_proc.call value + return if partition.is_a? NoIndex + + if v = @data[partition]? + v.delete key.to_i + @data[partition] = v + end + end + + # Gets partition entries and the database key for each entry, from the in-memory partition index. + # + # ``` + # # Get all red cars. + # cars_by_color.get_with_keys "red" + # # Returns something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. + # ``` + # WARNING: FOR CONSISTENCY, throws a MissingEntry exception on non-existing partition. + def get_with_keys(partition : String) : Array(Tuple(V, Int32)) + r_value = Array(Tuple(V, Int32)).new + if keys = @data[partition]? + keys.each do |data_key| + r_value << { @storage[data_key], data_key } + end + else + raise MissingEntry.new(@name, partition) + end + r_value + end + + # Clears the cache. + def nuke_trigger + data.clear + end +end diff --git a/src/dodb/trigger/tags.cr b/src/dodb/trigger/tags.cr new file mode 100644 index 0000000..b8bda22 --- /dev/null +++ b/src/dodb/trigger/tags.cr @@ -0,0 +1,413 @@ +require "file_utils" + +# Tags for n-to-n relations. +# Uncached version. +# +# ``` +# cars_by_keywords = car_database.new_uncached_tags "keywords", &.keywords +# ``` +# +# This (tag) index provides a file-system representation, enabling the administrators to +# select a value based on its index. +# +# The following example presents an index named "keywords" with some data indexed by a keywords attribute. +# +# ```plain +# storage +# ├── data +# │   ├── 0000000000 <- this car is expensive and fast +# │   ├── 0000000001 <- this car is expensive +# │   └── 0000000002 <- this car is expensive +# ├── tags +# │   └── by_keywords <- this is an example of index named "keywords" +# │   ├── fast +# │   │ └── 0000000000 -> ../../data/0000000000 +# │   └── expensive +# │   ├── 0000000000 -> ../../data/0000000000 +# │   ├── 0000000001 -> ../../data/0000000001 +# │   └── 0000000002 -> ../../data/0000000002 +# ``` +# +# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**. +# NOTE: see `TagsCached` for a cached version, faster for retrieval. +# NOTE: for fast operations without fs representation, see `TagsRAMOnly`. +class DODB::Trigger::Tags(V) < DODB::Trigger(V) + # Name of the index, such as *keywords* for example. + # This is an arbitrary value, mostly to create the index directory. + # + # NOTE: used for internal operations. + property name : String + + # Procedure to retrieve the index attribute from the value. + property key_proc : Proc(V, Array(String) | NoIndex) + + # Root database directory. + getter storage_root : String + + # Required to remove an entry in the DB. + @storage : DODB::Storage(V) + + # To create a *tag index* from a database, use `DODB::Storage#new_tags` to create + # a cached tag, `DODB::Storage#new_uncached_tags` for an uncached tag or + # `DODB::Storage#new_RAM_tags` for a RAM-only tag. + # + # WARNING: this is an internal operation, do not instanciate a tag index by hand. + def initialize(@storage : DODB::Storage(V), @storage_root : String, @name : String, @key_proc : Proc(V, Array(String) | NoIndex)) + ::Dir.mkdir_p trigger_directory + end + + # Checks for collisions. + # + # NOTE: always returns true, no collision can happen in a tag. + def check!(key : String, value : V, old_value : V?) + return true # Tags don’t have collisions or overloads. + end + + def index(key : String, value : V) + tags = key_proc.call(value) + return if tags.is_a? NoIndex + + tags.each do |tag| + symlink = get_tagged_entry_path(tag, key) + Dir.mkdir_p ::File.dirname symlink + ::File.symlink get_data_symlink(key), symlink + end + end + + # :inherit: + # TODO: in case the tag is left empty, should the tag directory be removed? + def deindex(key : String, value : V) + tags = key_proc.call(value) + return if tags.is_a? NoIndex + + tags.each do |tag| + symlink = get_tagged_entry_path(tag, key) + + begin + ::File.delete symlink + rescue File::NotFoundError + end + end + end + + # Gets tag entries (and their keys) from the file-system representation of the tag. + # + # ``` + # # Gets all slow cars. + # cars_by_keywords.get "slow" + # # Returns something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. + # ``` + # WARNING: throws a MissingEntry exception on non-existing tag. + def get_with_keys(tag : String) : Array(Tuple(V, Int32)) + tag_directory = trigger_directory tag + raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory + + r_value = Array(Tuple(V, Int32)).new + + Dir.each_child tag_directory do |child| + key = get_key child + r_value << { @storage[key], key } + end + + r_value + end + + # Gets values (and their keys) matching multiple tags (values must have all the provided tags). + # + # ``` + # # Get cars that are both fast and expensive. + # fast_expensive_cars = car_database.get_with_keys ["fast", "expensive"] + # ``` + # + # `#get_with_keys` is not designed to be fast, but should be fine for most applications. + # Nothing can beat custom implementations tailored with specific problems in mind, so in case this + # algorithm isn't fine for you, feel free to override this function for your specific data-set. + # + # NOTE: to seriously boost performance, use `DODB::Trigger::TagsCached`. + # WARNING: throws a MissingEntry exception on non-existing tag or no match. + def get_with_keys(keys : Array(String)) : Array(Tuple(V, Int32)) + r_value = Array(Tuple(V, Int32)).new + return r_value if keys.size < 1 + + first_key = keys.pop + r_value = get_with_keys(first_key) + + keys.each do |tag| + r_value &= get_with_keys(tag) + raise MissingEntry.new(@name, keys) if r_value.size < 1 + end + r_value + end + + # Safe version of `#get_with_keys`. + # NOTE: returns an empty list on empty or non-existing tag. + def get_with_keys?(tag : String | Array(String)) : Array(Tuple(V, Int32)) + get_with_keys tag + rescue MissingEntry + Array(Tuple(V, Int32)).new + end + + # Gets data from an indexed value (throws an exception on a missing entry). + # In case of multiple tags, gets entries matching all the provided tags. + # + # ``` + # # Get cars that are both fast and expensive. + # fast_expensive_cars = car_database.get ["fast", "expensive"] + # ``` + # + # `#get` is not designed to be fast, but should be fine for most applications. + # Nothing can beat custom implementations tailored with specific problems in mind, so in case this + # algorithm isn't fine for you, feel free to override this function for your specific data-set. + # WARNING: throws an exception if no value is found. + # NOTE: for a safe version, use `#get?`. + def get(keys : String | Array(String)) : Array(V) + get_with_keys(keys).map &.[0] + end + + # Safe version of `#get`, gets data and returns *an empty array* in case of + # a missing entry instead of an exception. + # + # ``` + # fast_cars = cars_by_keywords.get? "fast" + # cars_both_fast_and_expensive = cars_by_keywords.get? ["fast", "expensive"] + # ``` + def get?(tag : String | Array(String)) : Array(V) + get tag + rescue MissingEntry + Array(V).new + end + + # Deletes entries with the provided tag, or matching all the provided tags. + # + # ``` + # cars_by_keywords.delete "slow" # Deletes all slow cars. + # cars_by_keywords.delete ["slow", "expensive"] # Deletes all cars that are both slow and expensive. + # ``` + # WARNING: throws an exception if no value is found. + def delete(tag : String | Array(String)) + delete tag, do true end + end + + # Deletes entries within the provided tag index and matching the provided block of code. + # + # ``` + # # Deletes all slow Corvets. + # cars_by_keywords.delete "slow", do |car| + # car.name == "Corvet" + # end + # + # # Deletes all slow and expensive Bullet-GTs. + # cars_by_keywords.delete ["slow", "expensive"], do |car| + # car.name == "Bullet-GT" + # end + # ``` + def delete(tags : String | Array(String), &matcher : Proc(V, Bool)) + get_with_keys(tags).each do |entry, key| + if yield entry + @storage.delete key + end + end + end + + private def get_key(path : String) : Int32 + path.sub(/^.*\//, "").to_i + end + + def trigger_directory : String + "#{@storage_root}/tags/by_#{@name}" + end + + private def trigger_directory(tag) + "#{trigger_directory}/#{tag}" + end + + private def get_tagged_entry_path(tag : String, key : String) + "#{trigger_directory}/#{tag}/#{key}" + end + + private def get_data_symlink(key : String) + "../../../data/#{key}" + end +end + +# Tags for n-to-n relations. +# Cached version. +# +# ``` +# cars_by_keywords = car_database.new_tags "keywords", &.keywords +# ``` +# +# This (tag) index provides a file-system representation, enabling the administrators to +# select a value based on its index. +# +# The following example presents an index named "keywords" with some data indexed by a "keywords" attribute. +# +# ```plain +# storage +# ├── data +# │   ├── 0000000000 <- this car is expensive and fast +# │   ├── 0000000001 <- this car is expensive +# │   └── 0000000002 <- this car is expensive +# ├── tags +# │   └── by_keywords <- this is an example of index named "keywords" +# │   ├── fast +# │   │ └── 0000000000 -> ../../data/0000000000 +# │   └── expensive +# │   ├── 0000000000 -> ../../data/0000000000 +# │   ├── 0000000001 -> ../../data/0000000001 +# │   └── 0000000002 -> ../../data/0000000002 +# ``` +# +# NOTE: cached, reasonable amount of memory used since it's just an index. +# NOTE: fast for retrieval, slow for index creation and deletion (fs operations). +# NOTE: see `Tags` for an uncached version, even less memory-hungry. +# NOTE: for fast operations without fs representation, see `TagsRAMOnly`. +class DODB::Trigger::TagsCached(V) < DODB::Trigger::Tags(V) + # This hash contains the relation between the index key and the data keys. + property data = Hash(String, Array(Int32)).new + + def index(key : String, value : V) + tags = key_proc.call value + return if tags.is_a? NoIndex + super(key, value) + + tags.each do |tag| + array = if v = @data[tag]? + v + else + Array(Int32).new + end + array << key.to_i + + @data[tag] = array + end + end + + # :inherit: + # TODO: in case the tag is left empty, should it be removed from the cache? + def deindex(key : String, value : V) + tags = key_proc.call value + return if tags.is_a? NoIndex + super(key, value) + + tags.each do |tag| + if v = @data[tag]? + v.delete key.to_i + @data[tag] = v + end + end + end + + # In `DODB::Trigger::TagsCached`, `#get_with_keys(tag : String)` is modified to retrieve data keys from the index cache. + # In case the data isn't already in the cache, it is retrieved from the file-system. + # + # ``` + # # Get all slow cars. + # cars_by_keywords.get "slow" + # # Returns something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. + # ``` + # WARNING: throws a MissingEntry exception on non-existing tag. + def get_with_keys(tag : String) : Array(Tuple(V, Int32)) + r_value = Array(Tuple(V, Int32)).new + + # In case the tag is cached. + if keys = @data[tag]? + keys.each do |data_key| + r_value << { @storage[data_key], data_key } + end + else + # Gets data from the database representation on the file-system. + r_value = super(tag) + @data[tag] = r_value.map &.[1] + end + r_value + end + + # Clears the cache and removes the `#trigger_directory`. + def nuke_trigger + super + data.clear + end +end + +# Tags for n-to-n relations. +# RAM-only version. +# +# ``` +# cars_by_keywords = car_database.new_RAM_tags "keywords", &.keywords +# ``` +# +# Since there is no file-system operations, all the operations are fast. +# `DODB::Trigger::TagsRAMOnly` enables the flexibility of tags without a file-system representation. +# Absolute efficiency, exactly as easy to use as the other tag implementations. +# +# NOTE: reasonable amount of memory used since it's just an index. +# NOTE: fast for all operations, but no file-system representation. +# NOTE: see `Tags` for an uncached version, even less memory-hungry. +# NOTE: for an fs representation but still fast for retrieval, see `TagsCached`. +class DODB::Trigger::TagsRAMOnly(V) < DODB::Trigger::TagsCached(V) + def index(key : String, value : V) + tags = key_proc.call value + return if tags.is_a? NoIndex + + tags.each do |tag| + array = if v = @data[tag]? + v + else + Array(Int32).new + end + array << key.to_i + + @data[tag] = array + end + end + + def deindex(key : String, value : V) + tags = key_proc.call value + return if tags.is_a? NoIndex + + tags.each do |tag| + if v = @data[tag]? + v.delete key.to_i + @data[tag] = v + end + end + end + + # Gets tag entries from the in-memory tag cache. + # + # ``` + # # Get all slow cars. + # cars_by_keywords.get "slow" + # # Returns something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. + # ``` + # WARNING: FOR CONSISTENCY, throws a MissingEntry exception on non-existing tag. + def get_with_keys(tag : String) : Array(Tuple(V, Int32)) + r_value = Array(Tuple(V, Int32)).new + if keys = @data[tag]? + keys.each do |data_key| + r_value << { @storage[data_key], data_key } + end + else + raise MissingEntry.new(@name, tag) + end + r_value + end + + # Clears the cache. + def nuke_trigger + data.clear + end +end diff --git a/src/fifo.cr b/src/fifo.cr new file mode 100644 index 0000000..77801a0 --- /dev/null +++ b/src/fifo.cr @@ -0,0 +1,128 @@ +require "./list.cr" + +# This class is a simpler implementation of `EfficientFIFO`, used to implement an eviction policy for data cache +# for `DODB::Storage::Common`. +# It enables to keep track of recently used data. +# +# **How this works**. +# Each time a value is added in the database, its key is put in this "FIFO" structure. +# In this structure, **values are unique** and adding a value several times is considered as "using the value", +# so it is pushed back at the entry of the FIFO structure, as a new value. +# In case the number of entries exceeds what is allowed, the least recently used value is removed. +# ``` +# fifo = FIFO(Int32).new 3 # Only 3 allowed entries. +# +# pp! fifo << 1 # -> nil (there is still room in the FIFO structure) +# pp! fifo << 2 # -> nil (there is still room in the FIFO structure) +# pp! fifo << 3 # -> nil (last entry without exceeding the allowed size) +# pp! fifo << 4 # -> 1 (least recently used data) +# pp! fifo << 4 # -> nil (already in the structure) +# pp! fifo << 2 # -> nil (already in the structure) +# pp! fifo << 5 # -> 3 (least recently used data) +# ``` +# +# The number of entries in the FIFO structure is configurable. +# WARNING: this implementation becomes slow very fast (0(n) complexity), but doesn't cost much memory. +# WARNING: this *FIFO* class doesn't allow the same value multiple times. +class FIFO(V) + # This array is used as the *fifo structure*. + property data : Array(V) + + # Maximum allowed entries in the structure. + property max_entries : UInt32 + + def initialize(@max_entries : UInt32) + @data = Array(V).new + end + + # Pushes a value in the FIFO and gets the oldest value whether it exceeds the allowed number of entries. + # NOTE: `#<<(v : V)` is (almost) the only function since it's enough for the intended use, feel free to improve this. + # WARNING: implementation is extremely simple (3 lines) and not designed to be highly efficient. + def <<(v : V) : V? + @data.select! { |x| v != x } # remove dups + @data.unshift v # push as the first value of the structure + @data.pop if @data.size > @max_entries # remove least recently used entry if `@data` is too big + end + + # Removes a value. + def delete(v : V) + @data.select! { |x| v != x } + end + + def to_s(io : IO) + data.to_s(io) + end +end + +# This class is used to implement a cache policy for `DODB::Storage::Common`. +# It enables to keep track of recently used data. +# +# **How this works**. +# Each time a value is added in the database, its key is put in this "FIFO" structure. +# In this structure, **values are unique** and adding a value several times is considered as "using the value", +# so it is pushed back at the entry of the FIFO structure, as a new value. +# In case the number of entries exceeds what is allowed, the least recently used value is removed. +# ``` +# fifo = EfficientFIFO(Int32).new 3 # Only 3 allowed entries. +# +# pp! fifo << 1 # -> nil (there is still room in the FIFO structure) +# pp! fifo << 2 # -> nil (there is still room in the FIFO structure) +# pp! fifo << 3 # -> nil (last entry without exceeding the allowed size) +# pp! fifo << 4 # -> 1 (least recently used data) +# pp! fifo << 4 # -> nil (already in the structure) +# pp! fifo << 2 # -> nil (already in the structure) +# pp! fifo << 5 # -> 3 (least recently used data) +# ``` +# +# **Implementation details.** +# Contrary to the `FIFO` class, this implementation is time-efficient. +# However, this efficiency is a memory tradeoff: all the entries are added to a double-linked list to keep +# track of the order **and** to a hash to perform efficient searches of the values in the double-linked list. +# Thus, all the nodes are added twice, once in the list, once in the hash. +# +# The number of entries in the FIFO structure is configurable. +# NOTE: this implementation is time-efficient, but costs some memory. +class EfficientFIFO(V) + # Both this list and the hash are used as the *fifo structures*. + # The list preserves the *order* of the entries while the *hash* enables fast retrieval of entries in the list. + property list : DoubleLinkedList(V) + property hash : Hash(V, DoubleLinkedList::Node(V)) + + def initialize(@max_entries : UInt32) + @list = DoubleLinkedList(V).new + @hash = Hash(V, DoubleLinkedList::Node(V)).new + end + + # Pushes a value in the FIFO and gets the oldest value whether it exceeds the allowed number of entries. + # NOTE: `#<<(v : V)` is (almost) the only function since it's enough for the intended use, feel free to improve this. + def <<(v : V) : V? + if node = hash[v]? + list.delete node + end + + # push as the first value of the structure + node = @list.unshift v + hash[v] = node + + # remove least recently used entry if `@list` is too big + if list.size > @max_entries + node = @list.pop + hash.delete node.value + node.value + else + nil + end + end + + # Removes a value. + def delete(v : V) + if node = hash[v]? + list.delete node + hash.delete v + end + end + + def to_s(io : IO) + list.to_s(io) + end +end diff --git a/src/list.cr b/src/list.cr new file mode 100644 index 0000000..bc0a0d2 --- /dev/null +++ b/src/list.cr @@ -0,0 +1,447 @@ +# A linked list is a data structure with each value stored in a small structure called "node". +# Each node points to the next; making the whole structure *Enumerable*. +# The following code represents a *double linked list*, meaning each node points to the next **and the previous**. +# Pointers to other values are required since nodes are not stored in contiguous locations in memory. +# +# ``` +# list = DoubleLinkedList(Int32).new +# list.push(42) +# pp! list.pop # -> 42 +# ``` +class DoubleLinkedList(V) + # Each value of a linked is put in a *node*. + # Nodes are *linked* together thanks to pointers to both the next node + # and the previous one since this is a *double* linked list. + # + # ``` + # node1 = Node.new(42) + # node2 = Node.new(10) + # + # pp! node1.value # -> 42 + # pp! node2.value # -> 10 + # + # node1.next = node2 + # node2.previous = node1 + # ``` + class Node(V) + property next : Node(V)? = nil + property previous : Node(V)? = nil + property value : V + + # Creates a node. + def initialize(@value : V) + end + + def to_s(io) + io << @value + end + end + + class OutOfBounds < ::Exception + end + class BrokenList < ::Exception + end + + property first : Node(V) | Nil + property last : Node(V) | Nil + property size : UInt32 = 0 + + include Enumerable(V | Nil) + + # Creates an empty double linked list. + def initialize + @first = nil + @last = nil + end + + # Creates a linked list with potentially multiple values for the nodes. + def initialize(values : Enumerable(V)) + @first = nil + @last = nil + values.each do |value| + push(value) + end + end + + # Pushes a value at the end of the list. + # + # ``` + # list = DoubleLinkedList(Int32).new + # list.push 1 + # list.push 2 + # list.pop # => 2 + # list.pop # => 1 + # ``` + def push(value : V) : Node(V) + # First entry is nil = pushing the first entry. + if @first.nil? + new_node = Node(V).new(value) + @first = new_node + @last = @first + else + new_node = Node(V).new(value) + @last.not_nil!.next = new_node + new_node.previous = @last + @last = new_node + end + @size += 1 + new_node + end + + # Removes an entry. + # + # ``` + # list = DoubleLinkedList(Int32).new + # list << 1 << 2 << 3 << 4 # -> [ 1, 2, 3, 4 ] + # list.delete Node(Int32).new 2 # -> [ 1, 3, 4 ] + # ``` + def delete(n : Node(V)) : Node(V) + if first = @first + @first = n.next if n.value == first.value + end + if last = @last + @last = n.previous if n.value == last.value + end + if prev_node = n.previous + prev_node.next = n.next + end + if next_node = n.next + next_node.previous = n.previous + end + @size -= 1 + n + end + + # Removes an entry at an index. + # + # ``` + # list = DoubleLinkedList(Int32).new + # list << 1 << 2 << 3 << 4 # -> [ 1, 2, 3, 4 ] + # list.delete_at 2 # -> [ 1, 2, 4 ] + # ``` + def delete_at(index : Int32) : Node(V) + if index == 0 + shift + elsif index == @size - 1 + pop + else + v = self[index] + prev_node = v.previous.not_nil! + next_node = v.next.not_nil! + + prev_node.next = next_node + next_node.previous = prev_node + @size -= 1 + v + end + end + + # Adds a *value* to the linked list at a specified index. + # + # ``` + # list = DoubleLinkedList(Int32).new + # list << 1 # -> [1] + # list << 2 # -> [1] [2] + # list.insert_at(3, 1) # -> [1] [3] [2] + # ``` + # WARNING: this operation is slow, at worst O(n). + def insert_at(value : V, index : Int32) : Node(V) + # Error case. + raise OutOfBounds.new if index > size + + # Special case: insert the first value. + return unshift(value) if index == 0 + + new_node = Node(V).new(value) + + # Special case: insert the last value. + if size == index + new_node.previous = @last + @last.not_nil!.next = new_node + @last = new_node + @size += 1 + return new_node + end + + # General case. + current = @first.not_nil!.next.not_nil! + i = 1 + + while i < index + if next_entry = current.next + current = next_entry + else + raise BrokenList.new "unlinked list at index #{i}" + end + i += 1 + end + + new_node.next = current + new_node.previous = current.previous + + current.previous.not_nil!.next = new_node + current.previous = new_node + + @size += 1 + new_node + end + + # Adds a value to the end of the linked list. + # Can be chained. + # + # ``` + # list = DoubleLinkedList(Int32).new + # list << 1 + # list << 2 << 3 + # list.pop # => 3 + # list.pop # => 2 + # list.pop # => 1 + # ``` + def <<(value : V) : DoubleLinkedList(V) + push(value) + self + end + + # Adds a list of values to the end of the list. + # + # ``` + # list = DoubleLinkedList(Int32).new + # list.push(1, 2) + # list.pop # => 2 + # list.pop # => 1 + # ``` + def push(*values) + values.each do |value| + push(value) + end + end + + # Adds a value to the beginning of the list. + # + # ``` + # list = DoubleLinkedList(Int32).new + # list << 1 + # list.unshift 2 + # list.pop # => 1 + # list.pop # => 2 + # ``` + def unshift(value : V) : Node(V) + if first = @first + new_node = Node(V).new(value) + new_node.next = first + first.previous = new_node + @first = new_node + @size += 1 + new_node + else + push value + end + end + + # Returns the first node from the list and removes it. + # + # ``` + # list = DoubleLinkedList(Int32).new + # list << 1 + # list << 2 + # list.shift # -> 1 + # ``` + def shift : Node(V) + if first = @first + @first = first.next + @size -= 1 + first + else + raise OutOfBounds.new "shifting while no value in the list" + end + end + + # Returns the last node of the linked list **without removing it**. + # + # ``` + # list = DoubleLinkedList(Int32).new(1) + # list << 2 + # list.peek # => 2 + # ``` + def peek : Node(V) + if last = @last + last + else + raise OutOfBounds.new "peek at an empty list" + end + end + + # Returns the last value from the list and removes its node. + # + # ``` + # list = DoubleLinkedList(Int32).new + # list << 1 + # list << 2 + # list.pop # => 2 + # list.peek # => 1 + # ``` + def pop : Node(V) + if @size == 0 + raise OutOfBounds.new "pop an empty list" + end + + @size -= 1 + + if @size == 0 # size was just modified above + if current = @first + @first = nil + @last = nil + return current + else + raise BrokenList.new "pop a list of 1 element but 'first' is nil" + end + end + + if last = @last + @last = last.previous + @last.not_nil!.next = nil + last + else + raise BrokenList.new "'last' element is nil despite size > 0" + end + end + + # Iterates over all the values in the linked list. + # + # ``` + # values = [1, 2, 3] + # list = DoubleLinkedList(Int32).new(values) + # list.each do |elem| + # puts elem + # end + def each + each_node do |node| + yield node.value + end + self + end + + # Returns a **new** `DoubleLinkedList` with all of the elements from the first list + # followed by all of the elements in the second *list*. + # + # ``` + # list1 = DoubleLinkedList(Int32).new(1, 2) + # list2 = DoubleLinkedList(String).new(3, 4) + # + # list3 = list1 + list2 + # list3.peek # => 4 + # list3.shift # => 1 + # ``` + def +(list : Enumerable(C)) forall C + DoubleLinkedList(V | C).new.tap do |new_list| + each do |value| + new_list.push(value) + end + list.each do |value| + new_list.push(value) + end + end + end + + # Provides a node at a given index in the list. + # + # TODO: either start with the first entry or the last depending on the index. + def [](index : Int32) : Node(V) + raise OutOfBounds.new if index >= @size + + return @first.not_nil! if index == 0 + return @last.not_nil! if index == @size - 1 + + i = 0 + each_node do |node| + return node if i == index + i += 1 + end + + raise BrokenList.new "couldn't find the node, smth must be brkn" + end + + # Concats two lists. + # + # ``` + # list1 = DoubleLinkedList(Int32).new(1, 2) + # list2 = DoubleLinkedList(Int32).new(3, 4) + # + # list1.concat list2 + # # list1: 1 2 3 4 + # ``` + def concat(list : DoubleLinkedList(V)) : DoubleLinkedList(V) + if @size == 0 + @first = list.first + @last = list.last + elsif last = @last + last.next = list.first + if list_first = list.first + list_first.previous = last + end + end + @size += list.size + self + end + + # Returns true if and only if there are no elements in the list. + # + # ``` + # list = DoubleLinkedList(Int32).new + # list.empty? # => true + # list << 1 + # list.empty? # => false + # ``` + def empty? + @size == 0 + end + + # Creates a copy of the `DoubleLinkedList` with the order reversed. + # + # ``` + # list = DoubleLinkedList(Int32).new(1, 2, 3) + # reversed_list = list.reverse + # list.pop # => 1 + # list.pop # => 2 + # list.pop # => 3 + # ``` + def reverse + DoubleLinkedList(V).new.tap do |new_list| + each do |value| + new_list.unshift value + end + end + end + + # Iterates over the nodes. + # + # ``` + # values = [1, 2, 3] + # list = DoubleLinkedList(Int32).new(values) + # list.each_node do |elem| + # puts elem.value + # end + # ``` + private def each_node + return if @first.nil? + + current = @first + yield current.not_nil! + + while current = current.not_nil!.next + yield current + end + end + + # Fancy print of the list's content. + def to_s(io : IO) + io << "[ " + remaining_values = @size + each do |value| + io << value + remaining_values -= 1 + io << ", " unless remaining_values == 0 + end + io << " ]" + end +end