From 183432eb32e72b69f0f5d30248fdb72068ba76d9 Mon Sep 17 00:00:00 2001 From: Philippe PITTOLI Date: Mon, 6 May 2024 18:31:52 +0200 Subject: [PATCH] Cached indexes by default. --- README.md | 7 +++++-- spec/benchmark-cars.cr | 44 ++++++++++++++++++++++++++++++++++++++++-- src/cached.cr | 18 ----------------- src/dodb.cr | 26 ++++++++++++++++++++++++- 4 files changed, 72 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index a4210b2..daea8aa 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,11 @@ With DODB, to reach on-disk data: 13 microseconds. To search then retrieve indexed data: almost the same thing, 16 microseconds on average, since it's just a path to a symlink we have to build. With the `cached` version of DODB, there is not even deserialization happening, so 7 nanoseconds. -For indexes (indexes, partitions and tags), the speed up *"only"* is about 14 compared to the uncached version, because indexes still walk the file-system. -I may develop fully cached indexes at some point, but keep in mind that this costs memory (but yeah, again, insane speeds). + +Indexes (indexes, partitions and tags) are also cached **by default**. +The speed up is great compared to the uncached version since you won't walk the file-system. +Searching an index takes about 35 nanoseconds when cached. +To avoid the memory cost of cached indexes, you can explicitely ask for uncached ones. **NOTE:** of course SQL and DODB cannot be fairly compared based on performance since they don't have the same properties. But still, this is the kind of speed you can get with the tool. diff --git a/spec/benchmark-cars.cr b/spec/benchmark-cars.cr index f850c4c..988a4fb 100644 --- a/spec/benchmark-cars.cr +++ b/spec/benchmark-cars.cr @@ -38,6 +38,23 @@ class DODBUnCachedCars < DODB::DataBase(Car) end end +class DODBSemiCachedCars < DODB::DataBase(Car) + property storage_dir : String + def initialize(storage_ext = "", remove_previous_data = true) + @storage_dir = "test-storage-cars-semi#{storage_ext}" + + if remove_previous_data + ::FileUtils.rm_rf storage_dir + end + + super storage_dir + end + + def rm_storage_dir + ::FileUtils.rm_rf @storage_dir + end +end + def init_indexes(storage : DODB::Storage) n = storage.new_index "name", &.name c = storage.new_partition "color", &.color @@ -45,6 +62,13 @@ def init_indexes(storage : DODB::Storage) return n, c, k end +def init_uncached_indexes(storage : DODB::Storage) + n = storage.new_uncached_index "name", &.name + c = storage.new_uncached_partition "color", &.color + k = storage.new_uncached_tags "keyword", &.keywords + return n, c, k +end + def add_cars(storage : DODB::Storage, nb_iterations : Int32) i = 0 car1 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ] @@ -73,12 +97,15 @@ end cars_cached = DODBCachedCars.new cars_uncached = DODBUnCachedCars.new +cars_semi = DODBSemiCachedCars.new cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached -uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_indexes cars_uncached +uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_uncached_indexes cars_uncached +semi_searchby_name, semi_searchby_color, semi_searchby_keywords = init_indexes cars_semi add_cars cars_cached, 1_000 add_cars cars_uncached, 1_000 +add_cars cars_semi, 1_000 # Searching for data with an index. Benchmark.ips do |x| @@ -86,6 +113,10 @@ Benchmark.ips do |x| corvet = cached_searchby_name.get "Corvet-500" end + x.report("(cars db) searching a data with an index (semi: cache is only on index)") do + corvet = semi_searchby_name.get "Corvet-500" + end + x.report("(cars db) searching a data with an index (without a cache)") do corvet = uncached_searchby_name.get "Corvet-500" end @@ -97,6 +128,10 @@ Benchmark.ips do |x| red_cars = cached_searchby_color.get "red" end + x.report("(cars db) searching a data with a partition (semi: cache is only on partition)") do + red_cars = semi_searchby_color.get "red" + end + x.report("(cars db) searching a data with a partition (without a cache)") do red_cars = uncached_searchby_color.get "red" end @@ -108,6 +143,10 @@ Benchmark.ips do |x| red_cars = cached_searchby_keywords.get "spacious" end + x.report("(cars db) searching a data with a tag (semi: cache is only on tags)") do + red_cars = semi_searchby_keywords.get "spacious" + end + x.report("(cars db) searching a data with a tag (without a cache)") do red_cars = uncached_searchby_keywords.get "spacious" end @@ -122,7 +161,7 @@ cars_uncached = DODBUnCachedCars.new #init_indexes cars_cached #init_indexes cars_uncached cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached -uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_indexes cars_uncached +uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_uncached_indexes cars_uncached add_cars cars_cached, 1_000 add_cars cars_uncached, 1_000 @@ -139,3 +178,4 @@ end cars_cached.rm_storage_dir cars_uncached.rm_storage_dir +cars_semi.rm_storage_dir diff --git a/src/cached.cr b/src/cached.cr index c4f436b..b3a88e7 100644 --- a/src/cached.cr +++ b/src/cached.cr @@ -108,22 +108,4 @@ class DODB::CachedDataBase(V) < DODB::Storage(V) super @data = Hash(Int32, V).new end - - def new_index(name : String, &block : Proc(V, String)) - CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer| - @indexers << indexer - end - end - - def new_partition(name : String, &block : Proc(V, String)) - CachedPartition(V).new(self, @directory_name, name, block).tap do |table| - @indexers << table - end - end - - def new_tags(name : String, &block : Proc(V, Array(String))) - CachedTags(V).new(self, @directory_name, name, block).tap do |tags| - @indexers << tags - end - end end diff --git a/src/dodb.cr b/src/dodb.cr index 074b3b4..f81f7ab 100644 --- a/src/dodb.cr +++ b/src/dodb.cr @@ -112,12 +112,24 @@ abstract class DODB::Storage(V) ## # name is the name that will be used on the file system. def new_index(name : String, &block : Proc(V, String)) - Index(V).new(self, @directory_name, name, block).tap do |indexer| + CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer| @indexers << indexer end end def new_nilable_index(name : String, &block : Proc(V, String | DODB::NoIndex)) + CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer| + @indexers << indexer + end + end + + def new_uncached_index(name : String, &block : Proc(V, String)) + Index(V).new(self, @directory_name, name, block).tap do |indexer| + @indexers << indexer + end + end + + def new_nilable_uncached_index(name : String, &block : Proc(V, String | DODB::NoIndex)) Index(V).new(self, @directory_name, name, block).tap do |indexer| @indexers << indexer end @@ -132,6 +144,12 @@ abstract class DODB::Storage(V) ## # name is the name that will be used on the file system. def new_partition(name : String, &block : Proc(V, String)) + CachedPartition(V).new(self, @directory_name, name, block).tap do |table| + @indexers << table + end + end + + def new_uncached_partition(name : String, &block : Proc(V, String)) Partition(V).new(self, @directory_name, name, block).tap do |table| @indexers << table end @@ -148,6 +166,12 @@ abstract class DODB::Storage(V) end def new_tags(name : String, &block : Proc(V, Array(String))) + CachedTags(V).new(self, @directory_name, name, block).tap do |tags| + @indexers << tags + end + end + + def new_uncached_tags(name : String, &block : Proc(V, Array(String))) Tags(V).new(self, @directory_name, name, block).tap do |tags| @indexers << tags end