Cached indexes by default.

This commit is contained in:
Philippe PITTOLI 2024-05-06 18:31:52 +02:00
parent a92b27d80d
commit 183432eb32
4 changed files with 72 additions and 23 deletions

View file

@ -45,8 +45,11 @@ With DODB, to reach on-disk data: 13 microseconds.
To search then retrieve indexed data: almost the same thing, 16 microseconds on average, since it's just a path to a symlink we have to build. To search then retrieve indexed data: almost the same thing, 16 microseconds on average, since it's just a path to a symlink we have to build.
With the `cached` version of DODB, there is not even deserialization happening, so 7 nanoseconds. With the `cached` version of DODB, there is not even deserialization happening, so 7 nanoseconds.
For indexes (indexes, partitions and tags), the speed up *"only"* is about 14 compared to the uncached version, because indexes still walk the file-system.
I may develop fully cached indexes at some point, but keep in mind that this costs memory (but yeah, again, insane speeds). Indexes (indexes, partitions and tags) are also cached **by default**.
The speed up is great compared to the uncached version since you won't walk the file-system.
Searching an index takes about 35 nanoseconds when cached.
To avoid the memory cost of cached indexes, you can explicitely ask for uncached ones.
**NOTE:** of course SQL and DODB cannot be fairly compared based on performance since they don't have the same properties. **NOTE:** of course SQL and DODB cannot be fairly compared based on performance since they don't have the same properties.
But still, this is the kind of speed you can get with the tool. But still, this is the kind of speed you can get with the tool.

View file

@ -38,6 +38,23 @@ class DODBUnCachedCars < DODB::DataBase(Car)
end end
end end
class DODBSemiCachedCars < DODB::DataBase(Car)
property storage_dir : String
def initialize(storage_ext = "", remove_previous_data = true)
@storage_dir = "test-storage-cars-semi#{storage_ext}"
if remove_previous_data
::FileUtils.rm_rf storage_dir
end
super storage_dir
end
def rm_storage_dir
::FileUtils.rm_rf @storage_dir
end
end
def init_indexes(storage : DODB::Storage) def init_indexes(storage : DODB::Storage)
n = storage.new_index "name", &.name n = storage.new_index "name", &.name
c = storage.new_partition "color", &.color c = storage.new_partition "color", &.color
@ -45,6 +62,13 @@ def init_indexes(storage : DODB::Storage)
return n, c, k return n, c, k
end end
def init_uncached_indexes(storage : DODB::Storage)
n = storage.new_uncached_index "name", &.name
c = storage.new_uncached_partition "color", &.color
k = storage.new_uncached_tags "keyword", &.keywords
return n, c, k
end
def add_cars(storage : DODB::Storage, nb_iterations : Int32) def add_cars(storage : DODB::Storage, nb_iterations : Int32)
i = 0 i = 0
car1 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ] car1 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]
@ -73,12 +97,15 @@ end
cars_cached = DODBCachedCars.new cars_cached = DODBCachedCars.new
cars_uncached = DODBUnCachedCars.new cars_uncached = DODBUnCachedCars.new
cars_semi = DODBSemiCachedCars.new
cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached
uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_indexes cars_uncached uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_uncached_indexes cars_uncached
semi_searchby_name, semi_searchby_color, semi_searchby_keywords = init_indexes cars_semi
add_cars cars_cached, 1_000 add_cars cars_cached, 1_000
add_cars cars_uncached, 1_000 add_cars cars_uncached, 1_000
add_cars cars_semi, 1_000
# Searching for data with an index. # Searching for data with an index.
Benchmark.ips do |x| Benchmark.ips do |x|
@ -86,6 +113,10 @@ Benchmark.ips do |x|
corvet = cached_searchby_name.get "Corvet-500" corvet = cached_searchby_name.get "Corvet-500"
end end
x.report("(cars db) searching a data with an index (semi: cache is only on index)") do
corvet = semi_searchby_name.get "Corvet-500"
end
x.report("(cars db) searching a data with an index (without a cache)") do x.report("(cars db) searching a data with an index (without a cache)") do
corvet = uncached_searchby_name.get "Corvet-500" corvet = uncached_searchby_name.get "Corvet-500"
end end
@ -97,6 +128,10 @@ Benchmark.ips do |x|
red_cars = cached_searchby_color.get "red" red_cars = cached_searchby_color.get "red"
end end
x.report("(cars db) searching a data with a partition (semi: cache is only on partition)") do
red_cars = semi_searchby_color.get "red"
end
x.report("(cars db) searching a data with a partition (without a cache)") do x.report("(cars db) searching a data with a partition (without a cache)") do
red_cars = uncached_searchby_color.get "red" red_cars = uncached_searchby_color.get "red"
end end
@ -108,6 +143,10 @@ Benchmark.ips do |x|
red_cars = cached_searchby_keywords.get "spacious" red_cars = cached_searchby_keywords.get "spacious"
end end
x.report("(cars db) searching a data with a tag (semi: cache is only on tags)") do
red_cars = semi_searchby_keywords.get "spacious"
end
x.report("(cars db) searching a data with a tag (without a cache)") do x.report("(cars db) searching a data with a tag (without a cache)") do
red_cars = uncached_searchby_keywords.get "spacious" red_cars = uncached_searchby_keywords.get "spacious"
end end
@ -122,7 +161,7 @@ cars_uncached = DODBUnCachedCars.new
#init_indexes cars_cached #init_indexes cars_cached
#init_indexes cars_uncached #init_indexes cars_uncached
cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached
uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_indexes cars_uncached uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_uncached_indexes cars_uncached
add_cars cars_cached, 1_000 add_cars cars_cached, 1_000
add_cars cars_uncached, 1_000 add_cars cars_uncached, 1_000
@ -139,3 +178,4 @@ end
cars_cached.rm_storage_dir cars_cached.rm_storage_dir
cars_uncached.rm_storage_dir cars_uncached.rm_storage_dir
cars_semi.rm_storage_dir

View file

@ -108,22 +108,4 @@ class DODB::CachedDataBase(V) < DODB::Storage(V)
super super
@data = Hash(Int32, V).new @data = Hash(Int32, V).new
end end
def new_index(name : String, &block : Proc(V, String))
CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
end
def new_partition(name : String, &block : Proc(V, String))
CachedPartition(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
end
def new_tags(name : String, &block : Proc(V, Array(String)))
CachedTags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end
end
end end

View file

@ -112,12 +112,24 @@ abstract class DODB::Storage(V)
## ##
# name is the name that will be used on the file system. # name is the name that will be used on the file system.
def new_index(name : String, &block : Proc(V, String)) def new_index(name : String, &block : Proc(V, String))
Index(V).new(self, @directory_name, name, block).tap do |indexer| CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer @indexers << indexer
end end
end end
def new_nilable_index(name : String, &block : Proc(V, String | DODB::NoIndex)) def new_nilable_index(name : String, &block : Proc(V, String | DODB::NoIndex))
CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
end
def new_uncached_index(name : String, &block : Proc(V, String))
Index(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
end
def new_nilable_uncached_index(name : String, &block : Proc(V, String | DODB::NoIndex))
Index(V).new(self, @directory_name, name, block).tap do |indexer| Index(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer @indexers << indexer
end end
@ -132,6 +144,12 @@ abstract class DODB::Storage(V)
## ##
# name is the name that will be used on the file system. # name is the name that will be used on the file system.
def new_partition(name : String, &block : Proc(V, String)) def new_partition(name : String, &block : Proc(V, String))
CachedPartition(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
end
def new_uncached_partition(name : String, &block : Proc(V, String))
Partition(V).new(self, @directory_name, name, block).tap do |table| Partition(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table @indexers << table
end end
@ -148,6 +166,12 @@ abstract class DODB::Storage(V)
end end
def new_tags(name : String, &block : Proc(V, Array(String))) def new_tags(name : String, &block : Proc(V, Array(String)))
CachedTags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end
end
def new_uncached_tags(name : String, &block : Proc(V, Array(String)))
Tags(V).new(self, @directory_name, name, block).tap do |tags| Tags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags @indexers << tags
end end