Cached indexes by default.

paper
Philippe PITTOLI 2024-05-06 18:31:52 +02:00
parent a92b27d80d
commit 183432eb32
4 changed files with 72 additions and 23 deletions

View File

@ -45,8 +45,11 @@ With DODB, to reach on-disk data: 13 microseconds.
To search then retrieve indexed data: almost the same thing, 16 microseconds on average, since it's just a path to a symlink we have to build.
With the `cached` version of DODB, there is not even deserialization happening, so 7 nanoseconds.
For indexes (indexes, partitions and tags), the speed up *"only"* is about 14 compared to the uncached version, because indexes still walk the file-system.
I may develop fully cached indexes at some point, but keep in mind that this costs memory (but yeah, again, insane speeds).
Indexes (indexes, partitions and tags) are also cached **by default**.
The speed up is great compared to the uncached version since you won't walk the file-system.
Searching an index takes about 35 nanoseconds when cached.
To avoid the memory cost of cached indexes, you can explicitely ask for uncached ones.
**NOTE:** of course SQL and DODB cannot be fairly compared based on performance since they don't have the same properties.
But still, this is the kind of speed you can get with the tool.

View File

@ -38,6 +38,23 @@ class DODBUnCachedCars < DODB::DataBase(Car)
end
end
class DODBSemiCachedCars < DODB::DataBase(Car)
property storage_dir : String
def initialize(storage_ext = "", remove_previous_data = true)
@storage_dir = "test-storage-cars-semi#{storage_ext}"
if remove_previous_data
::FileUtils.rm_rf storage_dir
end
super storage_dir
end
def rm_storage_dir
::FileUtils.rm_rf @storage_dir
end
end
def init_indexes(storage : DODB::Storage)
n = storage.new_index "name", &.name
c = storage.new_partition "color", &.color
@ -45,6 +62,13 @@ def init_indexes(storage : DODB::Storage)
return n, c, k
end
def init_uncached_indexes(storage : DODB::Storage)
n = storage.new_uncached_index "name", &.name
c = storage.new_uncached_partition "color", &.color
k = storage.new_uncached_tags "keyword", &.keywords
return n, c, k
end
def add_cars(storage : DODB::Storage, nb_iterations : Int32)
i = 0
car1 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]
@ -73,12 +97,15 @@ end
cars_cached = DODBCachedCars.new
cars_uncached = DODBUnCachedCars.new
cars_semi = DODBSemiCachedCars.new
cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached
uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_indexes cars_uncached
uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_uncached_indexes cars_uncached
semi_searchby_name, semi_searchby_color, semi_searchby_keywords = init_indexes cars_semi
add_cars cars_cached, 1_000
add_cars cars_uncached, 1_000
add_cars cars_semi, 1_000
# Searching for data with an index.
Benchmark.ips do |x|
@ -86,6 +113,10 @@ Benchmark.ips do |x|
corvet = cached_searchby_name.get "Corvet-500"
end
x.report("(cars db) searching a data with an index (semi: cache is only on index)") do
corvet = semi_searchby_name.get "Corvet-500"
end
x.report("(cars db) searching a data with an index (without a cache)") do
corvet = uncached_searchby_name.get "Corvet-500"
end
@ -97,6 +128,10 @@ Benchmark.ips do |x|
red_cars = cached_searchby_color.get "red"
end
x.report("(cars db) searching a data with a partition (semi: cache is only on partition)") do
red_cars = semi_searchby_color.get "red"
end
x.report("(cars db) searching a data with a partition (without a cache)") do
red_cars = uncached_searchby_color.get "red"
end
@ -108,6 +143,10 @@ Benchmark.ips do |x|
red_cars = cached_searchby_keywords.get "spacious"
end
x.report("(cars db) searching a data with a tag (semi: cache is only on tags)") do
red_cars = semi_searchby_keywords.get "spacious"
end
x.report("(cars db) searching a data with a tag (without a cache)") do
red_cars = uncached_searchby_keywords.get "spacious"
end
@ -122,7 +161,7 @@ cars_uncached = DODBUnCachedCars.new
#init_indexes cars_cached
#init_indexes cars_uncached
cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached
uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_indexes cars_uncached
uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_uncached_indexes cars_uncached
add_cars cars_cached, 1_000
add_cars cars_uncached, 1_000
@ -139,3 +178,4 @@ end
cars_cached.rm_storage_dir
cars_uncached.rm_storage_dir
cars_semi.rm_storage_dir

View File

@ -108,22 +108,4 @@ class DODB::CachedDataBase(V) < DODB::Storage(V)
super
@data = Hash(Int32, V).new
end
def new_index(name : String, &block : Proc(V, String))
CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
end
def new_partition(name : String, &block : Proc(V, String))
CachedPartition(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
end
def new_tags(name : String, &block : Proc(V, Array(String)))
CachedTags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end
end
end

View File

@ -112,12 +112,24 @@ abstract class DODB::Storage(V)
##
# name is the name that will be used on the file system.
def new_index(name : String, &block : Proc(V, String))
Index(V).new(self, @directory_name, name, block).tap do |indexer|
CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
end
def new_nilable_index(name : String, &block : Proc(V, String | DODB::NoIndex))
CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
end
def new_uncached_index(name : String, &block : Proc(V, String))
Index(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
end
def new_nilable_uncached_index(name : String, &block : Proc(V, String | DODB::NoIndex))
Index(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
@ -132,6 +144,12 @@ abstract class DODB::Storage(V)
##
# name is the name that will be used on the file system.
def new_partition(name : String, &block : Proc(V, String))
CachedPartition(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
end
def new_uncached_partition(name : String, &block : Proc(V, String))
Partition(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
@ -148,6 +166,12 @@ abstract class DODB::Storage(V)
end
def new_tags(name : String, &block : Proc(V, Array(String)))
CachedTags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end
end
def new_uncached_tags(name : String, &block : Proc(V, Array(String)))
Tags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end