From 6beccbb96bb75172f28beff69cf85cadcb65b670 Mon Sep 17 00:00:00 2001 From: Philippe PITTOLI Date: Wed, 22 May 2024 19:00:48 +0200 Subject: [PATCH] Rename indexes, once again. --- README.md | 17 ++++++++++------- src/dodb/index/{index.cr => basic.cr} | 20 ++++++++++---------- src/dodb/index/partition.cr | 16 ++++++++-------- src/dodb/index/tags.cr | 18 +++++++++--------- src/dodb/storage.cr | 10 +++++----- 5 files changed, 42 insertions(+), 39 deletions(-) rename src/dodb/index/{index.cr => basic.cr} (95%) diff --git a/README.md b/README.md index daea8aa..bae69d1 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ To search then retrieve indexed data: almost the same thing, 16 microseconds on With the `cached` version of DODB, there is not even deserialization happening, so 7 nanoseconds. -Indexes (indexes, partitions and tags) are also cached **by default**. +Indexes (basic indexes, partitions and tags) are also cached **by default**. The speed up is great compared to the uncached version since you won't walk the file-system. Searching an index takes about 35 nanoseconds when cached. To avoid the memory cost of cached indexes, you can explicitely ask for uncached ones. @@ -70,7 +70,7 @@ dependencies: ```crystal # Database creation -db = DODB::DataBase(Thing).new "path/to/storage/directory" +db = DODB::Storage::Basic(Thing).new "path/to/storage/directory" # Adding an element to the db db << Thing.new @@ -88,7 +88,7 @@ end The DB creation is simply creating a few directories on the file-system. ```crystal -db = DODB::DataBase(Thing).new "path/to/storage/directory" +db = DODB::Storage::Basic(Thing).new "path/to/storage/directory" ``` ## Adding a new object @@ -123,7 +123,7 @@ end We want to store `cars` in a database and index them on their `id` attribute: ```Crystal -cars = DODB::DataBase(Car).new "path/to/storage/directory" +cars = DODB::Storage::Basic(Car).new "path/to/storage/directory" # We give a name to the index, then the code to extract the id from a Car instance cars_by_id = cars.new_index "id", &.id @@ -250,6 +250,7 @@ end # Remove a value based on a tag. cars_by_keyword.delete "shiny" +cars_by_keyword.delete ["slow", "expensive"] # Remove cars that are both slow and expensive. cars_by_keyword.delete "elegant", do |car| car.name == "GTI" end @@ -282,7 +283,7 @@ end # Database creation # ##################### -cars = DODB::DataBase(Car).new "./bin/storage" +cars = DODB::Storage::Basic(Car).new "./bin/storage" ########################## @@ -334,6 +335,8 @@ pp! cars_by_color.get "red" # based on a tag (print all fast cars) pp! cars_by_keyword.get "fast" +# based on several tags (print all cars that are both slow and expensive) +pp! cars_by_keyword.get ["slow", "expensive"] ############ # Updating # @@ -372,6 +375,6 @@ cars_by_color.delete "blue", &.name.==("GTI") # based on a keyword cars_by_keyword.delete "solid" -# based on a keyword (but not only) -cars_by_keyword.delete "fast", &.name.==("Corvet") +# based on a few keywords (but not only) +cars_by_keyword.delete ["slow", "expensive"], &.name.==("Corvet") ``` diff --git a/src/dodb/index/index.cr b/src/dodb/index/basic.cr similarity index 95% rename from src/dodb/index/index.cr rename to src/dodb/index/basic.cr index 0d9ab22..13baa31 100644 --- a/src/dodb/index/index.cr +++ b/src/dodb/index/basic.cr @@ -25,9 +25,9 @@ require "file_utils" # ``` # # NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**. -# NOTE: see `CachedIndex` for a cached version, faster for retrieval. -# NOTE: for fast operations without fs representation, see `RAMOnlyIndex`. -class DODB::Index::Index(V) < DODB::Index(V) +# NOTE: see `BasicCached` for a cached version, faster for retrieval. +# NOTE: for fast operations without fs representation, see `BasicRAMOnly`. +class DODB::Index::Basic(V) < DODB::Index(V) # Name of the index, such as *id* or *color* for example. # This is an arbitrary value, mostly to create the index directory. # @@ -306,9 +306,9 @@ end # # NOTE: cached, reasonable amount of memory used since it's just an index. # NOTE: fast for retrieval, slow for index creation and deletion (fs operations). -# NOTE: see `Index` for an uncached version, even less memory-hungry. -# NOTE: for fast operations without fs representation, see `RAMOnlyIndex`. -class DODB::Index::CachedIndex(V) < DODB::Index::Index(V) +# NOTE: see `DODB::Index::Basic` for an uncached version, even less memory-hungry. +# NOTE: for fast operations without fs representation, see `BasicRAMOnly`. +class DODB::Index::BasicCached(V) < DODB::Index::Basic(V) # This hash contains the relation between the index key and the data key, used for # **internal operations**. # @@ -337,7 +337,7 @@ class DODB::Index::CachedIndex(V) < DODB::Index::Index(V) data.clear end - # Indexes the value on the file-system as `DODB::Index::Index#index` but also puts the index in a cache. + # Indexes the value on the file-system as `DODB::Index::Basic#index` but also puts the index in a cache. # # NOTE: used for internal operations. def index(key, value) @@ -348,7 +348,7 @@ class DODB::Index::CachedIndex(V) < DODB::Index::Index(V) @data[index_key] = key.to_i end - # Removes the index of a value on the file-system as `DODB::Index::Index#deindex` but also from + # Removes the index of a value on the file-system as `DODB::Index::Basic#deindex` but also from # the cache, used for **internal operations**. # # NOTE: used for internal operations. @@ -384,13 +384,13 @@ end # ``` # # Since there is no file-system operations, all the operations are fast. -# `DODB::Index::RAMOnlyIndex` enables the flexibility of indexes without a file-system representation +# `DODB::Index::BasicRAMOnly` enables the flexibility of indexes without a file-system representation # for absolute efficiency. # Exactly as easy to use as the other index implementations. # # NOTE: reasonable amount of memory used since it's just an index. # NOTE: fast for all operations, but no file-system representation. -class DODB::Index::RAMOnlyIndex(V) < DODB::Index::CachedIndex(V) +class DODB::Index::BasicRAMOnly(V) < DODB::Index::BasicCached(V) # Indexes a value in RAM, no file-system operation. # # NOTE: used for internal operations. diff --git a/src/dodb/index/partition.cr b/src/dodb/index/partition.cr index 1b244d6..8e7d30a 100644 --- a/src/dodb/index/partition.cr +++ b/src/dodb/index/partition.cr @@ -28,8 +28,8 @@ require "file_utils" # ``` # # NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**. -# NOTE: see `CachedPartition` for a cached version, faster for retrieval. -# NOTE: for fast operations without fs representation, see `RAMOnlyPartition`. +# NOTE: see `PartitionCached` for a cached version, faster for retrieval. +# NOTE: for fast operations without fs representation, see `PartitionRAMOnly`. class DODB::Index::Partition(V) < DODB::Index(V) # Name of the index, such as *color* for example. # This is an arbitrary value, mostly to create the index directory. @@ -222,8 +222,8 @@ end # NOTE: cached, reasonable amount of memory used since it's just an index. # NOTE: fast for retrieval, slow for index creation and deletion (fs operations). # NOTE: see `Partition` for an uncached version, even less memory-hungry. -# NOTE: for fast operations without fs representation, see `RAMOnlyPartition`. -class DODB::Index::CachedPartition(V) < DODB::Index::Partition(V) +# NOTE: for fast operations without fs representation, see `PartitionRAMOnly`. +class DODB::Index::PartitionCached(V) < DODB::Index::Partition(V) # This hash contains the relation between the index key and the data key, used for # **internal operations**. # @@ -264,7 +264,7 @@ class DODB::Index::CachedPartition(V) < DODB::Index::Partition(V) end # Gets partition entries and the database key for each entry. - # In `DODB::Index::CachedPartition`, `#get_with_keys(partition : String)` is modified to retrieve data keys from + # In `DODB::Index::PartitionCached`, `#get_with_keys(partition : String)` is modified to retrieve data keys from # the index cache. # In case the data isn't already in the cache, it is retrieved from the file-system. # @@ -309,14 +309,14 @@ end # ``` # # Since there is no file-system operations, all the operations are fast. -# `DODB::Index::RAMOnlyPartition` enables the flexibility of partitions without a file-system representation. +# `DODB::Index::PartitionRAMOnly` enables the flexibility of partitions without a file-system representation. # Absolute efficiency, exactly as easy to use as the other partition implementations. # # NOTE: reasonable amount of memory used since it's just an index. # NOTE: fast for all operations, but no file-system representation. # NOTE: see `Partition` for an uncached version, even less memory-hungry. -# NOTE: for an fs representation but still fast for retrieval, see `CachedPartition`. -class DODB::Index::RAMOnlyPartition(V) < DODB::Index::CachedPartition(V) +# NOTE: for an fs representation but still fast for retrieval, see `PartitionCached`. +class DODB::Index::PartitionRAMOnly(V) < DODB::Index::PartitionCached(V) def index(key : String, value : V) partition = key_proc.call value return if partition.is_a? NoIndex diff --git a/src/dodb/index/tags.cr b/src/dodb/index/tags.cr index 8dc819f..7c6ae29 100644 --- a/src/dodb/index/tags.cr +++ b/src/dodb/index/tags.cr @@ -29,8 +29,8 @@ require "file_utils" # ``` # # NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**. -# NOTE: see `CachedTags` for a cached version, faster for retrieval. -# NOTE: for fast operations without fs representation, see `RAMOnlyTags`. +# NOTE: see `TagsCached` for a cached version, faster for retrieval. +# NOTE: for fast operations without fs representation, see `TagsRAMOnly`. class DODB::Index::Tags(V) < DODB::Index(V) # Name of the index, such as *keywords* for example. # This is an arbitrary value, mostly to create the index directory. @@ -127,7 +127,7 @@ class DODB::Index::Tags(V) < DODB::Index(V) # Nothing can beat custom implementations tailored with specific problems in mind, so in case this # algorithm isn't fine for you, feel free to override this function for your specific data-set. # - # NOTE: to seriously boost performance, use `DODB::Index::CachedTags`. + # NOTE: to seriously boost performance, use `DODB::Index::TagsCached`. # WARNING: throws a MissingEntry exception on non-existing tag or no match. def get_with_keys(keys : Array(String)) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new @@ -265,8 +265,8 @@ end # NOTE: cached, reasonable amount of memory used since it's just an index. # NOTE: fast for retrieval, slow for index creation and deletion (fs operations). # NOTE: see `Tags` for an uncached version, even less memory-hungry. -# NOTE: for fast operations without fs representation, see `RAMOnlyTags`. -class DODB::Index::CachedTags(V) < DODB::Index::Tags(V) +# NOTE: for fast operations without fs representation, see `TagsRAMOnly`. +class DODB::Index::TagsCached(V) < DODB::Index::Tags(V) # This hash contains the relation between the index key and the data keys. property data = Hash(String, Array(Int32)).new @@ -302,7 +302,7 @@ class DODB::Index::CachedTags(V) < DODB::Index::Tags(V) end end - # In `DODB::Index::CachedTags`, `#get_with_keys(tag : String)` is modified to retrieve data keys from the index cache. + # In `DODB::Index::TagsCached`, `#get_with_keys(tag : String)` is modified to retrieve data keys from the index cache. # In case the data isn't already in the cache, it is retrieved from the file-system. # # ``` @@ -346,14 +346,14 @@ end # ``` # # Since there is no file-system operations, all the operations are fast. -# `DODB::Index::RAMOnlyTags` enables the flexibility of tags without a file-system representation. +# `DODB::Index::TagsRAMOnly` enables the flexibility of tags without a file-system representation. # Absolute efficiency, exactly as easy to use as the other tag implementations. # # NOTE: reasonable amount of memory used since it's just an index. # NOTE: fast for all operations, but no file-system representation. # NOTE: see `Tags` for an uncached version, even less memory-hungry. -# NOTE: for an fs representation but still fast for retrieval, see `CachedTags`. -class DODB::Index::RAMOnlyTags(V) < DODB::Index::CachedTags(V) +# NOTE: for an fs representation but still fast for retrieval, see `TagsCached`. +class DODB::Index::TagsRAMOnly(V) < DODB::Index::TagsCached(V) def index(key : String, value : V) tags = key_proc.call value return if tags.is_a? NoIndex diff --git a/src/dodb/storage.cr b/src/dodb/storage.cr index 2d8d277..ae9ee81 100644 --- a/src/dodb/storage.cr +++ b/src/dodb/storage.cr @@ -189,7 +189,7 @@ abstract class DODB::Storage(V) # # NOTE: this index is the fastest, but doesn't have a file-system representation. def new_RAM_index(name : String, &block : Proc(V, String | DODB::NoIndex)) - Index::RAMOnlyIndex(V).new(self, @directory_name, name, block).tap do |indexer| + Index::BasicRAMOnly(V).new(self, @directory_name, name, block).tap do |indexer| @indexers << indexer end end @@ -203,7 +203,7 @@ abstract class DODB::Storage(V) # Creates a new partition **with a cache**. # The *name* parameter is the name of the directory that will be created. def new_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) - Index::CachedPartition(V).new(self, @directory_name, name, block).tap do |table| + Index::PartitionCached(V).new(self, @directory_name, name, block).tap do |table| @indexers << table end end @@ -223,7 +223,7 @@ abstract class DODB::Storage(V) # # NOTE: this partition index is the fastest but doesn't have a file-system representation. def new_RAM_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) - Index::RAMOnlyPartition(V).new(self, @directory_name, name, block).tap do |table| + Index::PartitionRAMOnly(V).new(self, @directory_name, name, block).tap do |table| @indexers << table end end @@ -237,7 +237,7 @@ abstract class DODB::Storage(V) # Creates a new tag **with a cache**. # The *name* parameter is the name of the directory that will be created. def new_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) - Index::CachedTags(V).new(self, @directory_name, name, block).tap do |tags| + Index::TagsCached(V).new(self, @directory_name, name, block).tap do |tags| @indexers << tags end end @@ -257,7 +257,7 @@ abstract class DODB::Storage(V) # # NOTE: this tag index is the fastest but doesn't have a file-system representation. def new_RAM_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) - Index::RAMOnlyTags(V).new(self, @directory_name, name, block).tap do |tags| + Index::TagsRAMOnly(V).new(self, @directory_name, name, block).tap do |tags| @indexers << tags end end