Rename indexes, once again.

This commit is contained in:
Philippe PITTOLI 2024-05-22 19:00:48 +02:00
parent 3b9e56451a
commit 6beccbb96b
5 changed files with 42 additions and 39 deletions

View File

@ -46,7 +46,7 @@ To search then retrieve indexed data: almost the same thing, 16 microseconds on
With the `cached` version of DODB, there is not even deserialization happening, so 7 nanoseconds.
Indexes (indexes, partitions and tags) are also cached **by default**.
Indexes (basic indexes, partitions and tags) are also cached **by default**.
The speed up is great compared to the uncached version since you won't walk the file-system.
Searching an index takes about 35 nanoseconds when cached.
To avoid the memory cost of cached indexes, you can explicitely ask for uncached ones.
@ -70,7 +70,7 @@ dependencies:
```crystal
# Database creation
db = DODB::DataBase(Thing).new "path/to/storage/directory"
db = DODB::Storage::Basic(Thing).new "path/to/storage/directory"
# Adding an element to the db
db << Thing.new
@ -88,7 +88,7 @@ end
The DB creation is simply creating a few directories on the file-system.
```crystal
db = DODB::DataBase(Thing).new "path/to/storage/directory"
db = DODB::Storage::Basic(Thing).new "path/to/storage/directory"
```
## Adding a new object
@ -123,7 +123,7 @@ end
We want to store `cars` in a database and index them on their `id` attribute:
```Crystal
cars = DODB::DataBase(Car).new "path/to/storage/directory"
cars = DODB::Storage::Basic(Car).new "path/to/storage/directory"
# We give a name to the index, then the code to extract the id from a Car instance
cars_by_id = cars.new_index "id", &.id
@ -250,6 +250,7 @@ end
# Remove a value based on a tag.
cars_by_keyword.delete "shiny"
cars_by_keyword.delete ["slow", "expensive"] # Remove cars that are both slow and expensive.
cars_by_keyword.delete "elegant", do |car|
car.name == "GTI"
end
@ -282,7 +283,7 @@ end
# Database creation #
#####################
cars = DODB::DataBase(Car).new "./bin/storage"
cars = DODB::Storage::Basic(Car).new "./bin/storage"
##########################
@ -334,6 +335,8 @@ pp! cars_by_color.get "red"
# based on a tag (print all fast cars)
pp! cars_by_keyword.get "fast"
# based on several tags (print all cars that are both slow and expensive)
pp! cars_by_keyword.get ["slow", "expensive"]
############
# Updating #
@ -372,6 +375,6 @@ cars_by_color.delete "blue", &.name.==("GTI")
# based on a keyword
cars_by_keyword.delete "solid"
# based on a keyword (but not only)
cars_by_keyword.delete "fast", &.name.==("Corvet")
# based on a few keywords (but not only)
cars_by_keyword.delete ["slow", "expensive"], &.name.==("Corvet")
```

View File

@ -25,9 +25,9 @@ require "file_utils"
# ```
#
# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
# NOTE: see `CachedIndex` for a cached version, faster for retrieval.
# NOTE: for fast operations without fs representation, see `RAMOnlyIndex`.
class DODB::Index::Index(V) < DODB::Index(V)
# NOTE: see `BasicCached` for a cached version, faster for retrieval.
# NOTE: for fast operations without fs representation, see `BasicRAMOnly`.
class DODB::Index::Basic(V) < DODB::Index(V)
# Name of the index, such as *id* or *color* for example.
# This is an arbitrary value, mostly to create the index directory.
#
@ -306,9 +306,9 @@ end
#
# NOTE: cached, reasonable amount of memory used since it's just an index.
# NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
# NOTE: see `Index` for an uncached version, even less memory-hungry.
# NOTE: for fast operations without fs representation, see `RAMOnlyIndex`.
class DODB::Index::CachedIndex(V) < DODB::Index::Index(V)
# NOTE: see `DODB::Index::Basic` for an uncached version, even less memory-hungry.
# NOTE: for fast operations without fs representation, see `BasicRAMOnly`.
class DODB::Index::BasicCached(V) < DODB::Index::Basic(V)
# This hash contains the relation between the index key and the data key, used for
# **internal operations**.
#
@ -337,7 +337,7 @@ class DODB::Index::CachedIndex(V) < DODB::Index::Index(V)
data.clear
end
# Indexes the value on the file-system as `DODB::Index::Index#index` but also puts the index in a cache.
# Indexes the value on the file-system as `DODB::Index::Basic#index` but also puts the index in a cache.
#
# NOTE: used for internal operations.
def index(key, value)
@ -348,7 +348,7 @@ class DODB::Index::CachedIndex(V) < DODB::Index::Index(V)
@data[index_key] = key.to_i
end
# Removes the index of a value on the file-system as `DODB::Index::Index#deindex` but also from
# Removes the index of a value on the file-system as `DODB::Index::Basic#deindex` but also from
# the cache, used for **internal operations**.
#
# NOTE: used for internal operations.
@ -384,13 +384,13 @@ end
# ```
#
# Since there is no file-system operations, all the operations are fast.
# `DODB::Index::RAMOnlyIndex` enables the flexibility of indexes without a file-system representation
# `DODB::Index::BasicRAMOnly` enables the flexibility of indexes without a file-system representation
# for absolute efficiency.
# Exactly as easy to use as the other index implementations.
#
# NOTE: reasonable amount of memory used since it's just an index.
# NOTE: fast for all operations, but no file-system representation.
class DODB::Index::RAMOnlyIndex(V) < DODB::Index::CachedIndex(V)
class DODB::Index::BasicRAMOnly(V) < DODB::Index::BasicCached(V)
# Indexes a value in RAM, no file-system operation.
#
# NOTE: used for internal operations.

View File

@ -28,8 +28,8 @@ require "file_utils"
# ```
#
# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
# NOTE: see `CachedPartition` for a cached version, faster for retrieval.
# NOTE: for fast operations without fs representation, see `RAMOnlyPartition`.
# NOTE: see `PartitionCached` for a cached version, faster for retrieval.
# NOTE: for fast operations without fs representation, see `PartitionRAMOnly`.
class DODB::Index::Partition(V) < DODB::Index(V)
# Name of the index, such as *color* for example.
# This is an arbitrary value, mostly to create the index directory.
@ -222,8 +222,8 @@ end
# NOTE: cached, reasonable amount of memory used since it's just an index.
# NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
# NOTE: see `Partition` for an uncached version, even less memory-hungry.
# NOTE: for fast operations without fs representation, see `RAMOnlyPartition`.
class DODB::Index::CachedPartition(V) < DODB::Index::Partition(V)
# NOTE: for fast operations without fs representation, see `PartitionRAMOnly`.
class DODB::Index::PartitionCached(V) < DODB::Index::Partition(V)
# This hash contains the relation between the index key and the data key, used for
# **internal operations**.
#
@ -264,7 +264,7 @@ class DODB::Index::CachedPartition(V) < DODB::Index::Partition(V)
end
# Gets partition entries and the database key for each entry.
# In `DODB::Index::CachedPartition`, `#get_with_keys(partition : String)` is modified to retrieve data keys from
# In `DODB::Index::PartitionCached`, `#get_with_keys(partition : String)` is modified to retrieve data keys from
# the index cache.
# In case the data isn't already in the cache, it is retrieved from the file-system.
#
@ -309,14 +309,14 @@ end
# ```
#
# Since there is no file-system operations, all the operations are fast.
# `DODB::Index::RAMOnlyPartition` enables the flexibility of partitions without a file-system representation.
# `DODB::Index::PartitionRAMOnly` enables the flexibility of partitions without a file-system representation.
# Absolute efficiency, exactly as easy to use as the other partition implementations.
#
# NOTE: reasonable amount of memory used since it's just an index.
# NOTE: fast for all operations, but no file-system representation.
# NOTE: see `Partition` for an uncached version, even less memory-hungry.
# NOTE: for an fs representation but still fast for retrieval, see `CachedPartition`.
class DODB::Index::RAMOnlyPartition(V) < DODB::Index::CachedPartition(V)
# NOTE: for an fs representation but still fast for retrieval, see `PartitionCached`.
class DODB::Index::PartitionRAMOnly(V) < DODB::Index::PartitionCached(V)
def index(key : String, value : V)
partition = key_proc.call value
return if partition.is_a? NoIndex

View File

@ -29,8 +29,8 @@ require "file_utils"
# ```
#
# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
# NOTE: see `CachedTags` for a cached version, faster for retrieval.
# NOTE: for fast operations without fs representation, see `RAMOnlyTags`.
# NOTE: see `TagsCached` for a cached version, faster for retrieval.
# NOTE: for fast operations without fs representation, see `TagsRAMOnly`.
class DODB::Index::Tags(V) < DODB::Index(V)
# Name of the index, such as *keywords* for example.
# This is an arbitrary value, mostly to create the index directory.
@ -127,7 +127,7 @@ class DODB::Index::Tags(V) < DODB::Index(V)
# Nothing can beat custom implementations tailored with specific problems in mind, so in case this
# algorithm isn't fine for you, feel free to override this function for your specific data-set.
#
# NOTE: to seriously boost performance, use `DODB::Index::CachedTags`.
# NOTE: to seriously boost performance, use `DODB::Index::TagsCached`.
# WARNING: throws a MissingEntry exception on non-existing tag or no match.
def get_with_keys(keys : Array(String)) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
@ -265,8 +265,8 @@ end
# NOTE: cached, reasonable amount of memory used since it's just an index.
# NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
# NOTE: see `Tags` for an uncached version, even less memory-hungry.
# NOTE: for fast operations without fs representation, see `RAMOnlyTags`.
class DODB::Index::CachedTags(V) < DODB::Index::Tags(V)
# NOTE: for fast operations without fs representation, see `TagsRAMOnly`.
class DODB::Index::TagsCached(V) < DODB::Index::Tags(V)
# This hash contains the relation between the index key and the data keys.
property data = Hash(String, Array(Int32)).new
@ -302,7 +302,7 @@ class DODB::Index::CachedTags(V) < DODB::Index::Tags(V)
end
end
# In `DODB::Index::CachedTags`, `#get_with_keys(tag : String)` is modified to retrieve data keys from the index cache.
# In `DODB::Index::TagsCached`, `#get_with_keys(tag : String)` is modified to retrieve data keys from the index cache.
# In case the data isn't already in the cache, it is retrieved from the file-system.
#
# ```
@ -346,14 +346,14 @@ end
# ```
#
# Since there is no file-system operations, all the operations are fast.
# `DODB::Index::RAMOnlyTags` enables the flexibility of tags without a file-system representation.
# `DODB::Index::TagsRAMOnly` enables the flexibility of tags without a file-system representation.
# Absolute efficiency, exactly as easy to use as the other tag implementations.
#
# NOTE: reasonable amount of memory used since it's just an index.
# NOTE: fast for all operations, but no file-system representation.
# NOTE: see `Tags` for an uncached version, even less memory-hungry.
# NOTE: for an fs representation but still fast for retrieval, see `CachedTags`.
class DODB::Index::RAMOnlyTags(V) < DODB::Index::CachedTags(V)
# NOTE: for an fs representation but still fast for retrieval, see `TagsCached`.
class DODB::Index::TagsRAMOnly(V) < DODB::Index::TagsCached(V)
def index(key : String, value : V)
tags = key_proc.call value
return if tags.is_a? NoIndex

View File

@ -189,7 +189,7 @@ abstract class DODB::Storage(V)
#
# NOTE: this index is the fastest, but doesn't have a file-system representation.
def new_RAM_index(name : String, &block : Proc(V, String | DODB::NoIndex))
Index::RAMOnlyIndex(V).new(self, @directory_name, name, block).tap do |indexer|
Index::BasicRAMOnly(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
end
@ -203,7 +203,7 @@ abstract class DODB::Storage(V)
# Creates a new partition **with a cache**.
# The *name* parameter is the name of the directory that will be created.
def new_partition(name : String, &block : Proc(V, String | DODB::NoIndex))
Index::CachedPartition(V).new(self, @directory_name, name, block).tap do |table|
Index::PartitionCached(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
end
@ -223,7 +223,7 @@ abstract class DODB::Storage(V)
#
# NOTE: this partition index is the fastest but doesn't have a file-system representation.
def new_RAM_partition(name : String, &block : Proc(V, String | DODB::NoIndex))
Index::RAMOnlyPartition(V).new(self, @directory_name, name, block).tap do |table|
Index::PartitionRAMOnly(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
end
@ -237,7 +237,7 @@ abstract class DODB::Storage(V)
# Creates a new tag **with a cache**.
# The *name* parameter is the name of the directory that will be created.
def new_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex))
Index::CachedTags(V).new(self, @directory_name, name, block).tap do |tags|
Index::TagsCached(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end
end
@ -257,7 +257,7 @@ abstract class DODB::Storage(V)
#
# NOTE: this tag index is the fastest but doesn't have a file-system representation.
def new_RAM_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex))
Index::RAMOnlyTags(V).new(self, @directory_name, name, block).tap do |tags|
Index::TagsRAMOnly(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end
end