Rename indexes, once again.
This commit is contained in:
parent
3b9e56451a
commit
6beccbb96b
17
README.md
17
README.md
@ -46,7 +46,7 @@ To search then retrieve indexed data: almost the same thing, 16 microseconds on
|
|||||||
|
|
||||||
With the `cached` version of DODB, there is not even deserialization happening, so 7 nanoseconds.
|
With the `cached` version of DODB, there is not even deserialization happening, so 7 nanoseconds.
|
||||||
|
|
||||||
Indexes (indexes, partitions and tags) are also cached **by default**.
|
Indexes (basic indexes, partitions and tags) are also cached **by default**.
|
||||||
The speed up is great compared to the uncached version since you won't walk the file-system.
|
The speed up is great compared to the uncached version since you won't walk the file-system.
|
||||||
Searching an index takes about 35 nanoseconds when cached.
|
Searching an index takes about 35 nanoseconds when cached.
|
||||||
To avoid the memory cost of cached indexes, you can explicitely ask for uncached ones.
|
To avoid the memory cost of cached indexes, you can explicitely ask for uncached ones.
|
||||||
@ -70,7 +70,7 @@ dependencies:
|
|||||||
|
|
||||||
```crystal
|
```crystal
|
||||||
# Database creation
|
# Database creation
|
||||||
db = DODB::DataBase(Thing).new "path/to/storage/directory"
|
db = DODB::Storage::Basic(Thing).new "path/to/storage/directory"
|
||||||
|
|
||||||
# Adding an element to the db
|
# Adding an element to the db
|
||||||
db << Thing.new
|
db << Thing.new
|
||||||
@ -88,7 +88,7 @@ end
|
|||||||
The DB creation is simply creating a few directories on the file-system.
|
The DB creation is simply creating a few directories on the file-system.
|
||||||
|
|
||||||
```crystal
|
```crystal
|
||||||
db = DODB::DataBase(Thing).new "path/to/storage/directory"
|
db = DODB::Storage::Basic(Thing).new "path/to/storage/directory"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Adding a new object
|
## Adding a new object
|
||||||
@ -123,7 +123,7 @@ end
|
|||||||
|
|
||||||
We want to store `cars` in a database and index them on their `id` attribute:
|
We want to store `cars` in a database and index them on their `id` attribute:
|
||||||
```Crystal
|
```Crystal
|
||||||
cars = DODB::DataBase(Car).new "path/to/storage/directory"
|
cars = DODB::Storage::Basic(Car).new "path/to/storage/directory"
|
||||||
|
|
||||||
# We give a name to the index, then the code to extract the id from a Car instance
|
# We give a name to the index, then the code to extract the id from a Car instance
|
||||||
cars_by_id = cars.new_index "id", &.id
|
cars_by_id = cars.new_index "id", &.id
|
||||||
@ -250,6 +250,7 @@ end
|
|||||||
|
|
||||||
# Remove a value based on a tag.
|
# Remove a value based on a tag.
|
||||||
cars_by_keyword.delete "shiny"
|
cars_by_keyword.delete "shiny"
|
||||||
|
cars_by_keyword.delete ["slow", "expensive"] # Remove cars that are both slow and expensive.
|
||||||
cars_by_keyword.delete "elegant", do |car|
|
cars_by_keyword.delete "elegant", do |car|
|
||||||
car.name == "GTI"
|
car.name == "GTI"
|
||||||
end
|
end
|
||||||
@ -282,7 +283,7 @@ end
|
|||||||
# Database creation #
|
# Database creation #
|
||||||
#####################
|
#####################
|
||||||
|
|
||||||
cars = DODB::DataBase(Car).new "./bin/storage"
|
cars = DODB::Storage::Basic(Car).new "./bin/storage"
|
||||||
|
|
||||||
|
|
||||||
##########################
|
##########################
|
||||||
@ -334,6 +335,8 @@ pp! cars_by_color.get "red"
|
|||||||
# based on a tag (print all fast cars)
|
# based on a tag (print all fast cars)
|
||||||
pp! cars_by_keyword.get "fast"
|
pp! cars_by_keyword.get "fast"
|
||||||
|
|
||||||
|
# based on several tags (print all cars that are both slow and expensive)
|
||||||
|
pp! cars_by_keyword.get ["slow", "expensive"]
|
||||||
|
|
||||||
############
|
############
|
||||||
# Updating #
|
# Updating #
|
||||||
@ -372,6 +375,6 @@ cars_by_color.delete "blue", &.name.==("GTI")
|
|||||||
|
|
||||||
# based on a keyword
|
# based on a keyword
|
||||||
cars_by_keyword.delete "solid"
|
cars_by_keyword.delete "solid"
|
||||||
# based on a keyword (but not only)
|
# based on a few keywords (but not only)
|
||||||
cars_by_keyword.delete "fast", &.name.==("Corvet")
|
cars_by_keyword.delete ["slow", "expensive"], &.name.==("Corvet")
|
||||||
```
|
```
|
||||||
|
@ -25,9 +25,9 @@ require "file_utils"
|
|||||||
# ```
|
# ```
|
||||||
#
|
#
|
||||||
# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
|
# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
|
||||||
# NOTE: see `CachedIndex` for a cached version, faster for retrieval.
|
# NOTE: see `BasicCached` for a cached version, faster for retrieval.
|
||||||
# NOTE: for fast operations without fs representation, see `RAMOnlyIndex`.
|
# NOTE: for fast operations without fs representation, see `BasicRAMOnly`.
|
||||||
class DODB::Index::Index(V) < DODB::Index(V)
|
class DODB::Index::Basic(V) < DODB::Index(V)
|
||||||
# Name of the index, such as *id* or *color* for example.
|
# Name of the index, such as *id* or *color* for example.
|
||||||
# This is an arbitrary value, mostly to create the index directory.
|
# This is an arbitrary value, mostly to create the index directory.
|
||||||
#
|
#
|
||||||
@ -306,9 +306,9 @@ end
|
|||||||
#
|
#
|
||||||
# NOTE: cached, reasonable amount of memory used since it's just an index.
|
# NOTE: cached, reasonable amount of memory used since it's just an index.
|
||||||
# NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
|
# NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
|
||||||
# NOTE: see `Index` for an uncached version, even less memory-hungry.
|
# NOTE: see `DODB::Index::Basic` for an uncached version, even less memory-hungry.
|
||||||
# NOTE: for fast operations without fs representation, see `RAMOnlyIndex`.
|
# NOTE: for fast operations without fs representation, see `BasicRAMOnly`.
|
||||||
class DODB::Index::CachedIndex(V) < DODB::Index::Index(V)
|
class DODB::Index::BasicCached(V) < DODB::Index::Basic(V)
|
||||||
# This hash contains the relation between the index key and the data key, used for
|
# This hash contains the relation between the index key and the data key, used for
|
||||||
# **internal operations**.
|
# **internal operations**.
|
||||||
#
|
#
|
||||||
@ -337,7 +337,7 @@ class DODB::Index::CachedIndex(V) < DODB::Index::Index(V)
|
|||||||
data.clear
|
data.clear
|
||||||
end
|
end
|
||||||
|
|
||||||
# Indexes the value on the file-system as `DODB::Index::Index#index` but also puts the index in a cache.
|
# Indexes the value on the file-system as `DODB::Index::Basic#index` but also puts the index in a cache.
|
||||||
#
|
#
|
||||||
# NOTE: used for internal operations.
|
# NOTE: used for internal operations.
|
||||||
def index(key, value)
|
def index(key, value)
|
||||||
@ -348,7 +348,7 @@ class DODB::Index::CachedIndex(V) < DODB::Index::Index(V)
|
|||||||
@data[index_key] = key.to_i
|
@data[index_key] = key.to_i
|
||||||
end
|
end
|
||||||
|
|
||||||
# Removes the index of a value on the file-system as `DODB::Index::Index#deindex` but also from
|
# Removes the index of a value on the file-system as `DODB::Index::Basic#deindex` but also from
|
||||||
# the cache, used for **internal operations**.
|
# the cache, used for **internal operations**.
|
||||||
#
|
#
|
||||||
# NOTE: used for internal operations.
|
# NOTE: used for internal operations.
|
||||||
@ -384,13 +384,13 @@ end
|
|||||||
# ```
|
# ```
|
||||||
#
|
#
|
||||||
# Since there is no file-system operations, all the operations are fast.
|
# Since there is no file-system operations, all the operations are fast.
|
||||||
# `DODB::Index::RAMOnlyIndex` enables the flexibility of indexes without a file-system representation
|
# `DODB::Index::BasicRAMOnly` enables the flexibility of indexes without a file-system representation
|
||||||
# for absolute efficiency.
|
# for absolute efficiency.
|
||||||
# Exactly as easy to use as the other index implementations.
|
# Exactly as easy to use as the other index implementations.
|
||||||
#
|
#
|
||||||
# NOTE: reasonable amount of memory used since it's just an index.
|
# NOTE: reasonable amount of memory used since it's just an index.
|
||||||
# NOTE: fast for all operations, but no file-system representation.
|
# NOTE: fast for all operations, but no file-system representation.
|
||||||
class DODB::Index::RAMOnlyIndex(V) < DODB::Index::CachedIndex(V)
|
class DODB::Index::BasicRAMOnly(V) < DODB::Index::BasicCached(V)
|
||||||
# Indexes a value in RAM, no file-system operation.
|
# Indexes a value in RAM, no file-system operation.
|
||||||
#
|
#
|
||||||
# NOTE: used for internal operations.
|
# NOTE: used for internal operations.
|
@ -28,8 +28,8 @@ require "file_utils"
|
|||||||
# ```
|
# ```
|
||||||
#
|
#
|
||||||
# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
|
# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
|
||||||
# NOTE: see `CachedPartition` for a cached version, faster for retrieval.
|
# NOTE: see `PartitionCached` for a cached version, faster for retrieval.
|
||||||
# NOTE: for fast operations without fs representation, see `RAMOnlyPartition`.
|
# NOTE: for fast operations without fs representation, see `PartitionRAMOnly`.
|
||||||
class DODB::Index::Partition(V) < DODB::Index(V)
|
class DODB::Index::Partition(V) < DODB::Index(V)
|
||||||
# Name of the index, such as *color* for example.
|
# Name of the index, such as *color* for example.
|
||||||
# This is an arbitrary value, mostly to create the index directory.
|
# This is an arbitrary value, mostly to create the index directory.
|
||||||
@ -222,8 +222,8 @@ end
|
|||||||
# NOTE: cached, reasonable amount of memory used since it's just an index.
|
# NOTE: cached, reasonable amount of memory used since it's just an index.
|
||||||
# NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
|
# NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
|
||||||
# NOTE: see `Partition` for an uncached version, even less memory-hungry.
|
# NOTE: see `Partition` for an uncached version, even less memory-hungry.
|
||||||
# NOTE: for fast operations without fs representation, see `RAMOnlyPartition`.
|
# NOTE: for fast operations without fs representation, see `PartitionRAMOnly`.
|
||||||
class DODB::Index::CachedPartition(V) < DODB::Index::Partition(V)
|
class DODB::Index::PartitionCached(V) < DODB::Index::Partition(V)
|
||||||
# This hash contains the relation between the index key and the data key, used for
|
# This hash contains the relation between the index key and the data key, used for
|
||||||
# **internal operations**.
|
# **internal operations**.
|
||||||
#
|
#
|
||||||
@ -264,7 +264,7 @@ class DODB::Index::CachedPartition(V) < DODB::Index::Partition(V)
|
|||||||
end
|
end
|
||||||
|
|
||||||
# Gets partition entries and the database key for each entry.
|
# Gets partition entries and the database key for each entry.
|
||||||
# In `DODB::Index::CachedPartition`, `#get_with_keys(partition : String)` is modified to retrieve data keys from
|
# In `DODB::Index::PartitionCached`, `#get_with_keys(partition : String)` is modified to retrieve data keys from
|
||||||
# the index cache.
|
# the index cache.
|
||||||
# In case the data isn't already in the cache, it is retrieved from the file-system.
|
# In case the data isn't already in the cache, it is retrieved from the file-system.
|
||||||
#
|
#
|
||||||
@ -309,14 +309,14 @@ end
|
|||||||
# ```
|
# ```
|
||||||
#
|
#
|
||||||
# Since there is no file-system operations, all the operations are fast.
|
# Since there is no file-system operations, all the operations are fast.
|
||||||
# `DODB::Index::RAMOnlyPartition` enables the flexibility of partitions without a file-system representation.
|
# `DODB::Index::PartitionRAMOnly` enables the flexibility of partitions without a file-system representation.
|
||||||
# Absolute efficiency, exactly as easy to use as the other partition implementations.
|
# Absolute efficiency, exactly as easy to use as the other partition implementations.
|
||||||
#
|
#
|
||||||
# NOTE: reasonable amount of memory used since it's just an index.
|
# NOTE: reasonable amount of memory used since it's just an index.
|
||||||
# NOTE: fast for all operations, but no file-system representation.
|
# NOTE: fast for all operations, but no file-system representation.
|
||||||
# NOTE: see `Partition` for an uncached version, even less memory-hungry.
|
# NOTE: see `Partition` for an uncached version, even less memory-hungry.
|
||||||
# NOTE: for an fs representation but still fast for retrieval, see `CachedPartition`.
|
# NOTE: for an fs representation but still fast for retrieval, see `PartitionCached`.
|
||||||
class DODB::Index::RAMOnlyPartition(V) < DODB::Index::CachedPartition(V)
|
class DODB::Index::PartitionRAMOnly(V) < DODB::Index::PartitionCached(V)
|
||||||
def index(key : String, value : V)
|
def index(key : String, value : V)
|
||||||
partition = key_proc.call value
|
partition = key_proc.call value
|
||||||
return if partition.is_a? NoIndex
|
return if partition.is_a? NoIndex
|
||||||
|
@ -29,8 +29,8 @@ require "file_utils"
|
|||||||
# ```
|
# ```
|
||||||
#
|
#
|
||||||
# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
|
# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
|
||||||
# NOTE: see `CachedTags` for a cached version, faster for retrieval.
|
# NOTE: see `TagsCached` for a cached version, faster for retrieval.
|
||||||
# NOTE: for fast operations without fs representation, see `RAMOnlyTags`.
|
# NOTE: for fast operations without fs representation, see `TagsRAMOnly`.
|
||||||
class DODB::Index::Tags(V) < DODB::Index(V)
|
class DODB::Index::Tags(V) < DODB::Index(V)
|
||||||
# Name of the index, such as *keywords* for example.
|
# Name of the index, such as *keywords* for example.
|
||||||
# This is an arbitrary value, mostly to create the index directory.
|
# This is an arbitrary value, mostly to create the index directory.
|
||||||
@ -127,7 +127,7 @@ class DODB::Index::Tags(V) < DODB::Index(V)
|
|||||||
# Nothing can beat custom implementations tailored with specific problems in mind, so in case this
|
# Nothing can beat custom implementations tailored with specific problems in mind, so in case this
|
||||||
# algorithm isn't fine for you, feel free to override this function for your specific data-set.
|
# algorithm isn't fine for you, feel free to override this function for your specific data-set.
|
||||||
#
|
#
|
||||||
# NOTE: to seriously boost performance, use `DODB::Index::CachedTags`.
|
# NOTE: to seriously boost performance, use `DODB::Index::TagsCached`.
|
||||||
# WARNING: throws a MissingEntry exception on non-existing tag or no match.
|
# WARNING: throws a MissingEntry exception on non-existing tag or no match.
|
||||||
def get_with_keys(keys : Array(String)) : Array(Tuple(V, Int32))
|
def get_with_keys(keys : Array(String)) : Array(Tuple(V, Int32))
|
||||||
r_value = Array(Tuple(V, Int32)).new
|
r_value = Array(Tuple(V, Int32)).new
|
||||||
@ -265,8 +265,8 @@ end
|
|||||||
# NOTE: cached, reasonable amount of memory used since it's just an index.
|
# NOTE: cached, reasonable amount of memory used since it's just an index.
|
||||||
# NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
|
# NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
|
||||||
# NOTE: see `Tags` for an uncached version, even less memory-hungry.
|
# NOTE: see `Tags` for an uncached version, even less memory-hungry.
|
||||||
# NOTE: for fast operations without fs representation, see `RAMOnlyTags`.
|
# NOTE: for fast operations without fs representation, see `TagsRAMOnly`.
|
||||||
class DODB::Index::CachedTags(V) < DODB::Index::Tags(V)
|
class DODB::Index::TagsCached(V) < DODB::Index::Tags(V)
|
||||||
# This hash contains the relation between the index key and the data keys.
|
# This hash contains the relation between the index key and the data keys.
|
||||||
property data = Hash(String, Array(Int32)).new
|
property data = Hash(String, Array(Int32)).new
|
||||||
|
|
||||||
@ -302,7 +302,7 @@ class DODB::Index::CachedTags(V) < DODB::Index::Tags(V)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# In `DODB::Index::CachedTags`, `#get_with_keys(tag : String)` is modified to retrieve data keys from the index cache.
|
# In `DODB::Index::TagsCached`, `#get_with_keys(tag : String)` is modified to retrieve data keys from the index cache.
|
||||||
# In case the data isn't already in the cache, it is retrieved from the file-system.
|
# In case the data isn't already in the cache, it is retrieved from the file-system.
|
||||||
#
|
#
|
||||||
# ```
|
# ```
|
||||||
@ -346,14 +346,14 @@ end
|
|||||||
# ```
|
# ```
|
||||||
#
|
#
|
||||||
# Since there is no file-system operations, all the operations are fast.
|
# Since there is no file-system operations, all the operations are fast.
|
||||||
# `DODB::Index::RAMOnlyTags` enables the flexibility of tags without a file-system representation.
|
# `DODB::Index::TagsRAMOnly` enables the flexibility of tags without a file-system representation.
|
||||||
# Absolute efficiency, exactly as easy to use as the other tag implementations.
|
# Absolute efficiency, exactly as easy to use as the other tag implementations.
|
||||||
#
|
#
|
||||||
# NOTE: reasonable amount of memory used since it's just an index.
|
# NOTE: reasonable amount of memory used since it's just an index.
|
||||||
# NOTE: fast for all operations, but no file-system representation.
|
# NOTE: fast for all operations, but no file-system representation.
|
||||||
# NOTE: see `Tags` for an uncached version, even less memory-hungry.
|
# NOTE: see `Tags` for an uncached version, even less memory-hungry.
|
||||||
# NOTE: for an fs representation but still fast for retrieval, see `CachedTags`.
|
# NOTE: for an fs representation but still fast for retrieval, see `TagsCached`.
|
||||||
class DODB::Index::RAMOnlyTags(V) < DODB::Index::CachedTags(V)
|
class DODB::Index::TagsRAMOnly(V) < DODB::Index::TagsCached(V)
|
||||||
def index(key : String, value : V)
|
def index(key : String, value : V)
|
||||||
tags = key_proc.call value
|
tags = key_proc.call value
|
||||||
return if tags.is_a? NoIndex
|
return if tags.is_a? NoIndex
|
||||||
|
@ -189,7 +189,7 @@ abstract class DODB::Storage(V)
|
|||||||
#
|
#
|
||||||
# NOTE: this index is the fastest, but doesn't have a file-system representation.
|
# NOTE: this index is the fastest, but doesn't have a file-system representation.
|
||||||
def new_RAM_index(name : String, &block : Proc(V, String | DODB::NoIndex))
|
def new_RAM_index(name : String, &block : Proc(V, String | DODB::NoIndex))
|
||||||
Index::RAMOnlyIndex(V).new(self, @directory_name, name, block).tap do |indexer|
|
Index::BasicRAMOnly(V).new(self, @directory_name, name, block).tap do |indexer|
|
||||||
@indexers << indexer
|
@indexers << indexer
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -203,7 +203,7 @@ abstract class DODB::Storage(V)
|
|||||||
# Creates a new partition **with a cache**.
|
# Creates a new partition **with a cache**.
|
||||||
# The *name* parameter is the name of the directory that will be created.
|
# The *name* parameter is the name of the directory that will be created.
|
||||||
def new_partition(name : String, &block : Proc(V, String | DODB::NoIndex))
|
def new_partition(name : String, &block : Proc(V, String | DODB::NoIndex))
|
||||||
Index::CachedPartition(V).new(self, @directory_name, name, block).tap do |table|
|
Index::PartitionCached(V).new(self, @directory_name, name, block).tap do |table|
|
||||||
@indexers << table
|
@indexers << table
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -223,7 +223,7 @@ abstract class DODB::Storage(V)
|
|||||||
#
|
#
|
||||||
# NOTE: this partition index is the fastest but doesn't have a file-system representation.
|
# NOTE: this partition index is the fastest but doesn't have a file-system representation.
|
||||||
def new_RAM_partition(name : String, &block : Proc(V, String | DODB::NoIndex))
|
def new_RAM_partition(name : String, &block : Proc(V, String | DODB::NoIndex))
|
||||||
Index::RAMOnlyPartition(V).new(self, @directory_name, name, block).tap do |table|
|
Index::PartitionRAMOnly(V).new(self, @directory_name, name, block).tap do |table|
|
||||||
@indexers << table
|
@indexers << table
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -237,7 +237,7 @@ abstract class DODB::Storage(V)
|
|||||||
# Creates a new tag **with a cache**.
|
# Creates a new tag **with a cache**.
|
||||||
# The *name* parameter is the name of the directory that will be created.
|
# The *name* parameter is the name of the directory that will be created.
|
||||||
def new_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex))
|
def new_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex))
|
||||||
Index::CachedTags(V).new(self, @directory_name, name, block).tap do |tags|
|
Index::TagsCached(V).new(self, @directory_name, name, block).tap do |tags|
|
||||||
@indexers << tags
|
@indexers << tags
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -257,7 +257,7 @@ abstract class DODB::Storage(V)
|
|||||||
#
|
#
|
||||||
# NOTE: this tag index is the fastest but doesn't have a file-system representation.
|
# NOTE: this tag index is the fastest but doesn't have a file-system representation.
|
||||||
def new_RAM_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex))
|
def new_RAM_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex))
|
||||||
Index::RAMOnlyTags(V).new(self, @directory_name, name, block).tap do |tags|
|
Index::TagsRAMOnly(V).new(self, @directory_name, name, block).tap do |tags|
|
||||||
@indexers << tags
|
@indexers << tags
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user