diff --git a/src/cached.cr b/src/cached.cr index 9f5d7db..c4c72d7 100644 --- a/src/cached.cr +++ b/src/cached.cr @@ -15,6 +15,26 @@ end # Cached database, stores data on the file-system and keeps it in RAM. # +# ``` +# # Creates a DODB cached database. +# car_database = DODB::CachedDataBase.new "/path/to/db" +# +# # Creates a (cached) index. +# cars_by_name = car_database.new_index "name", &.name +# +# # Add a value in the database. +# car_database << Car.new "Corvet" +# ``` +# On the file-system: +# ```plain +# storage +# ├── data +# │   └── 0000000000 +# ├── indices +# │   └── by_name <- the "name" basic index +# │   └── Corvet -> ../../data/0000000000 +# ``` +# # WARNING: beware of the RAM use, see `DODB::StackedDataBase` for a less memory-hungry option. class DODB::CachedDataBase(V) < DODB::Storage(V) @indexers = [] of Indexer(V) @@ -109,7 +129,6 @@ class DODB::CachedDataBase(V) < DODB::Storage(V) end end - # :inherit: def unsafe_delete(key : Int32) value = self[key]? diff --git a/src/dodb/partition.cr b/src/dodb/partition.cr index 01bf034..1bfc3de 100644 --- a/src/dodb/partition.cr +++ b/src/dodb/partition.cr @@ -10,8 +10,9 @@ require "./indexer.cr" # ``` # # This (partition) index provides a file-system representation, enabling the administrators to -# select a value based on its index. The following example presents an index named "color" -# with some data indexed by a color attribute. +# select a value based on its index. +# +# The following example presents an index named "color" with some data indexed by a color attribute. # # ```plain # storage @@ -32,7 +33,7 @@ require "./indexer.cr" # NOTE: see `CachedPartition` for a cached version, faster for retrieval. # NOTE: for fast operations without fs representation, see `RAMOnlyPartition`. class DODB::Partition(V) < DODB::Indexer(V) - # Name of the index, such as *id* or *color* for example. + # Name of the index, such as *color* for example. # This is an arbitrary value, mostly to create the index directory. # # NOTE: used for internal operations. @@ -47,7 +48,7 @@ class DODB::Partition(V) < DODB::Indexer(V) # Reference to the database instance. @storage : DODB::Storage(V) - # To create a partition from a database, use `DODB::Storage#new_partition` to create + # To create a *partition index* from a database, use `DODB::Storage#new_partition` to create # a cached partition, `DODB::Storage#new_uncached_partition` for an uncached partition or # `DODB::Storage#new_RAM_partition` for a RAM-only partition. # @@ -56,6 +57,9 @@ class DODB::Partition(V) < DODB::Indexer(V) ::Dir.mkdir_p indexing_directory end + # Checks for collisions. + # + # NOTE: always returns true, no collision can happen in a partition. def check!(key : String, value : V, old_value : V?) return true # Partitions don’t have collisions or overloads. end @@ -71,7 +75,7 @@ class DODB::Partition(V) < DODB::Indexer(V) ::File.symlink get_data_symlink(key), symlink end - def deindex (key : String, value : V) + def deindex(key : String, value : V) partition = key_proc.call value return if partition.is_a? NoIndex @@ -108,7 +112,7 @@ class DODB::Partition(V) < DODB::Indexer(V) # a missing entry instead of an exception. # # ``` - # red_cars = cars_by_color.get? "red" # No red cars = nil + # red_cars = cars_by_color.get? "red" # No red cars = nil. # ``` def get?(partition : String) : Array(V)? get partition @@ -119,7 +123,7 @@ class DODB::Partition(V) < DODB::Indexer(V) # Deletes all entries within the provided partition. # # ``` - # cars_by_color.delete "red" # Deletes all red cars + # cars_by_color.delete "red" # Deletes all red cars. # ``` def delete(partition : String) delete partition, do true end @@ -179,8 +183,9 @@ end # ``` # # This (partition) index provides a file-system representation, enabling the administrators to -# select a value based on its index. The following example presents an index named "color" -# with some data indexed by a color attribute. +# select a value based on its index. +# +# The following example presents an index named "color" with some data indexed by a color attribute. # # ```plain # storage @@ -247,7 +252,7 @@ class DODB::CachedPartition(V) < DODB::Partition(V) end end - # Gets a partition entries and the database key for each entry. + # Gets partition entries and the database key for each entry. # # ``` # # For example, get all red cars. @@ -280,7 +285,7 @@ class DODB::CachedPartition(V) < DODB::Partition(V) r_value end - # Gets a partition entries. + # Gets partition entries from the cache or the file-system representation. # # ``` # # For example, get all red cars. @@ -377,9 +382,19 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V) end end + # Gets partition entries and the database key for each entry, from the in-memory partition index. + # + # ``` + # # Get all red cars. + # cars_by_color.get_with_indexes "red" + # # Returns something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. + # ``` def get_with_indexes(partition : String) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new - if keys = @data[partition]? keys.each do |data_key| r_value << { @storage[data_key], data_key } @@ -388,6 +403,23 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V) r_value end + # Gets partition entries from the in-memory partition cache. + # + # ``` + # # Get all red cars. + # cars_by_color.get "red" + # ``` + # NOTE: returns an empty list on empty or non-existing partition. + def get(partition : String) : Array(V) + r_value = Array(V).new + if keys = @data[partition]? + keys.each do |data_key| + r_value << @storage[data_key] + end + end + r_value + end + # Deletes entries within the provided partition and matching the provided block of code. # # ``` @@ -396,8 +428,8 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V) # car.name == "Corvet" # end # ``` - # TODO: in case the partition is left empty, should the partition be removed from the cache? - def delete(partition, &matcher : Proc(V, Bool)) + # TODO: in case the partition is left empty, should it be removed from the cache? + def delete(partition : String, &matcher : Proc(V, Bool)) if keys = @data[partition]? new_partition = keys.select do |key| item = @storage[key] diff --git a/src/dodb/tags.cr b/src/dodb/tags.cr index fb939f7..280aa90 100644 --- a/src/dodb/tags.cr +++ b/src/dodb/tags.cr @@ -1,22 +1,69 @@ require "file_utils" +# Tags for n-to-n relations. +# Uncached version. +# +# ``` +# cars_by_keywords = car_database.new_uncached_tags "keywords", &.keywords +# ``` +# +# This (tag) index provides a file-system representation, enabling the administrators to +# select a value based on its index. +# +# The following example presents an index named "keywords" with some data indexed by a keywords attribute. +# +# ```plain +# storage +# ├── data +# │   ├── 0000000000 <- this car is expensive and fast +# │   ├── 0000000001 <- this car is expensive +# │   └── 0000000002 <- this car is expensive +# ├── tags +# │   └── by_keywords <- this is an example of index named "keywords" +# │   ├── fast +# │   │ └── 0000000000 -> ../../data/0000000000 +# │   └── expensive +# │   ├── 0000000000 -> ../../data/0000000000 +# │   ├── 0000000001 -> ../../data/0000000001 +# │   └── 0000000002 -> ../../data/0000000002 +# ``` +# +# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**. +# NOTE: see `CachedTags` for a cached version, faster for retrieval. +# NOTE: for fast operations without fs representation, see `RAMOnlyTags`. class DODB::Tags(V) < DODB::Indexer(V) + # Name of the index, such as *keywords* for example. + # This is an arbitrary value, mostly to create the index directory. + # + # NOTE: used for internal operations. property name : String - property key_proc : Proc(V, Array(String) | NoIndex) | Proc(V, Array(String)) + + # Procedure to retrieve the index attribute from the value. + property key_proc : Proc(V, Array(String) | NoIndex) + + # Root database directory. getter storage_root : String # Required to remove an entry in the DB. @storage : DODB::Storage(V) - def initialize(@storage, @storage_root, @name, @key_proc) + # To create a *tag index* from a database, use `DODB::Storage#new_tags` to create + # a cached tag, `DODB::Storage#new_uncached_tags` for an uncached tag or + # `DODB::Storage#new_RAM_tags` for a RAM-only tag. + # + # WARNING: this is an internal operation, do not instanciate a tag index by hand. + def initialize(@storage : DODB::Storage(V), @storage_root : String, @name : String, @key_proc : Proc(V, Array(String) | NoIndex)) ::Dir.mkdir_p indexing_directory end - def check!(key, value, old_value) + # Checks for collisions. + # + # NOTE: always returns true, no collision can happen in a tag. + def check!(key : String, value : V, old_value : V?) return true # Tags don’t have collisions or overloads. end - def index(key, value) + def index(key : String, value : V) indices = key_proc.call(value) return if indices.is_a? NoIndex @@ -29,7 +76,7 @@ class DODB::Tags(V) < DODB::Indexer(V) end end - def deindex(key, value) + def deindex(key : String, value : V) indices = key_proc.call(value) return if indices.is_a? NoIndex @@ -43,7 +90,19 @@ class DODB::Tags(V) < DODB::Indexer(V) end end - def get_with_indice(tag : String) : Array(Tuple(V, Int32)) + # Gets tag entries (and their keys) from the file-system representation of the tag. + # + # ``` + # # Get all slow cars. + # cars_by_keywords.get "slow" + # # Returns something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. + # ``` + # NOTE: returns an empty list on empty or non-existing tag. + def get_with_keys(tag : String) : Array(Tuple(V, Int32)) tag_directory = indexing_directory tag raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory @@ -57,42 +116,111 @@ class DODB::Tags(V) < DODB::Indexer(V) r_value end - # `get_with_indices` gets values with all the tags. - - def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32)) + # Gets values (and their keys) matching multiple tags (values must have all the provided tags). + # + # ``` + # # Get cars that are both fast and expensive. + # fast_expensive_cars = car_database.get_with_keys ["fast", "expensive"] + # ``` + # + # `#get_with_keys` is not designed to be fast, but should be fine for most applications. + # Nothing can beat custom implementations tailored with specific problems in mind, so in case this + # algorithm isn't fine for you, feel free to override this function for your specific data-set. + # + # NOTE: to seriously boost performance, use `DODB::CachedTags`. + def get_with_keys(keys : Array(String)) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new return r_value if keys.size < 1 first_key = keys.pop - r_value = get_with_indice(first_key) rescue return r_value + r_value = get_with_keys(first_key) rescue return r_value keys.each do |tag| - values = get_with_indice(tag) rescue return [] of Tuple(V, Int32) + values = get_with_keys(tag) rescue return [] of Tuple(V, Int32) r_value &= values return r_value if r_value.size < 1 end r_value end + # Gets data from an indexed value (throws an exception on a missing entry). + # + # ``` + # fast_cars = cars_by_keywords.get? "fast" # No fast cars = MissingEntry exception. + # ``` + # + # WARNING: throws an exception if no value is found. + # NOTE: for a safe version, use `#get?`. def get(tag : String) : Array(V) - get_with_indice(tag).map &.[0] + tag_directory = indexing_directory tag + raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory + + r_value = Array(V).new + + Dir.each_child tag_directory do |child| + key = get_key child + r_value << @storage[key] + end + + r_value end + # Safe version of `#get`, gets data and returns a *nil* value in case of + # a missing entry instead of an exception. + # + # ``` + # fast_cars = cars_by_keywords.get? "fast" # No fast cars = nil. + # ``` def get?(tag : String) : Array(V)? get tag rescue MissingEntry nil end + # Gets values matching multiple tags (values must have all the provided tags). + # + # ``` + # # Get cars that are both fast and expensive. + # fast_expensive_cars = car_database.get ["fast", "expensive"] + # ``` + # + # `#get` is not designed to be fast, but should be fine for most applications. + # Nothing can beat custom implementations tailored with specific problems in mind, so in case this + # algorithm isn't fine for you, feel free to override this function for your specific data-set. def get(keys : Array(String)) : Array(V) - get_with_indices(keys.sort).map &.[0] + r_value = Array(V).new + return r_value if keys.size < 1 + + first_key = keys.pop + r_value = get(first_key) rescue return r_value + + keys.each do |tag| + values = get(tag) rescue return [] of V + r_value &= values + return r_value if r_value.size < 1 + end + r_value end - def delete(tag) + # Deletes all entries within the provided tag. + # + # ``` + # cars_by_keywords.delete "slow" # Deletes all slow cars. + # ``` + def delete(tag : String) delete tag, do true end end - def delete(tag, &matcher) + # Deletes entries within the provided tag index and matching the provided block of code. + # + # ``` + # # Deletes all slow Corvets. + # cars_by_keywords.delete "slow", do |car| + # car.name == "Corvet" + # end + # ``` + # TODO: in case the tag is left empty, should the tag directory be removed? + def delete(tag : String, &matcher) tag_directory = indexing_directory tag return unless Dir.exists? tag_directory @@ -128,11 +256,43 @@ class DODB::Tags(V) < DODB::Indexer(V) end end +# Tags for n-to-n relations. +# Cached version. +# +# ``` +# cars_by_keywords = car_database.new_tags "keywords", &.keywords +# ``` +# +# This (tag) index provides a file-system representation, enabling the administrators to +# select a value based on its index. +# +# The following example presents an index named "keywords" with some data indexed by a "keywords" attribute. +# +# ```plain +# storage +# ├── data +# │   ├── 0000000000 <- this car is expensive and fast +# │   ├── 0000000001 <- this car is expensive +# │   └── 0000000002 <- this car is expensive +# ├── tags +# │   └── by_keywords <- this is an example of index named "keywords" +# │   ├── fast +# │   │ └── 0000000000 -> ../../data/0000000000 +# │   └── expensive +# │   ├── 0000000000 -> ../../data/0000000000 +# │   ├── 0000000001 -> ../../data/0000000001 +# │   └── 0000000002 -> ../../data/0000000002 +# ``` +# +# NOTE: cached, reasonable amount of memory used since it's just an index. +# NOTE: fast for retrieval, slow for index creation and deletion (fs operations). +# NOTE: see `Tags` for an uncached version, even less memory-hungry. +# NOTE: for fast operations without fs representation, see `RAMOnlyTags`. class DODB::CachedTags(V) < DODB::Tags(V) # This hash contains the relation between the index key and the data keys. property data = Hash(String, Array(Int32)).new - def index(key, value) + def index(key : String, value : V) indices = key_proc.call value return if indices.is_a? NoIndex super(key, value) @@ -149,7 +309,7 @@ class DODB::CachedTags(V) < DODB::Tags(V) end end - def deindex(key, value) + def deindex(key : String, value : V) indices = key_proc.call value return if indices.is_a? NoIndex super(key, value) @@ -162,12 +322,19 @@ class DODB::CachedTags(V) < DODB::Tags(V) end end - def nuke_index - super - data.clear - end - - def get_with_indice(tag : String) : Array(Tuple(V, Int32)) + # Gets values (and their keys) matching multiple tags, from the cache. + # + # ``` + # # Get cars that are both fast and expensive. + # fast_expensive_cars = car_database.get_with_keys ["fast", "expensive"] + # ``` + # + # In case the values aren't in cache, the file-system is checked. + # + # `#get_with_keys` is not designed to be fast, but should be fine for most applications. + # Nothing can beat custom implementations tailored with specific problems in mind, so in case this + # algorithm isn't fine for you, feel free to override this function for your specific data-set. + def get_with_keys(tag : String) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new if keys = @data[tag]? @@ -189,24 +356,53 @@ class DODB::CachedTags(V) < DODB::Tags(V) r_value end - def delete(tag, &matcher) - # Use `get_with_indexes` to retrieve data on-disk, if necessary. - new_tag = get_with_indexes(tag).map(&.[1]).select do |key| + # Deletes entries within the provided tag and matching the provided block of code. + # + # ``` + # # Deletes all slow Corvets. + # cars_by_keywords.delete "slow", do |car| + # car.name == "Corvet" + # end + # ``` + # + # TODO: in case the tag is left empty, should it be removed from the cache? + def delete(tag : String, &matcher : Proc(V, Bool)) + # Use `get_with_keys` to retrieve data on-disk, if necessary. + new_tag = get_with_keys(tag).map(&.[1]).select do |key| item = @storage[key] ! yield item end + # TODO: remove the tag if `new_tag` is empty? @data[tag] = new_tag super(tag, &matcher) end + + # Clears the cache and removes the `#indexing_directory`. + def nuke_index + super + data.clear + end end +# Tags for n-to-n relations. +# RAM-only version. +# +# ``` +# cars_by_keywords = car_database.new_RAM_tags "keywords", &.keywords +# ``` +# +# Since there is no file-system operations, all the operations are fast. # `DODB::RAMOnlyTags` enables the flexibility of tags without a file-system representation. # Absolute efficiency, exactly as easy to use as the other tag implementations. - +# +# NOTE: reasonable amount of memory used since it's just an index. +# NOTE: fast for all operations, but no file-system representation. +# NOTE: see `Tags` for an uncached version, even less memory-hungry. +# NOTE: for an fs representation but still fast for retrieval, see `CachedTags`. class DODB::RAMOnlyTags(V) < DODB::CachedTags(V) - def index(key, value) + def index(key : String, value : V) indices = key_proc.call value return if indices.is_a? NoIndex @@ -222,7 +418,7 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V) end end - def deindex(key, value) + def deindex(key : String, value : V) indices = key_proc.call value return if indices.is_a? NoIndex @@ -234,20 +430,55 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V) end end - def get_with_indice(tag : String) : Array(Tuple(V, Int32)) + # Gets tag entries from the in-memory tag cache. + # + # ``` + # # Get all slow cars. + # cars_by_keywords.get "slow" + # # Returns something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. + # ``` + # NOTE: returns an empty list on empty or non-existing tag. + def get_with_keys(tag : String) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new - if keys = @data[tag]? keys.each do |data_key| r_value << { @storage[data_key], data_key } end end - r_value end - def delete(tag, &matcher) - # Use `get_with_indexes` to retrieve data on-disk, if necessary. + # Gets tag entries from the in-memory tag cache. + # + # ``` + # # Get all slow cars. + # cars_by_keywords.get "slow" + # ``` + # NOTE: returns an empty list on empty or non-existing tag. + def get(tag : String) : Array(V) + r_value = Array(V).new + if keys = @data[tag]? + keys.each do |data_key| + r_value << @storage[data_key] + end + end + r_value + end + + # Deletes entries within the provided tag and matching the provided block of code. + # + # ``` + # # Deletes all slow Corvets. + # cars_by_keywords.delete "slow", do |car| + # car.name == "Corvet" + # end + # ``` + # TODO: in case the tag is left empty, should it be removed from the cache? + def delete(tag : String, &matcher : Proc(V, Bool)) if keys = @data[tag]? new_tag = keys.select do |key| item = @storage[key] @@ -257,4 +488,9 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V) @data[tag] = new_tag end end + + # Clears the cache. + def nuke_index + data.clear + end end diff --git a/src/ramonly.cr b/src/ramonly.cr index 85bb470..4df9422 100644 --- a/src/ramonly.cr +++ b/src/ramonly.cr @@ -1,6 +1,26 @@ -# `DODB::RAMOnlyDataBase` is a database without a file-system representation, -# enabling the use of DODB to store data which have the same lifetime as the application. -# Indexing (indexes, partitions, tags) will behave the same way. +# RAM-only database, without a file-system representation. +# +# This database implementation enables the use of DODB to store data with the same lifetime as the application. +# +# Indexing (basic indexes, partitions, tags) will behave the same way. +# ``` +# # Creates a DODB RAM-only database (yes, the path is still required). +# car_database = DODB::RAMOnlyDataBase.new "/path/to/db" +# +# # Creates a (cached) index (same as for all other DODB database implementations). +# cars_by_name = car_database.new_index "name", &.name +# +# # Add a value in the database. +# car_database << Car.new "Corvet" +# ``` +# In this example there is a cached index, so on the file-system: +# ```plain +# storage +# ├── data <- this directory stays empty (RAM-only database, remember?) +# ├── indices +# │   └── by_name <- the "name" basic index +# │   └── Corvet -> ../../data/0000000000 <- the index works despite not pointing to a real file +# ``` class DODB::RAMOnlyDataBase(V) < DODB::CachedDataBase(V) # Initialization still uses a directory name and creates a few paths. # This is an implementation detail to re-use code of `DODB::Storage` and to get the indexers to work. @@ -41,7 +61,6 @@ class DODB::RAMOnlyDataBase(V) < DODB::CachedDataBase(V) @data[key] = value end - # :inherit: def unsafe_delete(key : Int32) value = self[key]? diff --git a/src/uncached.cr b/src/uncached.cr index 16a1a45..933d747 100644 --- a/src/uncached.cr +++ b/src/uncached.cr @@ -1,6 +1,26 @@ # Basic database of DODB. # Data isn't cached. # +# ``` +# # Creates a DODB (uncached) database. +# car_database = DODB::DataBase.new "/path/to/db" +# +# # Creates a (cached) index. +# cars_by_name = car_database.new_index "name", &.name +# +# # Add a value in the database. +# car_database << Car.new "Corvet" +# ``` +# On the file-system: +# ```plain +# storage +# ├── data +# │   └── 0000000000 +# ├── indices +# │   └── by_name <- the "name" basic index +# │   └── Corvet -> ../../data/0000000000 +# ``` +# # NOTE: slow but doesn't require much memory. class DODB::DataBase(V) < DODB::Storage(V) end