From 087784a6209ac080a0c3302ba91603e7a5232f5d Mon Sep 17 00:00:00 2001 From: Philippe PITTOLI Date: Wed, 22 May 2024 03:56:44 +0200 Subject: [PATCH] Tags: code refactoring and consistency improvement. --- src/dodb/exceptions.cr | 10 ++- src/dodb/tags.cr | 180 ++++++++++++++--------------------------- 2 files changed, 69 insertions(+), 121 deletions(-) diff --git a/src/dodb/exceptions.cr b/src/dodb/exceptions.cr index a2c9c37..501d3c7 100644 --- a/src/dodb/exceptions.cr +++ b/src/dodb/exceptions.cr @@ -2,14 +2,20 @@ # tries to retrieve a value with an invalid key. class DODB::MissingEntry < Exception getter index : String? - getter key : String | Int32 + getter key : Array(String) | String | Int32 # The exception will contain both the key and the name of the index, # and a human-readable string. - def initialize(@index, @key) + def initialize(@index, @key : String | Int32) super "no entry in index '#{@index}' for key '#{@key}'" end + # The exception will contain the name of the index and all requested keys, + # and a human-readable string. + def initialize(@index, @key : Array(String)) + super "no entry in index '#{@index}' for keys '#{key.join('-')}'" + end + # The exception will contain the key and a human-readable string. def initialize(@key) super "no entry for key '#{@key}' in database" diff --git a/src/dodb/tags.cr b/src/dodb/tags.cr index 280aa90..86103d8 100644 --- a/src/dodb/tags.cr +++ b/src/dodb/tags.cr @@ -76,6 +76,8 @@ class DODB::Tags(V) < DODB::Indexer(V) end end + # :inherit: + # TODO: in case the tag is left empty, should the tag directory be removed? def deindex(key : String, value : V) indices = key_proc.call(value) return if indices.is_a? NoIndex @@ -101,7 +103,7 @@ class DODB::Tags(V) < DODB::Indexer(V) # # ] # # Each tuple is composed of a car and its key in the database. # ``` - # NOTE: returns an empty list on empty or non-existing tag. + # WARNING: throws a MissingEntry exception on non-existing tag. def get_with_keys(tag : String) : Array(Tuple(V, Int32)) tag_directory = indexing_directory tag raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory @@ -128,56 +130,31 @@ class DODB::Tags(V) < DODB::Indexer(V) # algorithm isn't fine for you, feel free to override this function for your specific data-set. # # NOTE: to seriously boost performance, use `DODB::CachedTags`. + # WARNING: throws a MissingEntry exception on non-existing tag or no match. def get_with_keys(keys : Array(String)) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new return r_value if keys.size < 1 first_key = keys.pop - r_value = get_with_keys(first_key) rescue return r_value + r_value = get_with_keys(first_key) keys.each do |tag| - values = get_with_keys(tag) rescue return [] of Tuple(V, Int32) - r_value &= values - return r_value if r_value.size < 1 + r_value &= get_with_keys(tag) + raise MissingEntry.new(@name, keys) if r_value.size < 1 end r_value end + # Safe version of `#get_with_keys`. + # NOTE: returns an empty list on empty or non-existing tag. + def get_with_keys?(tag : String | Array(String)) : Array(Tuple(V, Int32)) + get_with_keys tag + rescue MissingEntry + Array(Tuple(V, Int32)).new + end + # Gets data from an indexed value (throws an exception on a missing entry). - # - # ``` - # fast_cars = cars_by_keywords.get? "fast" # No fast cars = MissingEntry exception. - # ``` - # - # WARNING: throws an exception if no value is found. - # NOTE: for a safe version, use `#get?`. - def get(tag : String) : Array(V) - tag_directory = indexing_directory tag - raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory - - r_value = Array(V).new - - Dir.each_child tag_directory do |child| - key = get_key child - r_value << @storage[key] - end - - r_value - end - - # Safe version of `#get`, gets data and returns a *nil* value in case of - # a missing entry instead of an exception. - # - # ``` - # fast_cars = cars_by_keywords.get? "fast" # No fast cars = nil. - # ``` - def get?(tag : String) : Array(V)? - get tag - rescue MissingEntry - nil - end - - # Gets values matching multiple tags (values must have all the provided tags). + # In case of multiple tags, gets entries matching all the provided tags. # # ``` # # Get cars that are both fast and expensive. @@ -187,27 +164,33 @@ class DODB::Tags(V) < DODB::Indexer(V) # `#get` is not designed to be fast, but should be fine for most applications. # Nothing can beat custom implementations tailored with specific problems in mind, so in case this # algorithm isn't fine for you, feel free to override this function for your specific data-set. - def get(keys : Array(String)) : Array(V) - r_value = Array(V).new - return r_value if keys.size < 1 - - first_key = keys.pop - r_value = get(first_key) rescue return r_value - - keys.each do |tag| - values = get(tag) rescue return [] of V - r_value &= values - return r_value if r_value.size < 1 - end - r_value + # WARNING: throws an exception if no value is found. + # NOTE: for a safe version, use `#get?`. + def get(keys : String | Array(String)) : Array(V) + get_with_keys(keys).map &.[0] end - # Deletes all entries within the provided tag. + # Safe version of `#get`, gets data and returns *an empty array* in case of + # a missing entry instead of an exception. # # ``` - # cars_by_keywords.delete "slow" # Deletes all slow cars. + # fast_cars = cars_by_keywords.get? "fast" + # cars_both_fast_and_expensive = cars_by_keywords.get? ["fast", "expensive"] # ``` - def delete(tag : String) + def get?(tag : String | Array(String)) : Array(V) + get tag + rescue MissingEntry + Array(V).new + end + + # Deletes entries with the provided tag, or matching all the provided tags. + # + # ``` + # cars_by_keywords.delete "slow" # Deletes all slow cars. + # cars_by_keywords.delete ["slow", "expensive"] # Deletes all cars that are both slow and expensive. + # ``` + # WARNING: throws an exception if no value is found. + def delete(tag : String | Array(String)) delete tag, do true end end @@ -218,18 +201,15 @@ class DODB::Tags(V) < DODB::Indexer(V) # cars_by_keywords.delete "slow", do |car| # car.name == "Corvet" # end + # + # # Deletes all slow and expensive Bullet-GTs. + # cars_by_keywords.delete ["slow", "expensive"], do |car| + # car.name == "Bullet-GT" + # end # ``` - # TODO: in case the tag is left empty, should the tag directory be removed? - def delete(tag : String, &matcher) - tag_directory = indexing_directory tag - - return unless Dir.exists? tag_directory - - Dir.each_child tag_directory do |child| - key = get_key child - item = @storage[key] - - if yield item + def delete(tags : String | Array(String), &matcher : Proc(V, Bool)) + get_with_keys(tags).each do |entry, key| + if yield entry @storage.delete key end end @@ -309,6 +289,8 @@ class DODB::CachedTags(V) < DODB::Tags(V) end end + # :inherit: + # TODO: in case the tag is left empty, should it be removed from the cache? def deindex(key : String, value : V) indices = key_proc.call value return if indices.is_a? NoIndex @@ -322,18 +304,19 @@ class DODB::CachedTags(V) < DODB::Tags(V) end end - # Gets values (and their keys) matching multiple tags, from the cache. + # In `DODB::CachedTags`, `#get_with_keys(tag : String)` is modified to retrieve data keys from the index cache. + # In case the data isn't already in the cache, it is retrieved from the file-system. # # ``` - # # Get cars that are both fast and expensive. - # fast_expensive_cars = car_database.get_with_keys ["fast", "expensive"] + # # Get all slow cars. + # cars_by_keywords.get "slow" + # # Returns something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. # ``` - # - # In case the values aren't in cache, the file-system is checked. - # - # `#get_with_keys` is not designed to be fast, but should be fine for most applications. - # Nothing can beat custom implementations tailored with specific problems in mind, so in case this - # algorithm isn't fine for you, feel free to override this function for your specific data-set. + # WARNING: throws a MissingEntry exception on non-existing tag. def get_with_keys(tag : String) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new @@ -356,29 +339,6 @@ class DODB::CachedTags(V) < DODB::Tags(V) r_value end - # Deletes entries within the provided tag and matching the provided block of code. - # - # ``` - # # Deletes all slow Corvets. - # cars_by_keywords.delete "slow", do |car| - # car.name == "Corvet" - # end - # ``` - # - # TODO: in case the tag is left empty, should it be removed from the cache? - def delete(tag : String, &matcher : Proc(V, Bool)) - # Use `get_with_keys` to retrieve data on-disk, if necessary. - new_tag = get_with_keys(tag).map(&.[1]).select do |key| - item = @storage[key] - ! yield item - end - - # TODO: remove the tag if `new_tag` is empty? - @data[tag] = new_tag - - super(tag, &matcher) - end - # Clears the cache and removes the `#indexing_directory`. def nuke_index super @@ -441,13 +401,15 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V) # # ] # # Each tuple is composed of a car and its key in the database. # ``` - # NOTE: returns an empty list on empty or non-existing tag. + # WARNING: throws a MissingEntry exception on non-existing tag. def get_with_keys(tag : String) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new if keys = @data[tag]? keys.each do |data_key| r_value << { @storage[data_key], data_key } end + else + raise MissingEntry.new(@name, tag) end r_value end @@ -469,26 +431,6 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V) r_value end - # Deletes entries within the provided tag and matching the provided block of code. - # - # ``` - # # Deletes all slow Corvets. - # cars_by_keywords.delete "slow", do |car| - # car.name == "Corvet" - # end - # ``` - # TODO: in case the tag is left empty, should it be removed from the cache? - def delete(tag : String, &matcher : Proc(V, Bool)) - if keys = @data[tag]? - new_tag = keys.select do |key| - item = @storage[key] - ! yield item - end - - @data[tag] = new_tag - end - end - # Clears the cache. def nuke_index data.clear