From c5f57b589d8339fa8962daac9962154f7f184381 Mon Sep 17 00:00:00 2001 From: Philippe PITTOLI Date: Tue, 21 May 2024 13:50:25 +0200 Subject: [PATCH] Documentation goes brrrr. --- src/dodb/index.cr | 49 +++++++++--- src/dodb/partition.cr | 169 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 190 insertions(+), 28 deletions(-) diff --git a/src/dodb/index.cr b/src/dodb/index.cr index a63461c..148fede 100644 --- a/src/dodb/index.cr +++ b/src/dodb/index.cr @@ -6,6 +6,10 @@ require "./indexer.cr" # Basic indexes for 1-to-1 relations. # Uncached version. # +# ``` +# cars_by_name = car_database.new_uncached_index "name", &.name +# ``` +# # This index provides a file-system representation, enabling the administrators to # select a value based on its index. The following example presents an index named "id" # with some data indexed by an UUID attribute. @@ -27,16 +31,27 @@ require "./indexer.cr" # NOTE: see `CachedIndex` for a cached version, faster for retrieval. # NOTE: for fast operations without fs representation, see `RAMOnlyIndex`. class DODB::Index(V) < DODB::Indexer(V) + # Name of the index, such as *id* or *color* for example. + # This is an arbitrary value, mostly to create the index directory. + # + # NOTE: used for internal operations. property name : String - property key_proc : Proc(V, String | NoIndex) | Proc(V, String) + + # Procedure to retrieve the index attribute from the value, used for **internal operations**. + property key_proc : Proc(V, String | NoIndex) + + # Root database directory, used for **internal operations**. getter storage_root : String + # Reference to the database instance, used for **internal operations**. @storage : DODB::Storage(V) # To create an index from a database, use `DODB::Storage#new_index` to create # a cached index, `DODB::Storage#new_uncached_index` for an uncached index or - # `DODB::Storage#new_RAM_index` for a RAM-only index. - def initialize(@storage, @storage_root, @name, @key_proc) + # `DODB::Storage#new_RAM_index` for a RAM-only index. + # + # WARNING: this is an internal operation, do not instanciate an index by hand. + def initialize(@storage : DODB::Storage(V), @storage_root : String, @name : String, @key_proc : Proc(V, String | NoIndex)) Dir.mkdir_p indexing_directory end @@ -84,7 +99,7 @@ class DODB::Index(V) < DODB::Indexer(V) end end - # Gets the key (ex: 343) for an entry in the DB from an indexed value. + # Gets the key (ex: 343) for an entry in the DB from an indexed value, used for **internal operations**. # # Reads the link in `db/indices/by_#{name}/`. # @@ -159,7 +174,7 @@ class DODB::Index(V) < DODB::Indexer(V) yield nil end - # Reads the indexed symlink to find its related key. + # Reads the indexed symlink to find its related key, used for **internal operations**. # # For example, for a car indexed by its name: # @@ -175,6 +190,8 @@ class DODB::Index(V) < DODB::Indexer(V) # `#get_key_on_fs` reads the *storage/indices/by_name/Corvet* symlink and gets # the name of the data file ("000000343") and converts it in an integer, # which is the key in the database. + # + # NOTE: used for internal operations. def get_key_on_fs(index : String) : Int32 file_path = file_path_index index raise MissingEntry.new(@name, index) unless ::File.symlink? file_path @@ -267,6 +284,10 @@ end # Basic indexes for 1-to-1 relations. # Cached version. # +# ``` +# cars_by_name = car_database.new_index "name", &.name +# ``` +# # The cache makes this index fast and since the index doesn't contain # the full value but just an attribute and a key, memory usage is still reasonable. # @@ -291,7 +312,8 @@ end # NOTE: see `Index` for an uncached version, even less memory-hungry. # NOTE: for fast operations without fs representation, see `RAMOnlyIndex`. class DODB::CachedIndex(V) < DODB::Index(V) - # This hash contains the relation between the index key and the data key. + # This hash contains the relation between the index key and the data key, used for + # **internal operations**. # # WARNING: used for internal operations, do not change its content or access it directly. property data = Hash(String, Int32).new @@ -312,8 +334,7 @@ class DODB::CachedIndex(V) < DODB::Index(V) end end - # Clears the cache. - # :inherit: + # Clears the cache and removes the `#indexing_directory`. def nuke_index super data.clear @@ -330,7 +351,8 @@ class DODB::CachedIndex(V) < DODB::Index(V) @data[index_key] = key.to_i end - # Removes the index of a value on the file-system as `DODB::Index#deindex` but also from the cache. + # Removes the index of a value on the file-system as `DODB::Index#deindex` but also from + # the cache, used for **internal operations**. # # NOTE: used for internal operations. def deindex(key, value) @@ -360,6 +382,10 @@ end # Basic indexes for 1-to-1 relations. # RAM-only version, no file-system representation. # +# ``` +# cars_by_name = car_database.new_RAM_index "name", &.name +# ``` +# # Since there is no file-system operations, all the operations are fast. # `DODB::RAMOnlyIndex` enables the flexibility of indexes without a file-system representation # for absolute efficiency. @@ -397,4 +423,9 @@ class DODB::RAMOnlyIndex(V) < DODB::CachedIndex(V) raise MissingEntry.new(@name, index) end end + + # Clears the index. + def nuke_index + data.clear + end end diff --git a/src/dodb/partition.cr b/src/dodb/partition.cr index a730e5d..01bf034 100644 --- a/src/dodb/partition.cr +++ b/src/dodb/partition.cr @@ -5,6 +5,10 @@ require "./indexer.cr" # Partitions for 1-to-n relations. # Uncached version. # +# ``` +# cars_by_color = car_database.new_uncached_partition "color", &.color +# ``` +# # This (partition) index provides a file-system representation, enabling the administrators to # select a value based on its index. The following example presents an index named "color" # with some data indexed by a color attribute. @@ -28,22 +32,35 @@ require "./indexer.cr" # NOTE: see `CachedPartition` for a cached version, faster for retrieval. # NOTE: for fast operations without fs representation, see `RAMOnlyPartition`. class DODB::Partition(V) < DODB::Indexer(V) + # Name of the index, such as *id* or *color* for example. + # This is an arbitrary value, mostly to create the index directory. + # + # NOTE: used for internal operations. property name : String - property key_proc : Proc(V, String | NoIndex) | Proc(V, String) + + # Procedure to retrieve the index attribute from the value. + property key_proc : Proc(V, String | NoIndex) + + # Root database directory. getter storage_root : String - # Required to remove an entry in the DB. + # Reference to the database instance. @storage : DODB::Storage(V) - def initialize(@storage, @storage_root, @name, @key_proc) + # To create a partition from a database, use `DODB::Storage#new_partition` to create + # a cached partition, `DODB::Storage#new_uncached_partition` for an uncached partition or + # `DODB::Storage#new_RAM_partition` for a RAM-only partition. + # + # WARNING: this is an internal operation, do not instanciate a partition by hand. + def initialize(@storage : DODB::Storage(V), @storage_root : String, @name : String, @key_proc : Proc(V, String | NoIndex)) ::Dir.mkdir_p indexing_directory end - def check!(key, value, old_value) + def check!(key : String, value : V, old_value : V?) return true # Partitions don’t have collisions or overloads. end - def index(key, value) + def index(key : String, value : V) partition = key_proc.call value return if partition.is_a? NoIndex @@ -54,7 +71,7 @@ class DODB::Partition(V) < DODB::Indexer(V) ::File.symlink get_data_symlink(key), symlink end - def deindex(key, value) + def deindex (key : String, value : V) partition = key_proc.call value return if partition.is_a? NoIndex @@ -66,7 +83,15 @@ class DODB::Partition(V) < DODB::Indexer(V) end end - def get(partition) : Array(V) + # Gets data from an indexed value (throws an exception on a missing entry). + # + # ``` + # red_cars = cars_by_color.get "red" # No red cars = MissingEntry exception + # ``` + # + # WARNING: throws an exception if no value is found. + # NOTE: for a safe version, use `#get?`. + def get(partition : String) : Array(V) partition_directory = indexing_directory partition raise MissingEntry.new(@name, partition) unless Dir.exists? partition_directory @@ -79,16 +104,36 @@ class DODB::Partition(V) < DODB::Indexer(V) r_value end - def get?(partition) : Array(V)? + # Safe version of `#get`, gets data and returns a *nil* value in case of + # a missing entry instead of an exception. + # + # ``` + # red_cars = cars_by_color.get? "red" # No red cars = nil + # ``` + def get?(partition : String) : Array(V)? get partition rescue MissingEntry nil end - def delete(partition) + # Deletes all entries within the provided partition. + # + # ``` + # cars_by_color.delete "red" # Deletes all red cars + # ``` + def delete(partition : String) delete partition, do true end end + # Deletes entries within the provided partition and matching the provided block of code. + # + # ``` + # # Deletes all red Corvets. + # cars_by_color.delete "red", do |car| + # car.name == "Corvet" + # end + # ``` + # TODO: in case the partition is left empty, should the partition's directory be removed? def delete(partition, &matcher : Proc(V, Bool)) partition_directory = indexing_directory partition @@ -104,6 +149,7 @@ class DODB::Partition(V) < DODB::Indexer(V) end end + # :inherit: def indexing_directory : String "#{@storage_root}/partitions/by_#{@name}" end @@ -128,6 +174,10 @@ end # Partitions for 1-to-n relations. # Cached version. # +# ``` +# cars_by_color = car_database.new_partition "color", &.color +# ``` +# # This (partition) index provides a file-system representation, enabling the administrators to # select a value based on its index. The following example presents an index named "color" # with some data indexed by a color attribute. @@ -152,15 +202,22 @@ end # NOTE: see `Partition` for an uncached version, even less memory-hungry. # NOTE: for fast operations without fs representation, see `RAMOnlyPartition`. class DODB::CachedPartition(V) < DODB::Partition(V) - # This hash contains the relation between the index key and the data keys. + # This hash contains the relation between the index key and the data key, used for + # **internal operations**. + # + # WARNING: used for internal operations, do not change its content or access it directly. property data = Hash(String, Array(Int32)).new + # Clears the cache and removes the `#indexing_directory`. def nuke_index super data.clear end - def index(key, value) + # Indexes the value on the file-system as `DODB::Partition#index` but also puts the index in a cache. + # + # NOTE: used for internal operations. + def index(key : String, value : V) partition = key_proc.call value return if partition.is_a? NoIndex super(key, value) @@ -175,7 +232,11 @@ class DODB::CachedPartition(V) < DODB::Partition(V) @data[partition] = array end - def deindex(key, value) + # Removes the index of a value on the file-system as `DODB::Partition#deindex` but also from + # the cache, used for **internal operations**. + # + # NOTE: used for internal operations. + def deindex(key : String, value : V) partition = key_proc.call value return if partition.is_a? NoIndex super(key, value) @@ -186,9 +247,21 @@ class DODB::CachedPartition(V) < DODB::Partition(V) end end - def get_with_indexes(partition) : Array(Tuple(V, Int32)) + # Gets a partition entries and the database key for each entry. + # + # ``` + # # For example, get all red cars. + # cars_by_color.get_with_indexes "red" + # # Will return something like: + # # [ (@storage[42], 42) + # # , (@storage[91], 91) + # # ] + # # Each tuple is composed of a car and its key in the database. + # ``` + def get_with_indexes(partition : String) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new + # In case the partition is cached. if keys = @data[partition]? keys.each do |data_key| r_value << { @storage[data_key], data_key } @@ -207,11 +280,51 @@ class DODB::CachedPartition(V) < DODB::Partition(V) r_value end - def get(partition) : Array(V) - get_with_indexes(partition).map &.[0] + # Gets a partition entries. + # + # ``` + # # For example, get all red cars. + # cars_by_color.get "red" + # ``` + # NOTE: returns an empty list on empty or non-existing partition. + def get(partition : String) : Array(V) + r_value = Array(V).new + + # In case the partition is cached. + if keys = @data[partition]? + keys.each do |data_key| + r_value << @storage[data_key] + end + else + # The keys to put in the partition. + p_value = Array(Int32).new + + # Get the key from the database representation on the file-system. + partition_directory = indexing_directory partition + raise MissingEntry.new(@name, partition) unless Dir.exists? partition_directory + + Dir.each_child partition_directory do |child| + key = get_key child + r_value << @storage[key] + p_value << key + end + + @data[partition] = p_value + end + r_value end - def delete(partition, &matcher : Proc(V, Bool)) + # Deletes entries within the provided partition and matching the provided block of code, + # both from the file-system representation and from the cache. + # + # ``` + # # Deletes all red Corvets. + # cars_by_color.delete "red", do |car| + # car.name == "Corvet" + # end + # ``` + # TODO: in case the partition is left empty, should the partition be removed from the cache? + def delete(partition : String, &matcher : Proc(V, Bool)) # Use `get_with_indexes` to retrieve data on-disk, if necessary. new_partition = get_with_indexes(partition).map(&.[1]).select do |key| item = @storage[key] @@ -227,6 +340,10 @@ end # Partitions for 1-to-n relations. # RAM-only version. # +# ``` +# cars_by_color = car_database.new_RAM_partition "color", &.color +# ``` +# # Since there is no file-system operations, all the operations are fast. # `DODB::RAMOnlyPartition` enables the flexibility of partitions without a file-system representation. # Absolute efficiency, exactly as easy to use as the other partition implementations. @@ -236,7 +353,7 @@ end # NOTE: see `Partition` for an uncached version, even less memory-hungry. # NOTE: for an fs representation but still fast for retrieval, see `CachedPartition`. class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V) - def index(key, value) + def index(key : String, value : V) partition = key_proc.call value return if partition.is_a? NoIndex @@ -250,7 +367,7 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V) @data[partition] = array end - def deindex(key, value) + def deindex(key : String, value : V) partition = key_proc.call value return if partition.is_a? NoIndex @@ -260,7 +377,7 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V) end end - def get_with_indexes(partition) : Array(Tuple(V, Int32)) + def get_with_indexes(partition : String) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new if keys = @data[partition]? @@ -271,6 +388,15 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V) r_value end + # Deletes entries within the provided partition and matching the provided block of code. + # + # ``` + # # Deletes all red Corvets. + # cars_by_color.delete "red", do |car| + # car.name == "Corvet" + # end + # ``` + # TODO: in case the partition is left empty, should the partition be removed from the cache? def delete(partition, &matcher : Proc(V, Bool)) if keys = @data[partition]? new_partition = keys.select do |key| @@ -280,4 +406,9 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V) @data[partition] = new_partition end end + + # Clears the cache. + def nuke_index + data.clear + end end