Documentation, again. Some very few re-implementations.

2024-05-21 20:50:28 +02:00 · 2024-05-21 20:50:28 +02:00 · 378f8c76db
commit 378f8c76db
parent 5c45311926
5 changed files with 379 additions and 53 deletions
--- a/src/cached.cr
+++ b/src/cached.cr
@ -15,6 +15,26 @@ end
 # Cached database, stores data on the file-system and keeps it in RAM.
 #
 # ```
 # # Creates a DODB cached database.
 # car_database = DODB::CachedDataBase.new "/path/to/db"
 #
 # # Creates a (cached) index.
 # cars_by_name = car_database.new_index "name", &.name
 #
 # # Add a value in the database.
 # car_database << Car.new "Corvet"
 # ```
 # On the file-system:
 # ```plain
 # storage
 # ├── data
 # │   └── 0000000000
 # ├── indices
 # │   └── by_name   <- the "name" basic index
 # │       └── Corvet -> ../../data/0000000000
 # ```
 #
 # WARNING: beware of the RAM use, see `DODB::StackedDataBase` for a less memory-hungry option.
 class DODB::CachedDataBase(V) < DODB::Storage(V)
 	@indexers = [] of Indexer(V)
@ -109,7 +129,6 @@ class DODB::CachedDataBase(V) < DODB::Storage(V)
 		end
 	end
 	# :inherit:
 	def unsafe_delete(key : Int32)
 		value = self[key]?
--- a/src/dodb/partition.cr
+++ b/src/dodb/partition.cr
@ -10,8 +10,9 @@ require "./indexer.cr"
 # ```
 #
 # This (partition) index provides a file-system representation, enabling the administrators to
-# select a value based on its index. The following example presents an index named "color"
+# select a value based on its index.
-# with some data indexed by a color attribute.
+#
 # The following example presents an index named "color" with some data indexed by a color attribute.
 #
 # ```plain
 # storage
@ -32,7 +33,7 @@ require "./indexer.cr"
 # NOTE: see `CachedPartition` for a cached version, faster for retrieval.
 # NOTE: for fast operations without fs representation, see `RAMOnlyPartition`.
 class DODB::Partition(V) < DODB::Indexer(V)
-	# Name of the index, such as *id* or *color* for example.
+	# Name of the index, such as *color* for example.
 	# This is an arbitrary value, mostly to create the index directory.
 	#
 	# NOTE: used for internal operations.
@ -47,7 +48,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
 	# Reference to the database instance.
 	@storage : DODB::Storage(V)
-	# To create a partition from a database, use `DODB::Storage#new_partition` to create
+	# To create a *partition index* from a database, use `DODB::Storage#new_partition` to create
 	# a cached partition, `DODB::Storage#new_uncached_partition` for an uncached partition or
 	# `DODB::Storage#new_RAM_partition` for a RAM-only partition.
 	#
@ -56,6 +57,9 @@ class DODB::Partition(V) < DODB::Indexer(V)
 		::Dir.mkdir_p indexing_directory
 	end
 	# Checks for collisions.
 	#
 	# NOTE: always returns true, no collision can happen in a partition.
 	def check!(key : String, value : V, old_value : V?)
 		return true # Partitions don’t have collisions or overloads.
 	end
@ -108,7 +112,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
 	# a missing entry instead of an exception.
 	#
 	# ```
-	# red_cars = cars_by_color.get? "red"  # No red cars = nil
+	# red_cars = cars_by_color.get? "red"  # No red cars = nil.
 	# ```
 	def get?(partition : String) : Array(V)?
 		get partition
@ -119,7 +123,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
 	# Deletes all entries within the provided partition.
 	#
 	# ```
-	# cars_by_color.delete "red"  # Deletes all red cars
+	# cars_by_color.delete "red"  # Deletes all red cars.
 	# ```
 	def delete(partition : String)
 		delete partition, do true end
@ -179,8 +183,9 @@ end
 # ```
 #
 # This (partition) index provides a file-system representation, enabling the administrators to
-# select a value based on its index. The following example presents an index named "color"
+# select a value based on its index.
-# with some data indexed by a color attribute.
+#
 # The following example presents an index named "color" with some data indexed by a color attribute.
 #
 # ```plain
 # storage
@ -247,7 +252,7 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
 		end
 	end
-	# Gets a partition entries and the database key for each entry.
+	# Gets partition entries and the database key for each entry.
 	#
 	# ```
 	# # For example, get all red cars.
@ -280,7 +285,7 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
 		r_value
 	end
-	# Gets a partition entries.
+	# Gets partition entries from the cache or the file-system representation.
 	#
 	# ```
 	# # For example, get all red cars.
@ -377,9 +382,19 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
 		end
 	end
 	# Gets partition entries and the database key for each entry, from the in-memory partition index.
 	#
 	# ```
 	# # Get all red cars.
 	# cars_by_color.get_with_indexes "red"
 	# # Returns something like:
 	# # [ (@storage[42], 42)
 	# # , (@storage[91], 91)
 	# # ]
 	# # Each tuple is composed of a car and its key in the database.
 	# ```
 	def get_with_indexes(partition : String) : Array(Tuple(V, Int32))
 		r_value = Array(Tuple(V, Int32)).new
 		if keys = @data[partition]?
 			keys.each do |data_key|
 				r_value << { @storage[data_key], data_key }
@ -388,6 +403,23 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
 		r_value
 	end
 	# Gets partition entries from the in-memory partition cache.
 	#
 	# ```
 	# # Get all red cars.
 	# cars_by_color.get "red"
 	# ```
 	# NOTE: returns an empty list on empty or non-existing partition.
 	def get(partition : String) : Array(V)
 		r_value = Array(V).new
 		if keys = @data[partition]?
 			keys.each do |data_key|
 				r_value << @storage[data_key]
 			end
 		end
 		r_value
 	end
 	# Deletes entries within the provided partition and matching the provided block of code.
 	#
 	# ```
@ -396,8 +428,8 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
 	#   car.name == "Corvet"
 	# end
 	# ```
-	# TODO: in case the partition is left empty, should the partition be removed from the cache?
+	# TODO: in case the partition is left empty, should it be removed from the cache?
-	def delete(partition, &matcher : Proc(V, Bool))
+	def delete(partition : String, &matcher : Proc(V, Bool))
 		if keys = @data[partition]?
 			new_partition = keys.select do |key|
 				item = @storage[key]
--- a/src/dodb/tags.cr
+++ b/src/dodb/tags.cr
@ -1,22 +1,69 @@
 require "file_utils"
 # Tags for n-to-n relations.
 # Uncached version.
 #
 # ```
 # cars_by_keywords = car_database.new_uncached_tags "keywords", &.keywords
 # ```
 #
 # This (tag) index provides a file-system representation, enabling the administrators to
 # select a value based on its index.
 #
 # The following example presents an index named "keywords" with some data indexed by a keywords attribute.
 #
 # ```plain
 # storage
 # ├── data
 # │   ├── 0000000000      <- this car is expensive and fast
 # │   ├── 0000000001      <- this car is expensive
 # │   └── 0000000002      <- this car is expensive
 # ├── tags
 # │   └── by_keywords  <- this is an example of index named "keywords"
 # │       ├── fast
 # │       │   └── 0000000000 -> ../../data/0000000000
 # │       └── expensive
 # │           ├── 0000000000 -> ../../data/0000000000
 # │           ├── 0000000001 -> ../../data/0000000001
 # │           └── 0000000002 -> ../../data/0000000002
 # ```
 #
 # NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
 # NOTE: see `CachedTags` for a cached version, faster for retrieval.
 # NOTE: for fast operations without fs representation, see `RAMOnlyTags`.
 class DODB::Tags(V) < DODB::Indexer(V)
 	# Name of the index, such as *keywords* for example.
 	# This is an arbitrary value, mostly to create the index directory.
 	#
 	# NOTE: used for internal operations.
 	property name         : String
-	property key_proc     : Proc(V, Array(String) | NoIndex) | Proc(V, Array(String))
+
 	# Procedure to retrieve the index attribute from the value.
 	property key_proc     : Proc(V, Array(String) | NoIndex)
 	# Root database directory.
 	getter   storage_root : String
 	# Required to remove an entry in the DB.
 	@storage : DODB::Storage(V)
-	def initialize(@storage, @storage_root, @name, @key_proc)
+	# To create a *tag index* from a database, use `DODB::Storage#new_tags` to create
 	# a cached tag, `DODB::Storage#new_uncached_tags` for an uncached tag or
 	# `DODB::Storage#new_RAM_tags` for a RAM-only tag.
 	#
 	# WARNING: this is an internal operation, do not instanciate a tag index by hand.
 	def initialize(@storage : DODB::Storage(V), @storage_root : String, @name : String, @key_proc : Proc(V, Array(String) | NoIndex))
 		::Dir.mkdir_p indexing_directory
 	end
-	def check!(key, value, old_value)
+	# Checks for collisions.
 	#
 	# NOTE: always returns true, no collision can happen in a tag.
 	def check!(key : String, value : V, old_value : V?)
 		return true # Tags don’t have collisions or overloads.
 	end
-	def index(key, value)
+	def index(key : String, value : V)
 		indices = key_proc.call(value)
 		return if indices.is_a? NoIndex
@ -29,7 +76,7 @@ class DODB::Tags(V) < DODB::Indexer(V)
 		end
 	end
-	def deindex(key, value)
+	def deindex(key : String, value : V)
 		indices = key_proc.call(value)
 		return if indices.is_a? NoIndex
@ -43,7 +90,19 @@ class DODB::Tags(V) < DODB::Indexer(V)
 		end
 	end
-	def get_with_indice(tag : String) : Array(Tuple(V, Int32))
+	# Gets tag entries (and their keys) from the file-system representation of the tag.
 	#
 	# ```
 	# # Get all slow cars.
 	# cars_by_keywords.get "slow"
 	# # Returns something like:
 	# # [ (@storage[42], 42)
 	# # , (@storage[91], 91)
 	# # ]
 	# # Each tuple is composed of a car and its key in the database.
 	# ```
 	# NOTE: returns an empty list on empty or non-existing tag.
 	def get_with_keys(tag : String) : Array(Tuple(V, Int32))
 		tag_directory = indexing_directory tag
 		raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory
@ -57,42 +116,111 @@ class DODB::Tags(V) < DODB::Indexer(V)
 		r_value
 	end
-	# `get_with_indices` gets values with all the tags.
+	# Gets values (and their keys) matching multiple tags (values must have all the provided tags).
-
+	#
-	def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32))
+	# ```
 	# # Get cars that are both fast and expensive.
 	# fast_expensive_cars = car_database.get_with_keys ["fast", "expensive"]
 	# ```
 	#
 	# `#get_with_keys` is not designed to be fast, but should be fine for most applications.
 	# Nothing can beat custom implementations tailored with specific problems in mind, so in case this
 	# algorithm isn't fine for you, feel free to override this function for your specific data-set.
 	#
 	# NOTE: to seriously boost performance, use `DODB::CachedTags`.
 	def get_with_keys(keys : Array(String)) : Array(Tuple(V, Int32))
 		r_value = Array(Tuple(V, Int32)).new
 		return r_value if keys.size < 1
 		first_key = keys.pop
-		r_value = get_with_indice(first_key) rescue return r_value
+		r_value = get_with_keys(first_key) rescue return r_value
 		keys.each do |tag|
-			values = get_with_indice(tag) rescue return [] of Tuple(V, Int32)
+			values = get_with_keys(tag) rescue return [] of Tuple(V, Int32)
 			r_value &= values
 			return r_value if r_value.size < 1
 		end
 		r_value
 	end
 	# Gets data from an indexed value (throws an exception on a missing entry).
 	#
 	# ```
 	# fast_cars = cars_by_keywords.get? "fast"  # No fast cars = MissingEntry exception.
 	# ```
 	#
 	# WARNING: throws an exception if no value is found.
 	# NOTE: for a safe version, use `#get?`.
 	def get(tag : String) : Array(V)
-		get_with_indice(tag).map &.[0]
+		tag_directory = indexing_directory tag
 		raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory
 		r_value = Array(V).new
 		Dir.each_child tag_directory do |child|
 			key = get_key child
 			r_value << @storage[key]
 		end
 		r_value
 	end
 	# Safe version of `#get`, gets data and returns a *nil* value in case of
 	# a missing entry instead of an exception.
 	#
 	# ```
 	# fast_cars = cars_by_keywords.get? "fast"  # No fast cars = nil.
 	# ```
 	def get?(tag : String) : Array(V)?
 		get tag
 	rescue MissingEntry
 		nil
 	end
 	# Gets values matching multiple tags (values must have all the provided tags).
 	#
 	# ```
 	# # Get cars that are both fast and expensive.
 	# fast_expensive_cars = car_database.get ["fast", "expensive"]
 	# ```
 	#
 	# `#get` is not designed to be fast, but should be fine for most applications.
 	# Nothing can beat custom implementations tailored with specific problems in mind, so in case this
 	# algorithm isn't fine for you, feel free to override this function for your specific data-set.
 	def get(keys : Array(String)) : Array(V)
-		get_with_indices(keys.sort).map &.[0]
+		r_value = Array(V).new
 		return r_value if keys.size < 1
 		first_key = keys.pop
 		r_value = get(first_key) rescue return r_value
 		keys.each do |tag|
 			values = get(tag) rescue return [] of V
 			r_value &= values
 			return r_value if r_value.size < 1
 		end
 		r_value
 	end
-	def delete(tag)
+	# Deletes all entries within the provided tag.
 	#
 	# ```
 	# cars_by_keywords.delete "slow"  # Deletes all slow cars.
 	# ```
 	def delete(tag : String)
 		delete tag, do true end
 	end
-	def delete(tag, &matcher)
+	# Deletes entries within the provided tag index and matching the provided block of code.
 	#
 	# ```
 	# # Deletes all slow Corvets.
 	# cars_by_keywords.delete "slow", do |car|
 	#   car.name == "Corvet"
 	# end
 	# ```
 	# TODO: in case the tag is left empty, should the tag directory be removed?
 	def delete(tag : String, &matcher)
 		tag_directory = indexing_directory tag
 		return unless Dir.exists? tag_directory
@ -128,11 +256,43 @@ class DODB::Tags(V) < DODB::Indexer(V)
 	end
 end
 # Tags for n-to-n relations.
 # Cached version.
 #
 # ```
 # cars_by_keywords = car_database.new_tags "keywords", &.keywords
 # ```
 #
 # This (tag) index provides a file-system representation, enabling the administrators to
 # select a value based on its index.
 #
 # The following example presents an index named "keywords" with some data indexed by a "keywords" attribute.
 #
 # ```plain
 # storage
 # ├── data
 # │   ├── 0000000000      <- this car is expensive and fast
 # │   ├── 0000000001      <- this car is expensive
 # │   └── 0000000002      <- this car is expensive
 # ├── tags
 # │   └── by_keywords  <- this is an example of index named "keywords"
 # │       ├── fast
 # │       │   └── 0000000000 -> ../../data/0000000000
 # │       └── expensive
 # │           ├── 0000000000 -> ../../data/0000000000
 # │           ├── 0000000001 -> ../../data/0000000001
 # │           └── 0000000002 -> ../../data/0000000002
 # ```
 #
 # NOTE: cached, reasonable amount of memory used since it's just an index.
 # NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
 # NOTE: see `Tags` for an uncached version, even less memory-hungry.
 # NOTE: for fast operations without fs representation, see `RAMOnlyTags`.
 class DODB::CachedTags(V) < DODB::Tags(V)
 	# This hash contains the relation between the index key and the data keys.
 	property data = Hash(String, Array(Int32)).new
-	def index(key, value)
+	def index(key : String, value : V)
 		indices = key_proc.call value
 		return if indices.is_a? NoIndex
 		super(key, value)
@ -149,7 +309,7 @@ class DODB::CachedTags(V) < DODB::Tags(V)
 		end
 	end
-	def deindex(key, value)
+	def deindex(key : String, value : V)
 		indices = key_proc.call value
 		return if indices.is_a? NoIndex
 		super(key, value)
@ -162,12 +322,19 @@ class DODB::CachedTags(V) < DODB::Tags(V)
 		end
 	end
-	def nuke_index
+	# Gets values (and their keys) matching multiple tags, from the cache.
-		super
+	#
-		data.clear
+	# ```
-	end
+	# # Get cars that are both fast and expensive.
-
+	# fast_expensive_cars = car_database.get_with_keys ["fast", "expensive"]
-	def get_with_indice(tag : String) : Array(Tuple(V, Int32))
+	# ```
 	#
 	# In case the values aren't in cache, the file-system is checked.
 	#
 	# `#get_with_keys` is not designed to be fast, but should be fine for most applications.
 	# Nothing can beat custom implementations tailored with specific problems in mind, so in case this
 	# algorithm isn't fine for you, feel free to override this function for your specific data-set.
 	def get_with_keys(tag : String) : Array(Tuple(V, Int32))
 		r_value = Array(Tuple(V, Int32)).new
 		if keys = @data[tag]?
@ -189,24 +356,53 @@ class DODB::CachedTags(V) < DODB::Tags(V)
 		r_value
 	end
-	def delete(tag, &matcher)
+	# Deletes entries within the provided tag and matching the provided block of code.
-		# Use `get_with_indexes` to retrieve data on-disk, if necessary.
+	#
-		new_tag = get_with_indexes(tag).map(&.[1]).select do |key|
+	# ```
 	# # Deletes all slow Corvets.
 	# cars_by_keywords.delete "slow", do |car|
 	#   car.name == "Corvet"
 	# end
 	# ```
 	#
 	# TODO: in case the tag is left empty, should it be removed from the cache?
 	def delete(tag : String, &matcher : Proc(V, Bool))
 		# Use `get_with_keys` to retrieve data on-disk, if necessary.
 		new_tag = get_with_keys(tag).map(&.[1]).select do |key|
 			item = @storage[key]
 			! yield item
 		end
 		# TODO: remove the tag if `new_tag` is empty?
 		@data[tag] = new_tag
 		super(tag, &matcher)
 	end
 	# Clears the cache and removes the `#indexing_directory`.
 	def nuke_index
 		super
 		data.clear
 	end
 end
 # Tags for n-to-n relations.
 # RAM-only version.
 #
 # ```
 # cars_by_keywords = car_database.new_RAM_tags "keywords", &.keywords
 # ```
 #
 # Since there is no file-system operations, all the operations are fast.
 # `DODB::RAMOnlyTags` enables the flexibility of tags without a file-system representation.
 # Absolute efficiency, exactly as easy to use as the other tag implementations.
-
+#
 # NOTE: reasonable amount of memory used since it's just an index.
 # NOTE: fast for all operations, but no file-system representation.
 # NOTE: see `Tags` for an uncached version, even less memory-hungry.
 # NOTE: for an fs representation but still fast for retrieval, see `CachedTags`.
 class DODB::RAMOnlyTags(V) < DODB::CachedTags(V)
-	def index(key, value)
+	def index(key : String, value : V)
 		indices = key_proc.call value
 		return if indices.is_a? NoIndex
@ -222,7 +418,7 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V)
 		end
 	end
-	def deindex(key, value)
+	def deindex(key : String, value : V)
 		indices = key_proc.call value
 		return if indices.is_a? NoIndex
@ -234,20 +430,55 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V)
 		end
 	end
-	def get_with_indice(tag : String) : Array(Tuple(V, Int32))
+	# Gets tag entries from the in-memory tag cache.
 	#
 	# ```
 	# # Get all slow cars.
 	# cars_by_keywords.get "slow"
 	# # Returns something like:
 	# # [ (@storage[42], 42)
 	# # , (@storage[91], 91)
 	# # ]
 	# # Each tuple is composed of a car and its key in the database.
 	# ```
 	# NOTE: returns an empty list on empty or non-existing tag.
 	def get_with_keys(tag : String) : Array(Tuple(V, Int32))
 		r_value = Array(Tuple(V, Int32)).new
 		if keys = @data[tag]?
 			keys.each do |data_key|
 				r_value << { @storage[data_key], data_key }
 			end
 		end
 		r_value
 	end
-	def delete(tag, &matcher)
+	# Gets tag entries from the in-memory tag cache.
-		# Use `get_with_indexes` to retrieve data on-disk, if necessary.
+	#
 	# ```
 	# # Get all slow cars.
 	# cars_by_keywords.get "slow"
 	# ```
 	# NOTE: returns an empty list on empty or non-existing tag.
 	def get(tag : String) : Array(V)
 		r_value = Array(V).new
 		if keys = @data[tag]?
 			keys.each do |data_key|
 				r_value << @storage[data_key]
 			end
 		end
 		r_value
 	end
 	# Deletes entries within the provided tag and matching the provided block of code.
 	#
 	# ```
 	# # Deletes all slow Corvets.
 	# cars_by_keywords.delete "slow", do |car|
 	#   car.name == "Corvet"
 	# end
 	# ```
 	# TODO: in case the tag is left empty, should it be removed from the cache?
 	def delete(tag : String, &matcher : Proc(V, Bool))
 		if keys = @data[tag]?
 			new_tag = keys.select do |key|
 				item = @storage[key]
@ -257,4 +488,9 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V)
 			@data[tag] = new_tag
 		end
 	end
 	# Clears the cache.
 	def nuke_index
 		data.clear
 	end
 end
--- a/src/ramonly.cr
+++ b/src/ramonly.cr
@ -1,6 +1,26 @@
-# `DODB::RAMOnlyDataBase` is a database without a file-system representation,
+# RAM-only database, without a file-system representation.
-# enabling the use of DODB to store data which have the same lifetime as the application.
+#
-# Indexing (indexes, partitions, tags) will behave the same way.
+# This database implementation enables the use of DODB to store data with the same lifetime as the application.
 #
 # Indexing (basic indexes, partitions, tags) will behave the same way.
 # ```
 # # Creates a DODB RAM-only database (yes, the path is still required).
 # car_database = DODB::RAMOnlyDataBase.new "/path/to/db"
 #
 # # Creates a (cached) index (same as for all other DODB database implementations).
 # cars_by_name = car_database.new_index "name", &.name
 #
 # # Add a value in the database.
 # car_database << Car.new "Corvet"
 # ```
 # In this example there is a cached index, so on the file-system:
 # ```plain
 # storage
 # ├── data          <- this directory stays empty (RAM-only database, remember?)
 # ├── indices
 # │   └── by_name   <- the "name" basic index
 # │       └── Corvet -> ../../data/0000000000 <- the index works despite not pointing to a real file
 # ```
 class DODB::RAMOnlyDataBase(V) < DODB::CachedDataBase(V)
 	# Initialization still uses a directory name and creates a few paths.
 	# This is an implementation detail to re-use code of `DODB::Storage` and to get the indexers to work.
@ -41,7 +61,6 @@ class DODB::RAMOnlyDataBase(V) < DODB::CachedDataBase(V)
 		@data[key] = value
 	end
 	# :inherit:
 	def unsafe_delete(key : Int32)
 		value = self[key]?
--- a/src/uncached.cr
+++ b/src/uncached.cr
@ -1,6 +1,26 @@
 # Basic database of DODB.
 # Data isn't cached.
 #
 # ```
 # # Creates a DODB (uncached) database.
 # car_database = DODB::DataBase.new "/path/to/db"
 #
 # # Creates a (cached) index.
 # cars_by_name = car_database.new_index "name", &.name
 #
 # # Add a value in the database.
 # car_database << Car.new "Corvet"
 # ```
 # On the file-system:
 # ```plain
 # storage
 # ├── data
 # │   └── 0000000000
 # ├── indices
 # │   └── by_name   <- the "name" basic index
 # │       └── Corvet -> ../../data/0000000000
 # ```
 #
 # NOTE: slow but doesn't require much memory.
 class DODB::DataBase(V) < DODB::Storage(V)
 end