Documentation, again. Some very few re-implementations.

This commit is contained in:
Philippe PITTOLI 2024-05-21 20:50:28 +02:00
parent 5c45311926
commit 378f8c76db
5 changed files with 379 additions and 53 deletions

View File

@ -15,6 +15,26 @@ end
# Cached database, stores data on the file-system and keeps it in RAM.
#
# ```
# # Creates a DODB cached database.
# car_database = DODB::CachedDataBase.new "/path/to/db"
#
# # Creates a (cached) index.
# cars_by_name = car_database.new_index "name", &.name
#
# # Add a value in the database.
# car_database << Car.new "Corvet"
# ```
# On the file-system:
# ```plain
# storage
# ├── data
# │   └── 0000000000
# ├── indices
# │   └── by_name <- the "name" basic index
# │   └── Corvet -> ../../data/0000000000
# ```
#
# WARNING: beware of the RAM use, see `DODB::StackedDataBase` for a less memory-hungry option.
class DODB::CachedDataBase(V) < DODB::Storage(V)
@indexers = [] of Indexer(V)
@ -109,7 +129,6 @@ class DODB::CachedDataBase(V) < DODB::Storage(V)
end
end
# :inherit:
def unsafe_delete(key : Int32)
value = self[key]?

View File

@ -10,8 +10,9 @@ require "./indexer.cr"
# ```
#
# This (partition) index provides a file-system representation, enabling the administrators to
# select a value based on its index. The following example presents an index named "color"
# with some data indexed by a color attribute.
# select a value based on its index.
#
# The following example presents an index named "color" with some data indexed by a color attribute.
#
# ```plain
# storage
@ -32,7 +33,7 @@ require "./indexer.cr"
# NOTE: see `CachedPartition` for a cached version, faster for retrieval.
# NOTE: for fast operations without fs representation, see `RAMOnlyPartition`.
class DODB::Partition(V) < DODB::Indexer(V)
# Name of the index, such as *id* or *color* for example.
# Name of the index, such as *color* for example.
# This is an arbitrary value, mostly to create the index directory.
#
# NOTE: used for internal operations.
@ -47,7 +48,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
# Reference to the database instance.
@storage : DODB::Storage(V)
# To create a partition from a database, use `DODB::Storage#new_partition` to create
# To create a *partition index* from a database, use `DODB::Storage#new_partition` to create
# a cached partition, `DODB::Storage#new_uncached_partition` for an uncached partition or
# `DODB::Storage#new_RAM_partition` for a RAM-only partition.
#
@ -56,6 +57,9 @@ class DODB::Partition(V) < DODB::Indexer(V)
::Dir.mkdir_p indexing_directory
end
# Checks for collisions.
#
# NOTE: always returns true, no collision can happen in a partition.
def check!(key : String, value : V, old_value : V?)
return true # Partitions dont have collisions or overloads.
end
@ -71,7 +75,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
::File.symlink get_data_symlink(key), symlink
end
def deindex (key : String, value : V)
def deindex(key : String, value : V)
partition = key_proc.call value
return if partition.is_a? NoIndex
@ -108,7 +112,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
# a missing entry instead of an exception.
#
# ```
# red_cars = cars_by_color.get? "red" # No red cars = nil
# red_cars = cars_by_color.get? "red" # No red cars = nil.
# ```
def get?(partition : String) : Array(V)?
get partition
@ -119,7 +123,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
# Deletes all entries within the provided partition.
#
# ```
# cars_by_color.delete "red" # Deletes all red cars
# cars_by_color.delete "red" # Deletes all red cars.
# ```
def delete(partition : String)
delete partition, do true end
@ -179,8 +183,9 @@ end
# ```
#
# This (partition) index provides a file-system representation, enabling the administrators to
# select a value based on its index. The following example presents an index named "color"
# with some data indexed by a color attribute.
# select a value based on its index.
#
# The following example presents an index named "color" with some data indexed by a color attribute.
#
# ```plain
# storage
@ -247,7 +252,7 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
end
end
# Gets a partition entries and the database key for each entry.
# Gets partition entries and the database key for each entry.
#
# ```
# # For example, get all red cars.
@ -280,7 +285,7 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
r_value
end
# Gets a partition entries.
# Gets partition entries from the cache or the file-system representation.
#
# ```
# # For example, get all red cars.
@ -377,9 +382,19 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
end
end
# Gets partition entries and the database key for each entry, from the in-memory partition index.
#
# ```
# # Get all red cars.
# cars_by_color.get_with_indexes "red"
# # Returns something like:
# # [ (@storage[42], 42)
# # , (@storage[91], 91)
# # ]
# # Each tuple is composed of a car and its key in the database.
# ```
def get_with_indexes(partition : String) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
if keys = @data[partition]?
keys.each do |data_key|
r_value << { @storage[data_key], data_key }
@ -388,6 +403,23 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
r_value
end
# Gets partition entries from the in-memory partition cache.
#
# ```
# # Get all red cars.
# cars_by_color.get "red"
# ```
# NOTE: returns an empty list on empty or non-existing partition.
def get(partition : String) : Array(V)
r_value = Array(V).new
if keys = @data[partition]?
keys.each do |data_key|
r_value << @storage[data_key]
end
end
r_value
end
# Deletes entries within the provided partition and matching the provided block of code.
#
# ```
@ -396,8 +428,8 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
# car.name == "Corvet"
# end
# ```
# TODO: in case the partition is left empty, should the partition be removed from the cache?
def delete(partition, &matcher : Proc(V, Bool))
# TODO: in case the partition is left empty, should it be removed from the cache?
def delete(partition : String, &matcher : Proc(V, Bool))
if keys = @data[partition]?
new_partition = keys.select do |key|
item = @storage[key]

View File

@ -1,22 +1,69 @@
require "file_utils"
# Tags for n-to-n relations.
# Uncached version.
#
# ```
# cars_by_keywords = car_database.new_uncached_tags "keywords", &.keywords
# ```
#
# This (tag) index provides a file-system representation, enabling the administrators to
# select a value based on its index.
#
# The following example presents an index named "keywords" with some data indexed by a keywords attribute.
#
# ```plain
# storage
# ├── data
# │   ├── 0000000000 <- this car is expensive and fast
# │   ├── 0000000001 <- this car is expensive
# │   └── 0000000002 <- this car is expensive
# ├── tags
# │   └── by_keywords <- this is an example of index named "keywords"
# │   ├── fast
# │   │ └── 0000000000 -> ../../data/0000000000
# │   └── expensive
# │   ├── 0000000000 -> ../../data/0000000000
# │   ├── 0000000001 -> ../../data/0000000001
# │   └── 0000000002 -> ../../data/0000000002
# ```
#
# NOTE: no cache, thus considered as *slow* for creation, deletion **and retrieval**.
# NOTE: see `CachedTags` for a cached version, faster for retrieval.
# NOTE: for fast operations without fs representation, see `RAMOnlyTags`.
class DODB::Tags(V) < DODB::Indexer(V)
# Name of the index, such as *keywords* for example.
# This is an arbitrary value, mostly to create the index directory.
#
# NOTE: used for internal operations.
property name : String
property key_proc : Proc(V, Array(String) | NoIndex) | Proc(V, Array(String))
# Procedure to retrieve the index attribute from the value.
property key_proc : Proc(V, Array(String) | NoIndex)
# Root database directory.
getter storage_root : String
# Required to remove an entry in the DB.
@storage : DODB::Storage(V)
def initialize(@storage, @storage_root, @name, @key_proc)
# To create a *tag index* from a database, use `DODB::Storage#new_tags` to create
# a cached tag, `DODB::Storage#new_uncached_tags` for an uncached tag or
# `DODB::Storage#new_RAM_tags` for a RAM-only tag.
#
# WARNING: this is an internal operation, do not instanciate a tag index by hand.
def initialize(@storage : DODB::Storage(V), @storage_root : String, @name : String, @key_proc : Proc(V, Array(String) | NoIndex))
::Dir.mkdir_p indexing_directory
end
def check!(key, value, old_value)
# Checks for collisions.
#
# NOTE: always returns true, no collision can happen in a tag.
def check!(key : String, value : V, old_value : V?)
return true # Tags dont have collisions or overloads.
end
def index(key, value)
def index(key : String, value : V)
indices = key_proc.call(value)
return if indices.is_a? NoIndex
@ -29,7 +76,7 @@ class DODB::Tags(V) < DODB::Indexer(V)
end
end
def deindex(key, value)
def deindex(key : String, value : V)
indices = key_proc.call(value)
return if indices.is_a? NoIndex
@ -43,7 +90,19 @@ class DODB::Tags(V) < DODB::Indexer(V)
end
end
def get_with_indice(tag : String) : Array(Tuple(V, Int32))
# Gets tag entries (and their keys) from the file-system representation of the tag.
#
# ```
# # Get all slow cars.
# cars_by_keywords.get "slow"
# # Returns something like:
# # [ (@storage[42], 42)
# # , (@storage[91], 91)
# # ]
# # Each tuple is composed of a car and its key in the database.
# ```
# NOTE: returns an empty list on empty or non-existing tag.
def get_with_keys(tag : String) : Array(Tuple(V, Int32))
tag_directory = indexing_directory tag
raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory
@ -57,42 +116,111 @@ class DODB::Tags(V) < DODB::Indexer(V)
r_value
end
# `get_with_indices` gets values with all the tags.
def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32))
# Gets values (and their keys) matching multiple tags (values must have all the provided tags).
#
# ```
# # Get cars that are both fast and expensive.
# fast_expensive_cars = car_database.get_with_keys ["fast", "expensive"]
# ```
#
# `#get_with_keys` is not designed to be fast, but should be fine for most applications.
# Nothing can beat custom implementations tailored with specific problems in mind, so in case this
# algorithm isn't fine for you, feel free to override this function for your specific data-set.
#
# NOTE: to seriously boost performance, use `DODB::CachedTags`.
def get_with_keys(keys : Array(String)) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
return r_value if keys.size < 1
first_key = keys.pop
r_value = get_with_indice(first_key) rescue return r_value
r_value = get_with_keys(first_key) rescue return r_value
keys.each do |tag|
values = get_with_indice(tag) rescue return [] of Tuple(V, Int32)
values = get_with_keys(tag) rescue return [] of Tuple(V, Int32)
r_value &= values
return r_value if r_value.size < 1
end
r_value
end
# Gets data from an indexed value (throws an exception on a missing entry).
#
# ```
# fast_cars = cars_by_keywords.get? "fast" # No fast cars = MissingEntry exception.
# ```
#
# WARNING: throws an exception if no value is found.
# NOTE: for a safe version, use `#get?`.
def get(tag : String) : Array(V)
get_with_indice(tag).map &.[0]
tag_directory = indexing_directory tag
raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory
r_value = Array(V).new
Dir.each_child tag_directory do |child|
key = get_key child
r_value << @storage[key]
end
r_value
end
# Safe version of `#get`, gets data and returns a *nil* value in case of
# a missing entry instead of an exception.
#
# ```
# fast_cars = cars_by_keywords.get? "fast" # No fast cars = nil.
# ```
def get?(tag : String) : Array(V)?
get tag
rescue MissingEntry
nil
end
# Gets values matching multiple tags (values must have all the provided tags).
#
# ```
# # Get cars that are both fast and expensive.
# fast_expensive_cars = car_database.get ["fast", "expensive"]
# ```
#
# `#get` is not designed to be fast, but should be fine for most applications.
# Nothing can beat custom implementations tailored with specific problems in mind, so in case this
# algorithm isn't fine for you, feel free to override this function for your specific data-set.
def get(keys : Array(String)) : Array(V)
get_with_indices(keys.sort).map &.[0]
r_value = Array(V).new
return r_value if keys.size < 1
first_key = keys.pop
r_value = get(first_key) rescue return r_value
keys.each do |tag|
values = get(tag) rescue return [] of V
r_value &= values
return r_value if r_value.size < 1
end
r_value
end
def delete(tag)
# Deletes all entries within the provided tag.
#
# ```
# cars_by_keywords.delete "slow" # Deletes all slow cars.
# ```
def delete(tag : String)
delete tag, do true end
end
def delete(tag, &matcher)
# Deletes entries within the provided tag index and matching the provided block of code.
#
# ```
# # Deletes all slow Corvets.
# cars_by_keywords.delete "slow", do |car|
# car.name == "Corvet"
# end
# ```
# TODO: in case the tag is left empty, should the tag directory be removed?
def delete(tag : String, &matcher)
tag_directory = indexing_directory tag
return unless Dir.exists? tag_directory
@ -128,11 +256,43 @@ class DODB::Tags(V) < DODB::Indexer(V)
end
end
# Tags for n-to-n relations.
# Cached version.
#
# ```
# cars_by_keywords = car_database.new_tags "keywords", &.keywords
# ```
#
# This (tag) index provides a file-system representation, enabling the administrators to
# select a value based on its index.
#
# The following example presents an index named "keywords" with some data indexed by a "keywords" attribute.
#
# ```plain
# storage
# ├── data
# │   ├── 0000000000 <- this car is expensive and fast
# │   ├── 0000000001 <- this car is expensive
# │   └── 0000000002 <- this car is expensive
# ├── tags
# │   └── by_keywords <- this is an example of index named "keywords"
# │   ├── fast
# │   │ └── 0000000000 -> ../../data/0000000000
# │   └── expensive
# │   ├── 0000000000 -> ../../data/0000000000
# │   ├── 0000000001 -> ../../data/0000000001
# │   └── 0000000002 -> ../../data/0000000002
# ```
#
# NOTE: cached, reasonable amount of memory used since it's just an index.
# NOTE: fast for retrieval, slow for index creation and deletion (fs operations).
# NOTE: see `Tags` for an uncached version, even less memory-hungry.
# NOTE: for fast operations without fs representation, see `RAMOnlyTags`.
class DODB::CachedTags(V) < DODB::Tags(V)
# This hash contains the relation between the index key and the data keys.
property data = Hash(String, Array(Int32)).new
def index(key, value)
def index(key : String, value : V)
indices = key_proc.call value
return if indices.is_a? NoIndex
super(key, value)
@ -149,7 +309,7 @@ class DODB::CachedTags(V) < DODB::Tags(V)
end
end
def deindex(key, value)
def deindex(key : String, value : V)
indices = key_proc.call value
return if indices.is_a? NoIndex
super(key, value)
@ -162,12 +322,19 @@ class DODB::CachedTags(V) < DODB::Tags(V)
end
end
def nuke_index
super
data.clear
end
def get_with_indice(tag : String) : Array(Tuple(V, Int32))
# Gets values (and their keys) matching multiple tags, from the cache.
#
# ```
# # Get cars that are both fast and expensive.
# fast_expensive_cars = car_database.get_with_keys ["fast", "expensive"]
# ```
#
# In case the values aren't in cache, the file-system is checked.
#
# `#get_with_keys` is not designed to be fast, but should be fine for most applications.
# Nothing can beat custom implementations tailored with specific problems in mind, so in case this
# algorithm isn't fine for you, feel free to override this function for your specific data-set.
def get_with_keys(tag : String) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
if keys = @data[tag]?
@ -189,24 +356,53 @@ class DODB::CachedTags(V) < DODB::Tags(V)
r_value
end
def delete(tag, &matcher)
# Use `get_with_indexes` to retrieve data on-disk, if necessary.
new_tag = get_with_indexes(tag).map(&.[1]).select do |key|
# Deletes entries within the provided tag and matching the provided block of code.
#
# ```
# # Deletes all slow Corvets.
# cars_by_keywords.delete "slow", do |car|
# car.name == "Corvet"
# end
# ```
#
# TODO: in case the tag is left empty, should it be removed from the cache?
def delete(tag : String, &matcher : Proc(V, Bool))
# Use `get_with_keys` to retrieve data on-disk, if necessary.
new_tag = get_with_keys(tag).map(&.[1]).select do |key|
item = @storage[key]
! yield item
end
# TODO: remove the tag if `new_tag` is empty?
@data[tag] = new_tag
super(tag, &matcher)
end
# Clears the cache and removes the `#indexing_directory`.
def nuke_index
super
data.clear
end
end
# Tags for n-to-n relations.
# RAM-only version.
#
# ```
# cars_by_keywords = car_database.new_RAM_tags "keywords", &.keywords
# ```
#
# Since there is no file-system operations, all the operations are fast.
# `DODB::RAMOnlyTags` enables the flexibility of tags without a file-system representation.
# Absolute efficiency, exactly as easy to use as the other tag implementations.
#
# NOTE: reasonable amount of memory used since it's just an index.
# NOTE: fast for all operations, but no file-system representation.
# NOTE: see `Tags` for an uncached version, even less memory-hungry.
# NOTE: for an fs representation but still fast for retrieval, see `CachedTags`.
class DODB::RAMOnlyTags(V) < DODB::CachedTags(V)
def index(key, value)
def index(key : String, value : V)
indices = key_proc.call value
return if indices.is_a? NoIndex
@ -222,7 +418,7 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V)
end
end
def deindex(key, value)
def deindex(key : String, value : V)
indices = key_proc.call value
return if indices.is_a? NoIndex
@ -234,20 +430,55 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V)
end
end
def get_with_indice(tag : String) : Array(Tuple(V, Int32))
# Gets tag entries from the in-memory tag cache.
#
# ```
# # Get all slow cars.
# cars_by_keywords.get "slow"
# # Returns something like:
# # [ (@storage[42], 42)
# # , (@storage[91], 91)
# # ]
# # Each tuple is composed of a car and its key in the database.
# ```
# NOTE: returns an empty list on empty or non-existing tag.
def get_with_keys(tag : String) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
if keys = @data[tag]?
keys.each do |data_key|
r_value << { @storage[data_key], data_key }
end
end
r_value
end
def delete(tag, &matcher)
# Use `get_with_indexes` to retrieve data on-disk, if necessary.
# Gets tag entries from the in-memory tag cache.
#
# ```
# # Get all slow cars.
# cars_by_keywords.get "slow"
# ```
# NOTE: returns an empty list on empty or non-existing tag.
def get(tag : String) : Array(V)
r_value = Array(V).new
if keys = @data[tag]?
keys.each do |data_key|
r_value << @storage[data_key]
end
end
r_value
end
# Deletes entries within the provided tag and matching the provided block of code.
#
# ```
# # Deletes all slow Corvets.
# cars_by_keywords.delete "slow", do |car|
# car.name == "Corvet"
# end
# ```
# TODO: in case the tag is left empty, should it be removed from the cache?
def delete(tag : String, &matcher : Proc(V, Bool))
if keys = @data[tag]?
new_tag = keys.select do |key|
item = @storage[key]
@ -257,4 +488,9 @@ class DODB::RAMOnlyTags(V) < DODB::CachedTags(V)
@data[tag] = new_tag
end
end
# Clears the cache.
def nuke_index
data.clear
end
end

View File

@ -1,6 +1,26 @@
# `DODB::RAMOnlyDataBase` is a database without a file-system representation,
# enabling the use of DODB to store data which have the same lifetime as the application.
# Indexing (indexes, partitions, tags) will behave the same way.
# RAM-only database, without a file-system representation.
#
# This database implementation enables the use of DODB to store data with the same lifetime as the application.
#
# Indexing (basic indexes, partitions, tags) will behave the same way.
# ```
# # Creates a DODB RAM-only database (yes, the path is still required).
# car_database = DODB::RAMOnlyDataBase.new "/path/to/db"
#
# # Creates a (cached) index (same as for all other DODB database implementations).
# cars_by_name = car_database.new_index "name", &.name
#
# # Add a value in the database.
# car_database << Car.new "Corvet"
# ```
# In this example there is a cached index, so on the file-system:
# ```plain
# storage
# ├── data <- this directory stays empty (RAM-only database, remember?)
# ├── indices
# │   └── by_name <- the "name" basic index
# │   └── Corvet -> ../../data/0000000000 <- the index works despite not pointing to a real file
# ```
class DODB::RAMOnlyDataBase(V) < DODB::CachedDataBase(V)
# Initialization still uses a directory name and creates a few paths.
# This is an implementation detail to re-use code of `DODB::Storage` and to get the indexers to work.
@ -41,7 +61,6 @@ class DODB::RAMOnlyDataBase(V) < DODB::CachedDataBase(V)
@data[key] = value
end
# :inherit:
def unsafe_delete(key : Int32)
value = self[key]?

View File

@ -1,6 +1,26 @@
# Basic database of DODB.
# Data isn't cached.
#
# ```
# # Creates a DODB (uncached) database.
# car_database = DODB::DataBase.new "/path/to/db"
#
# # Creates a (cached) index.
# cars_by_name = car_database.new_index "name", &.name
#
# # Add a value in the database.
# car_database << Car.new "Corvet"
# ```
# On the file-system:
# ```plain
# storage
# ├── data
# │   └── 0000000000
# ├── indices
# │   └── by_name <- the "name" basic index
# │   └── Corvet -> ../../data/0000000000
# ```
#
# NOTE: slow but doesn't require much memory.
class DODB::DataBase(V) < DODB::Storage(V)
end