Compare commits

..

3 Commits

4 changed files with 242 additions and 30 deletions

View File

@ -5,6 +5,11 @@
- RAMOnly: do not read/write the `last_entry` file.
- Use the `#unsafe_add` function when possible.
# Memory management
- When a value is removed, the related partitions (and tags) may be empty, leaving both an empty array
in memory and a directory on the file-system. Should they be removed?
# Documentation
- Write the API documentation.

View File

@ -16,6 +16,8 @@ describe "DODB::DataBase" do
end
db.to_a.sort.should eq(Ship.all_ships.sort)
db.rm_storage_dir
end
it "rewrite already stored data" do
@ -28,6 +30,8 @@ describe "DODB::DataBase" do
db[key] = ship
db[key].should eq(ship)
db.rm_storage_dir
end
it "properly remove data" do
@ -49,6 +53,8 @@ describe "DODB::DataBase" do
db[i]?.should be_nil
end
db.rm_storage_dir
end
it "preserves data on reopening" do
@ -61,6 +67,9 @@ describe "DODB::DataBase" do
db2 << Ship.mutsuki
db1.to_a.size.should eq(2)
db1.rm_storage_dir
db2.rm_storage_dir
end
it "iterates in normal and reversed order" do
@ -81,6 +90,8 @@ describe "DODB::DataBase" do
# Actual reversal is tested here.
db.to_a(reversed: true).should eq db.to_a.reverse
db.rm_storage_dir
end
it "respects the provided offsets if any" do
@ -97,6 +108,8 @@ describe "DODB::DataBase" do
db.to_a(offset: 0, limit: 3).should eq [
Ship.mutsuki, Ship.kisaragi, Ship.yayoi
]
db.rm_storage_dir
end
end
@ -113,6 +126,8 @@ describe "DODB::DataBase" do
Ship.all_ships.each_with_index do |ship|
db_ships_by_name.get?(ship.name).should eq(ship)
end
db.rm_storage_dir
end
it "raise on index overload" do
@ -127,6 +142,8 @@ describe "DODB::DataBase" do
expect_raises(DODB::IndexOverload) do
db << Ship.kisaragi
end
db.rm_storage_dir
end
it "properly deindex" do
@ -145,6 +162,8 @@ describe "DODB::DataBase" do
Ship.all_ships.each do |ship|
db_ships_by_name.get?(ship.name).should be_nil
end
db.rm_storage_dir
end
it "properly reindex" do
@ -163,6 +182,8 @@ describe "DODB::DataBase" do
db[key].should eq(some_new_ship)
db_ships_by_name.get?(some_new_ship.name).should eq(some_new_ship)
db.rm_storage_dir
end
it "properly updates" do
@ -183,6 +204,8 @@ describe "DODB::DataBase" do
db_ships_by_name.get?("Kisaragi").should be_nil
db_ships_by_name.get?(new_kisaragi.name).should eq new_kisaragi
db.rm_storage_dir
end
end
@ -213,6 +236,8 @@ describe "DODB::DataBase" do
end
db_ships_by_class.get?("does-not-exist").should be_nil
db.rm_storage_dir
end
it "removes select elements from partitions" do
@ -231,6 +256,8 @@ describe "DODB::DataBase" do
partition.any?(&.name.==("Kisaragi")).should be_false
end
db.rm_storage_dir
end
end
@ -254,6 +281,8 @@ describe "DODB::DataBase" do
# There shouldnt be one in our data about WWII Japanese warships…
db_ships_by_tags.get?("starship").should be_nil
db.rm_storage_dir
end
it "properly removes tags" do
@ -278,6 +307,8 @@ describe "DODB::DataBase" do
# end
db_ships_by_tags.get("flagship").should eq([] of Ship)
db.rm_storage_dir
end
it "gets items that have multiple tags" do
@ -297,6 +328,8 @@ describe "DODB::DataBase" do
results = db_ships_by_tags.get(["flagship"])
results.should eq([Ship.yamato])
db.rm_storage_dir
end
end
@ -319,6 +352,8 @@ describe "DODB::DataBase" do
results.should eq(ship)
end
end
db.rm_storage_dir
end
end
@ -340,11 +375,12 @@ describe "DODB::DataBase" do
db_ships_by_name.get?(ship.name).should eq(ship)
db_ships_by_class.get(ship.klass).should contain(ship)
end
db.rm_storage_dir
end
it "migrates properly" do
::FileUtils.rm_rf "test-storage-migration-origin"
old_db = DODB::DataBase(PrimitiveShip).new "test-storage-migration-origin"
old_db = DODB::SpecDataBase(PrimitiveShip).new "-migration-origin"
old_ships_by_name = old_db.new_index "name", &.name
old_ships_by_class = old_db.new_partition "class", &.class_name
@ -384,6 +420,9 @@ describe "DODB::DataBase" do
ship.tags.any?(&.==("name ship")).should be_true if ship.name == ship.klass
end
old_db.rm_storage_dir
new_db.rm_storage_dir
end
end
@ -410,6 +449,8 @@ describe "DODB::DataBase" do
dump = db.to_a
dump.size.should eq fork_count * entries_per_fork
db.rm_storage_dir
end
it "works for updating values" do
@ -448,6 +489,8 @@ describe "DODB::DataBase" do
entry.tags.should eq ["updated"]
end
end
db.rm_storage_dir
end
it "does parallel-safe updates" do
@ -473,6 +516,8 @@ describe "DODB::DataBase" do
processes.each &.wait
db_entries_by_name.get("test").klass.should eq((fork_count * entries_per_fork).to_s)
db.rm_storage_dir
end
end
end

View File

@ -6,6 +6,10 @@ require "./indexer.cr"
# Basic indexes for 1-to-1 relations.
# Uncached version.
#
# ```
# cars_by_name = car_database.new_uncached_index "name", &.name
# ```
#
# This index provides a file-system representation, enabling the administrators to
# select a value based on its index. The following example presents an index named "id"
# with some data indexed by an UUID attribute.
@ -27,16 +31,27 @@ require "./indexer.cr"
# NOTE: see `CachedIndex` for a cached version, faster for retrieval.
# NOTE: for fast operations without fs representation, see `RAMOnlyIndex`.
class DODB::Index(V) < DODB::Indexer(V)
# Name of the index, such as *id* or *color* for example.
# This is an arbitrary value, mostly to create the index directory.
#
# NOTE: used for internal operations.
property name : String
property key_proc : Proc(V, String | NoIndex) | Proc(V, String)
# Procedure to retrieve the index attribute from the value, used for **internal operations**.
property key_proc : Proc(V, String | NoIndex)
# Root database directory, used for **internal operations**.
getter storage_root : String
# Reference to the database instance, used for **internal operations**.
@storage : DODB::Storage(V)
# To create an index from a database, use `DODB::Storage#new_index` to create
# a cached index, `DODB::Storage#new_uncached_index` for an uncached index or
# `DODB::Storage#new_RAM_index` for a RAM-only index.
def initialize(@storage, @storage_root, @name, @key_proc)
# `DODB::Storage#new_RAM_index` for a RAM-only index.
#
# WARNING: this is an internal operation, do not instanciate an index by hand.
def initialize(@storage : DODB::Storage(V), @storage_root : String, @name : String, @key_proc : Proc(V, String | NoIndex))
Dir.mkdir_p indexing_directory
end
@ -84,7 +99,7 @@ class DODB::Index(V) < DODB::Indexer(V)
end
end
# Gets the key (ex: 343) for an entry in the DB from an indexed value.
# Gets the key (ex: 343) for an entry in the DB from an indexed value, used for **internal operations**.
#
# Reads the link in `db/indices/by_#{name}/<index>`.
#
@ -159,7 +174,7 @@ class DODB::Index(V) < DODB::Indexer(V)
yield nil
end
# Reads the indexed symlink to find its related key.
# Reads the indexed symlink to find its related key, used for **internal operations**.
#
# For example, for a car indexed by its name:
#
@ -175,6 +190,8 @@ class DODB::Index(V) < DODB::Indexer(V)
# `#get_key_on_fs` reads the *storage/indices/by_name/Corvet* symlink and gets
# the name of the data file ("000000343") and converts it in an integer,
# which is the key in the database.
#
# NOTE: used for internal operations.
def get_key_on_fs(index : String) : Int32
file_path = file_path_index index
raise MissingEntry.new(@name, index) unless ::File.symlink? file_path
@ -267,6 +284,10 @@ end
# Basic indexes for 1-to-1 relations.
# Cached version.
#
# ```
# cars_by_name = car_database.new_index "name", &.name
# ```
#
# The cache makes this index fast and since the index doesn't contain
# the full value but just an attribute and a key, memory usage is still reasonable.
#
@ -291,7 +312,8 @@ end
# NOTE: see `Index` for an uncached version, even less memory-hungry.
# NOTE: for fast operations without fs representation, see `RAMOnlyIndex`.
class DODB::CachedIndex(V) < DODB::Index(V)
# This hash contains the relation between the index key and the data key.
# This hash contains the relation between the index key and the data key, used for
# **internal operations**.
#
# WARNING: used for internal operations, do not change its content or access it directly.
property data = Hash(String, Int32).new
@ -312,8 +334,7 @@ class DODB::CachedIndex(V) < DODB::Index(V)
end
end
# Clears the cache.
# :inherit:
# Clears the cache and removes the `#indexing_directory`.
def nuke_index
super
data.clear
@ -330,7 +351,8 @@ class DODB::CachedIndex(V) < DODB::Index(V)
@data[index_key] = key.to_i
end
# Removes the index of a value on the file-system as `DODB::Index#deindex` but also from the cache.
# Removes the index of a value on the file-system as `DODB::Index#deindex` but also from
# the cache, used for **internal operations**.
#
# NOTE: used for internal operations.
def deindex(key, value)
@ -360,6 +382,10 @@ end
# Basic indexes for 1-to-1 relations.
# RAM-only version, no file-system representation.
#
# ```
# cars_by_name = car_database.new_RAM_index "name", &.name
# ```
#
# Since there is no file-system operations, all the operations are fast.
# `DODB::RAMOnlyIndex` enables the flexibility of indexes without a file-system representation
# for absolute efficiency.
@ -397,4 +423,9 @@ class DODB::RAMOnlyIndex(V) < DODB::CachedIndex(V)
raise MissingEntry.new(@name, index)
end
end
# Clears the index.
def nuke_index
data.clear
end
end

View File

@ -5,6 +5,10 @@ require "./indexer.cr"
# Partitions for 1-to-n relations.
# Uncached version.
#
# ```
# cars_by_color = car_database.new_uncached_partition "color", &.color
# ```
#
# This (partition) index provides a file-system representation, enabling the administrators to
# select a value based on its index. The following example presents an index named "color"
# with some data indexed by a color attribute.
@ -28,22 +32,35 @@ require "./indexer.cr"
# NOTE: see `CachedPartition` for a cached version, faster for retrieval.
# NOTE: for fast operations without fs representation, see `RAMOnlyPartition`.
class DODB::Partition(V) < DODB::Indexer(V)
# Name of the index, such as *id* or *color* for example.
# This is an arbitrary value, mostly to create the index directory.
#
# NOTE: used for internal operations.
property name : String
property key_proc : Proc(V, String | NoIndex) | Proc(V, String)
# Procedure to retrieve the index attribute from the value.
property key_proc : Proc(V, String | NoIndex)
# Root database directory.
getter storage_root : String
# Required to remove an entry in the DB.
# Reference to the database instance.
@storage : DODB::Storage(V)
def initialize(@storage, @storage_root, @name, @key_proc)
# To create a partition from a database, use `DODB::Storage#new_partition` to create
# a cached partition, `DODB::Storage#new_uncached_partition` for an uncached partition or
# `DODB::Storage#new_RAM_partition` for a RAM-only partition.
#
# WARNING: this is an internal operation, do not instanciate a partition by hand.
def initialize(@storage : DODB::Storage(V), @storage_root : String, @name : String, @key_proc : Proc(V, String | NoIndex))
::Dir.mkdir_p indexing_directory
end
def check!(key, value, old_value)
def check!(key : String, value : V, old_value : V?)
return true # Partitions dont have collisions or overloads.
end
def index(key, value)
def index(key : String, value : V)
partition = key_proc.call value
return if partition.is_a? NoIndex
@ -54,7 +71,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
::File.symlink get_data_symlink(key), symlink
end
def deindex(key, value)
def deindex (key : String, value : V)
partition = key_proc.call value
return if partition.is_a? NoIndex
@ -66,7 +83,15 @@ class DODB::Partition(V) < DODB::Indexer(V)
end
end
def get(partition) : Array(V)
# Gets data from an indexed value (throws an exception on a missing entry).
#
# ```
# red_cars = cars_by_color.get "red" # No red cars = MissingEntry exception
# ```
#
# WARNING: throws an exception if no value is found.
# NOTE: for a safe version, use `#get?`.
def get(partition : String) : Array(V)
partition_directory = indexing_directory partition
raise MissingEntry.new(@name, partition) unless Dir.exists? partition_directory
@ -79,16 +104,36 @@ class DODB::Partition(V) < DODB::Indexer(V)
r_value
end
def get?(partition) : Array(V)?
# Safe version of `#get`, gets data and returns a *nil* value in case of
# a missing entry instead of an exception.
#
# ```
# red_cars = cars_by_color.get? "red" # No red cars = nil
# ```
def get?(partition : String) : Array(V)?
get partition
rescue MissingEntry
nil
end
def delete(partition)
# Deletes all entries within the provided partition.
#
# ```
# cars_by_color.delete "red" # Deletes all red cars
# ```
def delete(partition : String)
delete partition, do true end
end
# Deletes entries within the provided partition and matching the provided block of code.
#
# ```
# # Deletes all red Corvets.
# cars_by_color.delete "red", do |car|
# car.name == "Corvet"
# end
# ```
# TODO: in case the partition is left empty, should the partition's directory be removed?
def delete(partition, &matcher : Proc(V, Bool))
partition_directory = indexing_directory partition
@ -104,6 +149,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
end
end
# :inherit:
def indexing_directory : String
"#{@storage_root}/partitions/by_#{@name}"
end
@ -128,6 +174,10 @@ end
# Partitions for 1-to-n relations.
# Cached version.
#
# ```
# cars_by_color = car_database.new_partition "color", &.color
# ```
#
# This (partition) index provides a file-system representation, enabling the administrators to
# select a value based on its index. The following example presents an index named "color"
# with some data indexed by a color attribute.
@ -152,15 +202,22 @@ end
# NOTE: see `Partition` for an uncached version, even less memory-hungry.
# NOTE: for fast operations without fs representation, see `RAMOnlyPartition`.
class DODB::CachedPartition(V) < DODB::Partition(V)
# This hash contains the relation between the index key and the data keys.
# This hash contains the relation between the index key and the data key, used for
# **internal operations**.
#
# WARNING: used for internal operations, do not change its content or access it directly.
property data = Hash(String, Array(Int32)).new
# Clears the cache and removes the `#indexing_directory`.
def nuke_index
super
data.clear
end
def index(key, value)
# Indexes the value on the file-system as `DODB::Partition#index` but also puts the index in a cache.
#
# NOTE: used for internal operations.
def index(key : String, value : V)
partition = key_proc.call value
return if partition.is_a? NoIndex
super(key, value)
@ -175,7 +232,11 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
@data[partition] = array
end
def deindex(key, value)
# Removes the index of a value on the file-system as `DODB::Partition#deindex` but also from
# the cache, used for **internal operations**.
#
# NOTE: used for internal operations.
def deindex(key : String, value : V)
partition = key_proc.call value
return if partition.is_a? NoIndex
super(key, value)
@ -186,9 +247,21 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
end
end
def get_with_indexes(partition) : Array(Tuple(V, Int32))
# Gets a partition entries and the database key for each entry.
#
# ```
# # For example, get all red cars.
# cars_by_color.get_with_indexes "red"
# # Will return something like:
# # [ (@storage[42], 42)
# # , (@storage[91], 91)
# # ]
# # Each tuple is composed of a car and its key in the database.
# ```
def get_with_indexes(partition : String) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
# In case the partition is cached.
if keys = @data[partition]?
keys.each do |data_key|
r_value << { @storage[data_key], data_key }
@ -207,11 +280,51 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
r_value
end
def get(partition) : Array(V)
get_with_indexes(partition).map &.[0]
# Gets a partition entries.
#
# ```
# # For example, get all red cars.
# cars_by_color.get "red"
# ```
# NOTE: returns an empty list on empty or non-existing partition.
def get(partition : String) : Array(V)
r_value = Array(V).new
# In case the partition is cached.
if keys = @data[partition]?
keys.each do |data_key|
r_value << @storage[data_key]
end
else
# The keys to put in the partition.
p_value = Array(Int32).new
# Get the key from the database representation on the file-system.
partition_directory = indexing_directory partition
raise MissingEntry.new(@name, partition) unless Dir.exists? partition_directory
Dir.each_child partition_directory do |child|
key = get_key child
r_value << @storage[key]
p_value << key
end
@data[partition] = p_value
end
r_value
end
def delete(partition, &matcher : Proc(V, Bool))
# Deletes entries within the provided partition and matching the provided block of code,
# both from the file-system representation and from the cache.
#
# ```
# # Deletes all red Corvets.
# cars_by_color.delete "red", do |car|
# car.name == "Corvet"
# end
# ```
# TODO: in case the partition is left empty, should the partition be removed from the cache?
def delete(partition : String, &matcher : Proc(V, Bool))
# Use `get_with_indexes` to retrieve data on-disk, if necessary.
new_partition = get_with_indexes(partition).map(&.[1]).select do |key|
item = @storage[key]
@ -227,6 +340,10 @@ end
# Partitions for 1-to-n relations.
# RAM-only version.
#
# ```
# cars_by_color = car_database.new_RAM_partition "color", &.color
# ```
#
# Since there is no file-system operations, all the operations are fast.
# `DODB::RAMOnlyPartition` enables the flexibility of partitions without a file-system representation.
# Absolute efficiency, exactly as easy to use as the other partition implementations.
@ -236,7 +353,7 @@ end
# NOTE: see `Partition` for an uncached version, even less memory-hungry.
# NOTE: for an fs representation but still fast for retrieval, see `CachedPartition`.
class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
def index(key, value)
def index(key : String, value : V)
partition = key_proc.call value
return if partition.is_a? NoIndex
@ -250,7 +367,7 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
@data[partition] = array
end
def deindex(key, value)
def deindex(key : String, value : V)
partition = key_proc.call value
return if partition.is_a? NoIndex
@ -260,7 +377,7 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
end
end
def get_with_indexes(partition) : Array(Tuple(V, Int32))
def get_with_indexes(partition : String) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
if keys = @data[partition]?
@ -271,6 +388,15 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
r_value
end
# Deletes entries within the provided partition and matching the provided block of code.
#
# ```
# # Deletes all red Corvets.
# cars_by_color.delete "red", do |car|
# car.name == "Corvet"
# end
# ```
# TODO: in case the partition is left empty, should the partition be removed from the cache?
def delete(partition, &matcher : Proc(V, Bool))
if keys = @data[partition]?
new_partition = keys.select do |key|
@ -280,4 +406,9 @@ class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
@data[partition] = new_partition
end
end
# Clears the cache.
def nuke_index
data.clear
end
end