From df705543b6ea757b6a74d9bfdb5597a26b33679f Mon Sep 17 00:00:00 2001 From: Philippe PITTOLI Date: Mon, 20 May 2024 13:10:17 +0200 Subject: [PATCH] Documentation, a few implementations rewritten, make source more consistent. --- Makefile | 2 +- src/cached.cr | 78 ++------ src/dodb.cr | 503 +----------------------------------------------- src/ramonly.cr | 60 ++++++ src/stacked.cr | 15 ++ src/storage.cr | 467 ++++++++++++++++++++++++++++++++++++++++++++ src/uncached.cr | 6 + 7 files changed, 568 insertions(+), 563 deletions(-) create mode 100644 src/ramonly.cr create mode 100644 src/stacked.cr create mode 100644 src/storage.cr create mode 100644 src/uncached.cr diff --git a/Makefile b/Makefile index 919f689..42e6dbf 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ release: make build OPTS="--release --progress" doc: - crystal docs + crystal docs src/dodb.cr HTTPD_ACCESS_LOGS ?= /tmp/access-dodb-docs.log HTTPD_ADDR ?= 127.0.0.1 diff --git a/src/cached.cr b/src/cached.cr index 579c649..9f5d7db 100644 --- a/src/cached.cr +++ b/src/cached.cr @@ -1,7 +1,3 @@ -require "file_utils" -require "json" -require "./dodb.cr" - class Hash(K,V) def reverse rev = Array(Tuple(K,V)).new @@ -17,6 +13,9 @@ class Hash(K,V) end end +# Cached database, stores data on the file-system and keeps it in RAM. +# +# WARNING: beware of the RAM use, see `DODB::StackedDataBase` for a less memory-hungry option. class DODB::CachedDataBase(V) < DODB::Storage(V) @indexers = [] of Indexer(V) property data = Hash(Int32, V).new @@ -82,6 +81,16 @@ class DODB::CachedDataBase(V) < DODB::Storage(V) @data[key] = value end + # :inherit: + # + # As `DODB::Storage#each_key`, keys are sorted in order to keep the behavior consistent. + private def each_key(reversed : Bool = false) + keys = @data.keys.sort + (reversed ? keys.reverse : keys).each do |key| + yield key + end + end + # :inherit: def each_with_key(reversed : Bool = false, offset = 0, limit : Int32? = -1) limit = if l = limit @@ -122,64 +131,3 @@ class DODB::CachedDataBase(V) < DODB::Storage(V) @data = Hash(Int32, V).new end end - -# `DODB::RAMOnlyDataBase` is a database without a file-system representation, -# enabling the use of DODB to store data which have the same lifetime as the application. -# Indexing (indexes, partitions, tags) will behave the same way. -class DODB::RAMOnlyDataBase(V) < DODB::CachedDataBase(V) - # Initialization still uses a directory name and creates a few paths. - # This is an implementation detail to re-use code of `DODB::Storage` and to get the indexers to work. - def initialize(@directory_name : String) - Dir.mkdir_p data_path - Dir.mkdir_p locks_directory - @last_key = -1 - end - - # The `last_key` function doesn't read a file in the `DODB::RAMOnlyDataBase` database. - def last_key - @last_key - end - - # The `last_key=` function doesn't write to a file in the `DODB::RAMOnlyDataBase` database. - def last_key=(key : Int32) - @last_key = key - end - - # WARNING: takes `[]?` and `[]` implementations from `CachedDataBase`. - # This will lead to errors in case the implementations change, be aware. - def []=(key : Int32, value : V) - old_value = self.[key]? - - check_collisions! key, value, old_value - - # Removes any old indices or partitions pointing to a value about to be replaced. - if old_value - remove_indexes key, old_value - end - - write_indexes key, value - - if key > last_key - self.last_key = key - end - - @data[key] = value - end - - # :inherit: - def unsafe_delete(key : Int32) - value = self[key]? - - return if value.nil? - - remove_indexes key, value - - @data.delete key - value - end - - private def remove_data! - super - @data = Hash(Int32, V).new - end -end diff --git a/src/dodb.cr b/src/dodb.cr index 13b1e71..94f77b2 100644 --- a/src/dodb.cr +++ b/src/dodb.cr @@ -1,501 +1,10 @@ require "file_utils" require "json" -require "./dodb/*" +require "./dodb/*" # Indexes (basic indexes, partitions, tags, etc.). -# The `DODB::Storage` abstract class defines the specifications of -# subsequent DODB databases (uncached, cached, RAM-only, etc.). -abstract class DODB::Storage(V) - property directory_name : String - - # A DODB database is instanciated with a *path* where data will be written. - def initialize(@directory_name : String) - end - - # Requests a (named) lock. - # Locks prevent concurrent access to the same data. - # - # In case of a request for a lock that is already in use, - # wait for a millisecond then retry, loop until it works. - # A lock is simply an opened file with the `LibC::O_EXCL` flag. - def request_lock(name, subname = nil) - r = -1 - file_path = get_lock_file_path name, subname - file_perms = 0o644 - - flags = LibC::O_EXCL | LibC::O_CREAT - while (r = LibC.open file_path, flags, file_perms) == -1 - sleep 1.milliseconds - end - - LibC.close r - end - - # Releases a (named) lock. - # - # The implementation is simple, it just removes the file. - def release_lock(name, subname = nil) - File.delete get_lock_file_path name, subname - end - - private def key_file - "#{@directory_name}/last-key" - end - - # Reads the last *key* in the database. - def last_key : Int32 - File.read(key_file).to_i - end - - # Changes the last *key* in the database. - def last_key=(x : Int32) - file = File.open(key_file, "w") - - file << x.to_s - - file.close - - x - rescue - raise Exception.new "could not update last-key file" - end - - # Take a database key and convert it in a formated string. Example: 343 -> "0000000343" - def stringify_key(key : Int32) - # Negative numbers give strange results with Crystal’s printf. - if key >= 0 - "%010i" % key - else - key.to_s - end - end - - # Adds a value to the database without a locking mechanism. - # - # For a thread-safe version, use `#<<`. - # - # WARNING: not thread-safe. - def unsafe_add(item : V) - key = last_key + 1 - - self[key] = item - self.last_key = key - - key # FIXME: Should we really return the internal key? - rescue e - raise e - end - - # Adds a value to the database, with a locking mechanism to prevent race conditions. - # - # This operation should be thread-safe since a lock is required before tinkering with the database. - # Because of the file-system operations, this function may be a bit slow. - # For single-thread applications, use the `#unsafe_add` operation instead. - def <<(item : V) - request_lock "key" - key = last_key + 1 - self[key] = item - self.last_key = key - - release_lock "key" - - key # FIXME: Should we really return the internal key? - rescue e - release_lock "key" - - raise e - end - - # Lists all entries in the database. - # - # WARNING: Very slow. Try not to use. - def each(reversed : Bool = false, offset = 0, limit : Int32? = nil) - each_with_key( - reversed: reversed, - offset: offset, - limit: limit - ) do |item, key| - yield item - end - end - - # Converts all the database into an array. - # - # WARNING: Very slow. Try not to use. - def to_a(reversed : Bool = false, offset = 0, limit : Int32? = nil) - array = ::Array(V).new - - each( - reversed: reversed, - offset: offset, - limit: limit - ) do |value| - array << value - end - - array - end - - # Converts the entire database into a hash. - # - # WARNING: Very slow. Try not to use. - def to_h(reversed : Bool = false, offset = 0, limit : Int32? = nil) - hash = ::Hash(Int32, V).new - - each_with_key( - reversed: reversed, - offset: offset, - limit: limit - ) do |element, key| - hash[key] = element - end - - hash - end - - # Writes all indexes (basic indexes, partitions, tags, etc.) for a value. - def write_indexes(key : Int32, value : V) - @indexers.each &.index(stringify_key(key), value) - end - - # Creates a new basic index **with a cache**. - # The *name* parameter is the name of the directory that will be created. - def new_index(name : String, &block : Proc(V, String | DODB::NoIndex)) - CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer| - @indexers << indexer - end - end - - # Creates a new basic index **without a cache**. - # The *name* parameter is the name of the directory that will be created. - # - # NOTE: this will be a lot slower than the cached version. - def new_uncached_index(name : String, &block : Proc(V, String | DODB::NoIndex)) - Index(V).new(self, @directory_name, name, block).tap do |indexer| - @indexers << indexer - end - end - - # Creates a new basic index **only in RAM**. - # The *name* parameter is the name of the directory that will be created. - # - # NOTE: this index is the fastest, but doesn't have a file-system representation. - def new_RAM_index(name : String, &block : Proc(V, String | DODB::NoIndex)) - RAMOnlyIndex(V).new(self, @directory_name, name, block).tap do |indexer| - @indexers << indexer - end - end - - # Gets an *index object* based on its name. - def get_index(name : String, key) - index = @indexers.find &.name.==(name) - index.not_nil!.as(DODB::Index).get key - end - - # Creates a new partition **with a cache**. - # The *name* parameter is the name of the directory that will be created. - def new_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) - CachedPartition(V).new(self, @directory_name, name, block).tap do |table| - @indexers << table - end - end - - # Creates a new partition **without a cache**. - # The *name* parameter is the name of the directory that will be created. - # - # NOTE: this will be a lot slower than the cached version. - def new_uncached_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) - Partition(V).new(self, @directory_name, name, block).tap do |table| - @indexers << table - end - end - - # Creates a new partition **only in RAM**. - # The *name* parameter is the name of the directory that will be created. - # - # NOTE: this partition index is the fastest but doesn't have a file-system representation. - def new_RAM_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) - RAMOnlyPartition(V).new(self, @directory_name, name, block).tap do |table| - @indexers << table - end - end - - # Gets an *index (partition) object* based on its name. - def get_partition(table_name : String, partition_name : String) - partition = @indexers.find &.name.==(table_name) - partition.not_nil!.as(DODB::Partition).get partition_name - end - - # Creates a new tag **with a cache**. - # The *name* parameter is the name of the directory that will be created. - def new_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) - CachedTags(V).new(self, @directory_name, name, block).tap do |tags| - @indexers << tags - end - end - - # Creates a new tag **without a cache**. - # The *name* parameter is the name of the directory that will be created. - # - # NOTE: this will be a lot slower than the cached version. - def new_uncached_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) - Tags(V).new(self, @directory_name, name, block).tap do |tags| - @indexers << tags - end - end - - # Creates a new partition **only in RAM**. - # The *name* parameter is the name of the directory that will be created. - # - # NOTE: this tag index is the fastest but doesn't have a file-system representation. - def new_RAM_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) - RAMOnlyTags(V).new(self, @directory_name, name, block).tap do |tags| - @indexers << tags - end - end - - # Gets an *index (tag) object* based on its name. - def get_tags(name, key : String) - tag = @indexers.find &.name.==(name) - tag.not_nil!.as(DODB::Tags).get name, key - end - - # WARNING: directed graphs haven't been reviewed in YEARS. Assume as dead code. - def new_directed_graph(name : String, index : DODB::Index(V), &block : Proc(V, Array(String))) : DirectedGraph(V) - DirectedGraph(V).new(self, @directory_name, index, name, block).tap do |table| - @indexers << table - end - end - - # Checks for collisions in the indexes. - def check_collisions!(key : Int32, value : V, old_value : V?) - @indexers.each &.check!(stringify_key(key), value, old_value) - end - - # Retrieves a value and remove it from the database. - def pop - request_lock "key" - - key = last_key - - # Some entries may have been removed. We’ll skip over those. - # Not the most efficient if a large number of indices are empty. - while key >= 0 && self[key]?.nil? - key = key - 1 - end - - if key < 0 - return nil - end - - poped = self[key] - - self.unsafe_delete key - - last_key = key - 1 - - release_lock "key" - - poped - end - - private def data_path - "#{@directory_name}/data" - end - - private def file_path(key : Int32) - "#{data_path}/%010i" % key - end - - private def locks_directory : String - "#{@directory_name}/locks" - end - - private def get_lock_file_path(name : String, subname : String? = nil) - if subname - "#{locks_directory}/#{name}-#{subname}.lock" # FIXME: Separator that causes less collisions? - else - "#{locks_directory}/#{name}.lock" - end - end - - private def read(file_path : String) - V.from_json ::File.read file_path - end - - private def remove_data! - FileUtils.rm_rf data_path - Dir.mkdir_p data_path - end - - private def remove_indexing! - @indexers.each do |indexer| - indexer.nuke_index - end - end - - # Removes all indices and then rewrites them all. - # - # WARNING: slow operation. - def reindex_everything! - remove_indexing! - - each_with_key() do |item, key| - write_indexes key, item - end - end - - # Removes all indexes of a value. - def remove_indexes(key : Int32, value : V) - @indexers.each &.deindex(stringify_key(key), value) - end - - # Gets the data with the *key*. - # In case the data is missing, returns *nil*. - def []?(key : Int32) : V? - self[key] - rescue MissingEntry - # FIXME: Only rescue JSON and “no such file” errors. - return nil - end - - # Gets the data with the *key*. - # In case the data is missing, returns an exception `DODB::MissingEntry`. - abstract def [](key : Int32) - - # Replaces the data with the *key*. - # In case the data is missing, returns an exception `DODB::MissingEntry`. - abstract def []=(key : Int32, value : V) - - # Deletes the data with the *key* but doesn't request for a lock. - # This function is required since `#pop` is already locked before trying to remove an entry, - # thus calling `#unsafe_delete`. - abstract def unsafe_delete(key : Int32) - - # Deletes the data with the *key*. - def delete(key : Int32) - request_lock "key" - value = unsafe_delete key - release_lock "key" - - value - end -end - -class DODB::DataBase(V) < DODB::Storage(V) - @indexers = [] of Indexer(V) - - def initialize(@directory_name : String) - Dir.mkdir_p data_path - Dir.mkdir_p locks_directory - - begin - self.last_key - rescue - self.last_key = -1 - end - end - - def [](key : Int32) : V - raise MissingEntry.new(key) unless ::File.exists? file_path key - - read file_path key - end - - def []=(key : Int32, value : V) - old_value = self.[key]? - - check_collisions! key, value, old_value - - # Removes any old indices or partitions pointing to a value about - # to be replaced. - if old_value - remove_indexes key, old_value - end - - # Avoids corruption in case the application crashes while writing. - file_path(key).tap do |path| - ::File.write "#{path}.new", value.to_json - ::FileUtils.mv "#{path}.new", path - end - - write_indexes key, value - - if key > last_key - self.last_key = key - end - end - - # :inherit: - def unsafe_delete(key : Int32) - value = self[key]? - - return if value.nil? - - begin - ::File.delete file_path key - rescue File::NotFoundError - end - - remove_indexes key, value - - value - end - - - private def each_key(reversed = false) - start = 0 - _end = last_key - step = 1 - - if reversed - start = _end - _end = 0 - step = -1 - end - - key = start - while step == 1 ? key <= _end : key >= _end - full_path = file_path key - - if File.exists? full_path - yield key, full_path - end - - key = key + step - end - end - - # Lists all database entries with their key. - # Can be useful for making dumps or to restore a database. - # - # WARNING: Very slow. Try not to use. - def each_with_key(reversed : Bool = false, offset = 0, limit : Int32? = -1) - dirname = data_path - - limit = if l = limit - l - else - -1 - end - - each_key(reversed) do |key, path| - offset -= 1 if offset >= 0 - next if offset >= 0 - - return if limit == 0 - limit -= 1 if limit > 0 - - begin - # FIXME: Only intercept JSON parsing errors. - field = read path - rescue - next - end - - yield field, key - end - end -end - -require "./cached.cr" +require "./storage.cr" # Database specs. +require "./uncached.cr" # Uncached database. +require "./cached.cr" # Cached database. +require "./stacked.cr" # Reasonably cached database. +require "./ramonly.cr" # RAM-only database. diff --git a/src/ramonly.cr b/src/ramonly.cr new file mode 100644 index 0000000..85bb470 --- /dev/null +++ b/src/ramonly.cr @@ -0,0 +1,60 @@ +# `DODB::RAMOnlyDataBase` is a database without a file-system representation, +# enabling the use of DODB to store data which have the same lifetime as the application. +# Indexing (indexes, partitions, tags) will behave the same way. +class DODB::RAMOnlyDataBase(V) < DODB::CachedDataBase(V) + # Initialization still uses a directory name and creates a few paths. + # This is an implementation detail to re-use code of `DODB::Storage` and to get the indexers to work. + def initialize(@directory_name : String) + Dir.mkdir_p data_path + Dir.mkdir_p locks_directory + @last_key = -1 + end + + # The `last_key` function doesn't read a file in the `DODB::RAMOnlyDataBase` database. + def last_key + @last_key + end + + # The `last_key=` function doesn't write to a file in the `DODB::RAMOnlyDataBase` database. + def last_key=(key : Int32) + @last_key = key + end + + # WARNING: takes `[]?` and `[]` implementations from `CachedDataBase`. + # This will lead to errors in case the implementations change, be aware. + def []=(key : Int32, value : V) + old_value = self.[key]? + + check_collisions! key, value, old_value + + # Removes any old indices or partitions pointing to a value about to be replaced. + if old_value + remove_indexes key, old_value + end + + write_indexes key, value + + if key > last_key + self.last_key = key + end + + @data[key] = value + end + + # :inherit: + def unsafe_delete(key : Int32) + value = self[key]? + + return if value.nil? + + remove_indexes key, value + + @data.delete key + value + end + + private def remove_data! + super + @data = Hash(Int32, V).new + end +end diff --git a/src/stacked.cr b/src/stacked.cr new file mode 100644 index 0000000..06ffd89 --- /dev/null +++ b/src/stacked.cr @@ -0,0 +1,15 @@ +# Stacked database: only recently requested entries are kept in memory. +# Most recently used entries are in cache and put on the top of the stack. +# Least recently used entries may be removed from the cache in order to keep the amount of memory used reasonable. +# +# NOTE: fast for frequently requested data and requires a stable (and configurable) amount of memory. +# TODO: not yet implemented. +class DODB::StackedDataBase(V) < DODB::CachedDataBase(V) + # The maximum number of accepted entries in the cache. + property max_entries : Int32 + + # Initializes the `StackedDataBase` with a maximum number of entries in the cache. + def initialize(directory_name : String, @max_entries : Int32 = 100_000) + super directory_name + end +end diff --git a/src/storage.cr b/src/storage.cr new file mode 100644 index 0000000..ab3d45b --- /dev/null +++ b/src/storage.cr @@ -0,0 +1,467 @@ +# The `DODB::Storage` abstract class defines the specifications of +# subsequent DODB databases (uncached, cached, RAM-only, etc.). +abstract class DODB::Storage(V) + @indexers = [] of Indexer(V) + property directory_name : String + + # Creates a database. + # + # A DODB database is instanciated with a *path* where data will be written. + # Another directory is created where locks can be written. + # In case the database is empty, the *last_key* is set to *-1*. + def initialize(@directory_name : String) + Dir.mkdir_p data_path + Dir.mkdir_p locks_directory + + begin + self.last_key + rescue + self.last_key = -1 + end + end + + # Requests a (named) lock. + # Locks prevent concurrent access to the same data. + # + # In case of a request for a lock that is already in use, + # wait for a millisecond then retry, loop until it works. + # A lock is simply an opened file with the `LibC::O_EXCL` flag. + def request_lock(name, subname = nil) + r = -1 + file_path = get_lock_file_path name, subname + file_perms = 0o644 + + flags = LibC::O_EXCL | LibC::O_CREAT + while (r = LibC.open file_path, flags, file_perms) == -1 + sleep 1.milliseconds + end + + LibC.close r + end + + # Releases a (named) lock. + # + # The implementation is simple, it just removes the file. + def release_lock(name, subname = nil) + File.delete get_lock_file_path name, subname + end + + private def key_file + "#{@directory_name}/last-key" + end + + # Reads the last *key* in the database. + def last_key : Int32 + File.read(key_file).to_i + end + + # Changes the last *key* in the database. + def last_key=(x : Int32) + file = File.open(key_file, "w") + + file << x.to_s + + file.close + + x + rescue + raise Exception.new "could not update last-key file" + end + + # Take a database key and convert it in a formated string. Example: 343 -> "0000000343" + def stringify_key(key : Int32) + # Negative numbers give strange results with Crystal’s printf. + if key >= 0 + "%010i" % key + else + key.to_s + end + end + + # Adds a value to the database without a locking mechanism. + # + # For a thread-safe version, use `#<<`. + # + # WARNING: not thread-safe. + def unsafe_add(item : V) + key = last_key + 1 + + self[key] = item + self.last_key = key + + key # FIXME: Should we really return the internal key? + rescue e + raise e + end + + # Adds a value to the database, with a locking mechanism to prevent race conditions. + # + # This operation should be thread-safe since a lock is required before tinkering with the database. + # Because of the file-system operations, this function may be a bit slow. + # For single-thread applications, use the `#unsafe_add` operation instead. + def <<(item : V) + request_lock "key" + key = last_key + 1 + self[key] = item + self.last_key = key + + release_lock "key" + + key # FIXME: Should we really return the internal key? + rescue e + release_lock "key" + + raise e + end + + # Lists all entries in the database. + # + # WARNING: Very slow. Try not to use. + def each(reversed : Bool = false, offset = 0, limit : Int32? = nil) + each_with_key( + reversed: reversed, + offset: offset, + limit: limit + ) do |item, key| + yield item + end + end + + # Converts all the database into an array. + # + # WARNING: Very slow. Try not to use. + def to_a(reversed : Bool = false, offset = 0, limit : Int32? = nil) + array = ::Array(V).new + + each( + reversed: reversed, + offset: offset, + limit: limit + ) do |value| + array << value + end + + array + end + + # Converts the entire database into a hash. + # + # WARNING: Very slow. Try not to use. + def to_h(reversed : Bool = false, offset = 0, limit : Int32? = nil) + hash = ::Hash(Int32, V).new + + each_with_key( + reversed: reversed, + offset: offset, + limit: limit + ) do |element, key| + hash[key] = element + end + + hash + end + + # Writes all indexes (basic indexes, partitions, tags, etc.) for a value. + def write_indexes(key : Int32, value : V) + @indexers.each &.index(stringify_key(key), value) + end + + # Creates a new basic index **with a cache**. + # The *name* parameter is the name of the directory that will be created. + def new_index(name : String, &block : Proc(V, String | DODB::NoIndex)) + CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer| + @indexers << indexer + end + end + + # Creates a new basic index **without a cache**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this will be a lot slower than the cached version. + def new_uncached_index(name : String, &block : Proc(V, String | DODB::NoIndex)) + Index(V).new(self, @directory_name, name, block).tap do |indexer| + @indexers << indexer + end + end + + # Creates a new basic index **only in RAM**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this index is the fastest, but doesn't have a file-system representation. + def new_RAM_index(name : String, &block : Proc(V, String | DODB::NoIndex)) + RAMOnlyIndex(V).new(self, @directory_name, name, block).tap do |indexer| + @indexers << indexer + end + end + + # Gets an *index object* based on its name. + def get_index(name : String, key) + index = @indexers.find &.name.==(name) + index.not_nil!.as(DODB::Index).get key + end + + # Creates a new partition **with a cache**. + # The *name* parameter is the name of the directory that will be created. + def new_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) + CachedPartition(V).new(self, @directory_name, name, block).tap do |table| + @indexers << table + end + end + + # Creates a new partition **without a cache**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this will be a lot slower than the cached version. + def new_uncached_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) + Partition(V).new(self, @directory_name, name, block).tap do |table| + @indexers << table + end + end + + # Creates a new partition **only in RAM**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this partition index is the fastest but doesn't have a file-system representation. + def new_RAM_partition(name : String, &block : Proc(V, String | DODB::NoIndex)) + RAMOnlyPartition(V).new(self, @directory_name, name, block).tap do |table| + @indexers << table + end + end + + # Gets an *index (partition) object* based on its name. + def get_partition(table_name : String, partition_name : String) + partition = @indexers.find &.name.==(table_name) + partition.not_nil!.as(DODB::Partition).get partition_name + end + + # Creates a new tag **with a cache**. + # The *name* parameter is the name of the directory that will be created. + def new_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) + CachedTags(V).new(self, @directory_name, name, block).tap do |tags| + @indexers << tags + end + end + + # Creates a new tag **without a cache**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this will be a lot slower than the cached version. + def new_uncached_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) + Tags(V).new(self, @directory_name, name, block).tap do |tags| + @indexers << tags + end + end + + # Creates a new partition **only in RAM**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this tag index is the fastest but doesn't have a file-system representation. + def new_RAM_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex)) + RAMOnlyTags(V).new(self, @directory_name, name, block).tap do |tags| + @indexers << tags + end + end + + # Gets an *index (tag) object* based on its name. + def get_tags(name, key : String) + tag = @indexers.find &.name.==(name) + tag.not_nil!.as(DODB::Tags).get name, key + end + + # WARNING: directed graphs haven't been reviewed in YEARS, assume as dead code. + def new_directed_graph(name : String, index : DODB::Index(V), &block : Proc(V, Array(String))) : DirectedGraph(V) + DirectedGraph(V).new(self, @directory_name, index, name, block).tap do |table| + @indexers << table + end + end + + # Checks for collisions in the indexes. + def check_collisions!(key : Int32, value : V, old_value : V?) + @indexers.each &.check!(stringify_key(key), value, old_value) + end + + # Retrieves a value and remove it from the database. + def pop + request_lock "key" + + key = last_key + + # Some entries may have been removed. We’ll skip over those. + # Not the most efficient if a large number of indices are empty. + while key >= 0 && self[key]?.nil? + key = key - 1 + end + + if key < 0 + return nil + end + + poped = self[key] + + self.unsafe_delete key + + last_key = key - 1 + + release_lock "key" + + poped + end + + private def data_path + "#{@directory_name}/data" + end + + private def file_path(key : Int32) + "#{data_path}/%010i" % key + end + + private def locks_directory : String + "#{@directory_name}/locks" + end + + private def get_lock_file_path(name : String, subname : String? = nil) + if subname + "#{locks_directory}/#{name}-#{subname}.lock" # FIXME: Separator that causes less collisions? + else + "#{locks_directory}/#{name}.lock" + end + end + + private def read(file_path : String) + V.from_json ::File.read file_path + end + + private def remove_data! + FileUtils.rm_rf data_path + Dir.mkdir_p data_path + end + + private def remove_indexing! + @indexers.each do |indexer| + indexer.nuke_index + end + end + + # Removes all indices and then rewrites them all. + # + # WARNING: slow operation. + def reindex_everything! + remove_indexing! + + each_with_key() do |item, key| + write_indexes key, item + end + end + + # Removes all indexes of a value. + def remove_indexes(key : Int32, value : V) + @indexers.each &.deindex(stringify_key(key), value) + end + + # Gets the data with the *key*. + # In case the data is missing, returns *nil*. + def []?(key : Int32) : V? + self[key] + rescue MissingEntry + # FIXME: Only rescue JSON and “no such file” errors. + return nil + end + + # Gets the data with the *key*. + # In case the data is missing, returns an exception `DODB::MissingEntry`. + def [](key : Int32) : V + raise MissingEntry.new(key) unless ::File.exists? file_path key + read file_path key + end + + # Replaces the data with the *key*. + # In case the data is missing, returns an exception `DODB::MissingEntry`. + def []=(key : Int32, value : V) + old_value = self.[key]? + + check_collisions! key, value, old_value + + # Removes any old indices or partitions pointing to a value about + # to be replaced. + if old_value + remove_indexes key, old_value + end + + # Avoids corruption in case the application crashes while writing. + file_path(key).tap do |path| + ::File.write "#{path}.new", value.to_json + ::FileUtils.mv "#{path}.new", path + end + + write_indexes key, value + + if key > last_key + self.last_key = key + end + end + + # Deletes the data with the *key* but doesn't request for a lock. + # This function is required since `#pop` is already locked before trying to remove an entry, + # thus calling `#unsafe_delete`. + def unsafe_delete(key : Int32) + value = self[key]? + + return if value.nil? + + begin + ::File.delete file_path key + rescue File::NotFoundError + end + + remove_indexes key, value + + value + end + + # Deletes the data with the *key*. + def delete(key : Int32) + request_lock "key" + value = unsafe_delete key + release_lock "key" + + value + end + + # Lists all the keys in the database. + private def each_key(reversed = false) + # Removes the first two "." and ".." directories. + keys = Dir.entries(data_path).[2..].map(&.to_i).sort + (reversed ? keys.reverse : keys).each do |key| + yield key + end + end + + # Lists all database entries with their key. + # Can be useful for making dumps or to restore a database. + # + # WARNING: Very slow. Try not to use. + def each_with_key(reversed : Bool = false, offset = 0, limit : Int32? = -1) + limit = if l = limit + l + else + -1 + end + + each_key(reversed) do |key| + offset -= 1 if offset >= 0 + next if offset >= 0 + + return if limit == 0 + limit -= 1 if limit > 0 + + begin + # FIXME: Only intercept JSON parsing errors. + value = self[key] + rescue + next + end + + yield value, key + end + end +end diff --git a/src/uncached.cr b/src/uncached.cr new file mode 100644 index 0000000..16a1a45 --- /dev/null +++ b/src/uncached.cr @@ -0,0 +1,6 @@ +# Basic database of DODB. +# Data isn't cached. +# +# NOTE: slow but doesn't require much memory. +class DODB::DataBase(V) < DODB::Storage(V) +end