diff --git a/TODO.md b/TODO.md index 229af2f..6cd0700 100644 --- a/TODO.md +++ b/TODO.md @@ -9,3 +9,4 @@ - Write the API documentation. - Finish the PDF to explain *why DODB*. +- Change *index* by *key* in `DODB::Storage` and inherited classes. diff --git a/src/cached.cr b/src/cached.cr index 9bdd945..1e2378f 100644 --- a/src/cached.cr +++ b/src/cached.cr @@ -26,15 +26,15 @@ class DODB::CachedDataBase(V) < DODB::Storage(V) Dir.mkdir_p locks_directory begin - self.last_index + self.last_key rescue - self.last_index = -1 + self.last_key = -1 end # Load the database in RAM at start-up. - DODB::DataBase(V).new(@directory_name).each_with_index do |v, index| - puts "\rloading data from #{@directory_name} at index #{index}" - self[index] = v + DODB::DataBase(V).new(@directory_name).each_with_key do |v, key| + puts "\rloading data from #{@directory_name} at key #{key}" + self[key] = v end end @@ -46,53 +46,66 @@ class DODB::CachedDataBase(V) < DODB::Storage(V) return nil end + # :inherit: + # + # Data needs to be cloned in case it will be modified, otherwise it will mess with indexes. + # # WARNING: data isn't cloned. - # You have to do it yourself in case you modify any value, - # otherwise you may encounter problems (at least with indexes). def [](key : Int32) : V @data[key] rescue raise MissingEntry.new(key) end - def []=(index : Int32, value : V) - old_value = self.[index]? + # :inherit: + def []=(key : Int32, value : V) + old_value = self.[key]? - check_collisions! index, value, old_value + check_collisions! key, value, old_value # Removes any old indices or partitions pointing to a value about # to be replaced. if old_value - remove_indexes index, old_value + remove_indexes key, old_value end # Avoids corruption in case the application crashes while writing. - file_path(index).tap do |path| + file_path(key).tap do |path| ::File.write "#{path}.new", value.to_json ::FileUtils.mv "#{path}.new", path end - write_partitions index, value + write_indexes key, value - if index > last_index - self.last_index = index + if key > last_key + self.last_key = key end - @data[index] = value + @data[key] = value end - ## - # Can be useful for making dumps or to restore a database. - def each_with_index(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil) - i = -1 # do not trust key to be the right index - (reversed ? @data.reverse : @data).each do |index, v| - i += 1 - next if start_offset > i - break unless end_offset.nil? || i <= end_offset + # :inherit: + def each_with_key(reversed : Bool = false, offset = 0, limit : Int32? = -1) + (reversed ? @data.reverse : @data).each do |key, v| + offset -= 1 if offset >= 0 + next if offset >= 0 - yield v, index + return if limit == 0 + limit -= 1 if limit > 0 + + yield v, key end end + # :inherit: def delete(key : Int32) + request_lock "key" + value = unsafe_delete key + release_lock "key" + + value + end + + # :inherit: + def unsafe_delete(key : Int32) value = self[key]? return if value.nil? @@ -117,40 +130,48 @@ end # `DODB::RAMOnlyDataBase` is a database without a file-system representation, # enabling the use of DODB to store data which have the same lifetime as the application. # Indexing (indexes, partitions, tags) will behave the same way. - class DODB::RAMOnlyDataBase(V) < DODB::CachedDataBase(V) # Initialization still uses a directory name and creates a few paths. # This is an implementation detail to re-use code of `DODB::Storage` and to get the indexers to work. def initialize(@directory_name : String) Dir.mkdir_p data_path Dir.mkdir_p locks_directory - self.last_index = -1 + @last_key = -1 + end + + # The `last_key` function doesn't read a file in the `DODB::RAMOnlyDataBase` database. + def last_key + @last_key + end + + # The `last_key=` function doesn't write to a file in the `DODB::RAMOnlyDataBase` database. + def last_key=(key : Int32) + @last_key = key end # WARNING: takes `[]?` and `[]` implementations from `CachedDataBase`. # This will lead to errors in case the implementations change, be aware. + def []=(key : Int32, value : V) + old_value = self.[key]? - def []=(index : Int32, value : V) - old_value = self.[index]? + check_collisions! key, value, old_value - check_collisions! index, value, old_value - - # Removes any old indices or partitions pointing to a value about - # to be replaced. + # Removes any old indices or partitions pointing to a value about to be replaced. if old_value - remove_indexes index, old_value + remove_indexes key, old_value end - write_partitions index, value + write_indexes key, value - if index > last_index - self.last_index = index + if key > last_key + self.last_key = key end - @data[index] = value + @data[key] = value end - def delete(key : Int32) + # :inherit: + def unsafe_delete(key : Int32) value = self[key]? return if value.nil? diff --git a/src/dodb.cr b/src/dodb.cr index 2a9d1fa..9cba340 100644 --- a/src/dodb.cr +++ b/src/dodb.cr @@ -3,12 +3,21 @@ require "json" require "./dodb/*" +# The `DODB::Storage` abstract class defines the specifications of +# subsequent DODB databases (uncached, cached, RAM-only, etc.). abstract class DODB::Storage(V) property directory_name : String + # A DODB database is instanciated with a *path* where data will be written. def initialize(@directory_name : String) end + # Requests a (named) lock. + # Locks prevent concurrent access to the same data. + # + # In case of a request for a lock that is already in use, + # wait for a millisecond then retry, loop until it works. + # A lock is simply an opened file with the `LibC::O_EXCL` flag. def request_lock(name, subname = nil) r = -1 file_path = get_lock_file_path name, subname @@ -21,18 +30,26 @@ abstract class DODB::Storage(V) LibC.close r end + + # Releases a (named) lock. + # + # The implementation is simple, it just removes the file. def release_lock(name, subname = nil) File.delete get_lock_file_path name, subname end - private def index_file - "#{@directory_name}/last-index" + private def key_file + "#{@directory_name}/last-key" end - def last_index : Int32 - File.read(index_file).to_i + + # Reads the last *key* in the database. + def last_key : Int32 + File.read(key_file).to_i end - def last_index=(x : Int32) - file = File.open(index_file, "w") + + # Changes the last *key* in the database. + def last_key=(x : Int32) + file = File.open(key_file, "w") file << x.to_s @@ -40,9 +57,10 @@ abstract class DODB::Storage(V) x rescue - raise Exception.new "could not update index file" + raise Exception.new "could not update last-key file" end + # Take a database key and convert it in a formated string. Example: 343 -> "0000000343" def stringify_key(key : Int32) # Negative numbers give strange results with Crystal’s printf. if key >= 0 @@ -52,40 +70,65 @@ abstract class DODB::Storage(V) end end - def <<(item : V) - request_lock "index" - index = last_index + 1 - self[index] = item - self.last_index = index + # Adds a value to the database without a locking mechanism. + # + # For a thread-safe version, use `#<<`. + # + # WARNING: not thread-safe. + def unsafe_add(item : V) + key = last_key + 1 - release_lock "index" + self[key] = item + self.last_key = key - index # FIXME: Should we really return the internal key? + key # FIXME: Should we really return the internal key? rescue e - release_lock "index" + raise e + end + + # Adds a value to the database, with a locking mechanism to prevent race conditions. + # + # This operation should be thread-safe since a lock is required before tinkering with the database. + # Because of the file-system operations, this function may be a bit slow. + # For single-thread applications, use the `#unsafe_add` operation instead. + def <<(item : V) + request_lock "key" + key = last_key + 1 + self[key] = item + self.last_key = key + + release_lock "key" + + key # FIXME: Should we really return the internal key? + rescue e + release_lock "key" raise e end - def each(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil) - each_with_index( + # Lists all entries in the database. + # + # WARNING: Very slow. Try not to use. + def each(reversed : Bool = false, offset = 0, limit : Int32? = nil) + each_with_key( reversed: reversed, - start_offset: start_offset, - end_offset: end_offset - ) do |item, index| + offset: offset, + limit: limit + ) do |item, key| yield item end end - ## - # CAUTION: Very slow. Try not to use. - def to_a(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil) + # Converts all the database into an array. + # + # WARNING: Very slow. Try not to use. + def to_a(reversed : Bool = false, offset = 0, limit : Int32? = nil) array = ::Array(V).new each( reversed: reversed, - start_offset: start_offset, - end_offset: end_offset + offset: offset, + limit: limit ) do |value| array << value end @@ -93,132 +136,165 @@ abstract class DODB::Storage(V) array end - ## - # CAUTION: Very slow. Try not to use. - def to_h(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil) + # Converts the entire database into a hash. + # + # WARNING: Very slow. Try not to use. + def to_h(reversed : Bool = false, offset = 0, limit : Int32? = nil) hash = ::Hash(Int32, V).new - each_with_index( + each_with_key( reversed: reversed, - start_offset: start_offset, - end_offset: end_offset - ) do |element, index| - hash[index] = element + offset: offset, + limit: limit + ) do |element, key| + hash[key] = element end hash end - # name is the name that will be used on the file system. + # Writes all indexes (basic indexes, partitions, tags, etc.) for a value. + def write_indexes(key : Int32, value : V) + @indexers.each &.index(stringify_key(key), value) + end + + # Creates a new basic index **with a cache**. + # The *name* parameter is the name of the directory that will be created. def new_index(name : String, &block : Proc(V, String) | Proc(V, String | DODB::NoIndex)) CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer| @indexers << indexer end end + # Creates a new basic index **without a cache**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this will be a lot slower than the cached version. def new_uncached_index(name : String, &block : Proc(V, String) | Proc(V, String | DODB::NoIndex)) Index(V).new(self, @directory_name, name, block).tap do |indexer| @indexers << indexer end end + # Creates a new basic index **only in RAM**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this index is the fastest, but doesn't have a file-system representation. def new_RAM_index(name : String, &block : Proc(V, String) | Proc(V, String | DODB::NoIndex)) RAMOnlyIndex(V).new(self, @directory_name, name, block).tap do |indexer| @indexers << indexer end end + # Gets an *index object* based on its name. def get_index(name : String, key) index = @indexers.find &.name.==(name) - index.not_nil!.as(DODB::Index).get key end - # name is the name that will be used on the file system. + # Creates a new partition **with a cache**. + # The *name* parameter is the name of the directory that will be created. def new_partition(name : String, &block : Proc(V, String) | Proc(V, String | DODB::NoIndex)) CachedPartition(V).new(self, @directory_name, name, block).tap do |table| @indexers << table end end + # Creates a new partition **without a cache**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this will be a lot slower than the cached version. def new_uncached_partition(name : String, &block : Proc(V, String) | Proc(V, String | DODB::NoIndex)) Partition(V).new(self, @directory_name, name, block).tap do |table| @indexers << table end end + # Creates a new partition **only in RAM**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this partition index is the fastest but doesn't have a file-system representation. def new_RAM_partition(name : String, &block : Proc(V, String) | Proc(V, String | DODB::NoIndex)) RAMOnlyPartition(V).new(self, @directory_name, name, block).tap do |table| @indexers << table end end + # Gets an *index (partition) object* based on its name. def get_partition(table_name : String, partition_name : String) partition = @indexers.find &.name.==(table_name) - partition.not_nil!.as(DODB::Partition).get partition_name end - def write_partitions(key : Int32, value : V) - @indexers.each &.index(stringify_key(key), value) - end - + # Creates a new tag **with a cache**. + # The *name* parameter is the name of the directory that will be created. def new_tags(name : String, &block : Proc(V, Array(String)) | Proc(V, Array(String) | DODB::NoIndex)) CachedTags(V).new(self, @directory_name, name, block).tap do |tags| @indexers << tags end end + # Creates a new tag **without a cache**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this will be a lot slower than the cached version. def new_uncached_tags(name : String, &block : Proc(V, Array(String)) | Proc(V, Array(String) | DODB::NoIndex)) Tags(V).new(self, @directory_name, name, block).tap do |tags| @indexers << tags end end + # Creates a new partition **only in RAM**. + # The *name* parameter is the name of the directory that will be created. + # + # NOTE: this tag index is the fastest but doesn't have a file-system representation. def new_RAM_tags(name : String, &block : Proc(V, Array(String)) | Proc(V, Array(String) | DODB::NoIndex)) RAMOnlyTags(V).new(self, @directory_name, name, block).tap do |tags| @indexers << tags end end + # Gets an *index (tag) object* based on its name. def get_tags(name, key : String) tag = @indexers.find &.name.==(name) - tag.not_nil!.as(DODB::Tags).get name, key end + # WARNING: directed graphs haven't been reviewed in YEARS. Assume as dead code. def new_directed_graph(name : String, index : DODB::Index(V), &block : Proc(V, Array(String))) : DirectedGraph(V) DirectedGraph(V).new(self, @directory_name, index, name, block).tap do |table| @indexers << table end end + # Checks for collisions in the indexes. def check_collisions!(key : Int32, value : V, old_value : V?) @indexers.each &.check!(stringify_key(key), value, old_value) end + # Retrieves a value and remove it from the database. def pop - request_lock "index" + request_lock "key" - index = last_index + key = last_key # Some entries may have been removed. We’ll skip over those. # Not the most efficient if a large number of indices are empty. - while index >= 0 && self[index]?.nil? - index = index - 1 + while key >= 0 && self[key]?.nil? + key = key - 1 end - if index < 0 + if key < 0 return nil end - poped = self[index] + poped = self[key] - self.delete index + self.unsafe_delete key - last_index = index - 1 + last_key = key - 1 - release_lock "index" + release_lock "key" poped end @@ -258,20 +334,24 @@ abstract class DODB::Storage(V) end end - # A very slow operation that removes all indices and then rewrites - # them all. + # Removes all indices and then rewrites them all. + # + # WARNING: slow operation. def reindex_everything! remove_indexing! - each_with_index() do |item, index| - write_partitions index, item + each_with_key() do |item, key| + write_indexes key, item end end + # Removes all indexes of a value. def remove_indexes(key : Int32, value : V) @indexers.each &.deindex(stringify_key(key), value) end + # Gets the data with the *key*. + # In case the data is missing, returns *nil*. def []?(key : Int32) : V? self[key] rescue MissingEntry @@ -279,7 +359,20 @@ abstract class DODB::Storage(V) return nil end + # Gets the data with the *key*. + # In case the data is missing, returns an exception `DODB::MissingEntry`. abstract def [](key : Int32) + + # Replaces the data with the *key*. + # In case the data is missing, returns an exception `DODB::MissingEntry`. + abstract def []=(key : Int32, value : V) + + # Deletes the data with the *key* but doesn't request for a lock. + # This function is required since `#pop` is already locked before trying to remove an entry, + # thus calling `#unsafe_delete`. + abstract def unsafe_delete(key : Int32) + + # Deletes the data with the *key*. abstract def delete(key : Int32) end @@ -291,9 +384,9 @@ class DODB::DataBase(V) < DODB::Storage(V) Dir.mkdir_p locks_directory begin - self.last_index + self.last_key rescue - self.last_index = -1 + self.last_key = -1 end end @@ -303,31 +396,41 @@ class DODB::DataBase(V) < DODB::Storage(V) read file_path key end - def []=(index : Int32, value : V) - old_value = self.[index]? + def []=(key : Int32, value : V) + old_value = self.[key]? - check_collisions! index, value, old_value + check_collisions! key, value, old_value # Removes any old indices or partitions pointing to a value about # to be replaced. if old_value - remove_indexes index, old_value + remove_indexes key, old_value end # Avoids corruption in case the application crashes while writing. - file_path(index).tap do |path| + file_path(key).tap do |path| ::File.write "#{path}.new", value.to_json ::FileUtils.mv "#{path}.new", path end - write_partitions index, value + write_indexes key, value - if index > last_index - self.last_index = index + if key > last_key + self.last_key = key end end + # :inherit: def delete(key : Int32) + request_lock "key" + value = unsafe_delete key + release_lock "key" + + value + end + + # :inherit: + def unsafe_delete(key : Int32) value = self[key]? return if value.nil? @@ -342,9 +445,10 @@ class DODB::DataBase(V) < DODB::Storage(V) value end + private def each_key(reversed = false) start = 0 - _end = last_index + _end = last_key step = 1 if reversed @@ -365,23 +469,19 @@ class DODB::DataBase(V) < DODB::Storage(V) end end - ## - # CAUTION: Very slow. Try not to use. - # Can be useful for making dumps or to restore a database, however. - def each_with_index(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil) + # Lists all database entries with their key. + # Can be useful for making dumps or to restore a database. + # + # WARNING: Very slow. Try not to use. + def each_with_key(reversed : Bool = false, offset = 0, limit : Int32? = -1) dirname = data_path - offset = -1 - each_key(reversed) do |key, path| - offset += 1 + offset -= 1 if offset >= 0 + next if offset >= 0 - if offset < start_offset - next - end - if !end_offset.nil? && offset > end_offset - next - end + return if limit == 0 + limit -= 1 if limit > 0 begin # FIXME: Only intercept JSON parsing errors.