2019-12-12 00:44:05 +01:00
|
|
|
|
require "file_utils"
|
|
|
|
|
require "json"
|
|
|
|
|
|
|
|
|
|
require "./dodb/*"
|
|
|
|
|
|
2020-07-21 12:49:32 +02:00
|
|
|
|
abstract class DODB::Storage(V)
|
|
|
|
|
property directory_name : String
|
2019-12-12 00:44:05 +01:00
|
|
|
|
|
|
|
|
|
def initialize(@directory_name : String)
|
2020-07-21 12:49:32 +02:00
|
|
|
|
end
|
2019-12-18 03:43:09 +01:00
|
|
|
|
|
2020-07-21 13:25:48 +02:00
|
|
|
|
def request_lock(name, subname = nil)
|
2020-07-21 12:49:32 +02:00
|
|
|
|
r = -1
|
2020-07-21 13:25:48 +02:00
|
|
|
|
file_path = get_lock_file_path name, subname
|
2020-07-21 12:49:32 +02:00
|
|
|
|
file_perms = 0o644
|
|
|
|
|
|
|
|
|
|
flags = LibC::O_EXCL | LibC::O_CREAT
|
|
|
|
|
while (r = LibC.open file_path, flags, file_perms) == -1
|
|
|
|
|
sleep 1.milliseconds
|
2020-01-07 16:06:18 +01:00
|
|
|
|
end
|
2020-07-21 12:49:32 +02:00
|
|
|
|
|
|
|
|
|
LibC.close r
|
|
|
|
|
end
|
2020-07-21 13:25:48 +02:00
|
|
|
|
def release_lock(name, subname = nil)
|
|
|
|
|
File.delete get_lock_file_path name, subname
|
2019-12-18 03:43:09 +01:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
private def index_file
|
|
|
|
|
"#{@directory_name}/last-index"
|
|
|
|
|
end
|
|
|
|
|
def last_index : Int32
|
|
|
|
|
File.read(index_file).to_i
|
|
|
|
|
end
|
|
|
|
|
def last_index=(x : Int32)
|
|
|
|
|
file = File.open(index_file, "w")
|
|
|
|
|
|
2020-01-07 16:06:18 +01:00
|
|
|
|
file << x.to_s
|
2019-12-18 03:43:09 +01:00
|
|
|
|
|
|
|
|
|
file.close
|
|
|
|
|
|
|
|
|
|
x
|
|
|
|
|
rescue
|
|
|
|
|
raise Exception.new "could not update index file"
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def stringify_key(key : Int32)
|
|
|
|
|
# Negative numbers give strange results with Crystal’s printf.
|
|
|
|
|
if key >= 0
|
|
|
|
|
"%010i" % key
|
|
|
|
|
else
|
|
|
|
|
key.to_s
|
|
|
|
|
end
|
2019-12-12 00:44:05 +01:00
|
|
|
|
end
|
|
|
|
|
|
2020-07-21 12:49:32 +02:00
|
|
|
|
def <<(item : V)
|
2020-07-21 13:25:48 +02:00
|
|
|
|
request_lock "index"
|
2020-07-21 12:49:32 +02:00
|
|
|
|
index = last_index + 1
|
|
|
|
|
self[index] = item
|
|
|
|
|
self.last_index = index
|
2020-06-24 21:45:45 +02:00
|
|
|
|
|
2020-07-21 13:25:48 +02:00
|
|
|
|
release_lock "index"
|
2020-06-24 21:45:45 +02:00
|
|
|
|
|
2020-07-21 13:25:48 +02:00
|
|
|
|
index # FIXME: Should we really return the internal key?
|
2020-07-24 13:31:05 +02:00
|
|
|
|
rescue e
|
|
|
|
|
release_lock "index"
|
|
|
|
|
|
|
|
|
|
raise e
|
2020-07-21 12:49:32 +02:00
|
|
|
|
end
|
2020-06-24 21:45:45 +02:00
|
|
|
|
|
2020-07-21 12:49:32 +02:00
|
|
|
|
def each(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil)
|
|
|
|
|
each_with_index(
|
|
|
|
|
reversed: reversed,
|
|
|
|
|
start_offset: start_offset,
|
|
|
|
|
end_offset: end_offset
|
|
|
|
|
) do |item, index|
|
|
|
|
|
yield item
|
2020-06-24 21:45:45 +02:00
|
|
|
|
end
|
|
|
|
|
end
|
2020-07-21 12:49:32 +02:00
|
|
|
|
|
|
|
|
|
##
|
|
|
|
|
# CAUTION: Very slow. Try not to use.
|
|
|
|
|
def to_a(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil)
|
|
|
|
|
array = ::Array(V).new
|
|
|
|
|
|
|
|
|
|
each(
|
|
|
|
|
reversed: reversed,
|
|
|
|
|
start_offset: start_offset,
|
|
|
|
|
end_offset: end_offset
|
|
|
|
|
) do |value|
|
|
|
|
|
array << value
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
array
|
2020-06-24 21:45:45 +02:00
|
|
|
|
end
|
|
|
|
|
|
2019-12-12 00:44:05 +01:00
|
|
|
|
##
|
2020-07-21 12:49:32 +02:00
|
|
|
|
# CAUTION: Very slow. Try not to use.
|
|
|
|
|
def to_h(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil)
|
|
|
|
|
hash = ::Hash(Int32, V).new
|
|
|
|
|
|
|
|
|
|
each_with_index(
|
|
|
|
|
reversed: reversed,
|
|
|
|
|
start_offset: start_offset,
|
|
|
|
|
end_offset: end_offset
|
|
|
|
|
) do |element, index|
|
|
|
|
|
hash[index] = element
|
2019-12-12 00:44:05 +01:00
|
|
|
|
end
|
2020-07-21 12:49:32 +02:00
|
|
|
|
|
|
|
|
|
hash
|
2019-12-12 00:44:05 +01:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
##
|
|
|
|
|
# name is the name that will be used on the file system.
|
|
|
|
|
def new_index(name : String, &block : Proc(V, String))
|
2024-05-06 18:31:52 +02:00
|
|
|
|
CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer|
|
2019-12-12 00:44:05 +01:00
|
|
|
|
@indexers << indexer
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2020-07-15 17:19:27 +02:00
|
|
|
|
def new_nilable_index(name : String, &block : Proc(V, String | DODB::NoIndex))
|
2024-05-06 18:31:52 +02:00
|
|
|
|
CachedIndex(V).new(self, @directory_name, name, block).tap do |indexer|
|
|
|
|
|
@indexers << indexer
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def new_uncached_index(name : String, &block : Proc(V, String))
|
|
|
|
|
Index(V).new(self, @directory_name, name, block).tap do |indexer|
|
|
|
|
|
@indexers << indexer
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def new_nilable_uncached_index(name : String, &block : Proc(V, String | DODB::NoIndex))
|
2020-07-15 17:19:27 +02:00
|
|
|
|
Index(V).new(self, @directory_name, name, block).tap do |indexer|
|
|
|
|
|
@indexers << indexer
|
2019-12-12 00:44:05 +01:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def get_index(name : String, key)
|
|
|
|
|
index = @indexers.find &.name.==(name)
|
|
|
|
|
|
|
|
|
|
index.not_nil!.as(DODB::Index).get key
|
|
|
|
|
end
|
|
|
|
|
|
2020-07-21 13:25:48 +02:00
|
|
|
|
##
|
|
|
|
|
# name is the name that will be used on the file system.
|
|
|
|
|
def new_partition(name : String, &block : Proc(V, String))
|
2024-05-06 18:31:52 +02:00
|
|
|
|
CachedPartition(V).new(self, @directory_name, name, block).tap do |table|
|
|
|
|
|
@indexers << table
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def new_uncached_partition(name : String, &block : Proc(V, String))
|
2020-07-21 13:25:48 +02:00
|
|
|
|
Partition(V).new(self, @directory_name, name, block).tap do |table|
|
|
|
|
|
@indexers << table
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2019-12-12 00:44:05 +01:00
|
|
|
|
def get_partition(table_name : String, partition_name : String)
|
|
|
|
|
partition = @indexers.find &.name.==(table_name)
|
|
|
|
|
|
|
|
|
|
partition.not_nil!.as(DODB::Partition).get partition_name
|
|
|
|
|
end
|
|
|
|
|
|
2020-07-21 13:25:48 +02:00
|
|
|
|
def write_partitions(key : Int32, value : V)
|
|
|
|
|
@indexers.each &.index(stringify_key(key), value)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def new_tags(name : String, &block : Proc(V, Array(String)))
|
2024-05-06 18:31:52 +02:00
|
|
|
|
CachedTags(V).new(self, @directory_name, name, block).tap do |tags|
|
|
|
|
|
@indexers << tags
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def new_uncached_tags(name : String, &block : Proc(V, Array(String)))
|
2024-04-26 23:26:56 +02:00
|
|
|
|
Tags(V).new(self, @directory_name, name, block).tap do |tags|
|
2020-07-21 13:25:48 +02:00
|
|
|
|
@indexers << tags
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2019-12-28 03:24:05 +01:00
|
|
|
|
def get_tags(name, key : String)
|
2024-04-26 23:26:56 +02:00
|
|
|
|
tag = @indexers.find &.name.==(name)
|
2019-12-12 00:44:05 +01:00
|
|
|
|
|
2024-04-26 23:26:56 +02:00
|
|
|
|
tag.not_nil!.as(DODB::Tags).get name, key
|
2019-12-12 00:44:05 +01:00
|
|
|
|
end
|
|
|
|
|
|
2020-10-29 04:30:56 +01:00
|
|
|
|
def new_directed_graph(name : String, index : DODB::Index(V), &block : Proc(V, Array(String))) : DirectedGraph(V)
|
|
|
|
|
DirectedGraph(V).new(self, @directory_name, index, name, block).tap do |table|
|
|
|
|
|
@indexers << table
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2020-07-21 12:49:32 +02:00
|
|
|
|
def check_collisions!(key : Int32, value : V, old_value : V?)
|
|
|
|
|
@indexers.each &.check!(stringify_key(key), value, old_value)
|
|
|
|
|
end
|
2019-12-18 03:43:09 +01:00
|
|
|
|
|
2020-07-21 12:49:32 +02:00
|
|
|
|
def pop
|
2020-07-20 14:23:10 +02:00
|
|
|
|
request_lock "index"
|
|
|
|
|
|
2020-07-21 12:49:32 +02:00
|
|
|
|
index = last_index
|
|
|
|
|
|
|
|
|
|
# Some entries may have been removed. We’ll skip over those.
|
|
|
|
|
# Not the most efficient if a large number of indices are empty.
|
|
|
|
|
while index >= 0 && self[index]?.nil?
|
|
|
|
|
index = index - 1
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
if index < 0
|
|
|
|
|
return nil
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
poped = self[index]
|
|
|
|
|
|
|
|
|
|
self.delete index
|
|
|
|
|
|
|
|
|
|
last_index = index - 1
|
|
|
|
|
|
2020-07-20 14:23:10 +02:00
|
|
|
|
release_lock "index"
|
|
|
|
|
|
2020-07-21 12:49:32 +02:00
|
|
|
|
poped
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
private def data_path
|
|
|
|
|
"#{@directory_name}/data"
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
private def file_path(key : Int32)
|
2024-05-06 11:47:07 +02:00
|
|
|
|
"#{data_path}/%010i" % key
|
2020-07-21 12:49:32 +02:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
private def locks_directory : String
|
|
|
|
|
"#{@directory_name}/locks"
|
|
|
|
|
end
|
|
|
|
|
|
2020-07-21 13:25:48 +02:00
|
|
|
|
private def get_lock_file_path(name : String, subname : String? = nil)
|
|
|
|
|
if subname
|
|
|
|
|
"#{locks_directory}/#{name}-#{subname}.lock" # FIXME: Separator that causes less collisions?
|
|
|
|
|
else
|
|
|
|
|
"#{locks_directory}/#{name}.lock"
|
|
|
|
|
end
|
2020-07-21 12:49:32 +02:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
private def read(file_path : String)
|
|
|
|
|
V.from_json ::File.read file_path
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
private def remove_data!
|
|
|
|
|
FileUtils.rm_rf data_path
|
|
|
|
|
Dir.mkdir_p data_path
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
private def remove_indexing!
|
|
|
|
|
@indexers.each do |indexer|
|
|
|
|
|
FileUtils.rm_rf indexer.indexing_directory
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
# A very slow operation that removes all indices and then rewrites
|
|
|
|
|
# them all.
|
|
|
|
|
# FIXME: Is this really useful in its current form? We should remove the
|
|
|
|
|
# index directories, not the indices based on our current (and
|
|
|
|
|
# possiblly different from what’s stored) data.
|
|
|
|
|
def reindex_everything!
|
|
|
|
|
old_data = to_h
|
|
|
|
|
|
|
|
|
|
remove_indexing!
|
|
|
|
|
remove_data!
|
|
|
|
|
|
|
|
|
|
old_data.each do |index, item|
|
|
|
|
|
self[index] = item
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2024-05-06 23:52:17 +02:00
|
|
|
|
def remove_indexes(key : Int32, value : V)
|
2020-07-21 12:49:32 +02:00
|
|
|
|
@indexers.each &.deindex(stringify_key(key), value)
|
|
|
|
|
end
|
|
|
|
|
|
2019-12-18 03:43:09 +01:00
|
|
|
|
def []?(key : Int32) : V?
|
2019-12-12 00:44:05 +01:00
|
|
|
|
self[key]
|
|
|
|
|
rescue MissingEntry
|
|
|
|
|
# FIXME: Only rescue JSON and “no such file” errors.
|
|
|
|
|
return nil
|
|
|
|
|
end
|
|
|
|
|
|
2020-07-21 12:49:32 +02:00
|
|
|
|
abstract def [](key : Int32)
|
|
|
|
|
abstract def delete(key : Int32)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
class DODB::DataBase(V) < DODB::Storage(V)
|
|
|
|
|
@indexers = [] of Indexer(V)
|
|
|
|
|
|
|
|
|
|
def initialize(@directory_name : String)
|
|
|
|
|
Dir.mkdir_p data_path
|
|
|
|
|
Dir.mkdir_p locks_directory
|
|
|
|
|
|
|
|
|
|
begin
|
|
|
|
|
self.last_index
|
|
|
|
|
rescue
|
|
|
|
|
self.last_index = -1
|
|
|
|
|
end
|
2019-12-18 03:43:09 +01:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def [](key : Int32) : V
|
2019-12-12 00:44:05 +01:00
|
|
|
|
raise MissingEntry.new(key) unless ::File.exists? file_path key
|
|
|
|
|
|
|
|
|
|
read file_path key
|
|
|
|
|
end
|
|
|
|
|
|
2019-12-18 03:43:09 +01:00
|
|
|
|
def []=(index : Int32, value : V)
|
|
|
|
|
old_value = self.[index]?
|
2019-12-12 00:44:05 +01:00
|
|
|
|
|
2019-12-18 03:43:09 +01:00
|
|
|
|
check_collisions! index, value, old_value
|
2019-12-12 00:44:05 +01:00
|
|
|
|
|
|
|
|
|
# Removes any old indices or partitions pointing to a value about
|
|
|
|
|
# to be replaced.
|
|
|
|
|
if old_value
|
2024-05-06 23:52:17 +02:00
|
|
|
|
remove_indexes index, old_value
|
2019-12-12 00:44:05 +01:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
# Avoids corruption in case the application crashes while writing.
|
2019-12-18 03:43:09 +01:00
|
|
|
|
file_path(index).tap do |path|
|
2019-12-12 00:44:05 +01:00
|
|
|
|
::File.write "#{path}.new", value.to_json
|
|
|
|
|
::FileUtils.mv "#{path}.new", path
|
|
|
|
|
end
|
|
|
|
|
|
2019-12-18 03:43:09 +01:00
|
|
|
|
write_partitions index, value
|
|
|
|
|
|
|
|
|
|
if index > last_index
|
|
|
|
|
self.last_index = index
|
|
|
|
|
end
|
2019-12-12 00:44:05 +01:00
|
|
|
|
end
|
|
|
|
|
|
2019-12-18 03:43:09 +01:00
|
|
|
|
def delete(key : Int32)
|
2019-12-12 00:44:05 +01:00
|
|
|
|
value = self[key]?
|
|
|
|
|
|
|
|
|
|
return if value.nil?
|
|
|
|
|
|
|
|
|
|
begin
|
|
|
|
|
::File.delete file_path key
|
2024-05-07 00:01:31 +02:00
|
|
|
|
rescue File::NotFoundError
|
2019-12-12 00:44:05 +01:00
|
|
|
|
end
|
|
|
|
|
|
2024-05-06 23:52:17 +02:00
|
|
|
|
remove_indexes key, value
|
2019-12-12 00:44:05 +01:00
|
|
|
|
|
|
|
|
|
value
|
|
|
|
|
end
|
|
|
|
|
|
2020-01-12 14:50:10 +01:00
|
|
|
|
private def each_key(reversed = false)
|
2020-01-13 13:48:24 +01:00
|
|
|
|
start = 0
|
|
|
|
|
_end = last_index
|
|
|
|
|
step = 1
|
|
|
|
|
|
|
|
|
|
if reversed
|
|
|
|
|
start = _end
|
|
|
|
|
_end = 0
|
|
|
|
|
step = -1
|
|
|
|
|
end
|
2020-01-12 14:50:10 +01:00
|
|
|
|
|
2020-01-13 13:48:24 +01:00
|
|
|
|
key = start
|
|
|
|
|
while step == 1 ? key <= _end : key >= _end
|
2020-01-12 14:50:10 +01:00
|
|
|
|
full_path = file_path key
|
2019-12-12 00:44:05 +01:00
|
|
|
|
|
2020-01-13 13:48:24 +01:00
|
|
|
|
if File.exists? full_path
|
|
|
|
|
yield key, full_path
|
|
|
|
|
end
|
2019-12-12 00:44:05 +01:00
|
|
|
|
|
2020-01-13 13:48:24 +01:00
|
|
|
|
key = key + step
|
2019-12-12 00:44:05 +01:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2020-01-12 14:50:10 +01:00
|
|
|
|
##
|
|
|
|
|
# CAUTION: Very slow. Try not to use.
|
|
|
|
|
# Can be useful for making dumps or to restore a database, however.
|
2020-01-12 15:12:01 +01:00
|
|
|
|
def each_with_index(reversed : Bool = false, start_offset = 0, end_offset : Int32? = nil)
|
2020-01-12 14:50:10 +01:00
|
|
|
|
dirname = data_path
|
2020-01-10 17:20:10 +01:00
|
|
|
|
|
2020-01-12 15:12:01 +01:00
|
|
|
|
offset = -1
|
|
|
|
|
|
2020-01-12 14:50:10 +01:00
|
|
|
|
each_key(reversed) do |key, path|
|
2020-01-12 15:12:01 +01:00
|
|
|
|
offset += 1
|
|
|
|
|
|
|
|
|
|
if offset < start_offset
|
|
|
|
|
next
|
|
|
|
|
end
|
|
|
|
|
if !end_offset.nil? && offset > end_offset
|
|
|
|
|
next
|
|
|
|
|
end
|
|
|
|
|
|
2020-01-10 17:20:10 +01:00
|
|
|
|
begin
|
|
|
|
|
# FIXME: Only intercept JSON parsing errors.
|
2020-01-12 14:50:10 +01:00
|
|
|
|
field = read path
|
2020-01-10 17:20:10 +01:00
|
|
|
|
rescue
|
|
|
|
|
next
|
|
|
|
|
end
|
|
|
|
|
|
2020-01-12 14:50:10 +01:00
|
|
|
|
yield field, key
|
2020-01-10 17:20:10 +01:00
|
|
|
|
end
|
|
|
|
|
end
|
2019-12-12 00:44:05 +01:00
|
|
|
|
end
|
|
|
|
|
|
2020-07-21 12:49:32 +02:00
|
|
|
|
require "./cached.cr"
|