Compare commits

...

3 Commits

5 changed files with 116 additions and 101 deletions

View File

@ -162,25 +162,16 @@ $ tree storage/
... ...
└── tags └── tags
└── by_keyword └── by_keyword
└── other-tags ├── elegant
├── average │   ├── 0000000000.json -> ../../../data/0000000000.json
│   ├── data │   └── 0000000003.json -> ../../../data/0000000003.json
│   │   └── 0000000004.json -> ../../../../..//data/0000000004.json ├── impressive
... │   ├── 0000000000.json -> ../../../data/0000000000.json
├── dirty │   ├── 0000000001.json -> ../../../data/0000000001.json
│   ├── data │   └── 0000000003.json -> ../../../data/0000000003.json
│   │   └── 0000000005.json -> ../../../../..//data/0000000005.json
...
├── elegant
│   ├── data
│   │   ├── 0000000000.json -> ../../../../..//data/0000000000.json
│   │   └── 0000000003.json -> ../../../../..//data/0000000003.json
... ...
``` ```
This is very similar to partitions, but there is a bit more complexity here since we eventually search for a car matching a combination of keywords. Tags are very similar to partitions and are used the exact same way for search, update and deletion.
**TODO**: explanations about our tag-based search and an example.
## Updating an object ## Updating an object
@ -204,23 +195,41 @@ cars_by_id.update "86a07924-ab3a-4f46-a975-e9803acba22d", car
Or, in the case the object may not yet exist: Or, in the case the object may not yet exist:
```Crystal ```Crystal
cars_by_id.update_or_create car.id, car cars_by_id.update_or_create car.id, car
# Search by partitions: all blue cars.
pp! cars_by_color.get "blue"
# Search by tags: all elegant cars.
pp! cars_by_keyword.get "elegant"
``` ```
Changing a value that is related to a partition or a tag will automatically do what you would expect: de-index then re-index.
You won't find yourself with a bunch of invalid symbolic links all over the place.
## Removing an object ## Removing an object
```Crystal ```Crystal
# Remove a value based on an index.
cars_by_id.delete "86a07924-ab3a-4f46-a975-e9803acba22d" cars_by_id.delete "86a07924-ab3a-4f46-a975-e9803acba22d"
# Remove a value based on a partition.
cars_by_color.delete "red" cars_by_color.delete "red"
cars_by_color.delete "red", do |car| cars_by_color.delete "blue", do |car|
car.keywords.empty car.keywords.empty
end end
# Remove a value based on a tag.
cars_by_keyword.delete "shiny"
cars_by_keyword.delete "elegant", do |car|
car.name == "GTI"
end
``` ```
In this last example, we apply the function on red cars only. In this code snippet, we apply a function on blue cars only;
and blue cars are only removed if they don't have any associated keywords.
Same thing for elegant cars.
This represents a performance boost compared to applying the function on all the cars. This represents a performance boost compared to applying the function on all the cars.
# Complete example # Complete example
```Crystal ```Crystal
@ -292,11 +301,10 @@ pp! cars_by_name.get "Corvet"
# based on a partition (print all red cars) # based on a partition (print all red cars)
pp! cars_by_color.get "red" pp! cars_by_color.get "red"
# based on a tag # based on a tag (print all fast cars)
pp! cars_by_keyword.get "fast" pp! cars_by_keyword.get "fast"
############ ############
# Updating # # Updating #
############ ############
@ -314,7 +322,11 @@ cars_by_name.update "Bullet-GT", car # the name changed
car = Car.new "Mustang", "red", [] of String car = Car.new "Mustang", "red", [] of String
cars_by_name.update_or_create car.name, car cars_by_name.update_or_create car.name, car
# We all know it, elegant cars are also expensive.
cars_by_keyword.get("elegant").each do |car|
car.keywords << "expensive"
cars_by_name.update car.name, car
end
############### ###############
# Deleting... # # Deleting... #
@ -328,9 +340,8 @@ cars_by_color.delete "red"
# based on a color (but not only) # based on a color (but not only)
cars_by_color.delete "blue", &.name.==("GTI") cars_by_color.delete "blue", &.name.==("GTI")
## TAG-based deletion, soon. # based on a keyword
# # based on a keyword cars_by_keyword.delete "solid"
# cars_by_keyword.delete "solid" # based on a keyword (but not only)
# # based on a keyword (but not only) cars_by_keyword.delete "fast", &.name.==("Corvet")
# cars_by_keyword.delete "fast", &.name.==("Corvet")
``` ```

View File

@ -277,7 +277,7 @@ describe "DODB::DataBase" do
end end
# Removing the “flagship” tag, brace for impact. # Removing the “flagship” tag, brace for impact.
flagship, index = db_ships_by_tags.get_with_indices("flagship")[0] flagship, index = db_ships_by_tags.get_with_indice("flagship")[0]
flagship.tags = [] of String flagship.tags = [] of String
db[index] = flagship db[index] = flagship

View File

@ -148,15 +148,15 @@ abstract class DODB::Storage(V)
end end
def new_tags(name : String, &block : Proc(V, Array(String))) def new_tags(name : String, &block : Proc(V, Array(String)))
Tags(V).new(@directory_name, name, block).tap do |tags| Tags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags @indexers << tags
end end
end end
def get_tags(name, key : String) def get_tags(name, key : String)
partition = @indexers.find &.name.==(name) tag = @indexers.find &.name.==(name)
partition.not_nil!.as(DODB::Tags).get name, key tag.not_nil!.as(DODB::Tags).get name, key
end end
def new_directed_graph(name : String, index : DODB::Index(V), &block : Proc(V, Array(String))) : DirectedGraph(V) def new_directed_graph(name : String, index : DODB::Index(V), &block : Proc(V, Array(String))) : DirectedGraph(V)

View File

@ -8,6 +8,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
property key_proc : Proc(V, String) property key_proc : Proc(V, String)
getter storage_root : String getter storage_root : String
# Required to remove an entry in the DB.
@storage : DODB::Storage(V) @storage : DODB::Storage(V)
def initialize(@storage, @storage_root, @name, @key_proc) def initialize(@storage, @storage_root, @name, @key_proc)

View File

@ -6,77 +6,48 @@ class DODB::Tags(V) < DODB::Indexer(V)
property key_proc : Proc(V, Array(String)) property key_proc : Proc(V, Array(String))
getter storage_root : String getter storage_root : String
def initialize(@storage_root, @name, @key_proc) # Required to remove an entry in the DB.
@storage : DODB::Storage(V)
def initialize(@storage, @storage_root, @name, @key_proc)
::Dir.mkdir_p indexing_directory ::Dir.mkdir_p indexing_directory
end end
# FIXME: The slow is damn too high.
def tag_combinations(tags)
combinations = [] of Array(String)
tags.size.times do |i|
combinations.concat tags.permutations (i+1)
end
return combinations
end
def index(key, value)
indices = key_proc.call(value).sort
tag_combinations(indices).each do |previous_indices|
# FIXME: Not on `index`, but on the list of all previous indices.
symdir = symlinks_directory previous_indices
otdir = other_tags_directory previous_indices
::Dir.mkdir_p symdir
::Dir.mkdir_p otdir
symlink = get_tagged_entry_path(key, previous_indices)
::File.delete symlink if ::File.exists? symlink
::File.symlink get_data_symlink(key, previous_indices), symlink
end
end
def deindex(key, value)
indices = key_proc.call(value).sort
tag_combinations(indices).each do |previous_indices|
# FIXME: Not on `index`, but on the list of all previous indices.
symdir = symlinks_directory previous_indices
otdir = other_tags_directory previous_indices
::Dir.mkdir_p symdir
::Dir.mkdir_p otdir
symlink = get_tagged_entry_path(key, previous_indices)
::File.delete symlink if ::File.exists? symlink
# FIXME: Remove directories if empty?
end
end
def check!(key, value, old_value) def check!(key, value, old_value)
return true # Tags dont have collisions or overloads. return true # Tags dont have collisions or overloads.
end end
def get_with_indices(key : String) : Array(Tuple(V, Int32)) def index(key, value)
get_with_indices [key] indices = key_proc.call(value)
indices.each do |i|
symlink = get_tagged_entry_path(i, key)
Dir.mkdir_p ::File.dirname symlink
# FIXME: Should not happen anymore. Should we remove this?
::File.delete symlink if ::File.exists? symlink
::File.symlink get_data_symlink(key), symlink
end
end end
def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32)) def deindex(key, value)
indices = key_proc.call(value)
indices.each do |i|
symlink = get_tagged_entry_path(i, key)
::File.delete symlink
end
end
def get_with_indice(tag : String) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new r_value = Array(Tuple(V, Int32)).new
partition_directory = symlinks_directory keys tag_directory = indexing_directory tag
return r_value unless Dir.exists? partition_directory return r_value unless Dir.exists? tag_directory
Dir.each_child partition_directory do |child| Dir.each_child tag_directory do |child|
r_value << { r_value << {
V.from_json(::File.read("#{partition_directory}/#{child}")), V.from_json(::File.read("#{tag_directory}/#{child}")),
File.basename(child).gsub(/\.json$/, "").to_i File.basename(child).gsub(/\.json$/, "").to_i
} }
end end
@ -84,30 +55,62 @@ class DODB::Tags(V) < DODB::Indexer(V)
r_value r_value
end end
def get(key : String) : Array(V) def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32))
get_with_indices(key).map &.[0] r_value = Array(Tuple(V, Int32)).new
keys.each do |tag|
r_value.concat get_with_indice tag
end
r_value
end
def get(tag : String) : Array(V)
get_with_indice(tag).map &.[0]
end end
def get(keys : Array(String)) : Array(V) def get(keys : Array(String)) : Array(V)
get_with_indices(keys.sort).map &.[0] get_with_indices(keys.sort).map &.[0]
end end
def delete(tag)
delete tag, do true end
end
def delete(tag, &matcher)
tag_directory = indexing_directory tag
return unless Dir.exists? tag_directory
Dir.each_child tag_directory do |child|
path = "#{tag_directory}/#{child}"
item = V.from_json ::File.read path
if yield item
key = get_key path
@storage.delete key
end
end
end
private def get_key(path : String) : Int32
::File.readlink(path)
.sub(/\.json$/, "")
.sub(/^.*\//, "")
.to_i
end
def indexing_directory : String def indexing_directory : String
"#{@storage_root}/tags/by_#{@name}" "#{@storage_root}/tags/by_#{@name}"
end end
private def symlinks_directory(previous_indices : Array(String)) private def indexing_directory(tag)
"#{indexing_directory}#{previous_indices.map { |i| "/other-tags/#{i}" }.join}/data" "#{indexing_directory}/#{tag}"
end
private def other_tags_directory(previous_indices : Array(String))
"#{indexing_directory}#{previous_indices.map { |i| "/other-tags/#{i}" }.join}/other-tags"
end end
private def get_tagged_entry_path(key : String, indices : Array(String)) private def get_tagged_entry_path(tag : String, key : String)
"#{indexing_directory}#{indices.map { |i| "/other-tags/#{i}" }.join}/data/#{key}.json" "#{indexing_directory}/#{tag}/#{key}.json"
end end
private def get_data_symlink(key : String, indices : Array(String))
"../../../#{indices.map { "../../" }.join}/data/#{key}.json" private def get_data_symlink(key : String)
"../../../data/#{key}.json"
end end
end end