Compare commits

..

No commits in common. "fce2b633ce8c39c63276ad60ad25764ac7f1bf10" and "35f5c52cbc0c88ae66413a135eaef763645e2aaf" have entirely different histories.

5 changed files with 101 additions and 116 deletions

View File

@ -162,16 +162,25 @@ $ tree storage/
... ...
└── tags └── tags
└── by_keyword └── by_keyword
└── other-tags
├── average
│   ├── data
│   │   └── 0000000004.json -> ../../../../..//data/0000000004.json
...
├── dirty
│   ├── data
│   │   └── 0000000005.json -> ../../../../..//data/0000000005.json
...
├── elegant ├── elegant
│   ├── 0000000000.json -> ../../../data/0000000000.json │   ├── data
│   └── 0000000003.json -> ../../../data/0000000003.json │   │   ├── 0000000000.json -> ../../../../..//data/0000000000.json
├── impressive │   │   └── 0000000003.json -> ../../../../..//data/0000000003.json
│   ├── 0000000000.json -> ../../../data/0000000000.json
│   ├── 0000000001.json -> ../../../data/0000000001.json
│   └── 0000000003.json -> ../../../data/0000000003.json
... ...
``` ```
Tags are very similar to partitions and are used the exact same way for search, update and deletion. This is very similar to partitions, but there is a bit more complexity here since we eventually search for a car matching a combination of keywords.
**TODO**: explanations about our tag-based search and an example.
## Updating an object ## Updating an object
@ -195,41 +204,23 @@ cars_by_id.update "86a07924-ab3a-4f46-a975-e9803acba22d", car
Or, in the case the object may not yet exist: Or, in the case the object may not yet exist:
```Crystal ```Crystal
cars_by_id.update_or_create car.id, car cars_by_id.update_or_create car.id, car
# Search by partitions: all blue cars.
pp! cars_by_color.get "blue"
# Search by tags: all elegant cars.
pp! cars_by_keyword.get "elegant"
``` ```
Changing a value that is related to a partition or a tag will automatically do what you would expect: de-index then re-index.
You won't find yourself with a bunch of invalid symbolic links all over the place.
## Removing an object ## Removing an object
```Crystal ```Crystal
# Remove a value based on an index.
cars_by_id.delete "86a07924-ab3a-4f46-a975-e9803acba22d" cars_by_id.delete "86a07924-ab3a-4f46-a975-e9803acba22d"
# Remove a value based on a partition.
cars_by_color.delete "red" cars_by_color.delete "red"
cars_by_color.delete "blue", do |car| cars_by_color.delete "red", do |car|
car.keywords.empty car.keywords.empty
end end
# Remove a value based on a tag.
cars_by_keyword.delete "shiny"
cars_by_keyword.delete "elegant", do |car|
car.name == "GTI"
end
``` ```
In this code snippet, we apply a function on blue cars only; In this last example, we apply the function on red cars only.
and blue cars are only removed if they don't have any associated keywords.
Same thing for elegant cars.
This represents a performance boost compared to applying the function on all the cars. This represents a performance boost compared to applying the function on all the cars.
# Complete example # Complete example
```Crystal ```Crystal
@ -301,10 +292,11 @@ pp! cars_by_name.get "Corvet"
# based on a partition (print all red cars) # based on a partition (print all red cars)
pp! cars_by_color.get "red" pp! cars_by_color.get "red"
# based on a tag (print all fast cars) # based on a tag
pp! cars_by_keyword.get "fast" pp! cars_by_keyword.get "fast"
############ ############
# Updating # # Updating #
############ ############
@ -322,11 +314,7 @@ cars_by_name.update "Bullet-GT", car # the name changed
car = Car.new "Mustang", "red", [] of String car = Car.new "Mustang", "red", [] of String
cars_by_name.update_or_create car.name, car cars_by_name.update_or_create car.name, car
# We all know it, elegant cars are also expensive.
cars_by_keyword.get("elegant").each do |car|
car.keywords << "expensive"
cars_by_name.update car.name, car
end
############### ###############
# Deleting... # # Deleting... #
@ -340,8 +328,9 @@ cars_by_color.delete "red"
# based on a color (but not only) # based on a color (but not only)
cars_by_color.delete "blue", &.name.==("GTI") cars_by_color.delete "blue", &.name.==("GTI")
# based on a keyword ## TAG-based deletion, soon.
cars_by_keyword.delete "solid" # # based on a keyword
# based on a keyword (but not only) # cars_by_keyword.delete "solid"
cars_by_keyword.delete "fast", &.name.==("Corvet") # # based on a keyword (but not only)
# cars_by_keyword.delete "fast", &.name.==("Corvet")
``` ```

View File

@ -277,7 +277,7 @@ describe "DODB::DataBase" do
end end
# Removing the “flagship” tag, brace for impact. # Removing the “flagship” tag, brace for impact.
flagship, index = db_ships_by_tags.get_with_indice("flagship")[0] flagship, index = db_ships_by_tags.get_with_indices("flagship")[0]
flagship.tags = [] of String flagship.tags = [] of String
db[index] = flagship db[index] = flagship

View File

@ -148,15 +148,15 @@ abstract class DODB::Storage(V)
end end
def new_tags(name : String, &block : Proc(V, Array(String))) def new_tags(name : String, &block : Proc(V, Array(String)))
Tags(V).new(self, @directory_name, name, block).tap do |tags| Tags(V).new(@directory_name, name, block).tap do |tags|
@indexers << tags @indexers << tags
end end
end end
def get_tags(name, key : String) def get_tags(name, key : String)
tag = @indexers.find &.name.==(name) partition = @indexers.find &.name.==(name)
tag.not_nil!.as(DODB::Tags).get name, key partition.not_nil!.as(DODB::Tags).get name, key
end end
def new_directed_graph(name : String, index : DODB::Index(V), &block : Proc(V, Array(String))) : DirectedGraph(V) def new_directed_graph(name : String, index : DODB::Index(V), &block : Proc(V, Array(String))) : DirectedGraph(V)

View File

@ -8,7 +8,6 @@ class DODB::Partition(V) < DODB::Indexer(V)
property key_proc : Proc(V, String) property key_proc : Proc(V, String)
getter storage_root : String getter storage_root : String
# Required to remove an entry in the DB.
@storage : DODB::Storage(V) @storage : DODB::Storage(V)
def initialize(@storage, @storage_root, @name, @key_proc) def initialize(@storage, @storage_root, @name, @key_proc)

View File

@ -6,48 +6,77 @@ class DODB::Tags(V) < DODB::Indexer(V)
property key_proc : Proc(V, Array(String)) property key_proc : Proc(V, Array(String))
getter storage_root : String getter storage_root : String
# Required to remove an entry in the DB. def initialize(@storage_root, @name, @key_proc)
@storage : DODB::Storage(V)
def initialize(@storage, @storage_root, @name, @key_proc)
::Dir.mkdir_p indexing_directory ::Dir.mkdir_p indexing_directory
end end
# FIXME: The slow is damn too high.
def tag_combinations(tags)
combinations = [] of Array(String)
tags.size.times do |i|
combinations.concat tags.permutations (i+1)
end
return combinations
end
def index(key, value)
indices = key_proc.call(value).sort
tag_combinations(indices).each do |previous_indices|
# FIXME: Not on `index`, but on the list of all previous indices.
symdir = symlinks_directory previous_indices
otdir = other_tags_directory previous_indices
::Dir.mkdir_p symdir
::Dir.mkdir_p otdir
symlink = get_tagged_entry_path(key, previous_indices)
::File.delete symlink if ::File.exists? symlink
::File.symlink get_data_symlink(key, previous_indices), symlink
end
end
def deindex(key, value)
indices = key_proc.call(value).sort
tag_combinations(indices).each do |previous_indices|
# FIXME: Not on `index`, but on the list of all previous indices.
symdir = symlinks_directory previous_indices
otdir = other_tags_directory previous_indices
::Dir.mkdir_p symdir
::Dir.mkdir_p otdir
symlink = get_tagged_entry_path(key, previous_indices)
::File.delete symlink if ::File.exists? symlink
# FIXME: Remove directories if empty?
end
end
def check!(key, value, old_value) def check!(key, value, old_value)
return true # Tags dont have collisions or overloads. return true # Tags dont have collisions or overloads.
end end
def index(key, value) def get_with_indices(key : String) : Array(Tuple(V, Int32))
indices = key_proc.call(value) get_with_indices [key]
indices.each do |i|
symlink = get_tagged_entry_path(i, key)
Dir.mkdir_p ::File.dirname symlink
# FIXME: Should not happen anymore. Should we remove this?
::File.delete symlink if ::File.exists? symlink
::File.symlink get_data_symlink(key), symlink
end
end end
def deindex(key, value) def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32))
indices = key_proc.call(value)
indices.each do |i|
symlink = get_tagged_entry_path(i, key)
::File.delete symlink
end
end
def get_with_indice(tag : String) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new r_value = Array(Tuple(V, Int32)).new
tag_directory = indexing_directory tag partition_directory = symlinks_directory keys
return r_value unless Dir.exists? tag_directory return r_value unless Dir.exists? partition_directory
Dir.each_child tag_directory do |child| Dir.each_child partition_directory do |child|
r_value << { r_value << {
V.from_json(::File.read("#{tag_directory}/#{child}")), V.from_json(::File.read("#{partition_directory}/#{child}")),
File.basename(child).gsub(/\.json$/, "").to_i File.basename(child).gsub(/\.json$/, "").to_i
} }
end end
@ -55,62 +84,30 @@ class DODB::Tags(V) < DODB::Indexer(V)
r_value r_value
end end
def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32)) def get(key : String) : Array(V)
r_value = Array(Tuple(V, Int32)).new get_with_indices(key).map &.[0]
keys.each do |tag|
r_value.concat get_with_indice tag
end
r_value
end
def get(tag : String) : Array(V)
get_with_indice(tag).map &.[0]
end end
def get(keys : Array(String)) : Array(V) def get(keys : Array(String)) : Array(V)
get_with_indices(keys.sort).map &.[0] get_with_indices(keys.sort).map &.[0]
end end
def delete(tag)
delete tag, do true end
end
def delete(tag, &matcher)
tag_directory = indexing_directory tag
return unless Dir.exists? tag_directory
Dir.each_child tag_directory do |child|
path = "#{tag_directory}/#{child}"
item = V.from_json ::File.read path
if yield item
key = get_key path
@storage.delete key
end
end
end
private def get_key(path : String) : Int32
::File.readlink(path)
.sub(/\.json$/, "")
.sub(/^.*\//, "")
.to_i
end
def indexing_directory : String def indexing_directory : String
"#{@storage_root}/tags/by_#{@name}" "#{@storage_root}/tags/by_#{@name}"
end end
private def indexing_directory(tag) private def symlinks_directory(previous_indices : Array(String))
"#{indexing_directory}/#{tag}" "#{indexing_directory}#{previous_indices.map { |i| "/other-tags/#{i}" }.join}/data"
end
private def other_tags_directory(previous_indices : Array(String))
"#{indexing_directory}#{previous_indices.map { |i| "/other-tags/#{i}" }.join}/other-tags"
end end
private def get_tagged_entry_path(tag : String, key : String) private def get_tagged_entry_path(key : String, indices : Array(String))
"#{indexing_directory}/#{tag}/#{key}.json" "#{indexing_directory}#{indices.map { |i| "/other-tags/#{i}" }.join}/data/#{key}.json"
end
private def get_data_symlink(key : String, indices : Array(String))
"../../../#{indices.map { "../../" }.join}/data/#{key}.json"
end
end end
private def get_data_symlink(key : String)
"../../../data/#{key}.json"
end
end