Compare commits

..

No commits in common. "fce2b633ce8c39c63276ad60ad25764ac7f1bf10" and "35f5c52cbc0c88ae66413a135eaef763645e2aaf" have entirely different histories.

5 changed files with 101 additions and 116 deletions

View File

@ -162,16 +162,25 @@ $ tree storage/
...
└── tags
└── by_keyword
└── other-tags
├── average
│   ├── data
│   │   └── 0000000004.json -> ../../../../..//data/0000000004.json
...
├── dirty
│   ├── data
│   │   └── 0000000005.json -> ../../../../..//data/0000000005.json
...
├── elegant
│   ├── 0000000000.json -> ../../../data/0000000000.json
│   └── 0000000003.json -> ../../../data/0000000003.json
├── impressive
│   ├── 0000000000.json -> ../../../data/0000000000.json
│   ├── 0000000001.json -> ../../../data/0000000001.json
│   └── 0000000003.json -> ../../../data/0000000003.json
│   ├── data
│   │   ├── 0000000000.json -> ../../../../..//data/0000000000.json
│   │   └── 0000000003.json -> ../../../../..//data/0000000003.json
...
```
Tags are very similar to partitions and are used the exact same way for search, update and deletion.
This is very similar to partitions, but there is a bit more complexity here since we eventually search for a car matching a combination of keywords.
**TODO**: explanations about our tag-based search and an example.
## Updating an object
@ -195,41 +204,23 @@ cars_by_id.update "86a07924-ab3a-4f46-a975-e9803acba22d", car
Or, in the case the object may not yet exist:
```Crystal
cars_by_id.update_or_create car.id, car
# Search by partitions: all blue cars.
pp! cars_by_color.get "blue"
# Search by tags: all elegant cars.
pp! cars_by_keyword.get "elegant"
```
Changing a value that is related to a partition or a tag will automatically do what you would expect: de-index then re-index.
You won't find yourself with a bunch of invalid symbolic links all over the place.
## Removing an object
```Crystal
# Remove a value based on an index.
cars_by_id.delete "86a07924-ab3a-4f46-a975-e9803acba22d"
# Remove a value based on a partition.
cars_by_color.delete "red"
cars_by_color.delete "blue", do |car|
cars_by_color.delete "red", do |car|
car.keywords.empty
end
# Remove a value based on a tag.
cars_by_keyword.delete "shiny"
cars_by_keyword.delete "elegant", do |car|
car.name == "GTI"
end
```
In this code snippet, we apply a function on blue cars only;
and blue cars are only removed if they don't have any associated keywords.
Same thing for elegant cars.
In this last example, we apply the function on red cars only.
This represents a performance boost compared to applying the function on all the cars.
# Complete example
```Crystal
@ -301,10 +292,11 @@ pp! cars_by_name.get "Corvet"
# based on a partition (print all red cars)
pp! cars_by_color.get "red"
# based on a tag (print all fast cars)
# based on a tag
pp! cars_by_keyword.get "fast"
############
# Updating #
############
@ -322,11 +314,7 @@ cars_by_name.update "Bullet-GT", car # the name changed
car = Car.new "Mustang", "red", [] of String
cars_by_name.update_or_create car.name, car
# We all know it, elegant cars are also expensive.
cars_by_keyword.get("elegant").each do |car|
car.keywords << "expensive"
cars_by_name.update car.name, car
end
###############
# Deleting... #
@ -340,8 +328,9 @@ cars_by_color.delete "red"
# based on a color (but not only)
cars_by_color.delete "blue", &.name.==("GTI")
# based on a keyword
cars_by_keyword.delete "solid"
# based on a keyword (but not only)
cars_by_keyword.delete "fast", &.name.==("Corvet")
## TAG-based deletion, soon.
# # based on a keyword
# cars_by_keyword.delete "solid"
# # based on a keyword (but not only)
# cars_by_keyword.delete "fast", &.name.==("Corvet")
```

View File

@ -277,7 +277,7 @@ describe "DODB::DataBase" do
end
# Removing the “flagship” tag, brace for impact.
flagship, index = db_ships_by_tags.get_with_indice("flagship")[0]
flagship, index = db_ships_by_tags.get_with_indices("flagship")[0]
flagship.tags = [] of String
db[index] = flagship

View File

@ -148,15 +148,15 @@ abstract class DODB::Storage(V)
end
def new_tags(name : String, &block : Proc(V, Array(String)))
Tags(V).new(self, @directory_name, name, block).tap do |tags|
Tags(V).new(@directory_name, name, block).tap do |tags|
@indexers << tags
end
end
def get_tags(name, key : String)
tag = @indexers.find &.name.==(name)
partition = @indexers.find &.name.==(name)
tag.not_nil!.as(DODB::Tags).get name, key
partition.not_nil!.as(DODB::Tags).get name, key
end
def new_directed_graph(name : String, index : DODB::Index(V), &block : Proc(V, Array(String))) : DirectedGraph(V)

View File

@ -8,7 +8,6 @@ class DODB::Partition(V) < DODB::Indexer(V)
property key_proc : Proc(V, String)
getter storage_root : String
# Required to remove an entry in the DB.
@storage : DODB::Storage(V)
def initialize(@storage, @storage_root, @name, @key_proc)

View File

@ -6,48 +6,77 @@ class DODB::Tags(V) < DODB::Indexer(V)
property key_proc : Proc(V, Array(String))
getter storage_root : String
# Required to remove an entry in the DB.
@storage : DODB::Storage(V)
def initialize(@storage, @storage_root, @name, @key_proc)
def initialize(@storage_root, @name, @key_proc)
::Dir.mkdir_p indexing_directory
end
# FIXME: The slow is damn too high.
def tag_combinations(tags)
combinations = [] of Array(String)
tags.size.times do |i|
combinations.concat tags.permutations (i+1)
end
return combinations
end
def index(key, value)
indices = key_proc.call(value).sort
tag_combinations(indices).each do |previous_indices|
# FIXME: Not on `index`, but on the list of all previous indices.
symdir = symlinks_directory previous_indices
otdir = other_tags_directory previous_indices
::Dir.mkdir_p symdir
::Dir.mkdir_p otdir
symlink = get_tagged_entry_path(key, previous_indices)
::File.delete symlink if ::File.exists? symlink
::File.symlink get_data_symlink(key, previous_indices), symlink
end
end
def deindex(key, value)
indices = key_proc.call(value).sort
tag_combinations(indices).each do |previous_indices|
# FIXME: Not on `index`, but on the list of all previous indices.
symdir = symlinks_directory previous_indices
otdir = other_tags_directory previous_indices
::Dir.mkdir_p symdir
::Dir.mkdir_p otdir
symlink = get_tagged_entry_path(key, previous_indices)
::File.delete symlink if ::File.exists? symlink
# FIXME: Remove directories if empty?
end
end
def check!(key, value, old_value)
return true # Tags dont have collisions or overloads.
end
def index(key, value)
indices = key_proc.call(value)
indices.each do |i|
symlink = get_tagged_entry_path(i, key)
Dir.mkdir_p ::File.dirname symlink
# FIXME: Should not happen anymore. Should we remove this?
::File.delete symlink if ::File.exists? symlink
::File.symlink get_data_symlink(key), symlink
end
def get_with_indices(key : String) : Array(Tuple(V, Int32))
get_with_indices [key]
end
def deindex(key, value)
indices = key_proc.call(value)
indices.each do |i|
symlink = get_tagged_entry_path(i, key)
::File.delete symlink
end
end
def get_with_indice(tag : String) : Array(Tuple(V, Int32))
def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
tag_directory = indexing_directory tag
partition_directory = symlinks_directory keys
return r_value unless Dir.exists? tag_directory
return r_value unless Dir.exists? partition_directory
Dir.each_child tag_directory do |child|
Dir.each_child partition_directory do |child|
r_value << {
V.from_json(::File.read("#{tag_directory}/#{child}")),
V.from_json(::File.read("#{partition_directory}/#{child}")),
File.basename(child).gsub(/\.json$/, "").to_i
}
end
@ -55,62 +84,30 @@ class DODB::Tags(V) < DODB::Indexer(V)
r_value
end
def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
keys.each do |tag|
r_value.concat get_with_indice tag
end
r_value
end
def get(tag : String) : Array(V)
get_with_indice(tag).map &.[0]
def get(key : String) : Array(V)
get_with_indices(key).map &.[0]
end
def get(keys : Array(String)) : Array(V)
get_with_indices(keys.sort).map &.[0]
end
def delete(tag)
delete tag, do true end
end
def delete(tag, &matcher)
tag_directory = indexing_directory tag
return unless Dir.exists? tag_directory
Dir.each_child tag_directory do |child|
path = "#{tag_directory}/#{child}"
item = V.from_json ::File.read path
if yield item
key = get_key path
@storage.delete key
end
end
end
private def get_key(path : String) : Int32
::File.readlink(path)
.sub(/\.json$/, "")
.sub(/^.*\//, "")
.to_i
end
def indexing_directory : String
"#{@storage_root}/tags/by_#{@name}"
end
private def indexing_directory(tag)
"#{indexing_directory}/#{tag}"
private def symlinks_directory(previous_indices : Array(String))
"#{indexing_directory}#{previous_indices.map { |i| "/other-tags/#{i}" }.join}/data"
end
private def other_tags_directory(previous_indices : Array(String))
"#{indexing_directory}#{previous_indices.map { |i| "/other-tags/#{i}" }.join}/other-tags"
end
private def get_tagged_entry_path(tag : String, key : String)
"#{indexing_directory}/#{tag}/#{key}.json"
private def get_tagged_entry_path(key : String, indices : Array(String))
"#{indexing_directory}#{indices.map { |i| "/other-tags/#{i}" }.join}/data/#{key}.json"
end
private def get_data_symlink(key : String)
"../../../data/#{key}.json"
private def get_data_symlink(key : String, indices : Array(String))
"../../../#{indices.map { "../../" }.join}/data/#{key}.json"
end
end