From 364887bde37e3a1e40a6730fc5e57243516b092b Mon Sep 17 00:00:00 2001 From: Luka Vandervelden Date: Wed, 29 Jan 2020 16:59:39 +0100 Subject: [PATCH] WIP for indexing by multiple tags. Basically, DODB::Tags#get(tags : Array(String)). --- spec/test.cr | 19 ++++++++++++ src/dodb/tags.cr | 80 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 82 insertions(+), 17 deletions(-) diff --git a/spec/test.cr b/spec/test.cr index c56e977..199aff6 100644 --- a/spec/test.cr +++ b/spec/test.cr @@ -335,6 +335,25 @@ describe "DODB::DataBase" do db_ships_by_tags.get("flagship").should eq([] of Ship) end + + it "gets items that have multiple tags" do + db = DODB::SpecDataBase.new + + db_ships_by_tags = db.new_tags "tags", &.tags + + Ship.all_ships.each do |ship| + db << ship + end + + results = db_ships_by_tags.get(["flagship", "name ship"]) + results.should eq([Ship.yamato]) + + results = db_ships_by_tags.get(["name ship", "flagship"]) + results.should eq([Ship.yamato]) + + results = db_ships_by_tags.get(["flagship"]) + results.should eq([Ship.yamato]) + end end describe "tools" do diff --git a/src/dodb/tags.cr b/src/dodb/tags.cr index 51003eb..19e766b 100644 --- a/src/dodb/tags.cr +++ b/src/dodb/tags.cr @@ -10,27 +10,59 @@ class DODB::Tags(V) < DODB::Indexer(V) ::Dir.mkdir_p indexing_directory end + # FIXME: Too slow. That `uniq` call alone is a perf eater. + def tag_combinations(tags) + combinations = [] of Array(String) + + previous_tags = [] of String + while tag = tags.shift? + previous_tags.push tag + + combinations.push previous_tags.clone + + if tags.size > 0 + combinations.push tags.clone + end + end + + combinations.uniq + end + def index(key, value) - indices = key_proc.call value + indices = key_proc.call(value).sort - indices.each do |index| - symlink = get_tagged_entry_path(key, index) + tag_combinations(indices).each do |previous_indices| + # FIXME: Not on `index`, but on the list of all previous indices. + symdir = symlinks_directory previous_indices + otdir = other_tags_directory previous_indices - Dir.mkdir_p ::File.dirname symlink + ::Dir.mkdir_p symdir + ::Dir.mkdir_p otdir + + symlink = get_tagged_entry_path(key, previous_indices) ::File.delete symlink if ::File.exists? symlink - ::File.symlink get_data_symlink(key), symlink + ::File.symlink get_data_symlink(key, previous_indices), symlink end end def deindex(key, value) - indices = key_proc.call value + indices = key_proc.call(value).sort - indices.each do |index_key| - symlink = get_tagged_entry_path(key, index_key) + tag_combinations(indices).each do |previous_indices| + # FIXME: Not on `index`, but on the list of all previous indices. + symdir = symlinks_directory previous_indices + otdir = other_tags_directory previous_indices - ::File.delete symlink + ::Dir.mkdir_p symdir + ::Dir.mkdir_p otdir + + symlink = get_tagged_entry_path(key, previous_indices) + + ::File.delete symlink if ::File.exists? symlink + + # FIXME: Remove directories if empty? end end @@ -38,10 +70,14 @@ class DODB::Tags(V) < DODB::Indexer(V) return true # Tags don’t have collisions or overloads. end - def get_with_indices(key) : Array(Tuple(V, Int32)) + def get_with_indices(key : String) : Array(Tuple(V, Int32)) + get_with_indices [key] + end + + def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32)) r_value = Array(Tuple(V, Int32)).new - partition_directory = "#{indexing_directory}/#{key}" + partition_directory = symlinks_directory keys return r_value unless Dir.exists? partition_directory @@ -55,20 +91,30 @@ class DODB::Tags(V) < DODB::Indexer(V) r_value end - def get(key) : Array(V) + def get(key : String) : Array(V) get_with_indices(key).map &.[0] end + def get(keys : Array(String)) : Array(V) + get_with_indices(keys.sort).map &.[0] + end + def indexing_directory : String - "#{@storage_root}/by_tags/by_#{@name}" + "#{@storage_root}/tags/by_#{@name}" end - private def get_tagged_entry_path(key : String, index_key : String) - "#{indexing_directory}/#{index_key}/#{key}.json" + private def symlinks_directory(previous_indices : Array(String)) + "#{indexing_directory}#{previous_indices.map { |i| "/other-tags/#{i}" }.join}/data" + end + private def other_tags_directory(previous_indices : Array(String)) + "#{indexing_directory}#{previous_indices.map { |i| "/other-tags/#{i}" }.join}/other-tags" end - private def get_data_symlink(key : String) - "../../../data/#{key}.json" + private def get_tagged_entry_path(key : String, indices : Array(String)) + "#{indexing_directory}#{indices.map { |i| "/other-tags/#{i}" }.join}/data/#{key}.json" + end + private def get_data_symlink(key : String, indices : Array(String)) + "../../../#{indices.map { "../../" }.join}/data/#{key}.json" end end