Compare commits

...

40 Commits

Author SHA1 Message Date
Philippe PITTOLI 9a687a35cb Stuff. 2024-05-15 14:39:33 +02:00
Philippe PITTOLI 1e6814f4db Benchmarks: provide reports to simplify the code for statistical analysis. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI f888353ffb benchmark-cars has been rewritten to perform WAY FASTER benchmarks. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI e801a5a8c9 stats.sh 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 714300bdb5 Readlink. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 01cc4633fe Example linuxfr. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI cc5d6ff78f Fix various benchmark pain points. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI c4fce3c4a4 Enable cars not to have color or keywords. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 5a2f17f7e0 Enable all indexes (index, partition, tag) to be nilable. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 56fde98492 Some cleaning. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 9d1da890f3 Makefile: slightly different options. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 8161ea3f82 Finally a concise way to perform tests on different databases. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 53c69d0c5d Rewriting the benchmarks to get better data. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI c124b23d7a Tests and benchmarks are now more consistent. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 0a357446ef benchmark-ramdb: already taken into account in benchmark-cars. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 0389ed85ed Add some basic benchmark for adding values in different database types. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 5ce24184be Behavior of cached database is now covered by a few tests. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI c475c4f584 Specs are fixed & reworked for the most part. New benchmarks. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 2a71254533 s/exists?/symlink?/ + remove useless check + remove useless call to a function. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI d29b29bc1b Slight optimization in tag searches. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 0b4cdf00b7 Test some reindex stuff with the Car database. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 835626a6dd Benchmark RAM db. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 90b6563068 Proper Car database and functions. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 5bf6b21d5d Rewrites lead to a working spec file. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI dd7ed3010d Specs changed. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 42b82c8fa5 More consistent API. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI c88e738332 Spec: less errors. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI b3e416da8d Block captures fixed. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI a141849f2a Fix delete functions for tags & partition. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 031175a90a TODO: RAMOnlyTags + tests tests tests. 2024-05-15 14:17:15 +02:00
Philippe PITTOLI 66256c7650 Blah 2024-05-15 14:15:20 +02:00
Philippe PITTOLI afa96d8ae7 A few new explanations. 2024-05-15 03:10:59 +02:00
Philippe PITTOLI 6f170b60ab DODB 2024-05-14 16:19:46 +02:00
Philippe PITTOLI 03da23e8e2 Let's shit on SQL a bit more. 2024-05-14 13:51:13 +02:00
Philippe PITTOLI fc52757074 DODB PDF. 2024-05-13 21:46:02 +02:00
Philippe PITTOLI a986e56264 Graph: a few more sentenses. 2024-05-13 03:38:41 +02:00
Philippe PITTOLI 71d8fa53d5 Longer explanation of the experimental scenario. 2024-05-13 02:24:59 +02:00
Philippe PITTOLI 7428ba757c Graphs: starting to look good. 2024-05-12 20:47:09 +02:00
Philippe PITTOLI 20c7068e4c Graph: change the Y scale. 2024-05-12 19:24:50 +02:00
Philippe PITTOLI 3396c6a05c Graphs! 2024-05-12 16:47:53 +02:00
39 changed files with 3727 additions and 1130 deletions

View File

@ -1,12 +1,12 @@
all: build
OPTS ?= --progress
OPTS ?= --progress --no-debug
Q ?= @
SHOULD_UPDATE = ./bin/should-update
DBDIR=/tmp/tests-on-dodb
benchmark-cars:
$(Q)crystal build spec/benchmark-cars.cr $(OPTS)
$(Q)crystal build spec/benchmark-cars.cr $(OPTS) --release
build: benchmark-cars

View File

@ -1,8 +1,3 @@
# API
Cached indexes (index, partition, tags) should be used by default.
Uncached indexes should be an option, through a new function `add_uncached_index` or something.
# Performance
Search with some kind of "pagination" system: ask entries with a limit on the number of elements and an offset.

24
bin/extract-final-data.sh Executable file
View File

@ -0,0 +1,24 @@
#!/bin/sh
if [ $# -ne 1 ]
then
echo "usage: $0 result-directory"
exit 0
fi
d=$1
echo "from data (.d) to truncated data (.t)"
for i in $d/*.d
do
fname=$(echo $i | sed "s/[.]d$/.t/")
awk '{ print $2, $3, $5 }' < $i > $fname
done
awk '{ print $1 }' < $d/ram_index.d > it
mkdir data
echo "from truncated data (.t) to graphed data data/XXX.d"
paste it $d/ram_index.t $d/cached_index.t $d/semi_index.t $d/uncached_index.t > ./data/index.d
paste it $d/ram_partitions.t $d/cached_partitions.t $d/semi_partitions.t $d/uncached_partitions.t > ./data/partitions.d
paste it $d/ram_tags.t $d/cached_tags.t $d/semi_tags.t $d/uncached_tags.t > ./data/tags.d

39
bin/rsum2line.awk Executable file
View File

@ -0,0 +1,39 @@
#!/usr/bin/awk -f
BEGIN {
FOUND_95pct = 0
FOUND_mean = 0
}
FOUND_95pct == 1 {
pct_min = $1
pct_max = $2
FOUND_95pct = 0
}
FOUND_mean == 1 {
mean = $1
print pct_min, median, mean, pct_max, t, df, pvalue
FOUND_mean = 0
}
/^t = / {
gsub(",", "", $3)
t = $3
gsub(",", "", $6)
df = $6
pvalue = $9
}
/mean of x/ {
FOUND_mean = 1
}
/Median/ {
gsub(":", "")
median = $2
}
/95 percent confidence/ {
FOUND_95pct = 1
}

66
bin/stats.sh Executable file
View File

@ -0,0 +1,66 @@
#!/bin/sh
extract="./bin/extract-final-data.sh"
summary="./bin/summary.r"
summary_to_line="./bin/rsum2line.awk"
if [ $# -ne 1 ]
then
echo "usage: $0 result-directory"
exit 0
fi
dir="$1"
raw_to_summary() {
for i in $dir/*.raw
do
summary_with_bad_format=$(echo $i | sed "s/.raw$/.unconveniently_formated_summary/")
target=$(echo $i | sed "s/.raw$/.summary/")
if [ -f $summary_with_bad_format ]; then
echo -n "\r$summary_with_bad_format already exists: skipping "
else
Rscript $summary $i > $summary_with_bad_format
fi
if [ -f $target ]; then
echo -n "\r$target already exists: skipping "
else
$summary_to_line $summary_with_bad_format > $target
fi
done
echo ""
# Beyond a certain number of entries, retrieving data from partitions and tags isn't tested anymore.
# This leads to create "fake entries" with a duration of 0, resulting to causing some problems with
# statistical analysis. So, we need to replace "NaN" by "0" in summaries.
sed -i "s/NaN/0/g" $dir/*.summary
}
# List raw files with the number of iterations as a prefix so they can then be sorted.
sort_summary_files() {
for i in $dir/*.summary ; do f $i ; done | sort -n
}
f() {
echo $* | sed "s/[_./]/ /g" | xargs echo "$* " | awk '{ printf "%s %s/%s_%s %s\n", $4, $2, $3, $5, $1 }'
}
fill() {
while read LINE; do
nb_it=$(echo $LINE | awk '{ print $1 }')
target=$(echo $LINE | awk '{ print $2 }')
fname=$(echo $LINE | awk '{ print $3 }')
cat $fname | xargs echo "$nb_it " >> $target.d
done
}
raw_to_summary
sort_summary_files | fill
extract_final_data() {
$extract $dir
}
extract_final_data

14
bin/summary.r Normal file
View File

@ -0,0 +1,14 @@
# Rscript summary handshake-duration.txt
require(grDevices) # for colours
tbl <- read.table(file=commandArgs(TRUE)[1])
val <- tbl[1]
summary(val)
# standarddeviation=sd(unlist(val))
sd(unlist(val))
# print (standarddeviation, zero.print="standard deviation: ")
# confint.default (val)
t.test (val)

47
exemple-linuxfr.cr Normal file
View File

@ -0,0 +1,47 @@
require "./src/dodb.cr"
class Voiture
include JSON::Serializable
property nom : String
property couleur : String
property tags : Array(String)
def initialize(@nom, @couleur, @tags)
end
end
voitures = DODB::DataBase(Voiture).new "db-voitures"
voiture_par_nom = voitures.new_index "nom", &.nom
voiture_par_couleur = voitures.new_partition "couleur", &.couleur
voiture_par_tag = voitures.new_tags "étiquettes", &.tags
voitures << Voiture.new "Corvet", "rouge", ["jolie", "classique"]
voitures << Voiture.new "Bullet-GT", "bleue", ["jolie", "rapide"]
# La Corvet devient bleue.
voiture = voiture_par_nom.get "Corvet"
voiture.couleur = "bleue"
voiture_par_nom.update voiture
# La Bullet-GT change de nom.
voiture = voiture_par_nom.get "Bullet-GT"
voiture.nom = "Not-So-Fast-Bullet-GT"
voiture_par_nom.update "Bullet-GT", voiture # Le nom (l'indice) a changé.
# Comme nous le savons tous, les jolies voitures sont également chères.
voiture_par_tag.get("jolie").each do |voiture|
voiture.tags << "chère"
voiture_par_nom.update voiture
end
# Suppression des voitures bleues et classiques.
voiture_par_couleur.delete "bleue", do |voiture|
voiture.tags.includes? "classique"
end
voitures.each do |voiture|
pp! voiture
end

View File

@ -0,0 +1,71 @@
extension "groff"
doctemplate
"
.MT 0
$header
.TL
$title
.AU \"\"
.ND
.SA 0
.DS I
"
".DE
$footer
"
end
nodoctemplate
"
"
"
"
end
bold "\f[CB]$text\fP"
italics "\f[CI]$text\fP"
underline "\f[CI]$text\fP"
fixed "\fC$text\fP"
color "\m[$style]$text\m[]"
anchor "$infilename : $linenum - $text"
reference "$text \(-> $infile:$linenum, page : $infilename:$linenum"
#lineprefix "\fC\(em\fP "
#lineprefix "\fC\n(ln\fP "
lineprefix ""
colormap
"green" "green"
"red" "red"
"darkred" "darkred"
"blue" "blue"
"brown" "brown"
"pink" "pink"
"yellow" "yellow"
"cyan" "cyan"
"purple" "purple"
"orange" "orange"
"brightorange" "brightorange"
"brightgreen" "brightgreen"
"darkgreen" "darkgreen"
"black" "black"
"teal" "teal"
"gray" "gray"
"darkblue" "darkblue"
default "black"
end
translations
"\\" "\\\\"
##"\n" " \\\\\n"
##" " "\\ "
##"\t" "\\ \\ \\ \\ \\ \\ \\ \\ "
"\t" " "
"|" "|"
"---" "\(em"
"--" "\(mi"
end

5
graphs/Makefile Normal file
View File

@ -0,0 +1,5 @@
SRC ?= graphs
ODIR ?= /tmp/
export ODIR SRC
include Makefile.in

79
graphs/Makefile.in Normal file
View File

@ -0,0 +1,79 @@
SRC ?= graphs
ODIR ?= .
BIBLIOGRAPHY ?= bibliography
ALLSRC = $(shell find .)
SOELIM_OPTS ?=
SOELIM = soelim $(SOELIM_OPTS)
PRECONV_OPTS ?= -e utf-8
PRECONV = preconv $(PRECONV_OPTS)
EQN_OPTS ?= -Tpdf
EQN = eqn $(EQN_OPTS)
# source-highlight stuff
# GH_INTRO: instructions before each source code provided by source-highlight
# GH_OUTRO: ------------ after ---- ------ ---- -------- -- ----------------
# GH_INTRO/GH_OUTRO: values are separated by ';'
#
GH_INTRO := .nr DI 0;.DS I;.fam C;.b1;.sp -0.1i
GH_OUTRO := .sp -0.2i;.b2;.fam;.DE
#
export GH_INTRO
export GH_OUTRO
#
# SHOPTS: cmd line parameter given to source-highlight
SHOPTS = --outlang-def=.source-highlight_groff-output-definition
export SHOPTS
# ghighlight brings `source-highlight` to troff
GHIGHLIGHT_OPTS ?=
GHIGHLIGHT = ./bin/ghighlight $(GHIGHLIGHT_OPTS)
GRAP_OPTS ?=
GRAP = grap $(GRAP_OPTS)
PIC_OPTS ?= -Tpdf
PIC = pic $(PIC_OPTS)
# -P => move ponctuation after reference
# -S => label and bracket-label options
# -e => accumulate (use a reference section)
# -p bib => bibliography file
REFER_OPTS ?= -PS -e -p $(BIBLIOGRAPHY)
REFER = refer $(REFER_OPTS)
# -k => iconv conversion (did it ever worked?)
# -ms => ms macro
# -U => unsafe (because of PDF inclusion)
# -Tpdf => output device is PDF
# -mspdf => include PDF (so, images converted in PDF) in the document
# NOTE: a custom troffrc (configuration file) is necessary on OpenBSD
# to have correctly justified paragraphs. Otherwise, the default
# configuration removes this possibility, for bullshit reasons. Sad.
# -M dir => path to custom troffrc
# TODO: no change with or without the following options -P -e
# This has to be inverstigated: how to make PDFs look nice in browsers?
# -P -e => provide "-e" to gropdf to embed fonts
GROFF_OPTS ?= -ms -t -Tpdf -U -mspdf -mpdfmark -M ./bin -P -e
GROFF = groff $(GROFF_OPTS)
$(SRC).pdf:
$(SOELIM) < $(SRC).ms |\
./bin/utf8-to-ms.sh |\
$(PRECONV) |\
$(EQN) |\
$(GHIGHLIGHT) |\
$(GRAP) |\
$(PIC) |\
$(REFER) |\
$(GROFF) > $(ODIR)/$@
# Keep options in memory for the recursive 'make' call
export SOELIM_OPTS PRECONV_OPTS EQN_OPTS GHIGHLIGHT_OPTS GRAP_OPTS PIC_OPTS REFER_OPTS
serve:
@#find . -name "*.ms" -or -name "*.d" | entr gmake -B $(SRC).pdf
find . | entr gmake -B $(SRC).pdf

0
graphs/bibliography Normal file
View File

286
graphs/bin/ghighlight Executable file
View File

@ -0,0 +1,286 @@
#! /usr/bin/env perl
# ghighlight - A simple preprocessor for adding code highlighting in a groff file
# Copyright (C) 2014-2018 Free Software Foundation, Inc.
# Written by Bernd Warken <groff-bernd.warken-72@web.de>.
my $version = '0.9.0';
# This file is part of 'ghighlight', which is part of 'groff'.
# 'groff' is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 'groff' is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# You can find a copy of the GNU General Public License in the internet
# at <http://www.gnu.org/licenses/gpl-2.0.html>.
########################################################################
use strict;
use warnings;
#use diagnostics;
# current working directory
use Cwd;
# $Bin is the directory where this script is located
use FindBin;
# open3 for a bidirectional communication with a child process
use IPC::Open3;
########################################################################
# system variables and exported variables
########################################################################
$\ = "\n"; # final part for print command
########################################################################
# read-only variables with double-@ construct
########################################################################
our $File_split_env_sh;
our $File_version_sh;
our $Groff_Version;
my $before_make; # script before run of 'make'
{
my $at = '@';
$before_make = 1 if '@VERSION@' eq "${at}VERSION${at}";
}
my %at_at;
my $file_perl_test_pl;
my $groffer_libdir;
if ($before_make) {
my $highlight_source_dir = $FindBin::Bin;
$at_at{'BINDIR'} = $highlight_source_dir;
$at_at{'G'} = '';
} else {
$at_at{'BINDIR'} = '@BINDIR@';
$at_at{'G'} = '@g@';
}
########################################################################
# options
########################################################################
foreach (@ARGV) {
if ( /^(-h|--h|--he|--hel|--help)$/ ) {
print q(Usage for the 'ghighlight' program:);
print 'ghighlight [-] [--] [filespec...] normal file name arguments';
print 'ghighlight [-h|--help] gives usage information';
print 'ghighlight [-v|--version] displays the version number';
print q(This program is a 'groff' preprocessor that handles highlighting source code ) .
q(parts in 'roff' files.);
exit;
} elsif ( /^(-v|--v|--ve|--ver|--vers|--versi|--versio|--version)$/ ) {
print q('ghighlight' version ) . $version;
exit;
}
}
my $macros = "groff_mm";
if ( $ENV{'GHLENABLECOLOR'} ) {
$macros = "groff_mm_color";
}
########################################################################
# input
########################################################################
my $source_mode = 0;
my @lines = ();
sub getTroffLine {
my ($opt) = @_;
if ($opt =~ /^ps=([0-9]+)/) {".ps $1"}
elsif ($opt =~ /^vs=(\S+)/) {".vs $1"}
else { print STDERR "didn't recognised '$opt'"; ""}
}
sub getTroffLineOpposite {
my ($opt) = @_;
if ($opt =~ /^ps=/) {".ps"}
elsif ($opt =~ /^vs=/) {".vs"}
else { print STDERR "didn't recognised '$opt'"; ""}
}
# language for codeblocks
my $lang = '';
my @options = ();
foreach (<>) {
chomp;
s/\s+$//;
my $line = $_;
my $is_dot_Source = $line =~ /^[.']\s*(``|SOURCE)(|\s+.*)$/;
unless ( $is_dot_Source ) { # not a '.SOURCE' line
if ( $source_mode ) { # is running in SOURCE mode
push @lines, $line;
} else { # normal line, not SOURCE-related
print $line;
}
next;
}
##########
# now the line is a '.SOURCE' line
my $args = $line;
$args =~ s/\s+$//; # remove final spaces
$args =~ s/^[.']\s*(``|SOURCE)\s*//; # omit .source part, leave the arguments
my @args = split /\s+/, $args;
##########
# start SOURCE mode
$lang = $args[0] if ( @args > 0 && $args[0] ne 'stop' );
if ( @args > 0 && $args[0] ne 'stop' ) {
# For '.``' no args or first arg 'start' means opening 'SOURCE' mode.
# Everything else means an ending command.
shift @args;
@options = @args;
if ( $source_mode ) {
# '.SOURCE' was started twice, ignore
print STDERR q('.``' starter was run several times);
next;
} else { # new SOURCE start
$source_mode = 1;
@lines = ();
next;
}
}
##########
# now the line must be a SOURCE ending line (stop)
unless ( $source_mode ) {
print STDERR 'ghighlight.pl: there was a SOURCE ending without being in ' .
'SOURCE mode:';
print STDERR ' ' . $line;
next;
}
$source_mode = 0; # 'SOURCE' stop calling is correct
my $shopts = $ENV{"SHOPTS"} || "";
##########
# Run source-highlight on lines
# Check if language was specified
my $cmdline = "source-highlight -f $macros $shopts --output STDOUT";
if ($lang ne '') {
$cmdline .= " -s $lang";
}
# Start `source-highlight`
my $pid = open3(my $child_in, my $child_out, my $child_err, $cmdline)
or die "open3() failed $!";
# Provide source code to `source-highlight` in its standard input
print $child_in $_ for @lines;
close $child_in;
if (my $v = $ENV{"GH_INTRO"}) {
print for split /;/, $v;
}
for (@options) {
my $l = getTroffLine $_;
print $l if ($l ne "");
}
# Print `source-highlight` output
while (<$child_out>) {
chomp;
print;
}
close $child_out;
for (reverse @options) {
my $l = getTroffLineOpposite $_;
print $l if ($l ne "");
}
if (my $v = $ENV{"GH_OUTRO"}) {
print for split /;/, $v;
}
my @print_res = (1);
# Start argument processing
# remove 'stop' arg if exists
# shift @args if ( $args[0] eq 'stop' );
# if ( @args == 0 ) {
# # no args for saving, so @print_res doesn't matter
# next;
# }
# my @var_names = ();
# my @mode_names = ();
# my $mode = '.ds';
# for ( @args ) {
# if ( /^\.?ds$/ ) {
# $mode = '.ds';
# next;
# }
# if ( /^\.?nr$/ ) {
# $mode = '.nr';
# next;
# }
# push @mode_names, $mode;
# push @var_names, $_;
# }
# my $n_vars = @var_names;
# if ( $n_vars < $n_res ) {
# print STDERR 'ghighlight: not enough variables for Python part: ' .
# $n_vars . ' variables for ' . $n_res . ' output lines.';
# } elsif ( $n_vars > $n_res ) {
# print STDERR 'ghighlight: too many variablenames for Python part: ' .
# $n_vars . ' variables for ' . $n_res . ' output lines.';
# }
# if ( $n_vars < $n_res ) {
# print STDERR 'ghighlight: not enough variables for Python part: ' .
# $n_vars . ' variables for ' . $n_res . ' output lines.';
# }
# my $n_min = $n_res;
# $n_min = $n_vars if ( $n_vars < $n_res );
# exit unless ( $n_min );
# $n_min -= 1; # for starting with 0
# for my $i ( 0..$n_min ) {
# my $value = $print_res[$i];
# chomp $value;
# print $mode_names[$i] . ' ' . $var_names[$i] . ' ' . $value;
# }
}
1;
# Local Variables:
# mode: CPerl
# End:

69
graphs/bin/troffrc Normal file
View File

@ -0,0 +1,69 @@
.\" Startup file for troff.
.
.\" This is tested by pic.
.nr 0p 0
.
.\" Load composite mappings.
.do mso composite.tmac
.
.\" Load generic fallback mappings.
.do mso fallbacks.tmac
.
.\" Use .do here, so that it works with -C.
.\" The groff command defines the .X string if the -X option was given.
.ie r.X .do ds troffrc!ps Xps.tmac
.el .do ds troffrc!ps ps.tmac
.do ds troffrc!pdf pdf.tmac
.do ds troffrc!dvi dvi.tmac
.do ds troffrc!X75 X.tmac
.do ds troffrc!X75-12 X.tmac
.do ds troffrc!X100 X.tmac
.do ds troffrc!X100-12 X.tmac
.do ds troffrc!ascii tty.tmac
.do ds troffrc!latin1 tty.tmac
.do ds troffrc!utf8 tty.tmac
.do ds troffrc!cp1047 tty.tmac
.do ds troffrc!lj4 lj4.tmac
.do ds troffrc!lbp lbp.tmac
.do ds troffrc!html html.tmac
.do if d troffrc!\*[.T] \
. do mso \*[troffrc!\*[.T]]
.do rm troffrc!ps troffrc!Xps troffrc!dvi troffrc!X75 troffrc!X75-12 \
troffrc!X100 troffrc!X100-12 troffrc!lj4 troff!lbp troffrc!html troffrc!pdf
.
.\" Test whether we work under EBCDIC and map the no-breakable space
.\" character accordingly.
.do ie '\[char97]'a' \
. do tr \[char160]\~
.el \
. do tr \[char65]\~
.
.\" Set the hyphenation language to 'us'.
.do hla us
.
.\" Disable hyphenation:
.\" Do not load hyphenation patterns and exceptions.
.\"do hpf hyphen.us
.\"do hpfa hyphenex.us
.
.\" Disable adjustment by default,
.\" such that manuals look similar with groff and mandoc(1).
.\".ad l
.\".de ad
.\"..
.\" Handle paper formats.
.do mso papersize.tmac
.
.\" Handle PS images.
.do mso pspic.tmac
.do mso pdfpic.tmac
.
.\" ====================================================================
.\" Editor settings
.\" ====================================================================
.
.\" Local Variables:
.\" mode: nroff
.\" fill-column: 72
.\" End:
.\" vim: set filetype=groff textwidth=72:

154
graphs/bin/utf8-to-ms.sh Executable file
View File

@ -0,0 +1,154 @@
#!/bin/sh
# This program isn't by any mean complete.
# Most of text markers, accents and ligatures are handled.
# However, nothing else currently is.
# Please, do provide more translations.
# Convert input into hexadecimal and a single byte per line.
to_hex_one_column() xxd -p -c 1
# Reverse hexadecimal to original value.
from_hex() xxd -p -r
regroup_lines() awk '
BEGIN {
line_start=1
}
{
if (line_start == 1)
line = $1;
else
line = line " " $1;
line_start = 0;
if ($1 == "0a") {
print line;
line_start = 1
}
}
END {
if (line_start == 0)
print line
}
'
accents() sed \
-e "s/c3 81/5c 5b 27 41 5d/g"\
-e "s/c3 89/5c 5b 27 45 5d/g"\
-e "s/c3 8d/5c 5b 27 49 5d/g"\
-e "s/c3 93/5c 5b 27 4f 5d/g"\
-e "s/c3 9a/5c 5b 27 55 5d/g"\
-e "s/c3 9d/5c 5b 27 59 5d/g"\
-e "s/c3 a1/5c 5b 27 61 5d/g"\
-e "s/c3 a9/5c 5b 27 65 5d/g"\
-e "s/c3 ad/5c 5b 27 69 5d/g"\
-e "s/c3 b3/5c 5b 27 6f 5d/g"\
-e "s/c3 ba/5c 5b 27 75 5d/g"\
-e "s/c3 bd/5c 5b 27 79 5d/g"\
-e "s/c3 84/5c 5b 3a 41 5d/g"\
-e "s/c3 8b/5c 5b 3a 45 5d/g"\
-e "s/c3 8f/5c 5b 3a 49 5d/g"\
-e "s/c3 96/5c 5b 3a 4f 5d/g"\
-e "s/c3 9c/5c 5b 3a 55 5d/g"\
-e "s/c3 a4/5c 5b 3a 61 5d/g"\
-e "s/c3 ab/5c 5b 3a 65 5d/g"\
-e "s/c3 af/5c 5b 3a 69 5d/g"\
-e "s/c3 b6/5c 5b 3a 6f 5d/g"\
-e "s/c3 bc/5c 5b 3a 75 5d/g"\
-e "s/c3 bf/5c 5b 3a 79 5d/g"\
-e "s/c3 82/5c 5b 5e 41 5d/g"\
-e "s/c3 8a/5c 5b 5e 45 5d/g"\
-e "s/c3 8e/5c 5b 5e 49 5d/g"\
-e "s/c3 94/5c 5b 5e 4f 5d/g"\
-e "s/c3 9b/5c 5b 5e 55 5d/g"\
-e "s/c3 a2/5c 5b 5e 61 5d/g"\
-e "s/c3 aa/5c 5b 5e 65 5d/g"\
-e "s/c3 ae/5c 5b 5e 69 5d/g"\
-e "s/c3 b4/5c 5b 5e 6f 5d/g"\
-e "s/c3 bb/5c 5b 5e 75 5d/g"\
-e "s/c3 80/5c 5b 60 41 5d/g"\
-e "s/c3 88/5c 5b 60 45 5d/g"\
-e "s/c3 8c/5c 5b 60 49 5d/g"\
-e "s/c3 92/5c 5b 60 4f 5d/g"\
-e "s/c3 99/5c 5b 60 55 5d/g"\
-e "s/c3 a0/5c 5b 60 61 5d/g"\
-e "s/c3 a8/5c 5b 60 65 5d/g"\
-e "s/c3 ac/5c 5b 60 69 5d/g"\
-e "s/c3 b2/5c 5b 60 6f 5d/g"\
-e "s/c3 b9/5c 5b 60 75 5d/g"\
-e "s/c3 83/5c 5b 7e 41 5d/g"\
-e "s/c3 91/5c 5b 7e 4e 5d/g"\
-e "s/c3 95/5c 5b 7e 4f 5d/g"\
-e "s/c3 a3/5c 5b 7e 61 5d/g"\
-e "s/c3 b1/5c 5b 7e 6e 5d/g"\
-e "s/c3 b5/5c 5b 7e 6f 5d/g"\
-e "s/c3 87/5c 5b 2c 43 5d/g"\
-e "s/c3 a7/5c 5b 2c 63 5d/g"\
-e "s/c3 85/5c 5b 6f 41 5d/g"\
-e "s/c3 a5/5c 5b 6f 61 5d/g"\
-e "s/c5 b8/5c 5b 3a 59 5d/g"\
-e "s/c5 a0/5c 5b 76 53 5d/g"\
-e "s/c5 a1/5c 5b 76 73 5d/g"\
-e "s/c5 bd/5c 5b 76 5a 5d/g"\
-e "s/c5 be/5c 5b 76 7a 5d/g"
# Ligatures.
ligatures() sed \
-e "s/ef ac 80/5c 5b 66 66 5d/g"\
-e "s/ef ac 81/5c 5b 66 69 5d/g"\
-e "s/ef ac 82/5c 5b 66 6c 5d/g"\
-e "s/ef ac 83/5c 5b 46 69 5d/g"\
-e "s/ef ac 84/5c 5b 46 6c 5d/g"\
-e "s/c5 81/5c 5b 2f 4c 5d/g"\
-e "s/c5 82/5c 5b 2f 6c 5d/g"\
-e "s/c3 98/5c 5b 2f 4f 5d/g"\
-e "s/c3 b8/5c 5b 2f 6f 5d/g"\
-e "s/c3 86/5c 5b 41 45 5d/g"\
-e "s/c3 a6/5c 5b 61 65 5d/g"\
-e "s/c5 92/5c 5b 4f 45 5d/g"\
-e "s/c5 93/5c 5b 6f 65 5d/g"\
-e "s/c4 b2/5c 5b 49 4a 5d/g"\
-e "s/c4 b3/5c 5b 69 6a 5d/g"\
-e "s/c4 b1/5c 5b 2e 69 5d/g"\
-e "s/c8 b7/5c 5b 2e 6a 5d/g"
# Text markers.
text_markers() sed \
-e "s/e2 97 8b/5c 5b 63 69 5d/g"\
-e "s/e2 80 a2/5c 5b 62 75 5d/g"\
-e "s/e2 80 a1/5c 5b 64 64 5d/g"\
-e "s/e2 80 a0/5c 5b 64 67 5d/g"\
-e "s/e2 97 8a/5c 5b 6c 7a 5d/g"\
-e "s/e2 96 a1/5c 5b 73 71 5d/g"\
-e "s/c2 b6/5c 5b 70 73 5d/g"\
-e "s/c2 a7/5c 5b 73 63 5d/g"\
-e "s/e2 98 9c/5c 5b 6c 68 5d/g"\
-e "s/e2 98 9e/5c 5b 72 68 5d/g"\
-e "s/e2 86 b5/5c 5b 43 52 5d/g"\
-e "s/e2 9c 93/5c 5b 4f 4b 5d/g"
# These markers shouldn't be automatically translated in ms macros.
# @ "s/40/5c 5b 61 74 5d/g"
# # "s/23/5c 5b 73 68 5d/g"
# Legal symbols.
legal_symbols() sed \
-e "s/c2 a9/5c 5b 63 6f 5d/g"\
-e "s/c2 ae/5c 5b 72 67 5d/g"\
-e "s/e2 84 a2/5c 5b 74 6d 5d/g"
# TODO: ├─│└
misc() sed \
-e "s/e2 94 9c/+/g"\
-e "s/e2 94 80/-/g"\
-e "s/e2 94 82/|/g"\
-e 's/e2 94 94/+/g'
hexutf8_to_hexms() {
text_markers | accents | ligatures | legal_symbols | misc
}
to_hex_one_column | regroup_lines | hexutf8_to_hexms | from_hex

View File

@ -0,0 +1,69 @@
.G1
copy "legend.grap"
frame invis ht 3 wid 4 left solid bot solid
coord y 0,50
ticks left out from 0 to 50 by 10
ticks bot out at 50000 "50,000", 100000 "100,000", 150000 "150,000", 200000 "200,000", 250000 "250,000"
label left "Request duration with" unaligned "an index (µs)" "(Median)" left 0.8
label bot "Number of cars in the database" down 0.1
obram = obuncache = obcache = obsemi = 0 # old bullets
cbram = cbuncache = cbcache = cbsemi = 0 # current bullets
legendxleft = 100000
legendxright = 250000
legendyup = 15
legendydown = 2
boite(legendxleft,legendxright,legendyup,legendydown)
legend(legendxleft,legendxright,legendyup,legendydown)
copy "../data/index.d" thru X
cx = $1*5
y_scale = 1000
# ram cached semi uncached
line from cx,$2/y_scale to cx,$4/y_scale
line from cx,$5/y_scale to cx,$7/y_scale
line from cx,$8/y_scale to cx,$10/y_scale
line from cx,$11/y_scale to cx,$13/y_scale
#ty = $3
cx = $1*5
cbram = $3/y_scale
cbcache = $6/y_scale
cbsemi = $9/y_scale
cbuncache = $12/y_scale
if (obram > 0) then {line from cx,cbram to ox,obram}
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
.gcolor blue
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
.gcolor
.gcolor green
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
.gcolor
obram = cbram
obcache = cbcache
obsemi = cbsemi
obuncache = cbuncache
ox = cx
# ram cached semi uncached
.gcolor red
bullet at cx,cbram
.gcolor
bullet at cx,cbcache
.gcolor blue
bullet at cx,cbsemi
.gcolor
.gcolor green
bullet at cx,cbuncache
.gcolor
X
.G2

View File

@ -0,0 +1,66 @@
.G1
copy "legend.grap"
frame invis ht 3 wid 4 left solid bot solid
coord x 0,5000*2 y 0,350
ticks left out from 0 to 350 by 50
label left "Request duration" unaligned "for a partition (ms)" "(Median)" left 0.8
label bot "Number of cars matching the partition" down 0.1
obram = obuncache = obcache = obsemi = 0
cbram = cbuncache = cbcache = cbsemi = 0
legendxleft = 1000
legendxright = 6500
legendyup = 330
legendydown = 230
boite(legendxleft,legendxright,legendyup,legendydown)
legend(legendxleft,legendxright,legendyup,legendydown)
copy "../data/partitions.d" thru X
cx = $1*2
y_scale = 1000000
# ram cached semi uncached
line from cx,$2/y_scale to cx,$4/y_scale
line from cx,$5/y_scale to cx,$7/y_scale
line from cx,$8/y_scale to cx,$10/y_scale
line from cx,$11/y_scale to cx,$13/y_scale
#ty = $3
cbram = $3/y_scale
cbcache = $6/y_scale
cbsemi = $9/y_scale
cbuncache = $12/y_scale
if (obram > 0) then {line from cx,cbram to ox,obram}
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
.gcolor blue
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
.gcolor
.gcolor green
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
.gcolor
obram = cbram
obcache = cbcache
obsemi = cbsemi
obuncache = cbuncache
ox = cx
# ram cached semi uncached
.gcolor red
bullet at cx,cbram
.gcolor
bullet at cx,cbcache
.gcolor blue
bullet at cx,cbsemi
.gcolor
.gcolor green
bullet at cx,cbuncache
.gcolor
X
.G2

View File

@ -0,0 +1,65 @@
.G1
copy "legend.grap"
frame invis ht 3 wid 4 left solid bot solid
coord x 0,5000 y 0,170
ticks left out from 0 to 170 by 20
label left "Request duration" unaligned "for a tag (ms)" "(Median)" left 0.8
label bot "Number of cars matching the tag" down 0.1
obram = obuncache = obcache = obsemi = 0
cbram = cbuncache = cbcache = cbsemi = 0
legendxleft = 200
legendxright = 3000
legendyup = 170
legendydown = 120
boite(legendxleft,legendxright,legendyup,legendydown)
legend(legendxleft,legendxright,legendyup,legendydown)
copy "../data/tags.d" thru X
cx = $1
y_scale = 1000000
# ram cached semi uncached
line from cx,$2/y_scale to cx,$4/y_scale
line from cx,$5/y_scale to cx,$7/y_scale
line from cx,$8/y_scale to cx,$10/y_scale
line from cx,$11/y_scale to cx,$13/y_scale
#ty = $3
cbram = $3/y_scale
cbcache = $6/y_scale
cbsemi = $9/y_scale
cbuncache = $12/y_scale
if (obram > 0) then {line from cx,cbram to ox,obram}
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
.gcolor blue
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
.gcolor
.gcolor green
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
.gcolor
obram = cbram
obcache = cbcache
obsemi = cbsemi
obuncache = cbuncache
ox = cx
# ram cached semi uncached
.gcolor red
bullet at cx,cbram
.gcolor
bullet at cx,cbcache
.gcolor blue
bullet at cx,cbsemi
.gcolor
.gcolor green
bullet at cx,cbuncache
.gcolor
X
.G2

347
graphs/graphs.ms Normal file
View File

@ -0,0 +1,347 @@
.so macros.roff
.de TREE1
.QP
.KS
.ft CW
.b1
.nf
..
.de TREE2
.ft
.fi
.b2
.KE
.QE
..
.
. \" The document starts here.
.
.TITLE Document Oriented DataBase (DODB)
.AUTHOR Philippe P.
.ABSTRACT1
DODB is a database-as-library, enabling a very simple way to store applications' data: storing serialized
.I documents
(basically any data type) in plain files.
To speed-up searches, attributes of these documents can be used as indexes which leads to create a few symbolic links
.I symlinks ) (
on the disk.
This document briefly presents DODB and its main differences with other database engines.
An experiment is described and analysed to understand the performance that can be expected from this approach.
.ABSTRACT2
.SINGLE_COLUMN
.SECTION Introduction to DODB
A database consists in managing data, enabling queries (preferably fast) to retrieve, to modify, to add and to delete a piece of information.
Anything else is
.UL accessory .
Universities all around the world teach about Structured Query Language (SQL) and relational databases.
.
.de PRIMARY_KEY
.I \\$1 \\$2 \\$3
..
.de FOREIGN_KEY
.I \\$1 \\$2 \\$3
..
.UL "Relational databases"
are built around the idea to put data into
.I tables ,
with typed columns so the database can optimize operations and storage.
A database is a list of tables with relations between them.
For example, let's imagine a database of a movie theater.
The database will have a
.I table
for the list of movies they have
.PRIMARY_KEY idmovie , (
title, duration, synopsis),
a table for the scheduling
.PRIMARY_KEY idschedule , (
.FOREIGN_KEY idmovie ,
.FOREIGN_KEY idroom ,
time slot),
a table for the rooms
.PRIMARY_KEY idroom , (
name), etc.
Tables have relations, for example the table "scheduling" has a column
.I idmovie
which points to entries in the "movie" table.
.UL "The SQL language"
enables arbitrary operations on databases: add, search, modify and delete entries.
Furthermore, SQL also enables to manage administrative operations of the databases themselves: creating databases and tables, managing users with fine-grained authorizations, etc.
SQL is used between the application and the database, to perform operations and to provide results when due.
SQL is also used
.UL outside
the application, by admins for managing databases and potentially by some
.I non-developer
users to retrieve some data without a dedicated interface\*[*].
.FOOTNOTE1
One of the first objectives of SQL was to enable a class of
.I non-developer
users to talk directly to the database so they can access the data without bothering the developers.
This has value for many companies and organizations.
.FOOTNOTE2
Many tools were used or even developed over the years specifically to aleviate the inherent complexity and limitations of SQL.
For example, designing databases becomes difficult when the list of tables grows;
Unified Modeling Language (UML) is then used to provide a graphical overview of the relations between tables.
SQL databases may be fast to retrieve data despite complicated operations, but when multiple sequential operations are required they become slow because of all the back-and-forths with the application;
thus, SQL databases can be scripted to automate operations and provide a massive speed up
.I "stored procedures" , (
see
.I "PL/SQL" ).
Writing SQL requests requires a lot of boilerplate since there is no integration in the programming languages, leading to multiple function calls for any operation on the database;
thus, object-relational mapping (ORM) libraries were created to reduce the massive code duplication.
And so on.
For many reasons, SQL is not a silver bullet to
.I solve
the database problem.
The encountered difficulties mentioned above and the original objectives of SQL not being universal\*[*], other database designs were created\*[*].
.FOOTNOTE1
To say the least!
Not everyone needs to let users access the database without going through the application.
For instance, writing a \f[I]blog\f[] for a small event or to share small stories about your life doesn't require manual operations on the database, fortunately.
.FOOTNOTE2
.FOOTNOTE1
A lot of designs won't be mentioned here.
The actual history of databases is often quite unclear since the categories of databases are sometimes vague, underspecified.
As mentioned, SQL is not a silver bullet and a lot of developers shifted towards other solutions, that's the important part.
.FOOTNOTE2
The NoSQL movement started because the stated goals of many actors from the early Web boom were different from SQL.
The need for very fast operations far exceeded what was practical at the moment with SQL.
This led to the use of more basic methods to manage data such as
.I "key-value stores" ,
which simply associate a value with an
.I index
for fast retrieval.
In this case, there is no need for the database to have
.I tables ,
data may be untyped, the entries may even have different attributes.
Since homogeneity is not necessary anymore, databases have fewer (or different) constraints.
Document-oriented databases are a sub-class of key-value stores, where metadata can be extracted from the entries for further optimizations.
And that's exactly what is being done in Document Oriented DataBase (DODB).
.UL "Contrary to SQL" ,
DODB has a very narrow scope: to provide a library enabling to store, retrieve, modify and delete data.
In this way, DODB transforms any application in a database manager.
DODB doesn't provide an interactive shell, there is no request language to perform arbitrary operations on the database, no statistical optimizations of the requests based on query frequencies, etc.
Instead, DODB reduces the complexity of the infrastructure, stores data in plain files and enables simple manual scripting with widespread unix tools.
Simplicity is key.
.UL "Contrary to other NoSQL databases" ,
DODB doesn't provide an application but a library, nothing else.
The idea is to help developers to store their data themselves, not depending on
. I yet-another-all-in-one
massive tool.
The library writes (and removes) data on a storage device, has a few retrieval and update mechanisms and that's it\*[*].
.FOOTNOTE1
The lack of features
.I is
the feature.
Even with that motto, the tool still is expected to be convenient for most applications.
.FOOTNOTE2
This document will provide an extensive documentation on how DODB works and how to use it.
The presented code is in Crystal such as the DODB library for now, but keep in mind that this document is all about the method more that the actual implementation, anyone could implement the exact same library in almost every other language.
Limitations are also clearly stated in a dedicated section.
A few experiments are described to provide an overview of the performance you can expect from this approach.
Finally, a conclusion is drawn based on a real-world usage of this library.
.
.SECTION How DODB works and basic usage
DODB is a hash table.
The key of the hash is an auto-incremented number and the value is the stored data.
The following section will explain how to use DODB for basic cases including the few added mechanisms to speed-up searches.
Also, the file-system representation of the data will be presented since it enables easy off-application searches.
.SS Before starting: the example database
First things first, the following code is the structure used in the rest of the document to present the different aspects of DODB.
This is a simple object
.I Car ,
with a name, a color and a list of associated keywords (fast, elegant, etc.).
.SOURCE Ruby ps=10
class Car
property name : String
property color : String
property keywords : Array(String)
end
.SOURCE
.SS DODB basic usage
Let's create a DODB database for our cars.
.SOURCE Ruby ps=10
# Database creation
db = DODB::DataBase(Car).new "path/to/db-cars"
# Adding an element to the db
db << Car.new "Corvet", "red", ["elegant", "fast"]
# Reaching all objects in the db
db.each do |car|
pp! car
end
.SOURCE
When a value is added, it is serialized\*[*] and written in a dedicated file.
.FOOTNOTE1
Serialization is currently in JSON.
CBOR is a work-in-progress.
Nothing binds DODB to a particular format.
.FOOTNOTE2
The key of the hash is a number, auto-incremented, used as the name of the stored file.
The following example shows the content of the file system after adding the first car.
.TREE1
$ tree db-cars/
db-cars/
|-- data
| `-- 0000000000 <- the first car in the database
`-- last-index
.TREE2
In this example, the directory
.I db-cars/data
contains the serialized value, with a formated number as file name.
The file "0000000000" contains the following:
.QP
.SOURCE JSON ps=10
{
"name": "Corvet",
"color": "red",
"keywords": [
"elegant",
"fast"
]
}
.SOURCE
.QE
.SS Indexes
Database entries can be
.I indexed
based on their attributes.
There are currently three main ways to search a value by its attributes: basic indexes, partitions and tags.
.SSS Basic indexes (1 to 1 relation)
Basic indexes represent one-to-one relations, such as an index in SQL.
For example, in a database of
.I cars ,
each car can have a dedicted (unique) name.
This
.I name
attribute can be used to speed-up searches.
On the file-system, this will be translated as this:
.TREE1
storage
+-- data
|  `-- 0000000000
`-- indexes
   `-- by_name
   `-- Ford C-MAX -> ../../data/0000000000
.TREE2
As shown, the file "Ford C-MAX" is a symbolic link to a data file.
The name of the symlink file has been extracted from the value itself, enabling to list all the cars and their names with a simple
.UL ls
in the
.I storage/indexes/by_name/
directory.
.TBD
.SECTION A few more options
.TBD
.SECTION Limits of DODB
.TBD
.SECTION Experimental scenario
.LP
The following experiment shows the performance of DODB based on quering durations.
Data can be searched via
.I indexes ,
as for SQL databases.
Three possible indexes exist in DODB:
(a) basic indexes, representing 1 to 1 relations, the document's attribute is related to a value and each value of this attribute is unique,
(b) partitions, representing 1 to n relations, the attribute has a value and this value can be shared by other documents,
(c) tags, representing n to n relations, enabling the attribute to have multiple values whose are shared by other documents.
The scenario is simple: adding values to a database with indexes (basic, partitions and tags) then query 100 times a value based on the different indexes.
Loop and repeat.
Four instances of DODB are tested:
.BULLET \fIuncached database\f[] shows the achievable performance with a strong memory constraint (nothing can be kept in-memory) ;
.BULLET \fIuncached data but cached index\f[] shows the improvement you can expect by having a cache on indexes ;
.BULLET \fIcached database\f[] shows the most basic use of DODB\*[*] ;
.BULLET \fIRAM only\f[], the database doesn't have a representation on disk (no data is written on it).
The \fIRAM only\f[] instance shows a possible way to use DODB: to keep a consistent API to store data, including in-memory data with a lifetime related to the application's.
.ENDBULLET
.FOOTNOTE1
Having a cached database will probably be the most widespread use of DODB.
When memory isn't scarce, there is no point not using it to achieve better performance.
.FOOTNOTE2
The computer on which this test is performed\*[*] is a AMD PRO A10-8770E R7 (4 cores), 2.8 GHz.When mentioned, the
.I disk
is actually a
.I "temporary file-system (tmpfs)"
to enable maximum efficiency.
.FOOTNOTE1
A very simple $50 PC, buyed online.
Nothing fancy.
.FOOTNOTE2
The library is written in Crystal and so is the benchmark (\f[CW]spec/benchmark-cars.cr\f[]).
Nonetheless, despite a few technicalities, the objective of this document is to provide an insight on the approach used in DODB more than this particular implementation.
The manipulated data type can be found in \f[CW]spec/db-cars.cr\f[].
.SOURCE Ruby ps=9 vs=9p
class Car
property name : String # 1-1 relation
property color : String # 1-n relation
property keywords : Array(String) # n-n relation
end
.SOURCE
.
.SS Basic indexes (1 to 1 relations)
.LP
An index enables to match a single value based on a small string.
In our example, each \f[CW]car\f[] has an unique \fIname\f[] which is used as an index.
The following graph represents the result of 100 queries of a car based on its name.
The experiment starts with a database containing 1,000 cars and goes up to 250,000 cars.
.so graph_query_index.grap
Since there is only one value to retrieve, the request is quick and time is almost constant.
When the value and the index are kept in memory (see \f[CW]RAM only\f[] and \f[CW]Cached db\f[]), the retrieval is almost instantaneous (about 50 to 120 ns).
In case the value is on the disk, deserialization takes about 15 µs (see \f[CW]Uncached db, cached index\f[]).
The request is a little longer when the index isn't cached (see \f[CW]Uncached db and index\f[]); in this case DODB walks the file-system to find the right symlink to follow, thus slowing the process even more, by up to 20%.
.TS
allbox tab(:);
c | lw(4.0i) | cew(1.4i).
DODB instance:Comment and database usage:T{
compared to RAM only
T}
RAM only:T{
Worst memory footprint (all data must be in memory), best performance.
T}:-
Cached db and index:T{
Performance for retrieving a value is the same as RAM only while
enabling the admin to manually search for data on-disk.
T}:about the same perfs
Uncached db, cached index::300 to 400x slower
Uncached db and index:T{
Best memory footprint, worst performance.
T}:400 to 500x slower
.TE
.B Conclusion :
as expected, retrieving a single value is fast and the size of the database doesn't matter much.
Each deserialization and, more importantly, each disk access is a pain point.
Caching the value enables a massive performance gain, data can be retrieved several hundred times quicker.
.bp
.SS Partitions (1 to n relations)
.LP
.so graph_query_partition.grap
.bp
.SS Tags (n to n relations)
.LP
.so graph_query_tag.grap
.
.SECTION Future work
.TBD
.SECTION Conclusion
.TBD

47
graphs/legend.grap Normal file
View File

@ -0,0 +1,47 @@
define boite {
xleft = $1
xright = $2
yup = $3
ydown = $4
line from xleft,ydown to xright,ydown
line from xleft,yup to xright,yup
line from xleft,yup to xleft,ydown
line from xright,yup to xright,ydown
}
define legend {
xleft = $1
xright = $2
yup = $3
ydown = $4
diffx = xright - xleft
diffy = yup - ydown
hdiff = diffy/4.3
cy = yup - (diffy/6)
cx = (diffx/20) + xleft
lstartx = cx
lendx = cx + diffx/8
tstartx = lendx + diffx/20
.gcolor red
line from lstartx,cy to lendx,cy
.gcolor
"RAM only" ljust at tstartx,cy
cy = cy - hdiff
line from lstartx,cy to lendx,cy
"Cached db and index" ljust at tstartx,cy
cy = cy - hdiff
.gcolor blue
line from lstartx,cy to lendx,cy
.gcolor
"Uncached db, cached index" ljust at tstartx,cy
cy = cy - hdiff
.gcolor green
line from lstartx,cy to lendx,cy
.gcolor
"Uncached db and index" ljust at tstartx,cy
}

624
graphs/macros.roff Normal file
View File

@ -0,0 +1,624 @@
.\" .RP = report document
.nr PO 0.5i \" page offset default 1i
.nr LL 7.0i \" line length default 6i
.nr FM 0.3i \" page foot margin default 1i
.nr DI 0
.nr FF 3 \" footnotes' type: numbered, with point, indented
.nr PS 12
.
.nr LIST_NUMBER 0 +1
.
.R1
no-label-in-reference
accumulate
.R2
.
. \" COLORS
.defcolor darkgreen rgb 0.1 0.5 0.2
.defcolor darkblue rgb 0.3 0.3 0.7
.defcolor darkred rgb 0.7 0.3 0.3
.defcolor black rgb 0 0 0
.defcolor color_box rgb 1 1 .6
.
. \" with semantic
.defcolor citation rgb 0.4 0.4 0.4
.defcolor citationbar rgb 0.3 0.3 0.7
.defcolor explanation rgb 0.7 0.4 0.4
.defcolor explanationbar rgb 0.8 0.3 0.3
.
.defcolor specialcolor_command rgb 0.7 0.3 0.3
.defcolor specialcolor_type rgb 0.6 0.3 0.5
.defcolor specialcolor_constructor rgb 0.1 0.5 0.2
.defcolor specialcolor_module rgb 0.1 0.5 0.2
.defcolor specialcolor_function rgb 0.4 0.4 0.7
.defcolor specialcolor_question rgb 0.0 0.0 0.7
.defcolor specialcolor_operator rgb 0.3 0.8 0.3
.defcolor specialcolor_shine rgb 0.3 0.3 0.7
.
. \" SIZES
.nr specialsize_command 10
.nr specialsize_type 8
.nr specialsize_constructor 8
.nr specialsize_module 8
.nr specialsize_function 8
.nr specialsize_operator 9
.nr specialsize_question 10 \" Current point size, no change.
.nr specialsize_shine 11
.
. \" FONTS
.ds specialfont_command CW
.ds specialfont_type CW
.ds specialfont_constructor CW
.ds specialfont_module CW
.ds specialfont_function CW
.ds specialfont_operator CW
.ds specialfont_question I
.ds specialfont_shine B
.
.
.de BELLOWEXPLANATION1
.sp 0.5
.ps 7 \" point size (~= font size)
.vs 8p \" vertical spacing between lines
..
.de BELLOWEXPLANATION2
.br
.ps 9
.vs 11p
..
.
.\" BULLET and ENUM => do not add space when no parameter are provided
.de BULLET \" Bullet points
.IP \(bu 2
.ie '\\$1'' \
.
.el \\$*
..
.de ENDBULLET
.in -2 \" indent
..
.
.de ENUM \" Numbered list
.nr LIST_NUMBER +1
.IP \\n[LIST_NUMBER] 2
.ie '\\$1'' \
.
.el \\$*
..
.de ENDENUM
.nr LIST_NUMBER 0
.in -2 \" indent
..
.
.de b1 \" Begin code box
.B1
.sp 0.2
.ft CW
..
.de b2 \" End code box
.sp 0.5
.B2
.ft
..
.
.de CITATION1
.KS \" start a keep
.ft I \" citation in italics
.mk C \" set a marker for line drawing
.in +1 \" indent a bit
.gcolor citation
..
.ig
The CITATION2 macro closes the quote then draws a line
from current line to the start of the quote.
..
.de CITATION2
.mk D \" set second marker to come back here
.ft \" back to previous font
.in -1 \" remove indent
.gcolor \" remove previous color
.gcolor citationbar
.\" r = move upward
.\" Z D t = drawing thickness
.\" L = draw the line
\r\
\Z'\D't 1p''\
\L'|\\nCu' \" draw line
.gcolor black \" remove previous color
.sp -2 \" get two lines back
\Z'\D't 1'' \" get the previous drawing thickness back
.KE \" end of the keep
..
.
.de NAMECITATION
.QP
.vs -\\n[legendps]p
.ps -\\n[legendps]
.in -1.2
.ll +1.2
\h'|-2'\(em\h'|-0.4'
\\$*
.br
.LP
..
.
.de EXPLANATION1
.KS \" start a keep
.ft B \" citation in italics
.mk C \" set a marker for line drawing
.in +1 \" indent a bit
.gcolor explanation
..
.de EXPLANATION2
.ft \" back to previous font
.in -1 \" remove indent
.gcolor \" remove previous color
.gcolor explanationbar
\r\L'|\\nCu' \" draw line (\r moves upward, \L draw the line, ...)
.gcolor \" remove previous color
.sp -1 \" get two lines back
.KE \" end of the keep
..
.
.de METAINFO1
.ft CW \" constant width font
.ps 8 \" small font
.vs 9p \" smaller vertical spacing between lines
..
.de METAINFO2
.sp 1
.vs \" come back to the previous vertical spacing
.ps \" come back to the previous point size
.ft \" come back to the previous font
.sp -1 \" return one line above
..
.
.
.de FRAC
.ie '\\$3'' \{\
\v'-.7m\s[\\n(.s*6u/10u]+.7m'\\$1\v'-.7m\s0+.7m'\
\(f/\s[\\n(.s*6u/10u]\\$2\s0
\}
.el \{\
\v'-.7m\s[\\n(.s*6u/10u]+.7m'\\$1\v'-.7m\s0+.7m'\
\(f/\s[\\n(.s*6u/10u]\\$2\s0\\$3
\}
..
.de FOOTNOTE_TO_COLUMN_WIDTH
.nr pg@fn-colw \\n[pg@colw] \" footnotes' column width
..
.de SINGLE_COLUMN
.1C
.\" .FOOTNOTE_TO_COLUMN_WIDTH
.nr FL (\n[LL]*97/100)
..
.de TWO_COLUMNS
.2C
.FOOTNOTE_TO_COLUMN_WIDTH
..
.de HORIZONTALLINE
\l'15'
.FOOTNOTE_TO_COLUMN_WIDTH
..
.
. \" Fonts and colors.
.
.de SPECIAL_WORDS
.ie !'\\$3'' \\$3\c
.nr current_size \\n[.s] \" Current point size.
.gcolor specialcolor_\\*[semantictoken]
.
.if !((\\n[current_size] == \\n[specialsize_\\*[semantictoken]]) \
.ps \\n[specialsize_\\*[semantictoken]]
.
.ie '\\$2'' \{\
\f[\\*[specialfont_\\*[semantictoken]]]\\$1\f[]
. ps \\n[current_size]
. gcolor black \" FIXME: should be the previous color
\}
.el \{\
\f[\\*[specialfont_\\*[semantictoken]]]\\$1\f[]\c
. ps \\n[current_size]
. gcolor black \" FIXME: should be the previous color
\\$2
\}
..
.de SMALLFONT
.ps 8
.vs 9p
..
.de NORMALFONT
.vs
.ps
..
.de COMMAND1
.b1
..
.de COMMAND2
.b2
..
.de COMMANDNAME
.ds semantictoken command
.SPECIAL_WORDS \\$@
..
.de FUNCTION
.ds semantictoken function
.SPECIAL_WORDS \\$@
..
.de TYPE
.ds semantictoken type
.SPECIAL_WORDS \\$@
..
.de TYPECLASS
.I "\\$1" "\\$2"
..
.de OPERATOR
.ds semantictoken operator
.SPECIAL_WORDS \\$@
..
.de QUESTION
.ds semantictoken question
.SPECIAL_WORDS \\$@
\h'5p'
..
.de CONSTRUCTOR
.ds semantictoken constructor
.SPECIAL_WORDS \\$@
..
.de MODULE
.ds semantictoken module
.SPECIAL_WORDS \\$@
..
.de SHINE
.ds semantictoken shine
.SPECIAL_WORDS \\$@
..
.de MODULEX
.MODULE \\$1 ,
..
.de TBD
.ft B
To be defined or to finish.
.ft R
..
.de ARROW
.br
\(->\h'5p' \\$*
..
.af dy 00
.af mo 00
.ds CURRENT_DATE \\n(dy/\\n(mo/\\n[year]
.ds WEBSITE https://t.karchnu.fr/doc
.ds EMAIL karchnu@karchnu.fr
.de INFORMATIONS
Check out for newer versions:
.ft CW
.ps 8
\h'2p' \\$1
.ps
.ft
.br
And if you have questions:
.ft CW
\h'13p' \\$2
.ft
.\" .DE
.LP
Lastly compiled the
.SHINE \*[CURRENT_DATE]
(day/month/year, you know, like in any sane civilization).
..
.de INFORMATIONS_FR
.LP
Nouvelles versions :
.ft CW
.ps 8
\h'2p' \\$1
.ps
.ft
.br
Questions :
.ft CW
\h'36p' \\$2
.ft
.\" .DE
.LP
Compilé pour la dernière fois le
.SHINE \*[CURRENT_DATE]
..
.
.\" RENAMING REQUESTS
.
.de SECTION
.NH
.ps +3
.fam H \" helvetica family
\\$*
.fam \" back to previous font family
.ps
.PARAGRAPH_INDENTED
..
.de SUBSECTION
.NH 2
.ps +1
.fam H \" helvetica family
\\$*
.fam \" back to previous font family
.ps
.PARAGRAPH_INDENTED
..
.de SUBSUBSECTION
.NH 3
.fam H \" helvetica family
\\$*
.fam \" back to previous font family
.ps
.PARAGRAPH_INDENTED
..
.de SUBSUBSUBSECTION
.NH 4
.fam H \" helvetica family
\\$*
.fam \" back to previous font family
.PARAGRAPH_INDENTED
..
.de SECTION_NO_NUMBER
.SH
.fam H \" helvetica family
\\$*
.fam \" back to previous font family
.PARAGRAPH_INDENTED
..
.de SUBSECTION_NO_NUMBER
.SH 2
.fam H \" helvetica family
\\$*
.fam \" back to previous font family
.PARAGRAPH_INDENTED
..
.de PARAGRAPH_INDENTED
.PP
..
.de PARAGRAPH_UNINDENTED
.LP
..
.de NO_ABSTRACT
.AB no
..
.de ABSTRACT1
.AB
..
.de ABSTRACT2
.AE
..
.ds CH Page %
.de TITLE
.TL
\\$*
.ds LH \\$*
.de HD .XX
.sp -2.3
.nr LINEWIDTH (\n[LL]/1.0i)
\l'\\\\n[LINEWIDTH]i'
.sp +1.5
.br
..XX
..
.de AUTHOR
. AU
. ie !'\\$1'' \\$*
..
.de FOOTNOTE1
. FS
..
.de FOOTNOTE2
. FE
..
.de VOCABULARY1
. KS
. BULLET
. UL "\\$*" :
..
.de VOCABULARY2
. KE
..
.
.
.de HIGHLIGHT
.
. nr @wd \w'\\$1'
. nr x1 0
. nr y1 (\\n[rst]u - \\n[rsb]u + .4m)
. nr x2 (\\n[@wd]u + .4m)
. nr y2 0
. nr x3 0
. nr y3 (\\n[rst]u - \\n[rsb]u + .4m)
. nr x4 (\\n[@wd]u + .4m)
. nr y4 0
.
\h'.2m'\
\h'-.2m'\v'(.2m - \\n[rsb]u)'\
\M[color_box]\
\D'P \\n[x1] -\\n[y1]u \\n[x2]u \\n[y2]u \\n[x3]u \\n[y3]u -\\n[x4]u \\n[y4]u '\
\h'.2m'\v'-(.2m - \\n[rsb]u)'\
\M[]\
\\$1\
\h'.2m'
..
.
.
.
.ds SPACE_SS_NUMBER_TITLE 0.5\" not a number register because of leading 0
.nr CURRENT_SECTION 0 +1
.nr CURRENT_APPENDIX 0
.af CURRENT_APPENDIX I
.nr CURRENT_SUBSECTION 0 +1
.nr CURRENT_SSSECTION 0 +1
.rm SECTION
.de SECTION
. nr CURRENT_SUBSECTION 0 \" reset current subsection numbering
. nr CURRENT_SSSECTION 0 \" reset current subsubsection numbering
. ie !(\\n[CURRENT_SECTION]=0) .sp +1
. br
. ie (\\n[APPENDIX_TIME]=0) \
. ds RH \\n+[CURRENT_SECTION].\h'\\*[SPACE_SS_NUMBER_TITLE]' \\$*
. el \{
. ds RH \\n[CURRENT_APPENDIX].\h'\\*[SPACE_SS_NUMBER_TITLE]' \\$*
. bp \}
. ps +2
. fam H \" helvetica family
. ft B
. ne 4 \" should be at least a few lines left at the bottom of the page
\\*[RH]
. ft
. fam \" back to previous font family
. ps -2
. PARAGRAPH_INDENTED
..
.nr APPENDIX_TIME 0
.de APPENDIX
. nr CURRENT_APPENDIX +1
. nr APPENDIX_TIME 1
. SECTION \\$*
..
.de SS
. nr CURRENT_SSSECTION 0
. ie (\\n[APPENDIX_TIME]=0) \
. SUBSECTION_NO_NUMBER \\n[CURRENT_SECTION].\
\\n+[CURRENT_SUBSECTION]\h'\\*[SPACE_SS_NUMBER_TITLE]' \\$*
.el \
. SUBSECTION_NO_NUMBER \\n[CURRENT_APPENDIX].\
\\n+[CURRENT_SUBSECTION]\h'\\*[SPACE_SS_NUMBER_TITLE]' \\$*
..
.de SSS
. br
. ps -2
. fam H \" helvetica family
. ft B
. ie (\\n[APPENDIX_TIME]=0) \
. SUBSECTION_NO_NUMBER \\n[CURRENT_SECTION].\
\\n[CURRENT_SUBSECTION].\\n+[CURRENT_SSSECTION]\h'\
\\*[SPACE_SS_NUMBER_TITLE]' \\$*
. el \
\\n[CURRENT_APPENDIX].\
\\n[CURRENT_SUBSECTION].\\n+[CURRENT_SSSECTION]\h'\
\\*[SPACE_SS_NUMBER_TITLE]' \\$*
. ft
. fam \" back to previous font family
. ps +2
. PARAGRAPH_INDENTED
..
.de INNERBULLET
. in +1
. br
\(bu
. in +1
. sp -1
\\$*
. in -2
..
.de EENUM \" Numbered list
. nr ENUM_INDENTATION 2
. ie !(\\n[LIST_NUMBER]=0) .in -\\n[ENUM_INDENTATION]
. br
\\n+[LIST_NUMBER].
. in +\\n[ENUM_INDENTATION]
. sp -1
\\$*
..
.de EENDENUM
. nr LIST_NUMBER 0
. in -\\n[ENUM_INDENTATION]
..
.nr legendps 2
.de LEGEND1
. QP
. vs -\\n[legendps]p
. ps -\\n[legendps]
. in -1.2
. ll +1.2
. br
..
.de LEGEND2
. br
. vs +\\n[legendps]p
. ps +\\n[legendps]
. br
. LP
..
.de IEME
\\$1\u\s-4\\$2\s+4\d
..
.de CENTERED
. ce
\\$*
. br
..
.de GIVEEXAMPLE1
. in +1
. ll -1
. KS \" start a keep
. \" .ft I \" citation in italics
. mk C \" set a marker for line drawing
. in +1 \" indent a bit
. gcolor citation
..
.de GIVEEXAMPLE2
. mk D \" set second marker to come back here
. \" .ft \" back to previous font
. in -1 \" remove indent
. gcolor black\" remove previous color
. gcolor citationbar
. \" r = move upward
. \" Z D t = drawing thickness
. \" L = draw the line
\r\
\Z'\D't 1p''\
\L'|\\nCu' \" draw line
. gcolor black \" remove previous color
. sp -2 \" get two lines back
\Z'\D't 0.5p'' \" get the previous drawing thickness back
. KE \" end of the keep
. ll +1
. in -1
..
.de ST
.nr ww \w'\\$1'
\Z@\v'-.25m'\l'\\n[ww]u'@\\$1
..
.de INCREMENT
.br
.in \\*[PINCREMENT]
.br
\h'-\\*[DECALAGE]'\\*[CHARACTER]\h'|0'\\$*
..
.de D
.ds DECALAGE 1.0
.ds PINCREMENT 2
.ds CHARACTER \(bu
.INCREMENT \\$*
..
.de DD
.ds DECALAGE 1.0
.ds PINCREMENT 3
.ds CHARACTER \(bu
.INCREMENT \\$*
..
.de AA
.ds DECALAGE 1.5
.ds PINCREMENT 3
.ds CHARACTER \(->
.INCREMENT \\$*
..
.de AAA
.ds DECALAGE 1.5
.ds PINCREMENT 4
.ds CHARACTER \(->
.INCREMENT \\$*
..
.de ED
.br
.in 0
..

4
readlink.cr Normal file
View File

@ -0,0 +1,4 @@
pp! ARGV[0]
pp! File.readlink(ARGV[0]).sub(/^.*\//, "").to_i
pp! File.exists? ARGV[0]
pp! File.symlink? ARGV[0]

View File

@ -1,181 +1,188 @@
require "benchmark"
require "./benchmark-utilities.cr"
require "./utilities.cr"
require "./db-cars.cr"
require "../src/dodb.cr"
require "./test-data.cr"
# List of environment variables and default values:
# ENV["CARNAME"] rescue "Corvet-#{(db_size/2).to_i}"
# ENV["CARCOLOR"] rescue "red"
# ENV["CARKEYWORD"] rescue "spacious"
# ENV["DBSIZE"] rescue 50_000
# ENV["DBSIZE_START"] rescue 1_000
# ENV["DBSIZE_INCREMENT"] rescue 1_000
# ENV["REPORT_DIR"] rescue "results"
# ENV["NBRUN"] rescue 100
# ENV["MAXINDEXES"] rescue 5_000
class DODBCachedCars < DODB::CachedDataBase(Car)
property storage_dir : String
def initialize(storage_ext = "", remove_previous_data = true)
@storage_dir = "test-storage-cars-cached#{storage_ext}"
if remove_previous_data
::FileUtils.rm_rf storage_dir
class DODB::Storage(V)
def empty_db
while pop
end
super storage_dir
end
def rm_storage_dir
::FileUtils.rm_rf @storage_dir
end
end
class DODBUnCachedCars < DODB::DataBase(Car)
property storage_dir : String
def initialize(storage_ext = "", remove_previous_data = true)
@storage_dir = "test-storage-cars-uncached#{storage_ext}"
class Context
class_property report_dir = "results"
class_property max_indexes = 5_000
class_property nb_run = 100
class_property from = 1_000
class_property to = 50_000
class_property incr = 1_000
end
if remove_previous_data
::FileUtils.rm_rf storage_dir
# To simplify the creation of graphs, it's better to have fake data for
# partitions and tags that won't be actually covered.
# 0 means the absence of data.
def fake_report(name)
durations = Array(Int32).new Context.nb_run, 0
File.open("#{Context.report_dir}/#{name}.raw", "w") do |file|
durations.each do |d|
file.puts d
end
super storage_dir
end
def rm_storage_dir
::FileUtils.rm_rf @storage_dir
end
puts "#{name}: no report"
end
class DODBSemiCachedCars < DODB::DataBase(Car)
property storage_dir : String
def initialize(storage_ext = "", remove_previous_data = true)
@storage_dir = "test-storage-cars-semi#{storage_ext}"
if remove_previous_data
::FileUtils.rm_rf storage_dir
def report(storage, name, &block)
durations = run_n_times Context.nb_run, &block
File.open("#{Context.report_dir}/#{name}.raw", "w") do |file|
durations.each do |d|
file.puts d
end
super storage_dir
end
avr = durations.reduce { |a, b| a + b } / Context.nb_run
puts "#{name}: #{avr}"
avr
end
def rm_storage_dir
::FileUtils.rm_rf @storage_dir
def long_operation(text)
STDOUT.write "#{text}\r".to_slice
yield
STDOUT.write " \r".to_slice
end
def verbose_add_cars(storage, nbcars, name, max_indexes)
long_operation "add #{nbcars} values to #{name}" do
add_cars storage, nbcars, max_indexes: max_indexes
end
end
def init_indexes(storage : DODB::Storage)
n = storage.new_index "name", &.name
c = storage.new_partition "color", &.color
k = storage.new_tags "keyword", &.keywords
return n, c, k
# Add first entries, then loop: speed tests, add entries.
def prepare_env(storage, name, s_index, s_partition, s_tags, &)
verbose_add_cars storage, Context.from, name, max_indexes: Context.max_indexes
current = Context.from
to = Context.to
incr = Context.incr
while current < to
yield storage, current, name, s_index, s_partition, s_tags
break if current + incr >= to
verbose_add_cars storage, incr, name, max_indexes: Context.max_indexes
current += incr
end
long_operation "removing #{name} data" { storage.rm_storage_dir }
end
def init_uncached_indexes(storage : DODB::Storage)
n = storage.new_uncached_index "name", &.name
c = storage.new_uncached_partition "color", &.color
k = storage.new_uncached_tags "keyword", &.keywords
return n, c, k
def batch()
cars_ram = DODB::RAMOnlySpecDataBase(Car).new
cars_cached = DODB::CachedSpecDataBase(Car).new
cars_semi = DODB::SpecDataBase(Car).new "-semi"
cars_uncached = DODB::SpecDataBase(Car).new
ram_Sby_name, ram_Sby_color, ram_Sby_keywords = ram_indexes cars_ram
cached_Sby_name, cached_Sby_color, cached_Sby_keywords = cached_indexes cars_cached
semi_Sby_name, semi_Sby_color, semi_Sby_keywords = cached_indexes cars_semi
uncached_Sby_name, uncached_Sby_color, uncached_Sby_keywords = uncached_indexes cars_uncached
fn = ->(storage : DODB::Storage(Car),
current_db_size : Int32,
name : String,
search_name : DODB::Index(Car),
search_color : DODB::Partition(Car),
search_keywords : DODB::Tags(Car)) {
name_to_search = ENV["CARNAME"] rescue "Corvet-#{(current_db_size/2).to_i}"
color_to_search = ENV["CARCOLOR"] rescue "red"
keyword_to_search = ENV["CARKEYWORD"] rescue "spacious"
puts "NEW BATCH: db-size #{current_db_size}, name: '#{name_to_search}', color: '#{color_to_search}', tag: '#{keyword_to_search}'"
report(storage, "#{name}_#{current_db_size}_index") do
corvet = search_name.get name_to_search
end
if current_db_size <= Context.max_indexes
report(storage, "#{name}_#{current_db_size}_partitions") do
corvet = search_color.get? color_to_search
end
report(storage, "#{name}_#{current_db_size}_tags") do
corvet = search_keywords.get? keyword_to_search
end
else
fake_report("#{name}_#{current_db_size}_partitions")
fake_report("#{name}_#{current_db_size}_tags")
end
}
prepare_env cars_cached, "cached", cached_Sby_name, cached_Sby_color, cached_Sby_keywords, &fn
prepare_env cars_ram, "ram", ram_Sby_name, ram_Sby_color, ram_Sby_keywords, &fn
prepare_env cars_semi, "semi", semi_Sby_name, semi_Sby_color, semi_Sby_keywords, &fn
prepare_env cars_uncached, "uncached", uncached_Sby_name, uncached_Sby_color, uncached_Sby_keywords, &fn
end
def add_cars(storage : DODB::Storage, nb_iterations : Int32)
def perform_add(storage : DODB::Storage(Car))
corvet0 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]
i = 0
car1 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]
car2 = Car.new "Bullet-GT", "blue", [ "shiny", "fast", "expensive" ]
car3 = Car.new "Deudeuche", "beige", [ "curvy", "sublime" ]
car4 = Car.new "Ford-5", "red", [ "unknown" ]
car5 = Car.new "C-MAX", "gray", [ "spacious", "affordable" ]
while i < nb_iterations
car1.name = "Corvet-#{i}"
car2.name = "Bullet-GT-#{i}"
car3.name = "Deudeuche-#{i}"
car4.name = "Ford-5-#{i}"
car5.name = "C-MAX-#{i}"
storage << car1
storage << car2
storage << car3
storage << car4
storage << car5
perform_benchmark_average Context.nb_run, do
corvet = corvet0.clone
corvet.name = "Corvet-#{i}"
storage << corvet
i += 1
STDOUT.write "\radding value #{i}".to_slice
end
puts ""
end
cars_cached = DODBCachedCars.new
cars_uncached = DODBUnCachedCars.new
cars_semi = DODBSemiCachedCars.new
cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached
uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_uncached_indexes cars_uncached
semi_searchby_name, semi_searchby_color, semi_searchby_keywords = init_indexes cars_semi
add_cars cars_cached, 1_000
add_cars cars_uncached, 1_000
add_cars cars_semi, 1_000
# Searching for data with an index.
Benchmark.ips do |x|
x.report("(cars db) searching a data with an index (with a cache)") do
corvet = cached_searchby_name.get "Corvet-500"
end
x.report("(cars db) searching a data with an index (semi: cache is only on index)") do
corvet = semi_searchby_name.get "Corvet-500"
end
x.report("(cars db) searching a data with an index (without a cache)") do
corvet = uncached_searchby_name.get "Corvet-500"
end
end
# Searching for data with a partition.
Benchmark.ips do |x|
x.report("(cars db) searching a data with a partition (with a cache)") do
red_cars = cached_searchby_color.get "red"
end
def batch_add()
cars_ram = DODB::RAMOnlySpecDataBase(Car).new
cars_cached = DODB::CachedSpecDataBase(Car).new
cars_semi = DODB::SpecDataBase(Car).new "-semi"
cars_uncached = DODB::SpecDataBase(Car).new
x.report("(cars db) searching a data with a partition (semi: cache is only on partition)") do
red_cars = semi_searchby_color.get "red"
end
ram_indexes cars_ram
cached_indexes cars_cached
cached_indexes cars_semi
uncached_indexes cars_uncached
x.report("(cars db) searching a data with a partition (without a cache)") do
red_cars = uncached_searchby_color.get "red"
end
avr = perform_add(cars_ram)
puts "(ram db and indexes) add a value (average on #{Context.nb_run} tries): #{avr}"
avr = perform_add(cars_cached)
puts "(cached db and indexes) add a value (average on #{Context.nb_run} tries): #{avr}"
avr = perform_add(cars_semi)
puts "(uncached db but cached indexes) add a value (average on #{Context.nb_run} tries): #{avr}"
avr = perform_add(cars_uncached)
puts "(uncached db and indexes) add a value (average on #{Context.nb_run} tries): #{avr}"
cars_ram.rm_storage_dir
cars_cached.rm_storage_dir
cars_semi.rm_storage_dir
cars_uncached.rm_storage_dir
end
# Searching for data with a tag.
Benchmark.ips do |x|
x.report("(cars db) searching a data with a tag (with a cache)") do
red_cars = cached_searchby_keywords.get "spacious"
end
ENV["REPORT_DIR"]?.try { |report_dir| Context.report_dir = report_dir }
Dir.mkdir_p Context.report_dir
x.report("(cars db) searching a data with a tag (semi: cache is only on tags)") do
red_cars = semi_searchby_keywords.get "spacious"
end
ENV["MAXINDEXES"]?.try { |it| Context.max_indexes = it.to_i }
ENV["NBRUN"]?.try { |it| Context.nb_run = it.to_i }
ENV["DBSIZE"]?.try { |it| Context.to = it.to_i }
ENV["DBSIZE_START"]?.try { |it| Context.from = it.to_i }
ENV["DBSIZE_INCREMENT"]?.try { |it| Context.incr = it.to_i }
x.report("(cars db) searching a data with a tag (without a cache)") do
red_cars = uncached_searchby_keywords.get "spacious"
end
end
pp! Context.nb_run
pp! Context.from
pp! Context.to
pp! Context.incr
pp! Context.max_indexes
cars_cached.rm_storage_dir
cars_uncached.rm_storage_dir
cars_cached = DODBCachedCars.new
cars_uncached = DODBUnCachedCars.new
#init_indexes cars_cached
#init_indexes cars_uncached
cached_searchby_name, cached_searchby_color, cached_searchby_keywords = init_indexes cars_cached
uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = init_uncached_indexes cars_uncached
add_cars cars_cached, 1_000
add_cars cars_uncached, 1_000
nb_run = 1000
perform_benchmark_average_verbose "(cached) search db with an index", nb_run, do
cached_searchby_name.get "Corvet-500"
end
perform_benchmark_average_verbose "(uncached) search db with an index", nb_run, do
uncached_searchby_name.get "Corvet-500"
end
cars_cached.rm_storage_dir
cars_uncached.rm_storage_dir
cars_semi.rm_storage_dir
batch
batch_add

View File

@ -1,7 +1,5 @@
require "benchmark"
require "../src/dodb.cr"
require "./test-data.cr"
require "./db-ships.cr"
class DODBCached < DODB::CachedDataBase(Ship)
def initialize(storage_ext = "", remove_previous_data = true)

View File

@ -1,402 +0,0 @@
require "spec"
require "file_utils"
require "../src/dodb.cr"
require "./test-data.cr"
class DODB::SpecDataBase < DODB::CachedDataBase(Ship)
def initialize(storage_ext = "", remove_previous_data = true)
storage_dir = "test-storage#{storage_ext}"
if remove_previous_data
::FileUtils.rm_rf storage_dir
end
super storage_dir
end
end
describe "DODB::DataBase::Cached" do
describe "basics" do
it "store and get data" do
db = DODB::SpecDataBase.new
Ship.all_ships.each do |ship|
db << ship
end
db.to_a.sort.should eq(Ship.all_ships.sort)
end
it "rewrite already stored data" do
db = DODB::SpecDataBase.new
ship = Ship.all_ships[0]
key = db << ship
db[key] = Ship.new "broken"
db[key] = ship
db[key].should eq(ship)
end
it "properly remove data" do
db = DODB::SpecDataBase.new
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db.pop
end
Ship.all_ships.each_with_index do |ship, i|
# FIXME: Should it raise a particular exception?
expect_raises DODB::MissingEntry do
db[i]
end
db[i]?.should be_nil
end
end
it "preserves data on reopening" do
db1 = DODB::SpecDataBase.new
db1 << Ship.kisaragi
db1.to_a.size.should eq(1)
db2 = DODB::SpecDataBase.new remove_previous_data: false
db2 << Ship.mutsuki
# Only difference with DODB::DataBase: for now, concurrent DB cannot coexists.
db2.to_a.size.should eq(2)
end
it "iterates in normal and reversed order" do
db = DODB::SpecDataBase.new
Ship.all_ships.each do |ship|
db << ship
end
# The two #each test iteration.
db.each_with_index do |item, index|
item.should eq Ship.all_ships[index]
end
db.each_with_index(reversed: true) do |item, index|
item.should eq Ship.all_ships[index]
end
# Actual reversal is tested here.
db.to_a(reversed: true).should eq db.to_a.reverse
end
it "respects the provided offsets if any" do
db = DODB::SpecDataBase.new
Ship.all_ships.each do |ship|
db << ship
end
db.to_a(start_offset: 0, end_offset: 0)[0]?.should eq Ship.mutsuki
db.to_a(start_offset: 1, end_offset: 1)[0]?.should eq Ship.kisaragi
db.to_a(start_offset: 2, end_offset: 2)[0]?.should eq Ship.yayoi
db.to_a(start_offset: 0, end_offset: 2).should eq [
Ship.mutsuki, Ship.kisaragi, Ship.yayoi
]
end
end
describe "indices" do
it "do basic indexing" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each_with_index do |ship|
db_ships_by_name.get?(ship.name).should eq(ship)
end
end
it "raise on index overload" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
db << Ship.kisaragi
# Should not be allowed to store an entry whose “name” field
# already exists.
expect_raises(DODB::IndexOverload) do
db << Ship.kisaragi
end
end
it "properly deindex" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each_with_index do |ship, i|
db.delete i
end
Ship.all_ships.each do |ship|
db_ships_by_name.get?(ship.name).should be_nil
end
end
it "properly reindex" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
key = db << Ship.kisaragi
# We give the old id to the new ship, to get it replaced in
# the database.
some_new_ship = Ship.all_ships[2].clone
db[key] = some_new_ship
db[key].should eq(some_new_ship)
db_ships_by_name.get?(some_new_ship.name).should eq(some_new_ship)
end
it "properly updates" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
new_kisaragi = Ship.kisaragi.clone.tap do |s|
s.name = "Kisaragi Kai" # Dont think about it too much.
end
# Were changing an indexed value on purpose.
db_ships_by_name.update "Kisaragi", new_kisaragi
db_ships_by_name.get?("Kisaragi").should be_nil
db_ships_by_name.get?(new_kisaragi.name).should eq new_kisaragi
end
end
describe "partitions" do
it "do basic partitioning" do
db = DODB::SpecDataBase.new
db_ships_by_class = db.new_partition "class", &.klass
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db_ships_by_class.get(ship.klass).should contain(ship)
end
# We extract the possible classes to do test on them.
ship_classes = Ship.all_ships.map(&.klass).uniq
ship_classes.each do |klass|
partition = db_ships_by_class.get klass
# A partition on “class” should contain entries that all
# share the same value of “class”.
partition.map(&.klass.==(klass)).reduce { |a, b|
a && b
}.should be_true
end
db_ships_by_class.get("does-not-exist").should eq [] of Ship
end
it "removes select elements from partitions" do
db = DODB::SpecDataBase.new
db_ships_by_class = db.new_partition "class", &.klass
Ship.all_ships.each do |ship|
db << ship
end
db_ships_by_class.delete "Mutsuki", &.name.==("Kisaragi")
Ship.all_ships.map(&.klass).uniq.each do |klass|
partition = db_ships_by_class.get klass
partition.any?(&.name.==("Kisaragi")).should be_false
end
end
end
describe "tags" do
it "do basic tagging" do
db = DODB::SpecDataBase.new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
db_ships_by_tags.get("flagship").should eq([Ship.flagship])
# All returned entries should have the requested tag.
db_ships_by_tags.get("name ship")
.map(&.tags.includes?("name ship"))
.reduce { |a, e| a && e }
.should be_true
# There shouldnt be one in our data about WWII Japanese warships…
db_ships_by_tags.get("starship").should eq([] of Ship)
end
it "properly removes tags" do
db = DODB::SpecDataBase.new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
# Removing the “flagship” tag, brace for impact.
flagship, index = db_ships_by_tags.get_with_indices("flagship")[0]
flagship.tags = [] of String
db[index] = flagship
# ship, index = db_ships_by_tags.update(tag: "flagship") do |ship, index|
# ship.tags = [] of String
# db[index] = ship
# end
db_ships_by_tags.get("flagship").should eq([] of Ship)
end
it "gets items that have multiple tags" do
db = DODB::SpecDataBase.new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
results = db_ships_by_tags.get(["flagship", "name ship"])
results.should eq([Ship.yamato])
results = db_ships_by_tags.get(["name ship", "flagship"])
results.should eq([Ship.yamato])
results = db_ships_by_tags.get(["flagship"])
results.should eq([Ship.yamato])
end
end
describe "atomic operations" do
it "safe_get and safe_get?" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db_ships_by_name.safe_get ship.name do |results|
results.should eq(ship)
end
db_ships_by_name.safe_get? ship.name do |results|
results.should eq(ship)
end
end
end
end
describe "tools" do
it "rebuilds indexes" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
db_ships_by_class = db.new_partition "class", &.klass
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
db.reindex_everything!
Ship.all_ships.each do |ship|
db_ships_by_name.get?(ship.name).should eq(ship)
db_ships_by_class.get(ship.klass).should contain(ship)
end
end
it "migrates properly" do
::FileUtils.rm_rf "test-storage-migration-origin"
old_db = DODB::DataBase(PrimitiveShip).new "test-storage-migration-origin"
old_ships_by_name = old_db.new_index "name", &.name
old_ships_by_class = old_db.new_partition "class", &.class_name
PrimitiveShip.all_ships.each do |ship|
old_db << ship
end
# At this point, the “old” DB is filled. Now we need to convert
# to the new DB.
new_db = DODB::SpecDataBase.new "-migration-target"
new_ships_by_name = new_db.new_index "name", &.name
new_ships_by_class = new_db.new_partition "class", &.klass
new_ships_by_tags = new_db.new_tags "tags", &.tags
old_db.each_with_index do |ship, index|
new_ship = Ship.new ship.name,
klass: ship.class_name,
id: ship.id,
tags: Array(String).new.tap { |tags|
tags << "name ship" if ship.name == ship.class_name
}
new_db[index] = new_ship
end
# At this point, the conversion is done, so… were making a few
# arbitrary tests on the new data.
old_db.each_with_index do |old_ship, old_index|
ship = new_db[old_index]
ship.id.should eq(old_ship.id)
ship.klass.should eq(old_ship.class_name)
ship.tags.any?(&.==("name ship")).should be_true if ship.name == ship.klass
end
end
end
end

98
spec/db-cars.cr Normal file
View File

@ -0,0 +1,98 @@
# This file contains all the necessary code to perform tests based on the following Car database.
require "json"
require "../src/dodb.cr"
require "./spec-database.cr"
class Car
include JSON::Serializable
property name : String # unique to each instance (1-1 relations)
property color : String | DODB::NoIndex # a simple attribute (1-n relations)
property keywords : Array(String) | DODB::NoIndex # tags about a car, example: "shiny" (n-n relations)
def_clone
def initialize(@name, @color, @keywords)
end
class_getter cars = [
Car.new("Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]),
Car.new("SUV", "red", [ "solid", "impressive" ]),
Car.new("Mustang", "red", [ "shiny", "impressive", "elegant" ]),
Car.new("Bullet-GT", "red", [ "shiny", "impressive", "fast", "elegant" ]),
Car.new("GTI", "blue", [ "average" ]),
Car.new("Deudeuch", "violet", [ "dirty", "slow", "only French will understand" ])
]
# Equality is true if every property is identical.
def ==(other)
@name == other.name && @color == other.color && @keywords == other.keywords
end
end
def ram_indexes(storage : DODB::Storage)
n = storage.new_nilable_RAM_index "name", &.name
c = storage.new_nilable_RAM_partition "color", &.color
k = storage.new_nilable_RAM_tags "keyword", &.keywords
return n, c, k
end
def cached_indexes(storage : DODB::Storage)
n = storage.new_nilable_index "name", &.name
c = storage.new_nilable_partition "color", &.color
k = storage.new_nilable_tags "keyword", &.keywords
return n, c, k
end
def uncached_indexes(storage : DODB::Storage)
n = storage.new_nilable_uncached_index "name", &.name
c = storage.new_nilable_uncached_partition "color", &.color
k = storage.new_nilable_uncached_tags "keyword", &.keywords
return n, c, k
end
# `max_indexes` limits the number of indexes (partitions and tags).
# Once the last index (db last_index/5) is above this value, the following
# cars won't be tagged nor partitionned.
def add_cars(storage : DODB::Storage, nb_iterations : Int32, max_indexes = 5000)
last_index = ((storage.last_index + 1) / 5).to_i
i = 0
car1 = Car.new "Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]
car2 = Car.new "Bullet-GT", "blue", [ "shiny", "fast", "expensive" ]
car3 = Car.new "Deudeuche", "beige", [ "curvy", "sublime" ]
car4 = Car.new "Ford-5", "red", [ "unknown" ]
car5 = Car.new "C-MAX", "gray", [ "spacious", "affordable" ]
while i < nb_iterations
car1.name = "Corvet-#{last_index}"
car2.name = "Bullet-GT-#{last_index}"
car3.name = "Deudeuche-#{last_index}"
car4.name = "Ford-5-#{last_index}"
car5.name = "C-MAX-#{last_index}"
last_index += 1
if last_index > max_indexes
car1.color = DODB.no_index
car2.color = DODB.no_index
car3.color = DODB.no_index
car4.color = DODB.no_index
car5.color = DODB.no_index
car1.keywords = DODB.no_index
car2.keywords = DODB.no_index
car3.keywords = DODB.no_index
car4.keywords = DODB.no_index
car5.keywords = DODB.no_index
end
storage << car1.clone
storage << car2.clone
storage << car3.clone
storage << car4.clone
storage << car5.clone
i += 1
#STDOUT.write "\radding value #{i}".to_slice
end
#puts ""
end

View File

@ -1,6 +1,9 @@
require "uuid"
require "json"
require "../src/dodb.cr"
require "./spec-database.cr"
# FIXME: Split the test data in separate files. We dont care about those here.
class Ship
@ -85,24 +88,3 @@ class PrimitiveShip
@@asakaze
]
end
class Car
include JSON::Serializable
property name : String # unique to each instance (1-1 relations)
property color : String # a simple attribute (1-n relations)
property keywords : Array(String) # tags about a car, example: "shiny" (n-n relations)
def_clone
def initialize(@name, @color, @keywords)
end
class_getter cars = [
Car.new("Corvet", "red", [ "shiny", "impressive", "fast", "elegant" ]),
Car.new("SUV", "red", [ "solid", "impressive" ]),
Car.new("Mustang", "red", [ "shiny", "impressive", "elegant" ]),
Car.new("Bullet-GT", "red", [ "shiny", "impressive", "fast", "elegant" ]),
Car.new("GTI", "blue", [ "average" ]),
Car.new("Deudeuch", "violet", [ "dirty", "slow", "only French will understand" ])
]
end

50
spec/spec-database.cr Normal file
View File

@ -0,0 +1,50 @@
class DODB::SpecDataBase(V) < DODB::DataBase(V)
property storage_dir : String
def initialize(storage_ext = "", remove_previous_data = true)
@storage_dir = "specdb-storage-uncached#{storage_ext}"
if remove_previous_data
::FileUtils.rm_rf storage_dir
end
super storage_dir
end
def rm_storage_dir
::FileUtils.rm_rf @storage_dir
end
end
class DODB::CachedSpecDataBase(V) < DODB::CachedDataBase(V)
property storage_dir : String
def initialize(storage_ext = "", remove_previous_data = true)
@storage_dir = "specdb-storage-cached#{storage_ext}"
if remove_previous_data
::FileUtils.rm_rf storage_dir
end
super storage_dir
end
def rm_storage_dir
::FileUtils.rm_rf @storage_dir
end
end
class DODB::RAMOnlySpecDataBase(V) < DODB::RAMOnlyDataBase(V)
property storage_dir : String
def initialize(storage_ext = "", remove_previous_data = true)
@storage_dir = "specdb-storage-ram#{storage_ext}"
if remove_previous_data
::FileUtils.rm_rf storage_dir
end
super storage_dir
end
def rm_storage_dir
::FileUtils.rm_rf @storage_dir
end
end

41
spec/test-cars.cr Normal file
View File

@ -0,0 +1,41 @@
require "spec"
require "./db-cars.cr"
corvet0 = Car.new "Corvet-0", "red", [ "shiny", "impressive", "fast", "elegant" ]
describe "uncached, cached and ram indexes" do
it "RAM DB - add items, add indexes, search, reindex, search" do
cars_ram0 = DODB::RAMOnlySpecDataBase(Car).new "-0"
cars_ram1 = DODB::RAMOnlySpecDataBase(Car).new "-1"
cars_ram2 = DODB::RAMOnlySpecDataBase(Car).new "-2"
add_cars cars_ram0, 1
add_cars cars_ram1, 1
add_cars cars_ram2, 1
uncached_searchby_name, uncached_searchby_color, uncached_searchby_keywords = uncached_indexes cars_ram0
cached_searchby_name, cached_searchby_color, cached_searchby_keywords = cached_indexes cars_ram1
ram_searchby_name, ram_searchby_color, ram_searchby_keywords = ram_indexes cars_ram2
uncached_searchby_name.get?("Corvet-0").should be_nil
cached_searchby_name.get?("Corvet-0").should be_nil
ram_searchby_name.get?("Corvet-0").should be_nil
cars_ram0.reindex_everything!
cars_ram1.reindex_everything!
cars_ram2.reindex_everything!
# Get the value even if not written on the disk since the index was written on the disk.
# The value is retrieved by the database, the index only reads its key in the database.
uncached_searchby_name.get?("Corvet-0").should eq corvet0
# Both cached and RAM indexes can retrieve the value since they store the key.
cached_searchby_name.get?("Corvet-0").should eq corvet0
ram_searchby_name.get?("Corvet-0").should eq corvet0
# cars_ram0.rm_storage_dir
# cars_ram1.rm_storage_dir
# cars_ram2.rm_storage_dir
end
end

904
spec/test-ships.cr Normal file
View File

@ -0,0 +1,904 @@
require "spec"
require "file_utils"
require "./db-ships.cr"
def fork_process(&)
Process.new Crystal::System::Process.fork { yield }
end
describe "DODB::DataBase" do
describe "basics" do
it "store and get data" do
db = DODB::SpecDataBase(Ship).new
Ship.all_ships.each do |ship|
db << ship
end
db.to_a.sort.should eq(Ship.all_ships.sort)
end
it "rewrite already stored data" do
db = DODB::SpecDataBase(Ship).new
ship = Ship.all_ships[0]
key = db << ship
db[key] = Ship.new "broken"
db[key] = ship
db[key].should eq(ship)
end
it "properly remove data" do
db = DODB::SpecDataBase(Ship).new
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db.pop
end
Ship.all_ships.each_with_index do |ship, i|
# FIXME: Should it raise a particular exception?
expect_raises DODB::MissingEntry do
db[i]
end
db[i]?.should be_nil
end
end
it "preserves data on reopening" do
db1 = DODB::SpecDataBase(Ship).new
db1 << Ship.kisaragi
db1.to_a.size.should eq(1)
db2 = DODB::SpecDataBase(Ship).new remove_previous_data: false
db2 << Ship.mutsuki
db1.to_a.size.should eq(2)
end
it "iterates in normal and reversed order" do
db = DODB::SpecDataBase(Ship).new
Ship.all_ships.each do |ship|
db << ship
end
# The two #each test iteration.
db.each_with_index do |item, index|
item.should eq Ship.all_ships[index]
end
db.each_with_index(reversed: true) do |item, index|
item.should eq Ship.all_ships[index]
end
# Actual reversal is tested here.
db.to_a(reversed: true).should eq db.to_a.reverse
end
it "respects the provided offsets if any" do
db = DODB::SpecDataBase(Ship).new
Ship.all_ships.each do |ship|
db << ship
end
db.to_a(start_offset: 0, end_offset: 0)[0]?.should eq Ship.mutsuki
db.to_a(start_offset: 1, end_offset: 1)[0]?.should eq Ship.kisaragi
db.to_a(start_offset: 2, end_offset: 2)[0]?.should eq Ship.yayoi
db.to_a(start_offset: 0, end_offset: 2).should eq [
Ship.mutsuki, Ship.kisaragi, Ship.yayoi
]
end
end
describe "indices" do
it "do basic indexing" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each_with_index do |ship|
db_ships_by_name.get?(ship.name).should eq(ship)
end
end
it "raise on index overload" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
db << Ship.kisaragi
# Should not be allowed to store an entry whose “name” field
# already exists.
expect_raises(DODB::IndexOverload) do
db << Ship.kisaragi
end
end
it "properly deindex" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each_with_index do |ship, i|
db.delete i
end
Ship.all_ships.each do |ship|
db_ships_by_name.get?(ship.name).should be_nil
end
end
it "properly reindex" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
key = db << Ship.kisaragi
# We give the old id to the new ship, to get it replaced in
# the database.
some_new_ship = Ship.all_ships[2].clone
db[key] = some_new_ship
db[key].should eq(some_new_ship)
db_ships_by_name.get?(some_new_ship.name).should eq(some_new_ship)
end
it "properly updates" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
new_kisaragi = Ship.kisaragi.clone.tap do |s|
s.name = "Kisaragi Kai" # Dont think about it too much.
end
# Were changing an indexed value on purpose.
db_ships_by_name.update "Kisaragi", new_kisaragi
db_ships_by_name.get?("Kisaragi").should be_nil
db_ships_by_name.get?(new_kisaragi.name).should eq new_kisaragi
end
end
describe "partitions" do
it "do basic partitioning" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_class = db.new_partition "class", &.klass
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db_ships_by_class.get(ship.klass).should contain(ship)
end
# We extract the possible classes to do test on them.
ship_classes = Ship.all_ships.map(&.klass).uniq
ship_classes.each do |klass|
partition = db_ships_by_class.get klass
# A partition on “class” should contain entries that all
# share the same value of “class”.
partition.map(&.klass.==(klass)).reduce { |a, b|
a && b
}.should be_true
end
db_ships_by_class.get?("does-not-exist").should be_nil
end
it "removes select elements from partitions" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_class = db.new_partition "class", &.klass
Ship.all_ships.each do |ship|
db << ship
end
db_ships_by_class.delete "Mutsuki", &.name.==("Kisaragi")
Ship.all_ships.map(&.klass).uniq.each do |klass|
partition = db_ships_by_class.get klass
partition.any?(&.name.==("Kisaragi")).should be_false
end
end
end
describe "tags" do
it "do basic tagging" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
db_ships_by_tags.get("flagship").should eq([Ship.flagship])
# All returned entries should have the requested tag.
db_ships_by_tags.get("name ship")
.map(&.tags.includes?("name ship"))
.reduce { |a, e| a && e }
.should be_true
# There shouldnt be one in our data about WWII Japanese warships…
db_ships_by_tags.get?("starship").should be_nil
end
it "properly removes tags" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
# Removing the “flagship” tag, brace for impact.
flagship, index = db_ships_by_tags.get_with_indice("flagship")[0]
flagship.tags = [] of String
db[index] = flagship
# ship, index = db_ships_by_tags.update(tag: "flagship") do |ship, index|
# ship.tags = [] of String
# db[index] = ship
# end
db_ships_by_tags.get("flagship").should eq([] of Ship)
end
it "gets items that have multiple tags" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
results = db_ships_by_tags.get(["flagship", "name ship"])
results.should eq([Ship.yamato])
results = db_ships_by_tags.get(["name ship", "flagship"])
results.should eq([Ship.yamato])
results = db_ships_by_tags.get(["flagship"])
results.should eq([Ship.yamato])
end
end
describe "atomic operations" do
it "safe_get and safe_get?" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db_ships_by_name.safe_get ship.name do |results|
results.should eq(ship)
end
db_ships_by_name.safe_get? ship.name do |results|
results.should eq(ship)
end
end
end
end
describe "tools" do
it "rebuilds indexes" do
db = DODB::SpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
db_ships_by_class = db.new_partition "class", &.klass
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
db.reindex_everything!
Ship.all_ships.each do |ship|
db_ships_by_name.get?(ship.name).should eq(ship)
db_ships_by_class.get(ship.klass).should contain(ship)
end
end
it "migrates properly" do
::FileUtils.rm_rf "test-storage-migration-origin"
old_db = DODB::DataBase(PrimitiveShip).new "test-storage-migration-origin"
old_ships_by_name = old_db.new_index "name", &.name
old_ships_by_class = old_db.new_partition "class", &.class_name
PrimitiveShip.all_ships.each do |ship|
old_db << ship
end
# At this point, the “old” DB is filled. Now we need to convert
# to the new DB.
new_db = DODB::SpecDataBase(Ship).new "-migration-target"
new_ships_by_name = new_db.new_index "name", &.name
new_ships_by_class = new_db.new_partition "class", &.klass
new_ships_by_tags = new_db.new_tags "tags", &.tags
old_db.each_with_index do |ship, index|
new_ship = Ship.new ship.name,
klass: ship.class_name,
id: ship.id,
tags: Array(String).new.tap { |tags|
tags << "name ship" if ship.name == ship.class_name
}
new_db[index] = new_ship
end
# At this point, the conversion is done, so… were making a few
# arbitrary tests on the new data.
old_db.each_with_index do |old_ship, old_index|
ship = new_db[old_index]
ship.id.should eq(old_ship.id)
ship.klass.should eq(old_ship.class_name)
ship.tags.any?(&.==("name ship")).should be_true if ship.name == ship.klass
end
end
end
describe "parallel support" do
# Not sure how many forks would be safe in a test like that.
fork_count = 25
entries_per_fork = 100
it "works for pushing values" do
db = DODB::SpecDataBase(Ship).new
processes = [] of Process
fork_count.times do |fork_id|
processes << fork_process do
entries_per_fork.times do |entry_id|
db << Ship.new("entry-#{fork_id}-#{entry_id}", "???")
end
end
end
processes.each &.wait
dump = db.to_a
dump.size.should eq fork_count * entries_per_fork
end
it "works for updating values" do
db = DODB::SpecDataBase(Ship).new
db_entries_by_name = db.new_index "name", &.name
# First pass, creating data.
processes = [] of Process
fork_count.times do |fork_id|
processes << fork_process do
entries_per_fork.times do |entry_id|
db << Ship.new("entry-#{fork_id}-#{entry_id}", "???")
end
end
end
processes.each &.wait
# Second pass, updating data.
processes = [] of Process
fork_count.times do |fork_id|
processes << fork_process do
entries_per_fork.times do |entry_id|
db_entries_by_name.update Ship.new("entry-#{fork_id}-#{entry_id}", "???", tags: ["updated"])
end
end
end
processes.each &.wait
# Third pass, testing database content.
dump = db.to_a
fork_count.times do |fork_id|
entries_per_fork.times do |entry_id|
entry = db_entries_by_name.get "entry-#{fork_id}-#{entry_id}"
entry.tags.should eq ["updated"]
end
end
end
it "does parallel-safe updates" do
db = DODB::SpecDataBase(Ship).new
db_entries_by_name = db.new_index "name", &.name
# Well be storing an integer in the "klass" field, and incrementing
# it in forks in a second time.
db << Ship.new("test", "0")
processes = [] of Process
fork_count.times do |fork_id|
processes << fork_process do
entries_per_fork.times do |entry_id|
db_entries_by_name.safe_get "test" do |entry|
entry.klass = (entry.klass.to_i + 1).to_s
db_entries_by_name.update "test", entry
end
end
end
end
processes.each &.wait
db_entries_by_name.get("test").klass.should eq((fork_count * entries_per_fork).to_s)
end
end
end
# Basically the same thing as before, with some slight
# differences based on the fact that changing the on-disk data
# won't change the cached one.
describe "DODB::CachedDataBase" do
describe "basics" do
it "store and get data" do
db = DODB::CachedSpecDataBase(Ship).new
Ship.all_ships.each do |ship|
db << ship
end
db.to_a.sort.should eq(Ship.all_ships.sort)
db.rm_storage_dir
end
it "rewrite already stored data" do
db = DODB::CachedSpecDataBase(Ship).new
ship = Ship.all_ships[0]
key = db << ship
db[key] = Ship.new "broken"
db[key] = ship
db[key].should eq(ship)
db.rm_storage_dir
end
it "properly remove data" do
db = DODB::CachedSpecDataBase(Ship).new
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db.pop
end
Ship.all_ships.each_with_index do |ship, i|
# FIXME: Should it raise a particular exception?
expect_raises DODB::MissingEntry do
db[i]
end
db[i]?.should be_nil
end
db.rm_storage_dir
end
it "preserves data on reopening" do
db1 = DODB::CachedSpecDataBase(Ship).new
db1 << Ship.kisaragi
db1.to_a.size.should eq(1)
db2 = DODB::CachedSpecDataBase(Ship).new remove_previous_data: false
db2 << Ship.mutsuki
# Only difference with DODB::DataBase: concurrent DB cannot coexists.
db2.to_a.size.should eq(2)
db1.rm_storage_dir
db2.rm_storage_dir
end
it "iterates in normal and reversed order" do
db = DODB::CachedSpecDataBase(Ship).new
Ship.all_ships.each do |ship|
db << ship
end
# The two #each test iteration.
db.each_with_index do |item, index|
item.should eq Ship.all_ships[index]
end
db.each_with_index(reversed: true) do |item, index|
item.should eq Ship.all_ships[index]
end
# Actual reversal is tested here.
db.to_a(reversed: true).should eq db.to_a.reverse
db.rm_storage_dir
end
it "respects the provided offsets if any" do
db = DODB::CachedSpecDataBase(Ship).new
Ship.all_ships.each do |ship|
db << ship
end
db.to_a(start_offset: 0, end_offset: 0)[0]?.should eq Ship.mutsuki
db.to_a(start_offset: 1, end_offset: 1)[0]?.should eq Ship.kisaragi
db.to_a(start_offset: 2, end_offset: 2)[0]?.should eq Ship.yayoi
db.to_a(start_offset: 0, end_offset: 2).should eq [
Ship.mutsuki, Ship.kisaragi, Ship.yayoi
]
db.rm_storage_dir
end
end
describe "indices" do
it "do basic indexing" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each_with_index do |ship|
db_ships_by_name.get?(ship.name).should eq(ship)
end
db.rm_storage_dir
end
it "raise on index overload" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
db << Ship.kisaragi
# Should not be allowed to store an entry whose “name” field
# already exists.
expect_raises(DODB::IndexOverload) do
db << Ship.kisaragi
end
db.rm_storage_dir
end
it "properly deindex" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each_with_index do |ship, i|
db.delete i
end
Ship.all_ships.each do |ship|
db_ships_by_name.get?(ship.name).should be_nil
end
db.rm_storage_dir
end
it "properly reindex" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
key = db << Ship.kisaragi
# We give the old id to the new ship, to get it replaced in
# the database.
some_new_ship = Ship.all_ships[2].clone
db[key] = some_new_ship
db[key].should eq(some_new_ship)
db_ships_by_name.get?(some_new_ship.name).should eq(some_new_ship)
db.rm_storage_dir
end
it "properly updates" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
new_kisaragi = Ship.kisaragi.clone.tap do |s|
s.name = "Kisaragi Kai" # Dont think about it too much.
end
# Were changing an indexed value on purpose.
db_ships_by_name.update "Kisaragi", new_kisaragi
db_ships_by_name.get?("Kisaragi").should be_nil
db_ships_by_name.get?(new_kisaragi.name).should eq new_kisaragi
db.rm_storage_dir
end
end
describe "partitions" do
it "do basic partitioning" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_class = db.new_partition "class", &.klass
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db_ships_by_class.get(ship.klass).should contain(ship)
end
# We extract the possible classes to do test on them.
ship_classes = Ship.all_ships.map(&.klass).uniq
ship_classes.each do |klass|
partition = db_ships_by_class.get klass
# A partition on “class” should contain entries that all
# share the same value of “class”.
partition.map(&.klass.==(klass)).reduce { |a, b|
a && b
}.should be_true
end
db_ships_by_class.get?("does-not-exist").should be_nil
db.rm_storage_dir
end
it "removes select elements from partitions" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_class = db.new_partition "class", &.klass
Ship.all_ships.each do |ship|
db << ship
end
db_ships_by_class.delete "Mutsuki", &.name.==("Kisaragi")
Ship.all_ships.map(&.klass).uniq.each do |klass|
partition = db_ships_by_class.get klass
partition.any?(&.name.==("Kisaragi")).should be_false
end
db.rm_storage_dir
end
end
describe "tags" do
it "do basic tagging" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
db_ships_by_tags.get("flagship").should eq([Ship.flagship])
# All returned entries should have the requested tag.
db_ships_by_tags.get("name ship")
.map(&.tags.includes?("name ship"))
.reduce { |a, e| a && e }
.should be_true
# There shouldnt be one in our data about WWII Japanese warships…
db_ships_by_tags.get?("starship").should be_nil
db.rm_storage_dir
end
it "properly removes tags" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
# Removing the “flagship” tag, brace for impact.
flagship, index = db_ships_by_tags.get_with_indice("flagship")[0]
flagship = flagship.clone
flagship.tags = [] of String
db[index] = flagship
# ship, index = db_ships_by_tags.update(tag: "flagship") do |ship, index|
# ship.tags = [] of String
# db[index] = ship
# end
db_ships_by_tags.get("flagship").should eq([] of Ship)
db.rm_storage_dir
end
it "gets items that have multiple tags" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
results = db_ships_by_tags.get(["flagship", "name ship"])
results.should eq([Ship.yamato])
results = db_ships_by_tags.get(["name ship", "flagship"])
results.should eq([Ship.yamato])
results = db_ships_by_tags.get(["flagship"])
results.should eq([Ship.yamato])
db.rm_storage_dir
end
end
describe "atomic operations" do
it "safe_get and safe_get?" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db_ships_by_name.safe_get ship.name do |results|
results.should eq(ship)
end
db_ships_by_name.safe_get? ship.name do |results|
results.should eq(ship)
end
end
db.rm_storage_dir
end
end
describe "tools" do
it "rebuilds indexes" do
db = DODB::CachedSpecDataBase(Ship).new
db_ships_by_name = db.new_index "name", &.name
db_ships_by_class = db.new_partition "class", &.klass
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
db.reindex_everything!
Ship.all_ships.each do |ship|
db_ships_by_name.get?(ship.name).should eq(ship)
db_ships_by_class.get(ship.klass).should contain(ship)
end
db.rm_storage_dir
end
it "migrates properly" do
old_db = DODB::CachedSpecDataBase(PrimitiveShip).new "-migration-origin"
old_ships_by_name = old_db.new_index "name", &.name
old_ships_by_class = old_db.new_partition "class", &.class_name
PrimitiveShip.all_ships.each do |ship|
old_db << ship
end
# At this point, the “old” DB is filled. Now we need to convert
# to the new DB.
new_db = DODB::CachedSpecDataBase(Ship).new "-migration-target"
new_ships_by_name = new_db.new_index "name", &.name
new_ships_by_class = new_db.new_partition "class", &.klass
new_ships_by_tags = new_db.new_tags "tags", &.tags
old_db.each_with_index do |ship, index|
new_ship = Ship.new ship.name,
klass: ship.class_name,
id: ship.id,
tags: Array(String).new.tap { |tags|
tags << "name ship" if ship.name == ship.class_name
}
new_db[index] = new_ship
end
# At this point, the conversion is done, so… were making a few
# arbitrary tests on the new data.
old_db.each_with_index do |old_ship, old_index|
ship = new_db[old_index]
ship.id.should eq(old_ship.id)
ship.klass.should eq(old_ship.class_name)
ship.tags.any?(&.==("name ship")).should be_true if ship.name == ship.klass
end
old_db.rm_storage_dir
new_db.rm_storage_dir
end
end
end

View File

@ -1,490 +0,0 @@
require "spec"
require "file_utils"
require "../src/dodb.cr"
require "./test-data.cr"
class DODB::SpecDataBase < DODB::DataBase(Ship)
def initialize(storage_ext = "", remove_previous_data = true)
storage_dir = "test-storage#{storage_ext}"
if remove_previous_data
::FileUtils.rm_rf storage_dir
end
super storage_dir
end
end
describe "DODB::DataBase" do
describe "basics" do
it "store and get data" do
db = DODB::SpecDataBase.new
Ship.all_ships.each do |ship|
db << ship
end
db.to_a.sort.should eq(Ship.all_ships.sort)
end
it "rewrite already stored data" do
db = DODB::SpecDataBase.new
ship = Ship.all_ships[0]
key = db << ship
db[key] = Ship.new "broken"
db[key] = ship
db[key].should eq(ship)
end
it "properly remove data" do
db = DODB::SpecDataBase.new
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db.pop
end
Ship.all_ships.each_with_index do |ship, i|
# FIXME: Should it raise a particular exception?
expect_raises DODB::MissingEntry do
db[i]
end
db[i]?.should be_nil
end
end
it "preserves data on reopening" do
db1 = DODB::SpecDataBase.new
db1 << Ship.kisaragi
db1.to_a.size.should eq(1)
db2 = DODB::SpecDataBase.new remove_previous_data: false
db2 << Ship.mutsuki
db1.to_a.size.should eq(2)
end
it "iterates in normal and reversed order" do
db = DODB::SpecDataBase.new
Ship.all_ships.each do |ship|
db << ship
end
# The two #each test iteration.
db.each_with_index do |item, index|
item.should eq Ship.all_ships[index]
end
db.each_with_index(reversed: true) do |item, index|
item.should eq Ship.all_ships[index]
end
# Actual reversal is tested here.
db.to_a(reversed: true).should eq db.to_a.reverse
end
it "respects the provided offsets if any" do
db = DODB::SpecDataBase.new
Ship.all_ships.each do |ship|
db << ship
end
db.to_a(start_offset: 0, end_offset: 0)[0]?.should eq Ship.mutsuki
db.to_a(start_offset: 1, end_offset: 1)[0]?.should eq Ship.kisaragi
db.to_a(start_offset: 2, end_offset: 2)[0]?.should eq Ship.yayoi
db.to_a(start_offset: 0, end_offset: 2).should eq [
Ship.mutsuki, Ship.kisaragi, Ship.yayoi
]
end
end
describe "indices" do
it "do basic indexing" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each_with_index do |ship|
db_ships_by_name.get?(ship.name).should eq(ship)
end
end
it "raise on index overload" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
db << Ship.kisaragi
# Should not be allowed to store an entry whose “name” field
# already exists.
expect_raises(DODB::IndexOverload) do
db << Ship.kisaragi
end
end
it "properly deindex" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each_with_index do |ship, i|
db.delete i
end
Ship.all_ships.each do |ship|
db_ships_by_name.get?(ship.name).should be_nil
end
end
it "properly reindex" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
key = db << Ship.kisaragi
# We give the old id to the new ship, to get it replaced in
# the database.
some_new_ship = Ship.all_ships[2].clone
db[key] = some_new_ship
db[key].should eq(some_new_ship)
db_ships_by_name.get?(some_new_ship.name).should eq(some_new_ship)
end
it "properly updates" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
new_kisaragi = Ship.kisaragi.clone.tap do |s|
s.name = "Kisaragi Kai" # Dont think about it too much.
end
# Were changing an indexed value on purpose.
db_ships_by_name.update "Kisaragi", new_kisaragi
db_ships_by_name.get?("Kisaragi").should be_nil
db_ships_by_name.get?(new_kisaragi.name).should eq new_kisaragi
end
end
describe "partitions" do
it "do basic partitioning" do
db = DODB::SpecDataBase.new
db_ships_by_class = db.new_partition "class", &.klass
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db_ships_by_class.get(ship.klass).should contain(ship)
end
# We extract the possible classes to do test on them.
ship_classes = Ship.all_ships.map(&.klass).uniq
ship_classes.each do |klass|
partition = db_ships_by_class.get klass
# A partition on “class” should contain entries that all
# share the same value of “class”.
partition.map(&.klass.==(klass)).reduce { |a, b|
a && b
}.should be_true
end
db_ships_by_class.get("does-not-exist").should eq [] of Ship
end
it "removes select elements from partitions" do
db = DODB::SpecDataBase.new
db_ships_by_class = db.new_partition "class", &.klass
Ship.all_ships.each do |ship|
db << ship
end
db_ships_by_class.delete "Mutsuki", &.name.==("Kisaragi")
Ship.all_ships.map(&.klass).uniq.each do |klass|
partition = db_ships_by_class.get klass
partition.any?(&.name.==("Kisaragi")).should be_false
end
end
end
describe "tags" do
it "do basic tagging" do
db = DODB::SpecDataBase.new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
db_ships_by_tags.get("flagship").should eq([Ship.flagship])
# All returned entries should have the requested tag.
db_ships_by_tags.get("name ship")
.map(&.tags.includes?("name ship"))
.reduce { |a, e| a && e }
.should be_true
# There shouldnt be one in our data about WWII Japanese warships…
db_ships_by_tags.get("starship").should eq([] of Ship)
end
it "properly removes tags" do
db = DODB::SpecDataBase.new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
# Removing the “flagship” tag, brace for impact.
flagship, index = db_ships_by_tags.get_with_indice("flagship")[0]
flagship.tags = [] of String
db[index] = flagship
# ship, index = db_ships_by_tags.update(tag: "flagship") do |ship, index|
# ship.tags = [] of String
# db[index] = ship
# end
db_ships_by_tags.get("flagship").should eq([] of Ship)
end
it "gets items that have multiple tags" do
db = DODB::SpecDataBase.new
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
results = db_ships_by_tags.get(["flagship", "name ship"])
results.should eq([Ship.yamato])
results = db_ships_by_tags.get(["name ship", "flagship"])
results.should eq([Ship.yamato])
results = db_ships_by_tags.get(["flagship"])
results.should eq([Ship.yamato])
end
end
describe "atomic operations" do
it "safe_get and safe_get?" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
Ship.all_ships.each do |ship|
db << ship
end
Ship.all_ships.each do |ship|
db_ships_by_name.safe_get ship.name do |results|
results.should eq(ship)
end
db_ships_by_name.safe_get? ship.name do |results|
results.should eq(ship)
end
end
end
end
describe "tools" do
it "rebuilds indexes" do
db = DODB::SpecDataBase.new
db_ships_by_name = db.new_index "name", &.name
db_ships_by_class = db.new_partition "class", &.klass
db_ships_by_tags = db.new_tags "tags", &.tags
Ship.all_ships.each do |ship|
db << ship
end
db.reindex_everything!
Ship.all_ships.each do |ship|
db_ships_by_name.get?(ship.name).should eq(ship)
db_ships_by_class.get(ship.klass).should contain(ship)
end
end
it "migrates properly" do
::FileUtils.rm_rf "test-storage-migration-origin"
old_db = DODB::DataBase(PrimitiveShip).new "test-storage-migration-origin"
old_ships_by_name = old_db.new_index "name", &.name
old_ships_by_class = old_db.new_partition "class", &.class_name
PrimitiveShip.all_ships.each do |ship|
old_db << ship
end
# At this point, the “old” DB is filled. Now we need to convert
# to the new DB.
new_db = DODB::SpecDataBase.new "-migration-target"
new_ships_by_name = new_db.new_index "name", &.name
new_ships_by_class = new_db.new_partition "class", &.klass
new_ships_by_tags = new_db.new_tags "tags", &.tags
old_db.each_with_index do |ship, index|
new_ship = Ship.new ship.name,
klass: ship.class_name,
id: ship.id,
tags: Array(String).new.tap { |tags|
tags << "name ship" if ship.name == ship.class_name
}
new_db[index] = new_ship
end
# At this point, the conversion is done, so… were making a few
# arbitrary tests on the new data.
old_db.each_with_index do |old_ship, old_index|
ship = new_db[old_index]
ship.id.should eq(old_ship.id)
ship.klass.should eq(old_ship.class_name)
ship.tags.any?(&.==("name ship")).should be_true if ship.name == ship.klass
end
end
end
describe "parallel support" do
# Not sure how many forks would be safe in a test like that.
fork_count = 25
entries_per_fork = 100
it "works for pushing values" do
db = DODB::SpecDataBase.new
processes = [] of Process
fork_count.times do |fork_id|
processes << Process.fork do
entries_per_fork.times do |entry_id|
db << Ship.new("entry-#{fork_id}-#{entry_id}", "???")
end
end
end
processes.each &.wait
dump = db.to_a
dump.size.should eq fork_count * entries_per_fork
end
it "works for updating values" do
db = DODB::SpecDataBase.new
db_entries_by_name = db.new_index "name", &.name
# First pass, creating data.
processes = [] of Process
fork_count.times do |fork_id|
processes << Process.fork do
entries_per_fork.times do |entry_id|
db << Ship.new("entry-#{fork_id}-#{entry_id}", "???")
end
end
end
processes.each &.wait
# Second pass, updating data.
processes = [] of Process
fork_count.times do |fork_id|
processes << Process.fork do
entries_per_fork.times do |entry_id|
db_entries_by_name.update Ship.new("entry-#{fork_id}-#{entry_id}", "???", tags: ["updated"])
end
end
end
processes.each &.wait
# Third pass, testing database content.
dump = db.to_a
fork_count.times do |fork_id|
entries_per_fork.times do |entry_id|
entry = db_entries_by_name.get "entry-#{fork_id}-#{entry_id}"
entry.tags.should eq ["updated"]
end
end
end
it "does parallel-safe updates" do
db = DODB::SpecDataBase.new
db_entries_by_name = db.new_index "name", &.name
# Well be storing an integer in the "klass" field, and incrementing
# it in forks in a second time.
db << Ship.new("test", "0")
processes = [] of Process
fork_count.times do |fork_id|
processes << Process.fork do
entries_per_fork.times do |entry_id|
db_entries_by_name.safe_get "test" do |entry|
entry.klass = (entry.klass.to_i + 1).to_s
db_entries_by_name.update "test", entry
end
end
end
end
processes.each &.wait
db_entries_by_name.get("test").klass.should eq((fork_count * entries_per_fork).to_s)
end
end
end

View File

@ -12,21 +12,21 @@ def perform_benchmark_average(ntimes : Int32, &block)
sum += elapsed_time
i += 1
end
sum / ntimes
end
def perform_benchmark_average_verbose(title : String, ntimes : Int32, &block)
def run_n_times(ntimes : Int32, &block)
i = 1
sum = Time::Span.zero
puts "Execute '#{title}' × #{ntimes}"
durations = Array(Float64).new
while i <= ntimes
elapsed_time = perform_something &block
sum += elapsed_time
STDOUT.write "\relapsed_time: #{elapsed_time}, average: #{sum/i}".to_slice
durations << elapsed_time.total_nanoseconds
i += 1
end
puts ""
puts "Average: #{sum/ntimes}"
durations
end
# TODO
def should_nb_files(path : String, expected_nb_files : UInt32)
raise Exception.new "should_nb_files: not implemented yet"
end

View File

@ -30,9 +30,9 @@ class DODB::CachedDataBase(V) < DODB::Storage(V)
self.last_index = -1
end
# TODO: load the database in RAM at start-up
# Load the database in RAM at start-up.
DODB::DataBase(V).new(@directory_name).each_with_index do |v, index|
puts "loading value #{v} at index #{index}"
puts "\rloading data from #{@directory_name} at index #{index}"
self[index] = v
end
end
@ -98,8 +98,7 @@ class DODB::CachedDataBase(V) < DODB::Storage(V)
begin
::File.delete file_path key
rescue
# FIXME: Only intercept “no such file" errors
rescue File::NotFoundError
end
remove_indexes key, value
@ -113,3 +112,56 @@ class DODB::CachedDataBase(V) < DODB::Storage(V)
@data = Hash(Int32, V).new
end
end
# `DODB::RAMOnlyDataBase` is a database without a file-system representation,
# enabling the use of DODB to store data which have the same lifetime as the application.
# Indexing (indexes, partitions, tags) will behave the same way.
class DODB::RAMOnlyDataBase(V) < DODB::CachedDataBase(V)
# Initialization still uses a directory name and creates a few paths.
# This is an implementation detail to re-use code of `DODB::Storage` and to get the indexers to work.
def initialize(@directory_name : String)
Dir.mkdir_p data_path
Dir.mkdir_p locks_directory
self.last_index = -1
end
# WARNING: takes `[]?` and `[]` implementations from `CachedDataBase`.
# This will lead to errors in case the implementations change, be aware.
def []=(index : Int32, value : V)
old_value = self.[index]?
check_collisions! index, value, old_value
# Removes any old indices or partitions pointing to a value about
# to be replaced.
if old_value
remove_indexes index, old_value
end
write_partitions index, value
if index > last_index
self.last_index = index
end
@data[index] = value
end
def delete(key : Int32)
value = self[key]?
return if value.nil?
remove_indexes key, value
@data.delete key
value
end
private def remove_data!
super
@data = Hash(Int32, V).new
end
end

View File

@ -135,6 +135,18 @@ abstract class DODB::Storage(V)
end
end
def new_RAM_index(name : String, &block : Proc(V, String))
RAMOnlyIndex(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
end
def new_nilable_RAM_index(name : String, &block : Proc(V, String | DODB::NoIndex))
RAMOnlyIndex(V).new(self, @directory_name, name, block).tap do |indexer|
@indexers << indexer
end
end
def get_index(name : String, key)
index = @indexers.find &.name.==(name)
@ -155,6 +167,30 @@ abstract class DODB::Storage(V)
end
end
def new_RAM_partition(name : String, &block : Proc(V, String))
RAMOnlyPartition(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
end
def new_nilable_partition(name : String, &block : Proc(V, String | DODB::NoIndex))
CachedPartition(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
end
def new_nilable_uncached_partition(name : String, &block : Proc(V, String | DODB::NoIndex))
Partition(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
end
def new_nilable_RAM_partition(name : String, &block : Proc(V, String | DODB::NoIndex))
RAMOnlyPartition(V).new(self, @directory_name, name, block).tap do |table|
@indexers << table
end
end
def get_partition(table_name : String, partition_name : String)
partition = @indexers.find &.name.==(table_name)
@ -177,6 +213,30 @@ abstract class DODB::Storage(V)
end
end
def new_RAM_tags(name : String, &block : Proc(V, Array(String)))
RAMOnlyTags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end
end
def new_nilable_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex))
CachedTags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end
end
def new_nilable_uncached_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex))
Tags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end
end
def new_nilable_RAM_tags(name : String, &block : Proc(V, Array(String) | DODB::NoIndex))
RAMOnlyTags(V).new(self, @directory_name, name, block).tap do |tags|
@indexers << tags
end
end
def get_tags(name, key : String)
tag = @indexers.find &.name.==(name)
@ -250,23 +310,17 @@ abstract class DODB::Storage(V)
private def remove_indexing!
@indexers.each do |indexer|
FileUtils.rm_rf indexer.indexing_directory
indexer.nuke_index
end
end
# A very slow operation that removes all indices and then rewrites
# them all.
# FIXME: Is this really useful in its current form? We should remove the
# index directories, not the indices based on our current (and
# possiblly different from whats stored) data.
def reindex_everything!
old_data = to_h
remove_indexing!
remove_data!
old_data.each do |index, item|
self[index] = item
each_with_index() do |item, index|
write_partitions index, item
end
end

View File

@ -17,13 +17,16 @@ class DODB::Index(V) < DODB::Indexer(V)
def check!(key, value, old_value)
index_key = key_proc.call value
return if index_key.is_a? NoIndex
symlink = file_path_index index_key.to_s
# FIXME: Check its not pointing to “old_value”, if any, before raising.
if ::File.exists? symlink
if ::File.symlink? symlink
# In case both old and new values are pointing to the same key,
# this is not considered a collision.
if old_value
old_key = key_proc.call old_value
return if symlink == file_path_index old_key.to_s
return if index_key == old_key
end
raise IndexOverload.new "index '#{@name}' is overloaded for key '#{key}', file #{symlink} exists"
@ -32,24 +35,17 @@ class DODB::Index(V) < DODB::Indexer(V)
def index(key, value)
index_key = key_proc.call value
return if index_key.is_a? NoIndex
symlink = file_path_index index_key
Dir.mkdir_p ::File.dirname symlink
# FIXME: Now that this is done in check!, can we remove it?
if ::File.exists? symlink
raise Exception.new "symlink already exists: #{symlink}"
end
::File.symlink get_data_symlink_index(key), symlink
end
def deindex(key, value)
index_key = key_proc.call value
return if index_key.is_a? NoIndex
symlink = file_path_index index_key
@ -96,9 +92,7 @@ class DODB::Index(V) < DODB::Indexer(V)
def get_key_on_fs(index : String) : Int32
file_path = file_path_index index
raise MissingEntry.new(@name, index) unless ::File.exists? file_path
raise MissingEntry.new(@name, index) unless ::File.symlink? file_path
::File.readlink(file_path).sub(/^.*\//, "").to_i
end
@ -157,9 +151,11 @@ class DODB::CachedIndex(V) < DODB::Index(V)
def check!(key, value, old_value)
index_key = key_proc.call value
return if index_key.is_a? NoIndex
# FIXME: Check its not pointing to “old_value”, if any, before raising.
if data[index_key]?
# In case both old and new values are pointing to the same key,
# this is not considered a collision.
if old_value
old_key = key_proc.call old_value
return if index_key == old_key
@ -169,20 +165,23 @@ class DODB::CachedIndex(V) < DODB::Index(V)
end
end
def index(key, value)
super(key, value)
def nuke_index
super
data.clear
end
def index(key, value)
index_key = key_proc.call value
return if index_key.is_a? NoIndex
super(key, value)
@data[index_key] = key.to_i
end
def deindex(key, value)
super(key, value)
index_key = key_proc.call value
return if index_key.is_a? NoIndex
super(key, value)
@data.delete index_key
end
@ -200,3 +199,30 @@ class DODB::CachedIndex(V) < DODB::Index(V)
end
end
end
# `DODB::RAMOnlyIndex` enables the flexibility of indexes without a file-system representation.
# Absolute efficiency, exactly as easy to use as the other index implementations.
class DODB::RAMOnlyIndex(V) < DODB::CachedIndex(V)
def index(key, value)
index_key = key_proc.call value
return if index_key.is_a? NoIndex
@data[index_key] = key.to_i
end
def deindex(key, value)
index_key = key_proc.call value
return if index_key.is_a? NoIndex
@data.delete index_key
end
# Get the key (ex: 343) for an entry in the DB.
# With a RAM only index, the key is necessarily stored in the hash.
def get_key(index : String) : Int32
if k = @data[index]?
k
else
raise MissingEntry.new(@name, index)
end
end
end

View File

@ -6,5 +6,9 @@ abstract class DODB::Indexer(V)
abstract def name : String
abstract def indexing_directory : String
def nuke_index
FileUtils.rm_rf indexing_directory
end
end

View File

@ -1,5 +1,11 @@
class DODB::NoIndex
include JSON::Serializable
def_clone
def initialize()
end
end
module DODB

View File

@ -4,7 +4,7 @@ require "./indexer.cr"
class DODB::Partition(V) < DODB::Indexer(V)
property name : String
property key_proc : Proc(V, String)
property key_proc : Proc(V, String | NoIndex) | Proc(V, String)
getter storage_root : String
# Required to remove an entry in the DB.
@ -20,19 +20,18 @@ class DODB::Partition(V) < DODB::Indexer(V)
def index(key, value)
partition = key_proc.call value
return if partition.is_a? NoIndex
symlink = get_partition_symlink(partition, key)
Dir.mkdir_p ::File.dirname symlink
# FIXME: Should not happen anymore. Should we remove this?
::File.delete symlink if ::File.exists? symlink
::File.symlink get_data_symlink(key), symlink
end
def deindex(key, value)
partition = key_proc.call value
return if partition.is_a? NoIndex
symlink = get_partition_symlink(partition, key)
@ -43,11 +42,10 @@ class DODB::Partition(V) < DODB::Indexer(V)
end
def get(partition) : Array(V)
r_value = Array(V).new
partition_directory = indexing_directory partition
raise MissingEntry.new(@name, partition) unless Dir.exists? partition_directory
return r_value unless Dir.exists? partition_directory
r_value = Array(V).new
Dir.each_child partition_directory do |child|
r_value << @storage[get_key child]
@ -66,7 +64,7 @@ class DODB::Partition(V) < DODB::Indexer(V)
delete partition, do true end
end
def delete(partition, &matcher)
def delete(partition, &matcher : Proc(V, Bool))
partition_directory = indexing_directory partition
return unless Dir.exists? partition_directory
@ -106,9 +104,15 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
# This hash contains the relation between the index key and the data keys.
property data = Hash(String, Array(Int32)).new
def nuke_index
super
data.clear
end
def index(key, value)
super(key, value)
partition = key_proc.call value
return if partition.is_a? NoIndex
super(key, value)
array = if v = @data[partition]?
v
@ -121,8 +125,9 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
end
def deindex(key, value)
super(key, value)
partition = key_proc.call value
return if partition.is_a? NoIndex
super(key, value)
if v = @data[partition]?
v.delete key.to_i
@ -130,7 +135,7 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
end
end
def get(partition)
def get_with_indexes(partition) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
if keys = @data[partition]?
@ -148,6 +153,72 @@ class DODB::CachedPartition(V) < DODB::Partition(V)
@data[partition] = r_value.map &.[1]
end
r_value.map &.[0]
r_value
end
def get(partition) : Array(V)
get_with_indexes(partition).map &.[0]
end
def delete(partition, &matcher : Proc(V, Bool))
# Use `get_with_indexes` to retrieve data on-disk, if necessary.
new_partition = get_with_indexes(partition).map(&.[1]).select do |key|
item = @storage[key]
! yield item
end
@data[partition] = new_partition
super(partition, &matcher)
end
end
# `DODB::RAMOnlyPartition` enables the flexibility of partitions without a file-system representation.
# Absolute efficiency, exactly as easy to use as the other partition implementations.
class DODB::RAMOnlyPartition(V) < DODB::CachedPartition(V)
def index(key, value)
partition = key_proc.call value
return if partition.is_a? NoIndex
array = if v = @data[partition]?
v
else
Array(Int32).new
end
array << key.to_i
@data[partition] = array
end
def deindex(key, value)
partition = key_proc.call value
return if partition.is_a? NoIndex
if v = @data[partition]?
v.delete key.to_i
@data[partition] = v
end
end
def get_with_indexes(partition) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
if keys = @data[partition]?
keys.each do |data_key|
r_value << { @storage[data_key], data_key }
end
end
r_value
end
def delete(partition, &matcher : Proc(V, Bool))
if keys = @data[partition]?
new_partition = keys.select do |key|
item = @storage[key]
! yield item
end
@data[partition] = new_partition
end
end
end

View File

@ -2,7 +2,7 @@ require "file_utils"
class DODB::Tags(V) < DODB::Indexer(V)
property name : String
property key_proc : Proc(V, Array(String))
property key_proc : Proc(V, Array(String) | NoIndex) | Proc(V, Array(String))
getter storage_root : String
# Required to remove an entry in the DB.
@ -18,6 +18,7 @@ class DODB::Tags(V) < DODB::Indexer(V)
def index(key, value)
indices = key_proc.call(value)
return if indices.is_a? NoIndex
indices.each do |i|
symlink = get_tagged_entry_path(i, key)
@ -30,6 +31,7 @@ class DODB::Tags(V) < DODB::Indexer(V)
def deindex(key, value)
indices = key_proc.call(value)
return if indices.is_a? NoIndex
indices.each do |i|
symlink = get_tagged_entry_path(i, key)
@ -42,11 +44,10 @@ class DODB::Tags(V) < DODB::Indexer(V)
end
def get_with_indice(tag : String) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
tag_directory = indexing_directory tag
raise MissingEntry.new(@name, tag) unless Dir.exists? tag_directory
return r_value unless Dir.exists? tag_directory
r_value = Array(Tuple(V, Int32)).new
Dir.each_child tag_directory do |child|
key = get_key child
@ -56,10 +57,19 @@ class DODB::Tags(V) < DODB::Indexer(V)
r_value
end
# `get_with_indices` gets values with all the tags.
def get_with_indices(keys : Array(String)) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
return r_value if keys.size < 1
first_key = keys.pop
r_value = get_with_indice(first_key) rescue return r_value
keys.each do |tag|
r_value.concat get_with_indice tag
values = get_with_indice(tag) rescue return [] of Tuple(V, Int32)
r_value &= values
return r_value if r_value.size < 1
end
r_value
end
@ -123,8 +133,9 @@ class DODB::CachedTags(V) < DODB::Tags(V)
property data = Hash(String, Array(Int32)).new
def index(key, value)
super(key, value)
indices = key_proc.call value
return if indices.is_a? NoIndex
super(key, value)
indices.each do |tag|
array = if v = @data[tag]?
@ -139,8 +150,9 @@ class DODB::CachedTags(V) < DODB::Tags(V)
end
def deindex(key, value)
super(key, value)
indices = key_proc.call value
return if indices.is_a? NoIndex
super(key, value)
indices.each do |tag|
if v = @data[tag]?
@ -150,6 +162,11 @@ class DODB::CachedTags(V) < DODB::Tags(V)
end
end
def nuke_index
super
data.clear
end
def get_with_indice(tag : String) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
@ -171,4 +188,73 @@ class DODB::CachedTags(V) < DODB::Tags(V)
r_value
end
def delete(tag, &matcher)
# Use `get_with_indexes` to retrieve data on-disk, if necessary.
new_tag = get_with_indexes(tag).map(&.[1]).select do |key|
item = @storage[key]
! yield item
end
@data[tag] = new_tag
super(tag, &matcher)
end
end
# `DODB::RAMOnlyTags` enables the flexibility of tags without a file-system representation.
# Absolute efficiency, exactly as easy to use as the other tag implementations.
class DODB::RAMOnlyTags(V) < DODB::CachedTags(V)
def index(key, value)
indices = key_proc.call value
return if indices.is_a? NoIndex
indices.each do |tag|
array = if v = @data[tag]?
v
else
Array(Int32).new
end
array << key.to_i
@data[tag] = array
end
end
def deindex(key, value)
indices = key_proc.call value
return if indices.is_a? NoIndex
indices.each do |tag|
if v = @data[tag]?
v.delete key.to_i
@data[tag] = v
end
end
end
def get_with_indice(tag : String) : Array(Tuple(V, Int32))
r_value = Array(Tuple(V, Int32)).new
if keys = @data[tag]?
keys.each do |data_key|
r_value << { @storage[data_key], data_key }
end
end
r_value
end
def delete(tag, &matcher)
# Use `get_with_indexes` to retrieve data on-disk, if necessary.
if keys = @data[tag]?
new_tag = keys.select do |key|
item = @storage[key]
! yield item
end
@data[tag] = new_tag
end
end
end

39
stats.sh Executable file
View File

@ -0,0 +1,39 @@
#!/bin/sh
if [ $# -ne 1 ]
then
echo "usage: $0 result-directory"
exit 0
fi
dir="$1"
for i in $dir/*
do
Rscript ~/bin/summary.r $i > $i.r
rsum2line.awk $i.r > $i.raw
done
# List raw files with the number of iterations as a prefix so they can then be sorted.
sort_raw_files() {
for i in $dir/*.raw
do
f $i
done | sort -n
}
f() {
echo $* | sed "s/[_./]/ /g" | xargs echo "$* " | awk '{ printf "%s %s/%s_%s %s\n", $4, $2, $3, $5, $1 }'
}
fill() {
while read LINE; do
nb_it=$(echo $LINE | awk '{ print $1 }')
target=$(echo $LINE | awk '{ print $2 }')
fname=$(echo $LINE | awk '{ print $3 }')
cat $fname | xargs echo "$nb_it " >> $target.final
done
}
sort_raw_files | fill