DODB PDF.

This commit is contained in:
Philippe PITTOLI 2024-05-13 21:46:02 +02:00
parent a986e56264
commit fc52757074
5 changed files with 271 additions and 205 deletions

View File

@ -0,0 +1,69 @@
.G1
copy "legend.grap"
frame invis ht 3 wid 4 left solid bot solid
coord y 0,50
ticks left out from 0 to 50 by 10
ticks bot out at 50000 "50,000", 100000 "100,000", 150000 "150,000", 200000 "200,000", 250000 "250,000"
label left "Request duration with" unaligned "an index (µs)" "(Median)" left 0.8
label bot "Number of cars in the database" down 0.1
obram = obuncache = obcache = obsemi = 0 # old bullets
cbram = cbuncache = cbcache = cbsemi = 0 # current bullets
legendxleft = 100000
legendxright = 250000
legendyup = 15
legendydown = 2
boite(legendxleft,legendxright,legendyup,legendydown)
legend(legendxleft,legendxright,legendyup,legendydown)
copy "../data/index.d" thru X
cx = $1*5
y_scale = 1000
# ram cached semi uncached
line from cx,$2/y_scale to cx,$4/y_scale
line from cx,$5/y_scale to cx,$7/y_scale
line from cx,$8/y_scale to cx,$10/y_scale
line from cx,$11/y_scale to cx,$13/y_scale
#ty = $3
cx = $1*5
cbram = $3/y_scale
cbcache = $6/y_scale
cbsemi = $9/y_scale
cbuncache = $12/y_scale
if (obram > 0) then {line from cx,cbram to ox,obram}
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
.gcolor blue
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
.gcolor
.gcolor green
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
.gcolor
obram = cbram
obcache = cbcache
obsemi = cbsemi
obuncache = cbuncache
ox = cx
# ram cached semi uncached
.gcolor red
bullet at cx,cbram
.gcolor
bullet at cx,cbcache
.gcolor blue
bullet at cx,cbsemi
.gcolor
.gcolor green
bullet at cx,cbuncache
.gcolor
X
.G2

View File

@ -0,0 +1,66 @@
.G1
copy "legend.grap"
frame invis ht 3 wid 4 left solid bot solid
coord x 0,5000*2 y 0,350
ticks left out from 0 to 350 by 50
label left "Request duration" unaligned "for a partition (ms)" "(Median)" left 0.8
label bot "Number of cars matching the partition" down 0.1
obram = obuncache = obcache = obsemi = 0
cbram = cbuncache = cbcache = cbsemi = 0
legendxleft = 1000
legendxright = 6500
legendyup = 330
legendydown = 230
boite(legendxleft,legendxright,legendyup,legendydown)
legend(legendxleft,legendxright,legendyup,legendydown)
copy "../data/partitions.d" thru X
cx = $1*2
y_scale = 1000000
# ram cached semi uncached
line from cx,$2/y_scale to cx,$4/y_scale
line from cx,$5/y_scale to cx,$7/y_scale
line from cx,$8/y_scale to cx,$10/y_scale
line from cx,$11/y_scale to cx,$13/y_scale
#ty = $3
cbram = $3/y_scale
cbcache = $6/y_scale
cbsemi = $9/y_scale
cbuncache = $12/y_scale
if (obram > 0) then {line from cx,cbram to ox,obram}
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
.gcolor blue
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
.gcolor
.gcolor green
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
.gcolor
obram = cbram
obcache = cbcache
obsemi = cbsemi
obuncache = cbuncache
ox = cx
# ram cached semi uncached
.gcolor red
bullet at cx,cbram
.gcolor
bullet at cx,cbcache
.gcolor blue
bullet at cx,cbsemi
.gcolor
.gcolor green
bullet at cx,cbuncache
.gcolor
X
.G2

View File

@ -0,0 +1,65 @@
.G1
copy "legend.grap"
frame invis ht 3 wid 4 left solid bot solid
coord x 0,5000 y 0,170
ticks left out from 0 to 170 by 20
label left "Request duration" unaligned "for a tag (ms)" "(Median)" left 0.8
label bot "Number of cars matching the tag" down 0.1
obram = obuncache = obcache = obsemi = 0
cbram = cbuncache = cbcache = cbsemi = 0
legendxleft = 200
legendxright = 3000
legendyup = 170
legendydown = 120
boite(legendxleft,legendxright,legendyup,legendydown)
legend(legendxleft,legendxright,legendyup,legendydown)
copy "../data/tags.d" thru X
cx = $1
y_scale = 1000000
# ram cached semi uncached
line from cx,$2/y_scale to cx,$4/y_scale
line from cx,$5/y_scale to cx,$7/y_scale
line from cx,$8/y_scale to cx,$10/y_scale
line from cx,$11/y_scale to cx,$13/y_scale
#ty = $3
cbram = $3/y_scale
cbcache = $6/y_scale
cbsemi = $9/y_scale
cbuncache = $12/y_scale
if (obram > 0) then {line from cx,cbram to ox,obram}
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
.gcolor blue
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
.gcolor
.gcolor green
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
.gcolor
obram = cbram
obcache = cbcache
obsemi = cbsemi
obuncache = cbuncache
ox = cx
# ram cached semi uncached
.gcolor red
bullet at cx,cbram
.gcolor
bullet at cx,cbcache
.gcolor blue
bullet at cx,cbsemi
.gcolor
.gcolor green
bullet at cx,cbuncache
.gcolor
X
.G2

View File

@ -1,5 +1,5 @@
.so macros.roff
.TITLE Brief performance analysis of Document Oriented DataBase (DODB)
.TITLE Document Oriented DataBase (DODB)
.AUTHOR Philippe P.
.ABSTRACT1
DODB is a database-as-library, enabling a very simple way to store applications' data: storing serialized
@ -9,10 +9,46 @@ To speed-up searches, attributes of these documents can be used as indexes which
.I symlinks ) (
on the disk.
.br
See the \f[CW]README\f[] for a longer explanation.
This document briefly presents an experiment to understand the performances we can get with this approach.
This document briefly presents DODB and its main differences with other database engines.
An experiment is described and analysed to understand the performance that can be expected from this approach.
.ABSTRACT2
.SECTION Introduction to DODB
A database consists in managing data, enabling queries (preferably fast) to retrieve, to modify, to add and to delete a piece of information.
Anything else is
.UL accessory .
Universities all around the world teach about Structured Query Language (SQL) and relational databases.
The main idea of relational databases is to put data into
.I tables ,
with typed columns so the database can optimize operations and storage.
A database is a list of tables with relations between them.
For example, let's imagine a database of a
.I table
can contain a list of users (their age, height, job, etc.).
When another
The SQL language enables arbitrary operations on databases: add, modify and delete entries.
Furthermore, SQL enables even to manage administrative operations of the databases themselves: managing users with fine-grained authorizations, creating databases and tables, etc.
Many tools were used or even developed over the years specifically to aleviate the inherent complexity and limitations of SQL.
For example, Unified Modeling Language (UML) is used to design databases by providing a graphical overview of the relations between tables.
SQL databases can be scripted to automate operations and provide a massive speed up to the operations (
.I "stored procedures" ,
see
.I "PL/SQL" ),
etc.
Document-oriented databases are key-value stores.
Furthermore, metadata is extracted for further optimization.
Contrary to SQL, DODB has a very narrow scope: to provide
Thus, DODB doesn't provide an interactive shell, no request language to perform arbitrary operations on the database, etc.
.SECTION Basic usage
.SECTION A few more options
.SECTION Limits of DODB
.SECTION Experimental scenario
.LP
The following experiment shows the performance of DODB based on quering durations.
@ -61,7 +97,7 @@ class Car
end
.SOURCE
.
.SECTION Basic indexes (1 to 1 relations)
.SS Basic indexes (1 to 1 relations)
.LP
An index enables to match a single value based on a small string.
In our example, each \f[CW]car\f[] has an unique \fIname\f[] which is used as an index.
@ -69,213 +105,43 @@ In our example, each \f[CW]car\f[] has an unique \fIname\f[] which is used as an
The following graph represents the result of 100 queries of a car based on its name.
The experiment starts with a database containing 1,000 cars and goes up to 250,000 cars.
.so graph_query_index.grap
Since there is only one value to retrieve, the request is quick and time is almost constant.
When the value and the index are kept in memory (see \f[CW]RAM only\f[] and \f[CW]Cached db\f[]), the retrieval is almost instantaneous (about 50 to 120 ns).
In case the value is on the disk, deserialization takes about 15 µs (see \f[CW]Uncached db, cached index\f[]).
The request is a little longer when the index isn't cached, in this case DODB walks the file-system to find the right symlink to follow, thus slowing the process even more, by up to 20%.
.G1
copy "legend.grap"
frame invis ht 3 wid 4 left solid bot solid
coord y 0,50
ticks left out from 0 to 50 by 10
ticks bot out at 50000 "50,000", 100000 "100,000", 150000 "150,000", 200000 "200,000", 250000 "250,000"
The request is a little longer when the index isn't cached (see \f[CW]Uncached db and index\f[]); in this case DODB walks the file-system to find the right symlink to follow, thus slowing the process even more, by up to 20%.
label left "Request duration with" unaligned "an index (us)" "(Median)" left 0.8
label bot "Number of cars in the database" down 0.1
.TS
allbox tab(:);
c | lw(4.0i) | cew(1.4i).
DODB instance:Comment and database usage:T{
compared to RAM only
T}
RAM only:T{
Worst memory footprint (all data must be in memory), best performance.
T}:-
Cached db and index:T{
Performance for retrieving a value is the same as RAM only while
enabling the admin to manually search for data on-disk.
T}:about the same perfs
Uncached db, cached index::300 to 400x slower
Uncached db and index:T{
Best memory footprint, worst performance.
T}:400 to 500x slower
.TE
obram = obuncache = obcache = obsemi = 0 # old bullets
cbram = cbuncache = cbcache = cbsemi = 0 # current bullets
legendxleft = 100000
legendxright = 250000
legendyup = 15
legendydown = 2
boite(legendxleft,legendxright,legendyup,legendydown)
legend(legendxleft,legendxright,legendyup,legendydown)
copy "../data/index.d" thru X
cx = $1*5
y_scale = 1000
# ram cached semi uncached
line from cx,$2/y_scale to cx,$4/y_scale
line from cx,$5/y_scale to cx,$7/y_scale
line from cx,$8/y_scale to cx,$10/y_scale
line from cx,$11/y_scale to cx,$13/y_scale
#ty = $3
cx = $1*5
cbram = $3/y_scale
cbcache = $6/y_scale
cbsemi = $9/y_scale
cbuncache = $12/y_scale
if (obram > 0) then {line from cx,cbram to ox,obram}
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
.gcolor blue
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
.gcolor
.gcolor green
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
.gcolor
obram = cbram
obcache = cbcache
obsemi = cbsemi
obuncache = cbuncache
ox = cx
# ram cached semi uncached
.gcolor red
bullet at cx,cbram
.gcolor
bullet at cx,cbcache
.gcolor blue
bullet at cx,cbsemi
.gcolor
.gcolor green
bullet at cx,cbuncache
.gcolor
X
.G2
.B Conclusion :
as expected, retrieving a single value is fast and the size of the database doesn't matter much.
Each deserialization and, more importantly, each disk access is a pain point.
Caching the value enables a massive performance gain, data can be retrieved several hundred times quicker.
.bp
.SECTION Partitions (1 to n relations)
.SS Partitions (1 to n relations)
.LP
.G1
copy "legend.grap"
frame invis ht 3 wid 4 left solid bot solid
coord x 0,5000*2 y 0,350
ticks left out from 0 to 350 by 50
label left "Request duration" unaligned "for a partition (ms)" "(Median)" left 0.8
label bot "Number of cars matching the partition" down 0.1
.so graph_query_partition.grap
obram = obuncache = obcache = obsemi = 0
cbram = cbuncache = cbcache = cbsemi = 0
legendxleft = 1000
legendxright = 6500
legendyup = 330
legendydown = 230
boite(legendxleft,legendxright,legendyup,legendydown)
legend(legendxleft,legendxright,legendyup,legendydown)
copy "../data/partitions.d" thru X
cx = $1*2
y_scale = 1000000
# ram cached semi uncached
line from cx,$2/y_scale to cx,$4/y_scale
line from cx,$5/y_scale to cx,$7/y_scale
line from cx,$8/y_scale to cx,$10/y_scale
line from cx,$11/y_scale to cx,$13/y_scale
#ty = $3
cbram = $3/y_scale
cbcache = $6/y_scale
cbsemi = $9/y_scale
cbuncache = $12/y_scale
if (obram > 0) then {line from cx,cbram to ox,obram}
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
.gcolor blue
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
.gcolor
.gcolor green
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
.gcolor
obram = cbram
obcache = cbcache
obsemi = cbsemi
obuncache = cbuncache
ox = cx
# ram cached semi uncached
.gcolor red
bullet at cx,cbram
.gcolor
bullet at cx,cbcache
.gcolor blue
bullet at cx,cbsemi
.gcolor
.gcolor green
bullet at cx,cbuncache
.gcolor
X
.G2
.bp
.SECTION Tags (n to n relations)
.SS Tags (n to n relations)
.LP
.G1
copy "legend.grap"
frame invis ht 3 wid 4 left solid bot solid
coord x 0,5000 y 0,170
ticks left out from 0 to 170 by 20
label left "Request duration" unaligned "for a tag (ms)" "(Median)" left 0.8
label bot "Number of cars matching the tag" down 0.1
obram = obuncache = obcache = obsemi = 0
cbram = cbuncache = cbcache = cbsemi = 0
legendxleft = 200
legendxright = 3000
legendyup = 170
legendydown = 120
boite(legendxleft,legendxright,legendyup,legendydown)
legend(legendxleft,legendxright,legendyup,legendydown)
copy "../data/tags.d" thru X
cx = $1
y_scale = 1000000
# ram cached semi uncached
line from cx,$2/y_scale to cx,$4/y_scale
line from cx,$5/y_scale to cx,$7/y_scale
line from cx,$8/y_scale to cx,$10/y_scale
line from cx,$11/y_scale to cx,$13/y_scale
#ty = $3
cbram = $3/y_scale
cbcache = $6/y_scale
cbsemi = $9/y_scale
cbuncache = $12/y_scale
if (obram > 0) then {line from cx,cbram to ox,obram}
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
.gcolor blue
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
.gcolor
.gcolor green
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
.gcolor
obram = cbram
obcache = cbcache
obsemi = cbsemi
obuncache = cbuncache
ox = cx
# ram cached semi uncached
.gcolor red
bullet at cx,cbram
.gcolor
bullet at cx,cbcache
.gcolor blue
bullet at cx,cbsemi
.gcolor
.gcolor green
bullet at cx,cbuncache
.gcolor
X
.G2
.so graph_query_tag.grap

View File

@ -20,7 +20,7 @@ define legend {
diffy = yup - ydown
hdiff = diffy/4.3
cy = yup - (diffy/8)
cy = yup - (diffy/6)
cx = (diffx/20) + xleft
lstartx = cx
@ -33,7 +33,7 @@ define legend {
"RAM only" ljust at tstartx,cy
cy = cy - hdiff
line from lstartx,cy to lendx,cy
"Cached DODB" ljust at tstartx,cy
"Cached db and index" ljust at tstartx,cy
cy = cy - hdiff
.gcolor blue
line from lstartx,cy to lendx,cy