DODB PDF.
parent
a986e56264
commit
fc52757074
|
@ -0,0 +1,69 @@
|
|||
.G1
|
||||
copy "legend.grap"
|
||||
frame invis ht 3 wid 4 left solid bot solid
|
||||
coord y 0,50
|
||||
ticks left out from 0 to 50 by 10
|
||||
ticks bot out at 50000 "50,000", 100000 "100,000", 150000 "150,000", 200000 "200,000", 250000 "250,000"
|
||||
|
||||
label left "Request duration with" unaligned "an index (µs)" "(Median)" left 0.8
|
||||
label bot "Number of cars in the database" down 0.1
|
||||
|
||||
obram = obuncache = obcache = obsemi = 0 # old bullets
|
||||
cbram = cbuncache = cbcache = cbsemi = 0 # current bullets
|
||||
|
||||
legendxleft = 100000
|
||||
legendxright = 250000
|
||||
legendyup = 15
|
||||
legendydown = 2
|
||||
|
||||
boite(legendxleft,legendxright,legendyup,legendydown)
|
||||
legend(legendxleft,legendxright,legendyup,legendydown)
|
||||
|
||||
copy "../data/index.d" thru X
|
||||
cx = $1*5
|
||||
|
||||
y_scale = 1000
|
||||
|
||||
# ram cached semi uncached
|
||||
line from cx,$2/y_scale to cx,$4/y_scale
|
||||
line from cx,$5/y_scale to cx,$7/y_scale
|
||||
line from cx,$8/y_scale to cx,$10/y_scale
|
||||
line from cx,$11/y_scale to cx,$13/y_scale
|
||||
|
||||
#ty = $3
|
||||
|
||||
cx = $1*5
|
||||
|
||||
cbram = $3/y_scale
|
||||
cbcache = $6/y_scale
|
||||
cbsemi = $9/y_scale
|
||||
cbuncache = $12/y_scale
|
||||
|
||||
if (obram > 0) then {line from cx,cbram to ox,obram}
|
||||
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
|
||||
.gcolor blue
|
||||
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
|
||||
.gcolor
|
||||
.gcolor green
|
||||
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
|
||||
.gcolor
|
||||
|
||||
obram = cbram
|
||||
obcache = cbcache
|
||||
obsemi = cbsemi
|
||||
obuncache = cbuncache
|
||||
ox = cx
|
||||
|
||||
# ram cached semi uncached
|
||||
.gcolor red
|
||||
bullet at cx,cbram
|
||||
.gcolor
|
||||
bullet at cx,cbcache
|
||||
.gcolor blue
|
||||
bullet at cx,cbsemi
|
||||
.gcolor
|
||||
.gcolor green
|
||||
bullet at cx,cbuncache
|
||||
.gcolor
|
||||
X
|
||||
.G2
|
|
@ -0,0 +1,66 @@
|
|||
.G1
|
||||
copy "legend.grap"
|
||||
frame invis ht 3 wid 4 left solid bot solid
|
||||
coord x 0,5000*2 y 0,350
|
||||
ticks left out from 0 to 350 by 50
|
||||
|
||||
label left "Request duration" unaligned "for a partition (ms)" "(Median)" left 0.8
|
||||
label bot "Number of cars matching the partition" down 0.1
|
||||
|
||||
obram = obuncache = obcache = obsemi = 0
|
||||
cbram = cbuncache = cbcache = cbsemi = 0
|
||||
|
||||
legendxleft = 1000
|
||||
legendxright = 6500
|
||||
legendyup = 330
|
||||
legendydown = 230
|
||||
|
||||
boite(legendxleft,legendxright,legendyup,legendydown)
|
||||
legend(legendxleft,legendxright,legendyup,legendydown)
|
||||
|
||||
copy "../data/partitions.d" thru X
|
||||
cx = $1*2
|
||||
|
||||
y_scale = 1000000
|
||||
|
||||
# ram cached semi uncached
|
||||
line from cx,$2/y_scale to cx,$4/y_scale
|
||||
line from cx,$5/y_scale to cx,$7/y_scale
|
||||
line from cx,$8/y_scale to cx,$10/y_scale
|
||||
line from cx,$11/y_scale to cx,$13/y_scale
|
||||
|
||||
#ty = $3
|
||||
|
||||
cbram = $3/y_scale
|
||||
cbcache = $6/y_scale
|
||||
cbsemi = $9/y_scale
|
||||
cbuncache = $12/y_scale
|
||||
|
||||
if (obram > 0) then {line from cx,cbram to ox,obram}
|
||||
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
|
||||
.gcolor blue
|
||||
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
|
||||
.gcolor
|
||||
.gcolor green
|
||||
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
|
||||
.gcolor
|
||||
|
||||
obram = cbram
|
||||
obcache = cbcache
|
||||
obsemi = cbsemi
|
||||
obuncache = cbuncache
|
||||
ox = cx
|
||||
|
||||
# ram cached semi uncached
|
||||
.gcolor red
|
||||
bullet at cx,cbram
|
||||
.gcolor
|
||||
bullet at cx,cbcache
|
||||
.gcolor blue
|
||||
bullet at cx,cbsemi
|
||||
.gcolor
|
||||
.gcolor green
|
||||
bullet at cx,cbuncache
|
||||
.gcolor
|
||||
X
|
||||
.G2
|
|
@ -0,0 +1,65 @@
|
|||
.G1
|
||||
copy "legend.grap"
|
||||
frame invis ht 3 wid 4 left solid bot solid
|
||||
coord x 0,5000 y 0,170
|
||||
ticks left out from 0 to 170 by 20
|
||||
label left "Request duration" unaligned "for a tag (ms)" "(Median)" left 0.8
|
||||
label bot "Number of cars matching the tag" down 0.1
|
||||
|
||||
obram = obuncache = obcache = obsemi = 0
|
||||
cbram = cbuncache = cbcache = cbsemi = 0
|
||||
|
||||
legendxleft = 200
|
||||
legendxright = 3000
|
||||
legendyup = 170
|
||||
legendydown = 120
|
||||
|
||||
boite(legendxleft,legendxright,legendyup,legendydown)
|
||||
legend(legendxleft,legendxright,legendyup,legendydown)
|
||||
|
||||
copy "../data/tags.d" thru X
|
||||
cx = $1
|
||||
|
||||
y_scale = 1000000
|
||||
|
||||
# ram cached semi uncached
|
||||
line from cx,$2/y_scale to cx,$4/y_scale
|
||||
line from cx,$5/y_scale to cx,$7/y_scale
|
||||
line from cx,$8/y_scale to cx,$10/y_scale
|
||||
line from cx,$11/y_scale to cx,$13/y_scale
|
||||
|
||||
#ty = $3
|
||||
|
||||
cbram = $3/y_scale
|
||||
cbcache = $6/y_scale
|
||||
cbsemi = $9/y_scale
|
||||
cbuncache = $12/y_scale
|
||||
|
||||
if (obram > 0) then {line from cx,cbram to ox,obram}
|
||||
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
|
||||
.gcolor blue
|
||||
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
|
||||
.gcolor
|
||||
.gcolor green
|
||||
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
|
||||
.gcolor
|
||||
|
||||
obram = cbram
|
||||
obcache = cbcache
|
||||
obsemi = cbsemi
|
||||
obuncache = cbuncache
|
||||
ox = cx
|
||||
|
||||
# ram cached semi uncached
|
||||
.gcolor red
|
||||
bullet at cx,cbram
|
||||
.gcolor
|
||||
bullet at cx,cbcache
|
||||
.gcolor blue
|
||||
bullet at cx,cbsemi
|
||||
.gcolor
|
||||
.gcolor green
|
||||
bullet at cx,cbuncache
|
||||
.gcolor
|
||||
X
|
||||
.G2
|
272
graphs/graphs.ms
272
graphs/graphs.ms
|
@ -1,5 +1,5 @@
|
|||
.so macros.roff
|
||||
.TITLE Brief performance analysis of Document Oriented DataBase (DODB)
|
||||
.TITLE Document Oriented DataBase (DODB)
|
||||
.AUTHOR Philippe P.
|
||||
.ABSTRACT1
|
||||
DODB is a database-as-library, enabling a very simple way to store applications' data: storing serialized
|
||||
|
@ -9,10 +9,46 @@ To speed-up searches, attributes of these documents can be used as indexes which
|
|||
.I symlinks ) (
|
||||
on the disk.
|
||||
.br
|
||||
See the \f[CW]README\f[] for a longer explanation.
|
||||
|
||||
This document briefly presents an experiment to understand the performances we can get with this approach.
|
||||
This document briefly presents DODB and its main differences with other database engines.
|
||||
An experiment is described and analysed to understand the performance that can be expected from this approach.
|
||||
.ABSTRACT2
|
||||
.SECTION Introduction to DODB
|
||||
A database consists in managing data, enabling queries (preferably fast) to retrieve, to modify, to add and to delete a piece of information.
|
||||
Anything else is
|
||||
.UL accessory .
|
||||
|
||||
Universities all around the world teach about Structured Query Language (SQL) and relational databases.
|
||||
|
||||
The main idea of relational databases is to put data into
|
||||
.I tables ,
|
||||
with typed columns so the database can optimize operations and storage.
|
||||
A database is a list of tables with relations between them.
|
||||
For example, let's imagine a database of a
|
||||
.I table
|
||||
can contain a list of users (their age, height, job, etc.).
|
||||
When another
|
||||
|
||||
The SQL language enables arbitrary operations on databases: add, modify and delete entries.
|
||||
Furthermore, SQL enables even to manage administrative operations of the databases themselves: managing users with fine-grained authorizations, creating databases and tables, etc.
|
||||
|
||||
Many tools were used or even developed over the years specifically to aleviate the inherent complexity and limitations of SQL.
|
||||
For example, Unified Modeling Language (UML) is used to design databases by providing a graphical overview of the relations between tables.
|
||||
SQL databases can be scripted to automate operations and provide a massive speed up to the operations (
|
||||
.I "stored procedures" ,
|
||||
see
|
||||
.I "PL/SQL" ),
|
||||
etc.
|
||||
|
||||
Document-oriented databases are key-value stores.
|
||||
Furthermore, metadata is extracted for further optimization.
|
||||
|
||||
Contrary to SQL, DODB has a very narrow scope: to provide
|
||||
Thus, DODB doesn't provide an interactive shell, no request language to perform arbitrary operations on the database, etc.
|
||||
|
||||
.SECTION Basic usage
|
||||
.SECTION A few more options
|
||||
.SECTION Limits of DODB
|
||||
.SECTION Experimental scenario
|
||||
.LP
|
||||
The following experiment shows the performance of DODB based on quering durations.
|
||||
|
@ -61,7 +97,7 @@ class Car
|
|||
end
|
||||
.SOURCE
|
||||
.
|
||||
.SECTION Basic indexes (1 to 1 relations)
|
||||
.SS Basic indexes (1 to 1 relations)
|
||||
.LP
|
||||
An index enables to match a single value based on a small string.
|
||||
In our example, each \f[CW]car\f[] has an unique \fIname\f[] which is used as an index.
|
||||
|
@ -69,213 +105,43 @@ In our example, each \f[CW]car\f[] has an unique \fIname\f[] which is used as an
|
|||
The following graph represents the result of 100 queries of a car based on its name.
|
||||
The experiment starts with a database containing 1,000 cars and goes up to 250,000 cars.
|
||||
|
||||
.so graph_query_index.grap
|
||||
|
||||
Since there is only one value to retrieve, the request is quick and time is almost constant.
|
||||
When the value and the index are kept in memory (see \f[CW]RAM only\f[] and \f[CW]Cached db\f[]), the retrieval is almost instantaneous (about 50 to 120 ns).
|
||||
In case the value is on the disk, deserialization takes about 15 µs (see \f[CW]Uncached db, cached index\f[]).
|
||||
The request is a little longer when the index isn't cached, in this case DODB walks the file-system to find the right symlink to follow, thus slowing the process even more, by up to 20%.
|
||||
.G1
|
||||
copy "legend.grap"
|
||||
frame invis ht 3 wid 4 left solid bot solid
|
||||
coord y 0,50
|
||||
ticks left out from 0 to 50 by 10
|
||||
ticks bot out at 50000 "50,000", 100000 "100,000", 150000 "150,000", 200000 "200,000", 250000 "250,000"
|
||||
The request is a little longer when the index isn't cached (see \f[CW]Uncached db and index\f[]); in this case DODB walks the file-system to find the right symlink to follow, thus slowing the process even more, by up to 20%.
|
||||
|
||||
label left "Request duration with" unaligned "an index (us)" "(Median)" left 0.8
|
||||
label bot "Number of cars in the database" down 0.1
|
||||
.TS
|
||||
allbox tab(:);
|
||||
c | lw(4.0i) | cew(1.4i).
|
||||
DODB instance:Comment and database usage:T{
|
||||
compared to RAM only
|
||||
T}
|
||||
RAM only:T{
|
||||
Worst memory footprint (all data must be in memory), best performance.
|
||||
T}:-
|
||||
Cached db and index:T{
|
||||
Performance for retrieving a value is the same as RAM only while
|
||||
enabling the admin to manually search for data on-disk.
|
||||
T}:about the same perfs
|
||||
Uncached db, cached index::300 to 400x slower
|
||||
Uncached db and index:T{
|
||||
Best memory footprint, worst performance.
|
||||
T}:400 to 500x slower
|
||||
.TE
|
||||
|
||||
obram = obuncache = obcache = obsemi = 0 # old bullets
|
||||
cbram = cbuncache = cbcache = cbsemi = 0 # current bullets
|
||||
|
||||
legendxleft = 100000
|
||||
legendxright = 250000
|
||||
legendyup = 15
|
||||
legendydown = 2
|
||||
|
||||
boite(legendxleft,legendxright,legendyup,legendydown)
|
||||
legend(legendxleft,legendxright,legendyup,legendydown)
|
||||
|
||||
copy "../data/index.d" thru X
|
||||
cx = $1*5
|
||||
|
||||
y_scale = 1000
|
||||
|
||||
# ram cached semi uncached
|
||||
line from cx,$2/y_scale to cx,$4/y_scale
|
||||
line from cx,$5/y_scale to cx,$7/y_scale
|
||||
line from cx,$8/y_scale to cx,$10/y_scale
|
||||
line from cx,$11/y_scale to cx,$13/y_scale
|
||||
|
||||
#ty = $3
|
||||
|
||||
cx = $1*5
|
||||
|
||||
cbram = $3/y_scale
|
||||
cbcache = $6/y_scale
|
||||
cbsemi = $9/y_scale
|
||||
cbuncache = $12/y_scale
|
||||
|
||||
if (obram > 0) then {line from cx,cbram to ox,obram}
|
||||
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
|
||||
.gcolor blue
|
||||
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
|
||||
.gcolor
|
||||
.gcolor green
|
||||
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
|
||||
.gcolor
|
||||
|
||||
obram = cbram
|
||||
obcache = cbcache
|
||||
obsemi = cbsemi
|
||||
obuncache = cbuncache
|
||||
ox = cx
|
||||
|
||||
# ram cached semi uncached
|
||||
.gcolor red
|
||||
bullet at cx,cbram
|
||||
.gcolor
|
||||
bullet at cx,cbcache
|
||||
.gcolor blue
|
||||
bullet at cx,cbsemi
|
||||
.gcolor
|
||||
.gcolor green
|
||||
bullet at cx,cbuncache
|
||||
.gcolor
|
||||
X
|
||||
.G2
|
||||
.B Conclusion :
|
||||
as expected, retrieving a single value is fast and the size of the database doesn't matter much.
|
||||
Each deserialization and, more importantly, each disk access is a pain point.
|
||||
Caching the value enables a massive performance gain, data can be retrieved several hundred times quicker.
|
||||
.bp
|
||||
.SECTION Partitions (1 to n relations)
|
||||
.SS Partitions (1 to n relations)
|
||||
.LP
|
||||
.G1
|
||||
copy "legend.grap"
|
||||
frame invis ht 3 wid 4 left solid bot solid
|
||||
coord x 0,5000*2 y 0,350
|
||||
ticks left out from 0 to 350 by 50
|
||||
|
||||
label left "Request duration" unaligned "for a partition (ms)" "(Median)" left 0.8
|
||||
label bot "Number of cars matching the partition" down 0.1
|
||||
.so graph_query_partition.grap
|
||||
|
||||
obram = obuncache = obcache = obsemi = 0
|
||||
cbram = cbuncache = cbcache = cbsemi = 0
|
||||
|
||||
legendxleft = 1000
|
||||
legendxright = 6500
|
||||
legendyup = 330
|
||||
legendydown = 230
|
||||
|
||||
boite(legendxleft,legendxright,legendyup,legendydown)
|
||||
legend(legendxleft,legendxright,legendyup,legendydown)
|
||||
|
||||
copy "../data/partitions.d" thru X
|
||||
cx = $1*2
|
||||
|
||||
y_scale = 1000000
|
||||
|
||||
# ram cached semi uncached
|
||||
line from cx,$2/y_scale to cx,$4/y_scale
|
||||
line from cx,$5/y_scale to cx,$7/y_scale
|
||||
line from cx,$8/y_scale to cx,$10/y_scale
|
||||
line from cx,$11/y_scale to cx,$13/y_scale
|
||||
|
||||
#ty = $3
|
||||
|
||||
cbram = $3/y_scale
|
||||
cbcache = $6/y_scale
|
||||
cbsemi = $9/y_scale
|
||||
cbuncache = $12/y_scale
|
||||
|
||||
if (obram > 0) then {line from cx,cbram to ox,obram}
|
||||
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
|
||||
.gcolor blue
|
||||
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
|
||||
.gcolor
|
||||
.gcolor green
|
||||
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
|
||||
.gcolor
|
||||
|
||||
obram = cbram
|
||||
obcache = cbcache
|
||||
obsemi = cbsemi
|
||||
obuncache = cbuncache
|
||||
ox = cx
|
||||
|
||||
# ram cached semi uncached
|
||||
.gcolor red
|
||||
bullet at cx,cbram
|
||||
.gcolor
|
||||
bullet at cx,cbcache
|
||||
.gcolor blue
|
||||
bullet at cx,cbsemi
|
||||
.gcolor
|
||||
.gcolor green
|
||||
bullet at cx,cbuncache
|
||||
.gcolor
|
||||
X
|
||||
.G2
|
||||
.bp
|
||||
.SECTION Tags (n to n relations)
|
||||
.SS Tags (n to n relations)
|
||||
.LP
|
||||
.G1
|
||||
copy "legend.grap"
|
||||
frame invis ht 3 wid 4 left solid bot solid
|
||||
coord x 0,5000 y 0,170
|
||||
ticks left out from 0 to 170 by 20
|
||||
label left "Request duration" unaligned "for a tag (ms)" "(Median)" left 0.8
|
||||
label bot "Number of cars matching the tag" down 0.1
|
||||
|
||||
obram = obuncache = obcache = obsemi = 0
|
||||
cbram = cbuncache = cbcache = cbsemi = 0
|
||||
|
||||
legendxleft = 200
|
||||
legendxright = 3000
|
||||
legendyup = 170
|
||||
legendydown = 120
|
||||
|
||||
boite(legendxleft,legendxright,legendyup,legendydown)
|
||||
legend(legendxleft,legendxright,legendyup,legendydown)
|
||||
|
||||
copy "../data/tags.d" thru X
|
||||
cx = $1
|
||||
|
||||
y_scale = 1000000
|
||||
|
||||
# ram cached semi uncached
|
||||
line from cx,$2/y_scale to cx,$4/y_scale
|
||||
line from cx,$5/y_scale to cx,$7/y_scale
|
||||
line from cx,$8/y_scale to cx,$10/y_scale
|
||||
line from cx,$11/y_scale to cx,$13/y_scale
|
||||
|
||||
#ty = $3
|
||||
|
||||
cbram = $3/y_scale
|
||||
cbcache = $6/y_scale
|
||||
cbsemi = $9/y_scale
|
||||
cbuncache = $12/y_scale
|
||||
|
||||
if (obram > 0) then {line from cx,cbram to ox,obram}
|
||||
if (obcache > 0) then {line from cx,cbcache to ox,obcache}
|
||||
.gcolor blue
|
||||
if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi}
|
||||
.gcolor
|
||||
.gcolor green
|
||||
if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache}
|
||||
.gcolor
|
||||
|
||||
obram = cbram
|
||||
obcache = cbcache
|
||||
obsemi = cbsemi
|
||||
obuncache = cbuncache
|
||||
ox = cx
|
||||
|
||||
# ram cached semi uncached
|
||||
.gcolor red
|
||||
bullet at cx,cbram
|
||||
.gcolor
|
||||
bullet at cx,cbcache
|
||||
.gcolor blue
|
||||
bullet at cx,cbsemi
|
||||
.gcolor
|
||||
.gcolor green
|
||||
bullet at cx,cbuncache
|
||||
.gcolor
|
||||
X
|
||||
.G2
|
||||
.so graph_query_tag.grap
|
||||
|
|
|
@ -20,7 +20,7 @@ define legend {
|
|||
diffy = yup - ydown
|
||||
|
||||
hdiff = diffy/4.3
|
||||
cy = yup - (diffy/8)
|
||||
cy = yup - (diffy/6)
|
||||
cx = (diffx/20) + xleft
|
||||
|
||||
lstartx = cx
|
||||
|
@ -33,7 +33,7 @@ define legend {
|
|||
"RAM only" ljust at tstartx,cy
|
||||
cy = cy - hdiff
|
||||
line from lstartx,cy to lendx,cy
|
||||
"Cached DODB" ljust at tstartx,cy
|
||||
"Cached db and index" ljust at tstartx,cy
|
||||
cy = cy - hdiff
|
||||
.gcolor blue
|
||||
line from lstartx,cy to lendx,cy
|
||||
|
|
Loading…
Reference in New Issue