diff --git a/graphs/graph_query_index.grap b/graphs/graph_query_index.grap new file mode 100644 index 0000000..7c127a2 --- /dev/null +++ b/graphs/graph_query_index.grap @@ -0,0 +1,69 @@ +.G1 +copy "legend.grap" +frame invis ht 3 wid 4 left solid bot solid +coord y 0,50 +ticks left out from 0 to 50 by 10 +ticks bot out at 50000 "50,000", 100000 "100,000", 150000 "150,000", 200000 "200,000", 250000 "250,000" + +label left "Request duration with" unaligned "an index (µs)" "(Median)" left 0.8 +label bot "Number of cars in the database" down 0.1 + +obram = obuncache = obcache = obsemi = 0 # old bullets +cbram = cbuncache = cbcache = cbsemi = 0 # current bullets + +legendxleft = 100000 +legendxright = 250000 +legendyup = 15 +legendydown = 2 + +boite(legendxleft,legendxright,legendyup,legendydown) +legend(legendxleft,legendxright,legendyup,legendydown) + +copy "../data/index.d" thru X + cx = $1*5 + + y_scale = 1000 + + # ram cached semi uncached + line from cx,$2/y_scale to cx,$4/y_scale + line from cx,$5/y_scale to cx,$7/y_scale + line from cx,$8/y_scale to cx,$10/y_scale + line from cx,$11/y_scale to cx,$13/y_scale + + #ty = $3 + + cx = $1*5 + + cbram = $3/y_scale + cbcache = $6/y_scale + cbsemi = $9/y_scale + cbuncache = $12/y_scale + + if (obram > 0) then {line from cx,cbram to ox,obram} + if (obcache > 0) then {line from cx,cbcache to ox,obcache} +.gcolor blue + if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi} +.gcolor +.gcolor green + if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache} +.gcolor + + obram = cbram + obcache = cbcache + obsemi = cbsemi + obuncache = cbuncache + ox = cx + + # ram cached semi uncached +.gcolor red + bullet at cx,cbram +.gcolor + bullet at cx,cbcache +.gcolor blue + bullet at cx,cbsemi +.gcolor +.gcolor green + bullet at cx,cbuncache +.gcolor +X +.G2 diff --git a/graphs/graph_query_partition.grap b/graphs/graph_query_partition.grap new file mode 100644 index 0000000..f7d9a65 --- /dev/null +++ b/graphs/graph_query_partition.grap @@ -0,0 +1,66 @@ +.G1 +copy "legend.grap" +frame invis ht 3 wid 4 left solid bot solid +coord x 0,5000*2 y 0,350 +ticks left out from 0 to 350 by 50 + +label left "Request duration" unaligned "for a partition (ms)" "(Median)" left 0.8 +label bot "Number of cars matching the partition" down 0.1 + +obram = obuncache = obcache = obsemi = 0 +cbram = cbuncache = cbcache = cbsemi = 0 + +legendxleft = 1000 +legendxright = 6500 +legendyup = 330 +legendydown = 230 + +boite(legendxleft,legendxright,legendyup,legendydown) +legend(legendxleft,legendxright,legendyup,legendydown) + +copy "../data/partitions.d" thru X + cx = $1*2 + + y_scale = 1000000 + + # ram cached semi uncached + line from cx,$2/y_scale to cx,$4/y_scale + line from cx,$5/y_scale to cx,$7/y_scale + line from cx,$8/y_scale to cx,$10/y_scale + line from cx,$11/y_scale to cx,$13/y_scale + + #ty = $3 + + cbram = $3/y_scale + cbcache = $6/y_scale + cbsemi = $9/y_scale + cbuncache = $12/y_scale + + if (obram > 0) then {line from cx,cbram to ox,obram} + if (obcache > 0) then {line from cx,cbcache to ox,obcache} +.gcolor blue + if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi} +.gcolor +.gcolor green + if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache} +.gcolor + + obram = cbram + obcache = cbcache + obsemi = cbsemi + obuncache = cbuncache + ox = cx + + # ram cached semi uncached +.gcolor red + bullet at cx,cbram +.gcolor + bullet at cx,cbcache +.gcolor blue + bullet at cx,cbsemi +.gcolor +.gcolor green + bullet at cx,cbuncache +.gcolor +X +.G2 diff --git a/graphs/graph_query_tag.grap b/graphs/graph_query_tag.grap new file mode 100644 index 0000000..84a91a0 --- /dev/null +++ b/graphs/graph_query_tag.grap @@ -0,0 +1,65 @@ +.G1 +copy "legend.grap" +frame invis ht 3 wid 4 left solid bot solid +coord x 0,5000 y 0,170 +ticks left out from 0 to 170 by 20 +label left "Request duration" unaligned "for a tag (ms)" "(Median)" left 0.8 +label bot "Number of cars matching the tag" down 0.1 + +obram = obuncache = obcache = obsemi = 0 +cbram = cbuncache = cbcache = cbsemi = 0 + +legendxleft = 200 +legendxright = 3000 +legendyup = 170 +legendydown = 120 + +boite(legendxleft,legendxright,legendyup,legendydown) +legend(legendxleft,legendxright,legendyup,legendydown) + +copy "../data/tags.d" thru X + cx = $1 + + y_scale = 1000000 + + # ram cached semi uncached + line from cx,$2/y_scale to cx,$4/y_scale + line from cx,$5/y_scale to cx,$7/y_scale + line from cx,$8/y_scale to cx,$10/y_scale + line from cx,$11/y_scale to cx,$13/y_scale + + #ty = $3 + + cbram = $3/y_scale + cbcache = $6/y_scale + cbsemi = $9/y_scale + cbuncache = $12/y_scale + + if (obram > 0) then {line from cx,cbram to ox,obram} + if (obcache > 0) then {line from cx,cbcache to ox,obcache} +.gcolor blue + if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi} +.gcolor +.gcolor green + if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache} +.gcolor + + obram = cbram + obcache = cbcache + obsemi = cbsemi + obuncache = cbuncache + ox = cx + + # ram cached semi uncached +.gcolor red + bullet at cx,cbram +.gcolor + bullet at cx,cbcache +.gcolor blue + bullet at cx,cbsemi +.gcolor +.gcolor green + bullet at cx,cbuncache +.gcolor +X +.G2 diff --git a/graphs/graphs.ms b/graphs/graphs.ms index 5b2282c..c0071b6 100644 --- a/graphs/graphs.ms +++ b/graphs/graphs.ms @@ -1,5 +1,5 @@ .so macros.roff -.TITLE Brief performance analysis of Document Oriented DataBase (DODB) +.TITLE Document Oriented DataBase (DODB) .AUTHOR Philippe P. .ABSTRACT1 DODB is a database-as-library, enabling a very simple way to store applications' data: storing serialized @@ -9,10 +9,46 @@ To speed-up searches, attributes of these documents can be used as indexes which .I symlinks ) ( on the disk. .br -See the \f[CW]README\f[] for a longer explanation. -This document briefly presents an experiment to understand the performances we can get with this approach. +This document briefly presents DODB and its main differences with other database engines. +An experiment is described and analysed to understand the performance that can be expected from this approach. .ABSTRACT2 +.SECTION Introduction to DODB +A database consists in managing data, enabling queries (preferably fast) to retrieve, to modify, to add and to delete a piece of information. +Anything else is +.UL accessory . + +Universities all around the world teach about Structured Query Language (SQL) and relational databases. + +The main idea of relational databases is to put data into +.I tables , +with typed columns so the database can optimize operations and storage. +A database is a list of tables with relations between them. +For example, let's imagine a database of a +.I table +can contain a list of users (their age, height, job, etc.). +When another + +The SQL language enables arbitrary operations on databases: add, modify and delete entries. +Furthermore, SQL enables even to manage administrative operations of the databases themselves: managing users with fine-grained authorizations, creating databases and tables, etc. + +Many tools were used or even developed over the years specifically to aleviate the inherent complexity and limitations of SQL. +For example, Unified Modeling Language (UML) is used to design databases by providing a graphical overview of the relations between tables. +SQL databases can be scripted to automate operations and provide a massive speed up to the operations ( +.I "stored procedures" , +see +.I "PL/SQL" ), +etc. + +Document-oriented databases are key-value stores. +Furthermore, metadata is extracted for further optimization. + +Contrary to SQL, DODB has a very narrow scope: to provide +Thus, DODB doesn't provide an interactive shell, no request language to perform arbitrary operations on the database, etc. + +.SECTION Basic usage +.SECTION A few more options +.SECTION Limits of DODB .SECTION Experimental scenario .LP The following experiment shows the performance of DODB based on quering durations. @@ -61,7 +97,7 @@ class Car end .SOURCE . -.SECTION Basic indexes (1 to 1 relations) +.SS Basic indexes (1 to 1 relations) .LP An index enables to match a single value based on a small string. In our example, each \f[CW]car\f[] has an unique \fIname\f[] which is used as an index. @@ -69,213 +105,43 @@ In our example, each \f[CW]car\f[] has an unique \fIname\f[] which is used as an The following graph represents the result of 100 queries of a car based on its name. The experiment starts with a database containing 1,000 cars and goes up to 250,000 cars. +.so graph_query_index.grap + Since there is only one value to retrieve, the request is quick and time is almost constant. When the value and the index are kept in memory (see \f[CW]RAM only\f[] and \f[CW]Cached db\f[]), the retrieval is almost instantaneous (about 50 to 120 ns). In case the value is on the disk, deserialization takes about 15 µs (see \f[CW]Uncached db, cached index\f[]). -The request is a little longer when the index isn't cached, in this case DODB walks the file-system to find the right symlink to follow, thus slowing the process even more, by up to 20%. -.G1 -copy "legend.grap" -frame invis ht 3 wid 4 left solid bot solid -coord y 0,50 -ticks left out from 0 to 50 by 10 -ticks bot out at 50000 "50,000", 100000 "100,000", 150000 "150,000", 200000 "200,000", 250000 "250,000" +The request is a little longer when the index isn't cached (see \f[CW]Uncached db and index\f[]); in this case DODB walks the file-system to find the right symlink to follow, thus slowing the process even more, by up to 20%. -label left "Request duration with" unaligned "an index (us)" "(Median)" left 0.8 -label bot "Number of cars in the database" down 0.1 +.TS +allbox tab(:); +c | lw(4.0i) | cew(1.4i). +DODB instance:Comment and database usage:T{ +compared to RAM only +T} +RAM only:T{ +Worst memory footprint (all data must be in memory), best performance. +T}:- +Cached db and index:T{ +Performance for retrieving a value is the same as RAM only while +enabling the admin to manually search for data on-disk. +T}:about the same perfs +Uncached db, cached index::300 to 400x slower +Uncached db and index:T{ +Best memory footprint, worst performance. +T}:400 to 500x slower +.TE -obram = obuncache = obcache = obsemi = 0 # old bullets -cbram = cbuncache = cbcache = cbsemi = 0 # current bullets - -legendxleft = 100000 -legendxright = 250000 -legendyup = 15 -legendydown = 2 - -boite(legendxleft,legendxright,legendyup,legendydown) -legend(legendxleft,legendxright,legendyup,legendydown) - -copy "../data/index.d" thru X - cx = $1*5 - - y_scale = 1000 - - # ram cached semi uncached - line from cx,$2/y_scale to cx,$4/y_scale - line from cx,$5/y_scale to cx,$7/y_scale - line from cx,$8/y_scale to cx,$10/y_scale - line from cx,$11/y_scale to cx,$13/y_scale - - #ty = $3 - - cx = $1*5 - - cbram = $3/y_scale - cbcache = $6/y_scale - cbsemi = $9/y_scale - cbuncache = $12/y_scale - - if (obram > 0) then {line from cx,cbram to ox,obram} - if (obcache > 0) then {line from cx,cbcache to ox,obcache} -.gcolor blue - if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi} -.gcolor -.gcolor green - if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache} -.gcolor - - obram = cbram - obcache = cbcache - obsemi = cbsemi - obuncache = cbuncache - ox = cx - - # ram cached semi uncached -.gcolor red - bullet at cx,cbram -.gcolor - bullet at cx,cbcache -.gcolor blue - bullet at cx,cbsemi -.gcolor -.gcolor green - bullet at cx,cbuncache -.gcolor -X -.G2 +.B Conclusion : +as expected, retrieving a single value is fast and the size of the database doesn't matter much. +Each deserialization and, more importantly, each disk access is a pain point. +Caching the value enables a massive performance gain, data can be retrieved several hundred times quicker. .bp -.SECTION Partitions (1 to n relations) +.SS Partitions (1 to n relations) .LP -.G1 -copy "legend.grap" -frame invis ht 3 wid 4 left solid bot solid -coord x 0,5000*2 y 0,350 -ticks left out from 0 to 350 by 50 -label left "Request duration" unaligned "for a partition (ms)" "(Median)" left 0.8 -label bot "Number of cars matching the partition" down 0.1 +.so graph_query_partition.grap -obram = obuncache = obcache = obsemi = 0 -cbram = cbuncache = cbcache = cbsemi = 0 - -legendxleft = 1000 -legendxright = 6500 -legendyup = 330 -legendydown = 230 - -boite(legendxleft,legendxright,legendyup,legendydown) -legend(legendxleft,legendxright,legendyup,legendydown) - -copy "../data/partitions.d" thru X - cx = $1*2 - - y_scale = 1000000 - - # ram cached semi uncached - line from cx,$2/y_scale to cx,$4/y_scale - line from cx,$5/y_scale to cx,$7/y_scale - line from cx,$8/y_scale to cx,$10/y_scale - line from cx,$11/y_scale to cx,$13/y_scale - - #ty = $3 - - cbram = $3/y_scale - cbcache = $6/y_scale - cbsemi = $9/y_scale - cbuncache = $12/y_scale - - if (obram > 0) then {line from cx,cbram to ox,obram} - if (obcache > 0) then {line from cx,cbcache to ox,obcache} -.gcolor blue - if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi} -.gcolor -.gcolor green - if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache} -.gcolor - - obram = cbram - obcache = cbcache - obsemi = cbsemi - obuncache = cbuncache - ox = cx - - # ram cached semi uncached -.gcolor red - bullet at cx,cbram -.gcolor - bullet at cx,cbcache -.gcolor blue - bullet at cx,cbsemi -.gcolor -.gcolor green - bullet at cx,cbuncache -.gcolor -X -.G2 .bp -.SECTION Tags (n to n relations) +.SS Tags (n to n relations) .LP -.G1 -copy "legend.grap" -frame invis ht 3 wid 4 left solid bot solid -coord x 0,5000 y 0,170 -ticks left out from 0 to 170 by 20 -label left "Request duration" unaligned "for a tag (ms)" "(Median)" left 0.8 -label bot "Number of cars matching the tag" down 0.1 - -obram = obuncache = obcache = obsemi = 0 -cbram = cbuncache = cbcache = cbsemi = 0 - -legendxleft = 200 -legendxright = 3000 -legendyup = 170 -legendydown = 120 - -boite(legendxleft,legendxright,legendyup,legendydown) -legend(legendxleft,legendxright,legendyup,legendydown) - -copy "../data/tags.d" thru X - cx = $1 - - y_scale = 1000000 - - # ram cached semi uncached - line from cx,$2/y_scale to cx,$4/y_scale - line from cx,$5/y_scale to cx,$7/y_scale - line from cx,$8/y_scale to cx,$10/y_scale - line from cx,$11/y_scale to cx,$13/y_scale - - #ty = $3 - - cbram = $3/y_scale - cbcache = $6/y_scale - cbsemi = $9/y_scale - cbuncache = $12/y_scale - - if (obram > 0) then {line from cx,cbram to ox,obram} - if (obcache > 0) then {line from cx,cbcache to ox,obcache} -.gcolor blue - if (obsemi > 0) then {line from cx,cbsemi to ox,obsemi} -.gcolor -.gcolor green - if (obuncache > 0) then {line from cx,cbuncache to ox,obuncache} -.gcolor - - obram = cbram - obcache = cbcache - obsemi = cbsemi - obuncache = cbuncache - ox = cx - - # ram cached semi uncached -.gcolor red - bullet at cx,cbram -.gcolor - bullet at cx,cbcache -.gcolor blue - bullet at cx,cbsemi -.gcolor -.gcolor green - bullet at cx,cbuncache -.gcolor -X -.G2 +.so graph_query_tag.grap diff --git a/graphs/legend.grap b/graphs/legend.grap index 5181cb6..af31174 100644 --- a/graphs/legend.grap +++ b/graphs/legend.grap @@ -20,7 +20,7 @@ define legend { diffy = yup - ydown hdiff = diffy/4.3 - cy = yup - (diffy/8) + cy = yup - (diffy/6) cx = (diffx/20) + xleft lstartx = cx @@ -33,7 +33,7 @@ define legend { "RAM only" ljust at tstartx,cy cy = cy - hdiff line from lstartx,cy to lendx,cy - "Cached DODB" ljust at tstartx,cy + "Cached db and index" ljust at tstartx,cy cy = cy - hdiff .gcolor blue line from lstartx,cy to lendx,cy