Work on lexer and diagnostic representation

Alberto Restifo 2020-04-20 14:57:20 +02:00
parent dd1288089f
commit 2e2edd1908
9 changed files with 220 additions and 136 deletions

View File

@ -12,4 +12,4 @@ tasks:
crystal tool format --check
- test: |
cd crystal-cbor
crystal spec
crystal spec --error-on-warnings

View File

@ -24,7 +24,7 @@ describe CBOR::Lexer do
it "reads #{tt[:bytes].hexstring} as #{tt[:value].to_s}" do
lexer =[:bytes])
token = lexer.read_token
token = lexer.next_token
token.should be_a(CBOR::Token::IntT) eq(tt[:value])

spec/ Normal file
View File

@ -0,0 +1,103 @@
require "./spec_helper"
tests = [
{ %(0), "00" },
{ %(1), "01" },
{ %(10), "0a" },
{ %(23), "17" },
{ %(24), "18 18" },
{ %(25), "18 19" },
{ %(100), "18 64" },
{ %(1000), "19 03 e8" },
{ %(1000000), "1a 00 0f 42 40" },
{ %(1000000000000), "1b 00 00 00 e8 d4 a5 10 00" },
{ %(18446744073709551615), "1b ff ff ff ff ff ff ff ff" },
# { %(18446744073709551616), "c2 49 01 00 00 00 00 00 00 00 00" },
{ %(-18446744073709551616), "3b ff ff ff ff ff ff ff ff" },
# { %(-18446744073709551617), "c3 49 01 00 00 00 00 00 00 00 00" },
{ %(-1), "20" },
{ %(-10), "29" },
{ %(-100), "38 63" },
{ %(-1000), "39 03 e7" },
# { %(0.0), "f9 00 00" },
# { %(-0.0), "f9 80 00" },
# { %(1.0), "f9 3c 00" },
# { %(1.1), "fb 3f f1 99 99 99 99 99 9a" },
# { %(1.5), "f9 3e 00" },
# { %(65504.0), "f9 7b ff" },
# { %(100000.0), "fa 47 c3 50 00" },
# { %(3.4028234663852886e+38), "fa 7f 7f ff ff" },
# { %(1.0e+300), "fb 7e 37 e4 3c 88 00 75 9c" },
# { %(5.960464477539063e-8), "f9 00 01" },
# { %(0.00006103515625), "f9 04 00" },
# { %(-4.0), "f9 c4 00" },
# { %(-4.1), "fb c0 10 66 66 66 66 66 66" },
# { %(Infinity), "f9 7c 00" },
# { %(NaN), "f9 7e 00" },
# { %(-Infinity), "f9 fc 00" },
# { %(Infinity), "fa 7f 80 00 00" },
# { %(NaN), "fa 7f c0 00 00" },
# { %(-Infinity), "fa ff 80 00 00" },
# { %(Infinity), "fb 7f f0 00 00 00 00 00 00" },
# { %(NaN), "fb 7f f8 00 00 00 00 00 00" },
# { %(-Infinity), "fb ff f0 00 00 00 00 00 00" },
# { %(false), "f4" },
# { %(true), "f5" },
# { %(null), "f6" },
# { %(undefined), "f7" },
# { %(simple(16)), "f0" },
# { %(simple(24)), "f8 18" },
# { %(simple(255)), "f8 ff" },
# { %(0("2013-03-21T20:04:00Z")), "c0 74 32 30 31 33 2d 30 33 2d 32 31 54 32 30 3a 30 34 3a 30 30 5a" },
# { %(1(1363896240)), "c1 1a 51 4b 67 b0" },
# { %(1(1363896240.5)), "c1 fb 41 d4 52 d9 ec 20 00 00" },
# { %(23(h'01020304')), "d7 44 01 02 03 04" },
# { %(24(h'6449455446')), "d8 18 45 64 49 45 54 46" },
# { %(32("")), "d8 20 76 68 74 74 70 3a 2f 2f 77 77 77 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d" },
{ %(h''), "40" },
{ %(h'01020304'), "44 01 02 03 04" },
# { %(""), "60" },
# { %("a"), "61 61" },
# { %("IETF"), "64 49 45 54 46" },
# { %(""\\"), "62 22 5c" },
# { %("\u00fc"), "62 c3 bc" },
# { %("\u6c34"), "63 e6 b0 b4" },
# { %("\ud800\udd51"), "64 f0 90 85 91" },
# { %([]), "80" },
# { %([1, 2, 3]), "83 01 02 03" },
# { %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
# { %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
# { %({}), "a0" },
# { %({1: 2, 3: 4}), "a2 01 02 03 04" },
# { %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
# { %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
# { %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
{ %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
# { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
# { %([_ ]), "9f ff" },
# { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
# { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
# { %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
# { %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
# { %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
# { %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
# { %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
# { %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },
describe "Examples from RFC7049 Appendix A" do
tests.each_with_index do |tt, index|
describe "test ##{index}" do
diagnostic, hex_string = tt
bytes_arr =
bytes =, bytes_arr.size)
it "reads #{bytes.hexstring} as #{diagnostic}" do
result =
result.should eq(diagnostic)

View File

@ -1,84 +0,0 @@
tests = [
{ %(0), "00" },
{ %(1), "01" },
{ %(10), "0a" },
{ %(23), "17" },
{ %(24), "18 18" },
{ %(25), "18 19" },
{ %(100), "18 64" },
{ %(1000), "19 03 e8" },
{ %(1000000), "1a 00 0f 42 40" },
{ %(1000000000000), "1b 00 00 00 e8 d4 a5 10 00" },
{ %(18446744073709551615), "1b ff ff ff ff ff ff ff ff" },
{ %(18446744073709551616), "c2 49 01 00 00 00 00 00 00 00 00" },
{ %(-18446744073709551616), "3b ff ff ff ff ff ff ff ff" },
{ %(-18446744073709551617), "c3 49 01 00 00 00 00 00 00 00 00" },
{ %(-1), "20" },
{ %(-10), "29" },
{ %(-100), "38 63" },
{ %(-1000), "39 03 e7" },
{ %(0.0), "f9 00 00" },
{ %(-0.0), "f9 80 00" },
{ %(1.0), "f9 3c 00" },
{ %(1.1), "fb 3f f1 99 99 99 99 99 9a" },
{ %(1.5), "f9 3e 00" },
{ %(65504.0), "f9 7b ff" },
{ %(100000.0), "fa 47 c3 50 00" },
{ %(3.4028234663852886e+38), "fa 7f 7f ff ff" },
{ %(1.0e+300), "fb 7e 37 e4 3c 88 00 75 9c" },
{ %(5.960464477539063e-8), "f9 00 01" },
{ %(0.00006103515625), "f9 04 00" },
{ %(-4.0), "f9 c4 00" },
{ %(-4.1), "fb c0 10 66 66 66 66 66 66" },
{ %(Infinity), "f9 7c 00" },
{ %(NaN), "f9 7e 00" },
{ %(-Infinity), "f9 fc 00" },
{ %(Infinity), "fa 7f 80 00 00" },
{ %(NaN), "fa 7f c0 00 00" },
{ %(-Infinity), "fa ff 80 00 00" },
{ %(Infinity), "fb 7f f0 00 00 00 00 00 00" },
{ %(NaN), "fb 7f f8 00 00 00 00 00 00" },
{ %(-Infinity), "fb ff f0 00 00 00 00 00 00" },
{ %(false), "f4" },
{ %(true), "f5" },
{ %(null), "f6" },
{ %(undefined), "f7" },
{ %(simple(16)), "f0" },
{ %(simple(24)), "f8 18" },
{ %(simple(255)), "f8 ff" },
{ %(0("2013-03-21T20:04:00Z")), "c0 74 32 30 31 33 2d 30 33 2d 32 31 54 32 30 3a 30 34 3a 30 30 5a" },
{ %(1(1363896240)), "c1 1a 51 4b 67 b0" },
{ %(1(1363896240.5)), "c1 fb 41 d4 52 d9 ec 20 00 00" },
{ %(23(h'01020304')), "d7 44 01 02 03 04" },
{ %(24(h'6449455446')), "d8 18 45 64 49 45 54 46" },
{ %(32("")), "d8 20 76 68 74 74 70 3a 2f 2f 77 77 77 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d" },
{ %(h''), "40" },
{ %(h'01020304'), "44 01 02 03 04" },
{ %(""), "60" },
{ %("a"), "61 61" },
{ %("IETF"), "64 49 45 54 46" },
{ %(""\\"), "62225c" },
{ %("\u00fc"), "62 c3 bc" },
{ %("\u6c34"), "63 e6 b0 b4" },
{ %("\ud800\udd51"), "64 f0 90 85 91" },
{ %([]), "80" },
{ %([1, 2, 3]), "83 01 02 03" },
{ %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
{ %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
{ %({}), "a0" },
{ %({1: 2, 3: 4}), "a2 01 02 03 04" },
{ %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
{ %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
{ %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
{ %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
{ %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
{ %([_ ]), "9f ff" },
{ %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
{ %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
{ %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
{ %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
{ %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
{ %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
{ %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
{ %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },

View File

@ -22,4 +22,25 @@ abstract class CBOR::Decoder
# Consume the array :)
private def read_bytes_array_body
read_type(Token::ByteArrayT) do |token|
private macro read_type(type, finish_token = true, &block)
case token = current_token
when {{type}}
{% if finish_token %}finish_token!{% end %}
{{ block.body }}
unexpected_token(token, {{type.stringify.split("::").last}})
private def unexpected_token(token, expected = nil)
message = "Unexpected token #{Token.to_s(token)}"
message += " expected #{expected}" if expected
raise, token.byte_number)

View File

@ -1,15 +1,45 @@
module CBOR::Diagnostic
def to_s(value : CBOR::ByteArray) : String
require "./lexer"
require "./token"
# Reads a CBOR input into a diagnostic string.
# This consumes the IO and is mostly usedful to tests again the example
# provided in the RFC and ensuring a correct functioning of the `CBOR::Lexer`.
class CBOR::Diagnostic
@lexer : Lexer
@is_array : Bool = false
def initialize(input)
@lexer =
{% for type in [UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64, Int64, Int128] %}
def to_s(value : {{type}}) : String
{% end %}
# Reads the content of the IO and prints out a diagnostic string
# represation of the input.
def to_s : String
result = ""
def to_s(value : String)
while val = next_value
result += val
private def next_value : String?
token = @lexer.next_token
return nil unless token
case token
when Token::BytesArray
@is_array = true
when Token::BreakT
@is_array = flase
separator + Token.to_diagnostic(token)
private def separator : String
return ", " if @is_array

View File

@ -11,44 +11,19 @@ class CBOR::Lexer
@token : Token::T
@current_pos : Int64
@token_finished : Bool
@eof : Bool = false
def initialize(@io : IO)
@current_pos = 0
@token =
@token_finished = true
def current_token : Token::T
if @token_finished
@token_finished = false
@token = next_token
def next_token
return nil if @eof
def finish_token!
@token_finished = true
def read_token : Token::T
if @token_finished
@token = next_token
private def next_token
@current_pos = @io.pos.to_i64
current_byte = next_byte
current_byte = @io.read_byte
return nil unless current_byte
case current_byte
when 0x00..0x17
@ -85,16 +60,23 @@ class CBOR::Lexer
when 0x5b
when 0x5f
when 0xff
# TODO: Define which segment it's breaking
raise"Unexpected first byte #{current_byte}")
raise"Unexpected first byte 0x#{current_byte.to_s(16)}")
private def next_byte : UInt8
private def next_byte : UInt8?
byte = @io.read_byte
raise"Unexpected EOF at byte #{@io.pos}") unless byte
if byte
@eof = true
private def consume_int(value)

View File

@ -1,5 +1,6 @@
class CBOR::Token
record NullT, byte_number : Int64
record UndefinedT, byte_number : Int64
record BoolT, byte_number : Int64, value : Bool
record ArrayT, byte_number : Int64, size : UInt32?
record MapT, byte_number : Int64, size : UInt32?
@ -7,12 +8,26 @@ class CBOR::Token
record FloatT, byte_number : Int64, value : Float64
record StringT, byte_number : Int64, value : String
record BytesT, byte_number : Int64, value : Bytes
record StringArrayT, byte_number : Int64
record BytesArrayT, byte_number : Int64
record StringArrayStartT, byte_number : Int64
record StringArrayEndT, byte_number : Int64
record BytesArrayStartT, byte_number : Int64
record BytesArrayEndT, byte_number : Int64
alias T = NullT | BoolT | ArrayT | MapT | IntT | FloatT | StringT | BytesT | StringArrayT | BytesArrayT
alias T = NullT |
UndefinedT |
BoolT |
ArrayT |
MapT |
IntT |
FloatT |
StringT |
BytesT |
StringArrayStartT |
StringArrayEndT |
BytesArrayStartT |
def self.to_s(token : T)
def self.to_diagnostic(token : T) : String
case token
when IntT
@ -23,8 +38,24 @@ class CBOR::Token
when UndefinedT
when BoolT
when BytesArrayStartT
"(_ "
when BytesArrayEndT
when FloatT
when StringT
when StringArrayT
when MapT
when ArrayT
raise "Diagnostic notation for type #{token.class} not implemented"
raise "Uknown diagnostics representation for #{token.class}"

View File

@ -1,5 +1,6 @@
class CBOR::BytesArray < Array(UInt8)
def to_a : Array(UInt8)
def to_bytes : Bytes