Work on lexer and diagnostic representation

dev
Alberto Restifo 2020-04-20 14:57:20 +02:00
parent dd1288089f
commit 2e2edd1908
9 changed files with 220 additions and 136 deletions

View File

@ -12,4 +12,4 @@ tasks:
crystal tool format --check
- test: |
cd crystal-cbor
crystal spec
crystal spec --error-on-warnings

View File

@ -24,7 +24,7 @@ describe CBOR::Lexer do
it "reads #{tt[:bytes].hexstring} as #{tt[:value].to_s}" do
lexer = CBOR::Lexer.new(tt[:bytes])
token = lexer.read_token
token = lexer.next_token
token.should be_a(CBOR::Token::IntT)
token.as(CBOR::Token::IntT).value.should eq(tt[:value])
end

103
spec/rfc_spec.cr Normal file
View File

@ -0,0 +1,103 @@
require "./spec_helper"
tests = [
{ %(0), "00" },
{ %(1), "01" },
{ %(10), "0a" },
{ %(23), "17" },
{ %(24), "18 18" },
{ %(25), "18 19" },
{ %(100), "18 64" },
{ %(1000), "19 03 e8" },
{ %(1000000), "1a 00 0f 42 40" },
{ %(1000000000000), "1b 00 00 00 e8 d4 a5 10 00" },
{ %(18446744073709551615), "1b ff ff ff ff ff ff ff ff" },
# { %(18446744073709551616), "c2 49 01 00 00 00 00 00 00 00 00" },
{ %(-18446744073709551616), "3b ff ff ff ff ff ff ff ff" },
# { %(-18446744073709551617), "c3 49 01 00 00 00 00 00 00 00 00" },
{ %(-1), "20" },
{ %(-10), "29" },
{ %(-100), "38 63" },
{ %(-1000), "39 03 e7" },
# { %(0.0), "f9 00 00" },
# { %(-0.0), "f9 80 00" },
# { %(1.0), "f9 3c 00" },
# { %(1.1), "fb 3f f1 99 99 99 99 99 9a" },
# { %(1.5), "f9 3e 00" },
# { %(65504.0), "f9 7b ff" },
# { %(100000.0), "fa 47 c3 50 00" },
# { %(3.4028234663852886e+38), "fa 7f 7f ff ff" },
# { %(1.0e+300), "fb 7e 37 e4 3c 88 00 75 9c" },
# { %(5.960464477539063e-8), "f9 00 01" },
# { %(0.00006103515625), "f9 04 00" },
# { %(-4.0), "f9 c4 00" },
# { %(-4.1), "fb c0 10 66 66 66 66 66 66" },
# { %(Infinity), "f9 7c 00" },
# { %(NaN), "f9 7e 00" },
# { %(-Infinity), "f9 fc 00" },
# { %(Infinity), "fa 7f 80 00 00" },
# { %(NaN), "fa 7f c0 00 00" },
# { %(-Infinity), "fa ff 80 00 00" },
# { %(Infinity), "fb 7f f0 00 00 00 00 00 00" },
# { %(NaN), "fb 7f f8 00 00 00 00 00 00" },
# { %(-Infinity), "fb ff f0 00 00 00 00 00 00" },
# { %(false), "f4" },
# { %(true), "f5" },
# { %(null), "f6" },
# { %(undefined), "f7" },
# { %(simple(16)), "f0" },
# { %(simple(24)), "f8 18" },
# { %(simple(255)), "f8 ff" },
# { %(0("2013-03-21T20:04:00Z")), "c0 74 32 30 31 33 2d 30 33 2d 32 31 54 32 30 3a 30 34 3a 30 30 5a" },
# { %(1(1363896240)), "c1 1a 51 4b 67 b0" },
# { %(1(1363896240.5)), "c1 fb 41 d4 52 d9 ec 20 00 00" },
# { %(23(h'01020304')), "d7 44 01 02 03 04" },
# { %(24(h'6449455446')), "d8 18 45 64 49 45 54 46" },
# { %(32("http://www.example.com")), "d8 20 76 68 74 74 70 3a 2f 2f 77 77 77 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d" },
{ %(h''), "40" },
{ %(h'01020304'), "44 01 02 03 04" },
# { %(""), "60" },
# { %("a"), "61 61" },
# { %("IETF"), "64 49 45 54 46" },
# { %(""\\"), "62 22 5c" },
# { %("\u00fc"), "62 c3 bc" },
# { %("\u6c34"), "63 e6 b0 b4" },
# { %("\ud800\udd51"), "64 f0 90 85 91" },
# { %([]), "80" },
# { %([1, 2, 3]), "83 01 02 03" },
# { %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
# { %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
# { %({}), "a0" },
# { %({1: 2, 3: 4}), "a2 01 02 03 04" },
# { %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
# { %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
# { %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
{ %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
# { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
# { %([_ ]), "9f ff" },
# { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
# { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
# { %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
# { %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
# { %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
# { %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
# { %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
# { %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },
]
describe "Examples from RFC7049 Appendix A" do
tests.each_with_index do |tt, index|
describe "test ##{index}" do
diagnostic, hex_string = tt
bytes_arr = hex_string.split.map(&.to_u8(16))
bytes = Bytes.new(bytes_arr.to_unsafe, bytes_arr.size)
it "reads #{bytes.hexstring} as #{diagnostic}" do
result = CBOR::Diagnostic.new(bytes).to_s
result.should eq(diagnostic)
end
end
end
end

View File

@ -1,84 +0,0 @@
tests = [
{ %(0), "00" },
{ %(1), "01" },
{ %(10), "0a" },
{ %(23), "17" },
{ %(24), "18 18" },
{ %(25), "18 19" },
{ %(100), "18 64" },
{ %(1000), "19 03 e8" },
{ %(1000000), "1a 00 0f 42 40" },
{ %(1000000000000), "1b 00 00 00 e8 d4 a5 10 00" },
{ %(18446744073709551615), "1b ff ff ff ff ff ff ff ff" },
{ %(18446744073709551616), "c2 49 01 00 00 00 00 00 00 00 00" },
{ %(-18446744073709551616), "3b ff ff ff ff ff ff ff ff" },
{ %(-18446744073709551617), "c3 49 01 00 00 00 00 00 00 00 00" },
{ %(-1), "20" },
{ %(-10), "29" },
{ %(-100), "38 63" },
{ %(-1000), "39 03 e7" },
{ %(0.0), "f9 00 00" },
{ %(-0.0), "f9 80 00" },
{ %(1.0), "f9 3c 00" },
{ %(1.1), "fb 3f f1 99 99 99 99 99 9a" },
{ %(1.5), "f9 3e 00" },
{ %(65504.0), "f9 7b ff" },
{ %(100000.0), "fa 47 c3 50 00" },
{ %(3.4028234663852886e+38), "fa 7f 7f ff ff" },
{ %(1.0e+300), "fb 7e 37 e4 3c 88 00 75 9c" },
{ %(5.960464477539063e-8), "f9 00 01" },
{ %(0.00006103515625), "f9 04 00" },
{ %(-4.0), "f9 c4 00" },
{ %(-4.1), "fb c0 10 66 66 66 66 66 66" },
{ %(Infinity), "f9 7c 00" },
{ %(NaN), "f9 7e 00" },
{ %(-Infinity), "f9 fc 00" },
{ %(Infinity), "fa 7f 80 00 00" },
{ %(NaN), "fa 7f c0 00 00" },
{ %(-Infinity), "fa ff 80 00 00" },
{ %(Infinity), "fb 7f f0 00 00 00 00 00 00" },
{ %(NaN), "fb 7f f8 00 00 00 00 00 00" },
{ %(-Infinity), "fb ff f0 00 00 00 00 00 00" },
{ %(false), "f4" },
{ %(true), "f5" },
{ %(null), "f6" },
{ %(undefined), "f7" },
{ %(simple(16)), "f0" },
{ %(simple(24)), "f8 18" },
{ %(simple(255)), "f8 ff" },
{ %(0("2013-03-21T20:04:00Z")), "c0 74 32 30 31 33 2d 30 33 2d 32 31 54 32 30 3a 30 34 3a 30 30 5a" },
{ %(1(1363896240)), "c1 1a 51 4b 67 b0" },
{ %(1(1363896240.5)), "c1 fb 41 d4 52 d9 ec 20 00 00" },
{ %(23(h'01020304')), "d7 44 01 02 03 04" },
{ %(24(h'6449455446')), "d8 18 45 64 49 45 54 46" },
{ %(32("http://www.example.com")), "d8 20 76 68 74 74 70 3a 2f 2f 77 77 77 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d" },
{ %(h''), "40" },
{ %(h'01020304'), "44 01 02 03 04" },
{ %(""), "60" },
{ %("a"), "61 61" },
{ %("IETF"), "64 49 45 54 46" },
{ %(""\\"), "62225c" },
{ %("\u00fc"), "62 c3 bc" },
{ %("\u6c34"), "63 e6 b0 b4" },
{ %("\ud800\udd51"), "64 f0 90 85 91" },
{ %([]), "80" },
{ %([1, 2, 3]), "83 01 02 03" },
{ %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
{ %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
{ %({}), "a0" },
{ %({1: 2, 3: 4}), "a2 01 02 03 04" },
{ %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
{ %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
{ %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
{ %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
{ %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
{ %([_ ]), "9f ff" },
{ %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
{ %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
{ %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
{ %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
{ %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
{ %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
{ %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
{ %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },
]

View File

@ -22,4 +22,25 @@ abstract class CBOR::Decoder
# Consume the array :)
end
end
private def read_bytes_array_body
read_type(Token::ByteArrayT) do |token|
end
end
private macro read_type(type, finish_token = true, &block)
case token = current_token
when {{type}}
{% if finish_token %}finish_token!{% end %}
{{ block.body }}
else
unexpected_token(token, {{type.stringify.split("::").last}})
end
end
private def unexpected_token(token, expected = nil)
message = "Unexpected token #{Token.to_s(token)}"
message += " expected #{expected}" if expected
raise TypeCastError.new(message, token.byte_number)
end
end

View File

@ -1,15 +1,45 @@
module CBOR::Diagnostic
def to_s(value : CBOR::ByteArray) : String
value.to_diagnostic
require "./lexer"
require "./token"
# Reads a CBOR input into a diagnostic string.
# This consumes the IO and is mostly usedful to tests again the example
# provided in the RFC and ensuring a correct functioning of the `CBOR::Lexer`.
class CBOR::Diagnostic
@lexer : Lexer
@is_array : Bool = false
def initialize(input)
@lexer = Lexer.new(input)
end
{% for type in [UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64, Int64, Int128] %}
def to_s(value : {{type}}) : String
{{type}}.to_s
end
{% end %}
# Reads the content of the IO and prints out a diagnostic string
# represation of the input.
def to_s : String
result = ""
def to_s(value : String)
%("#{value}")
while val = next_value
result += val
end
result
end
private def next_value : String?
token = @lexer.next_token
return nil unless token
case token
when Token::BytesArray
@is_array = true
when Token::BreakT
@is_array = flase
end
separator + Token.to_diagnostic(token)
end
private def separator : String
return ", " if @is_array
""
end
end

View File

@ -11,44 +11,19 @@ class CBOR::Lexer
new IO::Memory.new(slice)
end
@token : Token::T
@current_pos : Int64
@token_finished : Bool
@eof : Bool = false
def initialize(@io : IO)
@current_pos = 0
@token = Token::NullT.new(0)
@token_finished = true
end
@[AlwaysInline]
def current_token : Token::T
if @token_finished
@token_finished = false
@token = next_token
else
@token
end
end
def next_token
return nil if @eof
@[AlwaysInline]
def finish_token!
@token_finished = true
end
@[AlwaysInline]
def read_token : Token::T
if @token_finished
@token = next_token
else
finish_token!
end
@token
end
private def next_token
@current_pos = @io.pos.to_i64
current_byte = next_byte
current_byte = @io.read_byte
return nil unless current_byte
case current_byte
when 0x00..0x17
@ -85,16 +60,23 @@ class CBOR::Lexer
when 0x5b
consume_binary(read(UInt64))
when 0x5f
Token::BytesArrayT.new(@current_pos)
Token::BytesArrayStartT.new(@current_pos)
when 0xff
# TODO: Define which segment it's breaking
Token::BreakT.new(@current_pos)
else
raise ParseError.new("Unexpected first byte #{current_byte}")
raise ParseError.new("Unexpected first byte 0x#{current_byte.to_s(16)}")
end
end
private def next_byte : UInt8
private def next_byte : UInt8?
byte = @io.read_byte
raise ParseError.new("Unexpected EOF at byte #{@io.pos}") unless byte
byte
if byte
byte
else
@eof = true
nil
end
end
private def consume_int(value)

View File

@ -1,5 +1,6 @@
class CBOR::Token
record NullT, byte_number : Int64
record UndefinedT, byte_number : Int64
record BoolT, byte_number : Int64, value : Bool
record ArrayT, byte_number : Int64, size : UInt32?
record MapT, byte_number : Int64, size : UInt32?
@ -7,12 +8,26 @@ class CBOR::Token
record FloatT, byte_number : Int64, value : Float64
record StringT, byte_number : Int64, value : String
record BytesT, byte_number : Int64, value : Bytes
record StringArrayT, byte_number : Int64
record BytesArrayT, byte_number : Int64
record StringArrayStartT, byte_number : Int64
record StringArrayEndT, byte_number : Int64
record BytesArrayStartT, byte_number : Int64
record BytesArrayEndT, byte_number : Int64
alias T = NullT | BoolT | ArrayT | MapT | IntT | FloatT | StringT | BytesT | StringArrayT | BytesArrayT
alias T = NullT |
UndefinedT |
BoolT |
ArrayT |
MapT |
IntT |
FloatT |
StringT |
BytesT |
StringArrayStartT |
StringArrayEndT |
BytesArrayStartT |
BytesArrayEndT
def self.to_s(token : T)
def self.to_diagnostic(token : T) : String
case token
when IntT
token.value.to_s
@ -23,8 +38,24 @@ class CBOR::Token
"null"
when UndefinedT
"undefined"
when BoolT
token.value.to_s
when BytesArrayStartT
"(_ "
when BytesArrayEndT
")"
when FloatT
"TODO"
when StringT
"TODO"
when StringArrayT
"TODO"
when MapT
"TODO"
when ArrayT
"TODO"
else
raise "Diagnostic notation for type #{token.class} not implemented"
raise "Uknown diagnostics representation for #{token.class}"
end
end
end

View File

@ -1,5 +1,6 @@
class CBOR::BytesArray < Array(UInt8)
def to_a : Array(UInt8)
self.as(Array(UInt8))
end
def to_bytes : Bytes