Work on lexer and diagnostic representation
parent
dd1288089f
commit
2e2edd1908
|
@ -12,4 +12,4 @@ tasks:
|
|||
crystal tool format --check
|
||||
- test: |
|
||||
cd crystal-cbor
|
||||
crystal spec
|
||||
crystal spec --error-on-warnings
|
||||
|
|
|
@ -24,7 +24,7 @@ describe CBOR::Lexer do
|
|||
it "reads #{tt[:bytes].hexstring} as #{tt[:value].to_s}" do
|
||||
lexer = CBOR::Lexer.new(tt[:bytes])
|
||||
|
||||
token = lexer.read_token
|
||||
token = lexer.next_token
|
||||
token.should be_a(CBOR::Token::IntT)
|
||||
token.as(CBOR::Token::IntT).value.should eq(tt[:value])
|
||||
end
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
require "./spec_helper"
|
||||
|
||||
tests = [
|
||||
{ %(0), "00" },
|
||||
{ %(1), "01" },
|
||||
{ %(10), "0a" },
|
||||
{ %(23), "17" },
|
||||
{ %(24), "18 18" },
|
||||
{ %(25), "18 19" },
|
||||
{ %(100), "18 64" },
|
||||
{ %(1000), "19 03 e8" },
|
||||
{ %(1000000), "1a 00 0f 42 40" },
|
||||
{ %(1000000000000), "1b 00 00 00 e8 d4 a5 10 00" },
|
||||
{ %(18446744073709551615), "1b ff ff ff ff ff ff ff ff" },
|
||||
# { %(18446744073709551616), "c2 49 01 00 00 00 00 00 00 00 00" },
|
||||
{ %(-18446744073709551616), "3b ff ff ff ff ff ff ff ff" },
|
||||
# { %(-18446744073709551617), "c3 49 01 00 00 00 00 00 00 00 00" },
|
||||
{ %(-1), "20" },
|
||||
{ %(-10), "29" },
|
||||
{ %(-100), "38 63" },
|
||||
{ %(-1000), "39 03 e7" },
|
||||
# { %(0.0), "f9 00 00" },
|
||||
# { %(-0.0), "f9 80 00" },
|
||||
# { %(1.0), "f9 3c 00" },
|
||||
# { %(1.1), "fb 3f f1 99 99 99 99 99 9a" },
|
||||
# { %(1.5), "f9 3e 00" },
|
||||
# { %(65504.0), "f9 7b ff" },
|
||||
# { %(100000.0), "fa 47 c3 50 00" },
|
||||
# { %(3.4028234663852886e+38), "fa 7f 7f ff ff" },
|
||||
# { %(1.0e+300), "fb 7e 37 e4 3c 88 00 75 9c" },
|
||||
# { %(5.960464477539063e-8), "f9 00 01" },
|
||||
# { %(0.00006103515625), "f9 04 00" },
|
||||
# { %(-4.0), "f9 c4 00" },
|
||||
# { %(-4.1), "fb c0 10 66 66 66 66 66 66" },
|
||||
# { %(Infinity), "f9 7c 00" },
|
||||
# { %(NaN), "f9 7e 00" },
|
||||
# { %(-Infinity), "f9 fc 00" },
|
||||
# { %(Infinity), "fa 7f 80 00 00" },
|
||||
# { %(NaN), "fa 7f c0 00 00" },
|
||||
# { %(-Infinity), "fa ff 80 00 00" },
|
||||
# { %(Infinity), "fb 7f f0 00 00 00 00 00 00" },
|
||||
# { %(NaN), "fb 7f f8 00 00 00 00 00 00" },
|
||||
# { %(-Infinity), "fb ff f0 00 00 00 00 00 00" },
|
||||
# { %(false), "f4" },
|
||||
# { %(true), "f5" },
|
||||
# { %(null), "f6" },
|
||||
# { %(undefined), "f7" },
|
||||
# { %(simple(16)), "f0" },
|
||||
# { %(simple(24)), "f8 18" },
|
||||
# { %(simple(255)), "f8 ff" },
|
||||
# { %(0("2013-03-21T20:04:00Z")), "c0 74 32 30 31 33 2d 30 33 2d 32 31 54 32 30 3a 30 34 3a 30 30 5a" },
|
||||
# { %(1(1363896240)), "c1 1a 51 4b 67 b0" },
|
||||
# { %(1(1363896240.5)), "c1 fb 41 d4 52 d9 ec 20 00 00" },
|
||||
# { %(23(h'01020304')), "d7 44 01 02 03 04" },
|
||||
# { %(24(h'6449455446')), "d8 18 45 64 49 45 54 46" },
|
||||
# { %(32("http://www.example.com")), "d8 20 76 68 74 74 70 3a 2f 2f 77 77 77 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d" },
|
||||
{ %(h''), "40" },
|
||||
{ %(h'01020304'), "44 01 02 03 04" },
|
||||
# { %(""), "60" },
|
||||
# { %("a"), "61 61" },
|
||||
# { %("IETF"), "64 49 45 54 46" },
|
||||
# { %(""\\"), "62 22 5c" },
|
||||
# { %("\u00fc"), "62 c3 bc" },
|
||||
# { %("\u6c34"), "63 e6 b0 b4" },
|
||||
# { %("\ud800\udd51"), "64 f0 90 85 91" },
|
||||
# { %([]), "80" },
|
||||
# { %([1, 2, 3]), "83 01 02 03" },
|
||||
# { %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
|
||||
# { %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
|
||||
# { %({}), "a0" },
|
||||
# { %({1: 2, 3: 4}), "a2 01 02 03 04" },
|
||||
# { %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
|
||||
# { %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
|
||||
# { %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
|
||||
{ %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
|
||||
# { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
|
||||
# { %([_ ]), "9f ff" },
|
||||
# { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
|
||||
# { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
|
||||
# { %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
|
||||
# { %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
|
||||
# { %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
|
||||
# { %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
|
||||
# { %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
|
||||
# { %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },
|
||||
]
|
||||
|
||||
describe "Examples from RFC7049 Appendix A" do
|
||||
tests.each_with_index do |tt, index|
|
||||
describe "test ##{index}" do
|
||||
diagnostic, hex_string = tt
|
||||
|
||||
bytes_arr = hex_string.split.map(&.to_u8(16))
|
||||
bytes = Bytes.new(bytes_arr.to_unsafe, bytes_arr.size)
|
||||
|
||||
it "reads #{bytes.hexstring} as #{diagnostic}" do
|
||||
result = CBOR::Diagnostic.new(bytes).to_s
|
||||
|
||||
result.should eq(diagnostic)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,84 +0,0 @@
|
|||
tests = [
|
||||
{ %(0), "00" },
|
||||
{ %(1), "01" },
|
||||
{ %(10), "0a" },
|
||||
{ %(23), "17" },
|
||||
{ %(24), "18 18" },
|
||||
{ %(25), "18 19" },
|
||||
{ %(100), "18 64" },
|
||||
{ %(1000), "19 03 e8" },
|
||||
{ %(1000000), "1a 00 0f 42 40" },
|
||||
{ %(1000000000000), "1b 00 00 00 e8 d4 a5 10 00" },
|
||||
{ %(18446744073709551615), "1b ff ff ff ff ff ff ff ff" },
|
||||
{ %(18446744073709551616), "c2 49 01 00 00 00 00 00 00 00 00" },
|
||||
{ %(-18446744073709551616), "3b ff ff ff ff ff ff ff ff" },
|
||||
{ %(-18446744073709551617), "c3 49 01 00 00 00 00 00 00 00 00" },
|
||||
{ %(-1), "20" },
|
||||
{ %(-10), "29" },
|
||||
{ %(-100), "38 63" },
|
||||
{ %(-1000), "39 03 e7" },
|
||||
{ %(0.0), "f9 00 00" },
|
||||
{ %(-0.0), "f9 80 00" },
|
||||
{ %(1.0), "f9 3c 00" },
|
||||
{ %(1.1), "fb 3f f1 99 99 99 99 99 9a" },
|
||||
{ %(1.5), "f9 3e 00" },
|
||||
{ %(65504.0), "f9 7b ff" },
|
||||
{ %(100000.0), "fa 47 c3 50 00" },
|
||||
{ %(3.4028234663852886e+38), "fa 7f 7f ff ff" },
|
||||
{ %(1.0e+300), "fb 7e 37 e4 3c 88 00 75 9c" },
|
||||
{ %(5.960464477539063e-8), "f9 00 01" },
|
||||
{ %(0.00006103515625), "f9 04 00" },
|
||||
{ %(-4.0), "f9 c4 00" },
|
||||
{ %(-4.1), "fb c0 10 66 66 66 66 66 66" },
|
||||
{ %(Infinity), "f9 7c 00" },
|
||||
{ %(NaN), "f9 7e 00" },
|
||||
{ %(-Infinity), "f9 fc 00" },
|
||||
{ %(Infinity), "fa 7f 80 00 00" },
|
||||
{ %(NaN), "fa 7f c0 00 00" },
|
||||
{ %(-Infinity), "fa ff 80 00 00" },
|
||||
{ %(Infinity), "fb 7f f0 00 00 00 00 00 00" },
|
||||
{ %(NaN), "fb 7f f8 00 00 00 00 00 00" },
|
||||
{ %(-Infinity), "fb ff f0 00 00 00 00 00 00" },
|
||||
{ %(false), "f4" },
|
||||
{ %(true), "f5" },
|
||||
{ %(null), "f6" },
|
||||
{ %(undefined), "f7" },
|
||||
{ %(simple(16)), "f0" },
|
||||
{ %(simple(24)), "f8 18" },
|
||||
{ %(simple(255)), "f8 ff" },
|
||||
{ %(0("2013-03-21T20:04:00Z")), "c0 74 32 30 31 33 2d 30 33 2d 32 31 54 32 30 3a 30 34 3a 30 30 5a" },
|
||||
{ %(1(1363896240)), "c1 1a 51 4b 67 b0" },
|
||||
{ %(1(1363896240.5)), "c1 fb 41 d4 52 d9 ec 20 00 00" },
|
||||
{ %(23(h'01020304')), "d7 44 01 02 03 04" },
|
||||
{ %(24(h'6449455446')), "d8 18 45 64 49 45 54 46" },
|
||||
{ %(32("http://www.example.com")), "d8 20 76 68 74 74 70 3a 2f 2f 77 77 77 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d" },
|
||||
{ %(h''), "40" },
|
||||
{ %(h'01020304'), "44 01 02 03 04" },
|
||||
{ %(""), "60" },
|
||||
{ %("a"), "61 61" },
|
||||
{ %("IETF"), "64 49 45 54 46" },
|
||||
{ %(""\\"), "62225c" },
|
||||
{ %("\u00fc"), "62 c3 bc" },
|
||||
{ %("\u6c34"), "63 e6 b0 b4" },
|
||||
{ %("\ud800\udd51"), "64 f0 90 85 91" },
|
||||
{ %([]), "80" },
|
||||
{ %([1, 2, 3]), "83 01 02 03" },
|
||||
{ %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
|
||||
{ %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
|
||||
{ %({}), "a0" },
|
||||
{ %({1: 2, 3: 4}), "a2 01 02 03 04" },
|
||||
{ %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
|
||||
{ %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
|
||||
{ %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
|
||||
{ %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
|
||||
{ %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
|
||||
{ %([_ ]), "9f ff" },
|
||||
{ %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
|
||||
{ %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
|
||||
{ %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
|
||||
{ %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
|
||||
{ %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
|
||||
{ %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
|
||||
{ %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
|
||||
{ %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },
|
||||
]
|
|
@ -22,4 +22,25 @@ abstract class CBOR::Decoder
|
|||
# Consume the array :)
|
||||
end
|
||||
end
|
||||
|
||||
private def read_bytes_array_body
|
||||
read_type(Token::ByteArrayT) do |token|
|
||||
end
|
||||
end
|
||||
|
||||
private macro read_type(type, finish_token = true, &block)
|
||||
case token = current_token
|
||||
when {{type}}
|
||||
{% if finish_token %}finish_token!{% end %}
|
||||
{{ block.body }}
|
||||
else
|
||||
unexpected_token(token, {{type.stringify.split("::").last}})
|
||||
end
|
||||
end
|
||||
|
||||
private def unexpected_token(token, expected = nil)
|
||||
message = "Unexpected token #{Token.to_s(token)}"
|
||||
message += " expected #{expected}" if expected
|
||||
raise TypeCastError.new(message, token.byte_number)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,15 +1,45 @@
|
|||
module CBOR::Diagnostic
|
||||
def to_s(value : CBOR::ByteArray) : String
|
||||
value.to_diagnostic
|
||||
require "./lexer"
|
||||
require "./token"
|
||||
|
||||
# Reads a CBOR input into a diagnostic string.
|
||||
# This consumes the IO and is mostly usedful to tests again the example
|
||||
# provided in the RFC and ensuring a correct functioning of the `CBOR::Lexer`.
|
||||
class CBOR::Diagnostic
|
||||
@lexer : Lexer
|
||||
@is_array : Bool = false
|
||||
|
||||
def initialize(input)
|
||||
@lexer = Lexer.new(input)
|
||||
end
|
||||
|
||||
{% for type in [UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64, Int64, Int128] %}
|
||||
def to_s(value : {{type}}) : String
|
||||
{{type}}.to_s
|
||||
end
|
||||
{% end %}
|
||||
# Reads the content of the IO and prints out a diagnostic string
|
||||
# represation of the input.
|
||||
def to_s : String
|
||||
result = ""
|
||||
|
||||
def to_s(value : String)
|
||||
%("#{value}")
|
||||
while val = next_value
|
||||
result += val
|
||||
end
|
||||
|
||||
result
|
||||
end
|
||||
|
||||
private def next_value : String?
|
||||
token = @lexer.next_token
|
||||
return nil unless token
|
||||
|
||||
case token
|
||||
when Token::BytesArray
|
||||
@is_array = true
|
||||
when Token::BreakT
|
||||
@is_array = flase
|
||||
end
|
||||
|
||||
separator + Token.to_diagnostic(token)
|
||||
end
|
||||
|
||||
private def separator : String
|
||||
return ", " if @is_array
|
||||
""
|
||||
end
|
||||
end
|
||||
|
|
|
@ -11,44 +11,19 @@ class CBOR::Lexer
|
|||
new IO::Memory.new(slice)
|
||||
end
|
||||
|
||||
@token : Token::T
|
||||
@current_pos : Int64
|
||||
@token_finished : Bool
|
||||
@eof : Bool = false
|
||||
|
||||
def initialize(@io : IO)
|
||||
@current_pos = 0
|
||||
@token = Token::NullT.new(0)
|
||||
@token_finished = true
|
||||
end
|
||||
|
||||
@[AlwaysInline]
|
||||
def current_token : Token::T
|
||||
if @token_finished
|
||||
@token_finished = false
|
||||
@token = next_token
|
||||
else
|
||||
@token
|
||||
end
|
||||
end
|
||||
def next_token
|
||||
return nil if @eof
|
||||
|
||||
@[AlwaysInline]
|
||||
def finish_token!
|
||||
@token_finished = true
|
||||
end
|
||||
|
||||
@[AlwaysInline]
|
||||
def read_token : Token::T
|
||||
if @token_finished
|
||||
@token = next_token
|
||||
else
|
||||
finish_token!
|
||||
end
|
||||
@token
|
||||
end
|
||||
|
||||
private def next_token
|
||||
@current_pos = @io.pos.to_i64
|
||||
current_byte = next_byte
|
||||
current_byte = @io.read_byte
|
||||
return nil unless current_byte
|
||||
|
||||
case current_byte
|
||||
when 0x00..0x17
|
||||
|
@ -85,16 +60,23 @@ class CBOR::Lexer
|
|||
when 0x5b
|
||||
consume_binary(read(UInt64))
|
||||
when 0x5f
|
||||
Token::BytesArrayT.new(@current_pos)
|
||||
Token::BytesArrayStartT.new(@current_pos)
|
||||
when 0xff
|
||||
# TODO: Define which segment it's breaking
|
||||
Token::BreakT.new(@current_pos)
|
||||
else
|
||||
raise ParseError.new("Unexpected first byte #{current_byte}")
|
||||
raise ParseError.new("Unexpected first byte 0x#{current_byte.to_s(16)}")
|
||||
end
|
||||
end
|
||||
|
||||
private def next_byte : UInt8
|
||||
private def next_byte : UInt8?
|
||||
byte = @io.read_byte
|
||||
raise ParseError.new("Unexpected EOF at byte #{@io.pos}") unless byte
|
||||
if byte
|
||||
byte
|
||||
else
|
||||
@eof = true
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
private def consume_int(value)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
class CBOR::Token
|
||||
record NullT, byte_number : Int64
|
||||
record UndefinedT, byte_number : Int64
|
||||
record BoolT, byte_number : Int64, value : Bool
|
||||
record ArrayT, byte_number : Int64, size : UInt32?
|
||||
record MapT, byte_number : Int64, size : UInt32?
|
||||
|
@ -7,12 +8,26 @@ class CBOR::Token
|
|||
record FloatT, byte_number : Int64, value : Float64
|
||||
record StringT, byte_number : Int64, value : String
|
||||
record BytesT, byte_number : Int64, value : Bytes
|
||||
record StringArrayT, byte_number : Int64
|
||||
record BytesArrayT, byte_number : Int64
|
||||
record StringArrayStartT, byte_number : Int64
|
||||
record StringArrayEndT, byte_number : Int64
|
||||
record BytesArrayStartT, byte_number : Int64
|
||||
record BytesArrayEndT, byte_number : Int64
|
||||
|
||||
alias T = NullT | BoolT | ArrayT | MapT | IntT | FloatT | StringT | BytesT | StringArrayT | BytesArrayT
|
||||
alias T = NullT |
|
||||
UndefinedT |
|
||||
BoolT |
|
||||
ArrayT |
|
||||
MapT |
|
||||
IntT |
|
||||
FloatT |
|
||||
StringT |
|
||||
BytesT |
|
||||
StringArrayStartT |
|
||||
StringArrayEndT |
|
||||
BytesArrayStartT |
|
||||
BytesArrayEndT
|
||||
|
||||
def self.to_s(token : T)
|
||||
def self.to_diagnostic(token : T) : String
|
||||
case token
|
||||
when IntT
|
||||
token.value.to_s
|
||||
|
@ -23,8 +38,24 @@ class CBOR::Token
|
|||
"null"
|
||||
when UndefinedT
|
||||
"undefined"
|
||||
when BoolT
|
||||
token.value.to_s
|
||||
when BytesArrayStartT
|
||||
"(_ "
|
||||
when BytesArrayEndT
|
||||
")"
|
||||
when FloatT
|
||||
"TODO"
|
||||
when StringT
|
||||
"TODO"
|
||||
when StringArrayT
|
||||
"TODO"
|
||||
when MapT
|
||||
"TODO"
|
||||
when ArrayT
|
||||
"TODO"
|
||||
else
|
||||
raise "Diagnostic notation for type #{token.class} not implemented"
|
||||
raise "Uknown diagnostics representation for #{token.class}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
class CBOR::BytesArray < Array(UInt8)
|
||||
def to_a : Array(UInt8)
|
||||
self.as(Array(UInt8))
|
||||
end
|
||||
|
||||
def to_bytes : Bytes
|
||||
|
|
Loading…
Reference in New Issue