diff --git a/spec/cbor/from_cbor_spec.cr b/spec/cbor/from_cbor_spec.cr index b9b4629..413d68f 100644 --- a/spec/cbor/from_cbor_spec.cr +++ b/spec/cbor/from_cbor_spec.cr @@ -27,13 +27,17 @@ describe "CBOR helpers on basic types" do {Float64, Bytes[0xfa, 0x47, 0xc3, 0x50, 0x00], 100000.0_f64}, {Set(Int8), Bytes[0x83, 0x01, 0x02, 0x03], Set(Int8){1, 2, 3}}, {Array(Int8), Bytes[0x83, 0x01, 0x02, 0x03], [1_i8, 2_i8, 3_i8]}, - # {Array(Array(Int8) | Int8), - # Bytes[0x83, 0x01, 0x82, 0x02, 0x03, 0x82, 0x04, 0x05], - # [1_i8, [2_i8, 3_i8], [4_i8, 5_i8]]}, + {Array(Array(Int8) | Int8), + Bytes[0x83, 0x01, 0x82, 0x02, 0x03, 0x82, 0x04, 0x05], + [1_i8, [2_i8, 3_i8], [4_i8, 5_i8]]}, {Array(UInt8), Bytes[0x9f, 0xff], [] of UInt8}, - # {Array(Array(Int8) | Int8), - # Bytes[0x9f, 0x01, 0x82, 0x02, 0x03, 0x9f, 0x04, 0x05, 0xff, 0xff], - # [1_i8, [2_i8, 3_i8], [4_i8, 5_i8]]}, + {Array(UInt8), Bytes[0x9f, 0x01, 0xff], [1_u8] of UInt8}, + {Array(Int32), + Bytes[0x9f, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x18, 0x18, 0x19, 0xff], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]}, + {Array(Array(Int8) | Int8), + Bytes[0x9f, 0x01, 0x82, 0x02, 0x03, 0x9f, 0x04, 0x05, 0xff, 0xff], + [1_i8, [2_i8, 3_i8], [4_i8, 5_i8]]}, {Hash(UInt8, UInt8), Bytes[0xa0], {} of UInt8 => UInt8}, {Hash(UInt8, UInt8), Bytes[0xa2, 0x01, 0x02, 0x03, 0x04], Hash(UInt8, UInt8){1 => 2, 3 => 4}}, {TestEnum, Bytes[0x1a, 0x00, 0x00, 0x00, 0x01], TestEnum::Foo}, diff --git a/src/cbor/decoder.cr b/src/cbor/decoder.cr index 03a24fd..bdc5edc 100644 --- a/src/cbor/decoder.cr +++ b/src/cbor/decoder.cr @@ -71,14 +71,14 @@ class CBOR::Decoder end def consume_array(&block) - read_type(Token::ArrayT, finish_token: false) do |token| - read(token.size) { yield } + read_type(Token::ArrayT) do |token| + consume_sequence(token.size) { yield } end end def consume_hash(&block) - read_type(Token::MapT, finish_token: false) do |token| - read(token.size) { yield } + read_type(Token::MapT) do |token| + consume_sequence(token.size) { yield } end end @@ -86,32 +86,36 @@ class CBOR::Decoder @current_token = @lexer.next_token end - private def read(size : Int32?, &block) + private def consume_sequence(size : Int32?, &block) if size - finish_token! size.times { yield } else - @lexer.until_break do |token| - @current_token = token + until @current_token.is_a?(Token::BreakT) yield end end end private macro read_type(type, finish_token = true, ignore_tag = true, &block) - # Skip the tag unless the token we want to read is a tag - {% if ignore_tag %} - if @current_token.is_a?(Token::TagT) - finish_token! - end - {% end %} + begin + # Skip the tag unless the token we want to read is a tag + {% if ignore_tag %} + if @current_token.is_a?(Token::TagT) + finish_token! + end + {% end %} - case token = @current_token - when {{type}} - {% if finish_token %}finish_token!{% end %} - {{ block.body }} - else - unexpected_token(token, {{type.stringify.split("::").last}}) + case token = @current_token + when {{type}} + {% if finish_token %} + finish_token! + {% end %} + {{ block.body }} + else + unexpected_token(token, {{type.stringify.split("::").last}}) + end + rescue err + raise CBOR::ParseError.new("{{type}} -> #{err}") end end diff --git a/src/cbor/diagnostic.cr b/src/cbor/diagnostic.cr index 6d58652..00fac46 100644 --- a/src/cbor/diagnostic.cr +++ b/src/cbor/diagnostic.cr @@ -83,14 +83,8 @@ class CBOR::Diagnostic private def read_array(size : Int32?) : Array(String) arr = size ? Array(String).new(size) : Array(String).new - if size - size.times do - val = next_value - raise ParseError.new("Unexpected EOF while reading array body") unless val - arr << val - end - else - @lexer.until_break { |token| arr << to_diagnostic(token) } + consume_array_body(size) do |token| + arr << to_diagnostic(token) end arr @@ -100,16 +94,44 @@ class CBOR::Diagnostic # correctly formatted in the diagnostic notation private def read_hash(size : Int32?) : Array(String) key_pairs = Array(String).new - - if size - size.times { key_pairs << key_value(*@lexer.next_pair) } - else - @lexer.pairs_until_break { |pairs| key_pairs << key_value(*pairs) } - end - + consume_map_body(size) { |pairs| key_pairs << key_value(*pairs) } key_pairs end + private def consume_array_body(size : Int32?, &block : Token::T ->) + if size + size.times do + token = @lexer.next_token + raise ParseError.new("Unexpected EOF while reading array body") unless token + yield token + end + else + loop do + token = @lexer.next_token + raise ParseError.new("Unexpected EOF while reading array body") unless token + break if token.is_a?(Token::BreakT) + yield token + end + end + end + + private def consume_map_body(size : Int32?, &block : Tuple(Token::T, Token::T) ->) + if size + size.times { yield @lexer.next_pair } + else + loop do + key = @lexer.next_token + raise ParseError.new("Unexpected EOF while reading map key") unless key + break if key.is_a?(Token::BreakT) + + value = @lexer.next_token + raise ParseError.new("Unexpected EOF while reading map value") unless value + + yield Tuple.new(key, value) + end + end + end + private def read_big_int(negative : Bool = false) : String token = @lexer.next_token raise ParseError.new("Unexpected EOF after tag") unless token diff --git a/src/cbor/from_cbor.cr b/src/cbor/from_cbor.cr index 439d440..f54dbb1 100644 --- a/src/cbor/from_cbor.cr +++ b/src/cbor/from_cbor.cr @@ -230,6 +230,8 @@ def Union.new(decoder : CBOR::Decoder) {% for type in type_order.select { |t| T.includes? t } %} return {{type}}.new(decoder) {% end %} + else + # This case check is non-exaustive on purpose end {% end %} diff --git a/src/cbor/lexer.cr b/src/cbor/lexer.cr index 49e734d..ac58d5e 100644 --- a/src/cbor/lexer.cr +++ b/src/cbor/lexer.cr @@ -1,12 +1,9 @@ class CBOR::Lexer - BREAK = 0xff - def self.new(slice : Bytes) new IO::Memory.new(slice) end @eof : Bool = false - @current_byte : UInt8 = 0 def initialize(@io : IO) end @@ -31,32 +28,6 @@ class CBOR::Lexer Tuple.new(pairs[0], pairs[1]) end - def until_break(&block : Token::T ->) - loop do - byte = next_byte - raise ParseError.new("unexpected EOF while searching for break") unless byte - break if byte == BREAK - yield decode(byte) - end - end - - # Read a pair of values until a break is reached - def pairs_until_break(&block : Tuple(Token::T, Token::T) ->) - loop do - key_byte = next_byte - raise ParseError.new("Unexpected EOF while searching for break") unless key_byte - break if key_byte == BREAK - - key = decode(key_byte) - raise ParseError.new("Unexpected EOF while reading key in pairs") unless key - - value = next_token - raise ParseError.new("Unexpected EOF while reading value in pairs") unless value - - yield Tuple.new(key, value) - end - end - private def decode(byte : UInt8) : Token::T case byte when 0x00..0x1b @@ -89,8 +60,10 @@ class CBOR::Lexer Token::FloatT.new(value: read(Float32)) when 0xfb Token::FloatT.new(value: read(Float64)) + when 0xff + Token::BreakT.new else - raise ParseError.new("Unexpected first byte 0x#{byte.to_s(16)}") + raise ParseError.new("Unexpected byte 0x#{byte.to_s(16)}") end end @@ -126,6 +99,16 @@ class CBOR::Lexer Token::StringT.new(value: value, chunks: chunks) end + private def until_break(&block : Token::T ->) + loop do + token = next_token + raise ParseError.new("Unexpected EOF while searching for 0xff (break)") unless token + break if token.is_a?(Token::BreakT) + + yield token + end + end + # Reads the size for the next token type private def read_size(current_byte : UInt8) : Int case current_byte diff --git a/src/cbor/token.cr b/src/cbor/token.cr index ed8551a..39e499a 100644 --- a/src/cbor/token.cr +++ b/src/cbor/token.cr @@ -7,6 +7,7 @@ module CBOR::Token record MapT, size : Int32? = nil record TagT, value : Tag record SimpleValueT, value : SimpleValue + record BreakT alias T = IntT | FloatT | @@ -15,5 +16,6 @@ module CBOR::Token ArrayT | MapT | TagT | - SimpleValueT + SimpleValueT | + BreakT end