From 28f9cfa1f505cdc5d44c63e3fdd8335218caf397 Mon Sep 17 00:00:00 2001 From: Alberto Restifo Date: Wed, 22 Apr 2020 09:33:46 +0200 Subject: [PATCH] Add support for string streaming --- spec/rfc_spec.cr | 2 +- src/cbor.cr | 3 +- src/cbor/diagnostic.cr | 2 ++ src/cbor/lexer.cr | 58 +++++++++++++++++++++++++++++++---- src/cbor/token.cr | 2 +- src/cbor/type/string_array.cr | 13 ++++++++ 6 files changed, 70 insertions(+), 10 deletions(-) create mode 100644 src/cbor/type/string_array.cr diff --git a/spec/rfc_spec.cr b/spec/rfc_spec.cr index 3dccb55..0755e24 100644 --- a/spec/rfc_spec.cr +++ b/spec/rfc_spec.cr @@ -73,7 +73,7 @@ tests = [ # { %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" }, # { %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" }, { %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" }, - # { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" }, + { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" }, # { %([_ ]), "9f ff" }, # { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" }, # { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" }, diff --git a/src/cbor.cr b/src/cbor.cr index d6bc043..8418855 100644 --- a/src/cbor.cr +++ b/src/cbor.cr @@ -19,6 +19,5 @@ module CBOR UInt32 | Int64 | UInt64 | - Int128 | - BytesArray + Int128 end diff --git a/src/cbor/diagnostic.cr b/src/cbor/diagnostic.cr index 9774374..63eb6ea 100644 --- a/src/cbor/diagnostic.cr +++ b/src/cbor/diagnostic.cr @@ -34,6 +34,8 @@ class CBOR::Diagnostic "h'#{token[:value].as(Bytes).hexstring}'" when Kind::BytesArray token[:value].as(BytesArray).to_diagnostic + when Kind::StringArray + token[:value].as(StringArray).to_diagnostic else token[:kind].to_s end diff --git a/src/cbor/lexer.cr b/src/cbor/lexer.cr index 850cc0d..2b69efe 100644 --- a/src/cbor/lexer.cr +++ b/src/cbor/lexer.cr @@ -1,6 +1,25 @@ require "./token" class CBOR::Lexer + # Types returned by the lexer + alias Type = Nil | + Bool | + String | + Bytes | + Array(Type) | + Hash(Type, Type) | + Int8 | + UInt8 | + Int16 | + UInt16 | + Int32 | + UInt32 | + Int64 | + UInt64 | + Int128 | + BytesArray | + StringArray + def self.new(string : String) new IO::Memory.new(string) end @@ -47,6 +66,8 @@ class CBOR::Lexer {kind: token[:kind], value: nil} when Kind::BytesArray {kind: token[:kind], value: read_bytes_array} + when Kind::StringArray + {kind: token[:kind], value: read_string_array} end end @@ -54,17 +75,24 @@ class CBOR::Lexer def read_bytes_array : CBOR::BytesArray bytes = BytesArray.new - loop do - token = next_token - raise ParseError.new("Unexpected EOF while reading bytes array") unless token - break if token[:kind] == Kind::BytesArrayEnd - raise ParseError.new("Illegal token #{token.class} while reading bytes array") unless token[:kind] == Kind::Bytes - bytes << token[:value].as(Bytes) + read_until(Kind::BytesArrayEnd, only: Kind::Bytes) do |chunk| + bytes << chunk.as(Bytes) end bytes end + # Reads until break for chunks of strings + def read_string_array : CBOR::StringArray + strings = StringArray.new + + read_until(Kind::StringArrayEnd, only: Kind::String) do |chunk| + strings << chunk.as(String) + end + + strings + end + private def next_token : Token? return nil if @eof @@ -118,6 +146,8 @@ class CBOR::Lexer consume_string(read(UInt32)) when 0x7b consume_string(read(UInt16)) + when 0x7f + {kind: open_token(Kind::StringArray), value: nil} when 0xff {kind: finish_token, value: nil} else @@ -125,6 +155,22 @@ class CBOR::Lexer end end + # Reads tokens until it meets the stop kind. + # Optionally it can fail when the read token is not of the passed kind. + private def read_until(stop : Kind, only : Kind?, &block) + loop do + token = next_token + raise ParseError.new("Unexpected EOF") unless token + break if token[:kind] == stop + + if only && token[:kind] != only + raise ParseError.new("Illegal token #{token[:kind].to_s} while reading #{only.to_s} array") + end + + yield token[:value] + end + end + private def next_byte : UInt8? byte = @io.read_byte if byte diff --git a/src/cbor/token.cr b/src/cbor/token.cr index 5a5b44d..38da57f 100644 --- a/src/cbor/token.cr +++ b/src/cbor/token.cr @@ -15,4 +15,4 @@ enum CBOR::Kind Map end -alias CBOR::Token = NamedTuple(kind: Kind, value: Type) +alias CBOR::Token = NamedTuple(kind: Kind, value: Lexer::Type) diff --git a/src/cbor/type/string_array.cr b/src/cbor/type/string_array.cr new file mode 100644 index 0000000..10d09dd --- /dev/null +++ b/src/cbor/type/string_array.cr @@ -0,0 +1,13 @@ +class CBOR::StringArray < Array(String) + def to_s : String + join + end + + def to_diagnostic : String + "(_ #{map { |s| quote(s) }.join(", ")})" + end + + private def quote(chunk : String) : String + %("#{chunk}") + end +end