diff --git a/spec/cbor/lexer_spec.cr b/spec/cbor/lexer_spec.cr index 5772685..ad9eb00 100644 --- a/spec/cbor/lexer_spec.cr +++ b/spec/cbor/lexer_spec.cr @@ -1,100 +1,33 @@ require "../spec_helper" describe CBOR::Lexer do - describe "examples from the RFC7049 Appendix A" do - tests = [ - { %(0), "00" }, - { %(1), "01" }, - { %(10), "0a" }, - { %(23), "17" }, - { %(24), "18 18" }, - { %(25), "18 19" }, - { %(100), "18 64" }, - { %(1000), "19 03 e8" }, - { %(1000000), "1a 00 0f 42 40" }, - { %(1000000000000), "1b 00 00 00 e8 d4 a5 10 00" }, - { %(18446744073709551615), "1b ff ff ff ff ff ff ff ff" }, - # { %(18446744073709551616), "c2 49 01 00 00 00 00 00 00 00 00"}, - { %(-18446744073709551616), "3b ff ff ff ff ff ff ff ff" }, - # { %(-18446744073709551617), "c3 49 01 00 00 00 00 00 00 00 00"}, - { %(-1), "20" }, - { %(-10), "29" }, - { %(-100), "38 63" }, - { %(-1000), "39 03 e7" }, - # { %(0.0), "f9 00 00"}, - # { %(-0.0), "f9 80 00"}, - # { %(1.0), "f9 3c 00"}, - # { %(1.1), "fb 3f f1 99 99 99 99 99 9a"}, - # { %(1.5), "f9 3e 00"}, - # { %(65504.0), "f9 7b ff"}, - # { %(100000.0), "fa 47 c3 50 00"}, - # { %(3.4028234663852886e+38), "fa 7f 7f ff ff"}, - # { %(1.0e+300), "fb 7e 37 e4 3c 88 00 75 9c"}, - # { %(5.960464477539063e-8), "f9 00 01"}, - # { %(0.00006103515625), "f9 04 00"}, - # { %(-4.0), "f9 c4 00"}, - # { %(-4.1), "fb c0 10 66 66 66 66 66 66"}, - # { %(Infinity), "f9 7c 00"}, - # { %(NaN), "f9 7e 00"}, - # { %(-Infinity), "f9 fc 00"}, - # { %(Infinity), "fa 7f 80 00 00"}, - # { %(NaN), "fa 7f c0 00 00"}, - # { %(-Infinity), "fa ff 80 00 00"}, - # { %(Infinity), "fb 7f f0 00 00 00 00 00 00"}, - # { %(NaN), "fb 7f f8 00 00 00 00 00 00"}, - # { %(-Infinity), "fb ff f0 00 00 00 00 00 00"}, - # { %(false), "f4"}, - # { %(true), "f5"}, - # { %(null), "f6"}, - # { %(undefined), "f7"}, - # { %(simple(16)), "f0"}, - # { %(simple(24)), "f8 18"}, - # { %(simple(255)), "f8 ff"}, - # { %(0(\"2013-03-21T20:04:00Z\")), "c0 74 32 30 31 33 2d 30 33 2d 32 31 54 32 30 3a 30 34 3a 30 30 5a"}, - # { %(1(1363896240)), "c1 1a 51 4b 67 b0"}, - # { %(1(1363896240.5)), "c1 fb 41 d4 52 d9 ec 20 00 00"}, - # { %(23(h'01020304')), "d7 44 01 02 03 04"}, - # { %(24(h'6449455446')), "d8 18 45 64 49 45 54 46"}, - # { %(32("http://www.example.com")), "d8 20 76 68 74 74 70 3a 2f 2f 77 77 77 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d" }, - { %(h''), "40" }, - { %(h'01020304'), "44 01 02 03 04" }, - # { %(""), "60" }, - # { %("a"), "61 61" }, - # { %("IETF"), "64 49 45 54 46" }, - # { %(""\\"), "62225c" }, - # { %("\u00fc"), "62 c3 bc" }, - # { %("\u6c34"), "63 e6 b0 b4" }, - # { %("\ud800\udd51"), "64 f0 90 85 91" }, - # { %([]), "80" }, - # { %([1, 2, 3]), "83 01 02 03" }, - # { %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" }, - # { %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" }, - # { %({}), "a0" }, - # { %({1: 2, 3: 4}), "a2 01 02 03 04" }, - # { %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" }, - # { %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" }, - # { %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" }, - # { %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" }, - # { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" }, - # { %([_ ]), "9f ff" }, - # { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" }, - # { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" }, - # { %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" }, - # { %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" }, - # { %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" }, - # { %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" }, - # { %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" }, - # { %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" }, - ] + describe "read_token" do + describe "reads an int" do + tests = [ + {value: 0, bytes: Bytes[0x00]}, + {value: 1, bytes: Bytes[0x01]}, + {value: 10, bytes: Bytes[0x0a]}, + {value: 23, bytes: Bytes[0x17]}, + {value: 24, bytes: Bytes[0x18, 0x18]}, + {value: 25, bytes: Bytes[0x18, 0x19]}, + {value: 100, bytes: Bytes[0x18, 0x64]}, + {value: 1000, bytes: Bytes[0x19, 0x03, 0xe8]}, + {value: 1000000, bytes: Bytes[0x1a, 0x00, 0x0f, 0x42, 0x40]}, + {value: 1000000000000, bytes: Bytes[0x1b, 0x00, 0x00, 0x00, 0xe8, 0xd4, 0xa5, 0x10, 0x00]}, + {value: -1, bytes: Bytes[0x20]}, + {value: -10, bytes: Bytes[0x29]}, + {value: -100, bytes: Bytes[0x38, 0x63]}, + {value: -1000, bytes: Bytes[0x39, 0x03, 0xe7]}, + ] - tests.each do |tt| - debug, hex = tt - it "Reads #{hex} as #{debug}" do - bytes = hex.split.map(&.to_u8(16)) - lexer = CBOR::Lexer.new(Slice.new(bytes.to_unsafe, bytes.size)) + tests.each do |tt| + it "reads #{tt[:bytes].hexstring} as #{tt[:value].to_s}" do + lexer = CBOR::Lexer.new(tt[:bytes]) - token = lexer.read_token - CBOR::Token.to_s(token).should eq(debug) + token = lexer.read_token + token.should be_a(CBOR::Token::IntT) + token.as(CBOR::Token::IntT).value.should eq(tt[:value]) + end end end end diff --git a/spec/rfc_tests.cr b/spec/rfc_tests.cr new file mode 100644 index 0000000..919f256 --- /dev/null +++ b/spec/rfc_tests.cr @@ -0,0 +1,84 @@ +tests = [ + { %(0), "00" }, + { %(1), "01" }, + { %(10), "0a" }, + { %(23), "17" }, + { %(24), "18 18" }, + { %(25), "18 19" }, + { %(100), "18 64" }, + { %(1000), "19 03 e8" }, + { %(1000000), "1a 00 0f 42 40" }, + { %(1000000000000), "1b 00 00 00 e8 d4 a5 10 00" }, + { %(18446744073709551615), "1b ff ff ff ff ff ff ff ff" }, + { %(18446744073709551616), "c2 49 01 00 00 00 00 00 00 00 00" }, + { %(-18446744073709551616), "3b ff ff ff ff ff ff ff ff" }, + { %(-18446744073709551617), "c3 49 01 00 00 00 00 00 00 00 00" }, + { %(-1), "20" }, + { %(-10), "29" }, + { %(-100), "38 63" }, + { %(-1000), "39 03 e7" }, + { %(0.0), "f9 00 00" }, + { %(-0.0), "f9 80 00" }, + { %(1.0), "f9 3c 00" }, + { %(1.1), "fb 3f f1 99 99 99 99 99 9a" }, + { %(1.5), "f9 3e 00" }, + { %(65504.0), "f9 7b ff" }, + { %(100000.0), "fa 47 c3 50 00" }, + { %(3.4028234663852886e+38), "fa 7f 7f ff ff" }, + { %(1.0e+300), "fb 7e 37 e4 3c 88 00 75 9c" }, + { %(5.960464477539063e-8), "f9 00 01" }, + { %(0.00006103515625), "f9 04 00" }, + { %(-4.0), "f9 c4 00" }, + { %(-4.1), "fb c0 10 66 66 66 66 66 66" }, + { %(Infinity), "f9 7c 00" }, + { %(NaN), "f9 7e 00" }, + { %(-Infinity), "f9 fc 00" }, + { %(Infinity), "fa 7f 80 00 00" }, + { %(NaN), "fa 7f c0 00 00" }, + { %(-Infinity), "fa ff 80 00 00" }, + { %(Infinity), "fb 7f f0 00 00 00 00 00 00" }, + { %(NaN), "fb 7f f8 00 00 00 00 00 00" }, + { %(-Infinity), "fb ff f0 00 00 00 00 00 00" }, + { %(false), "f4" }, + { %(true), "f5" }, + { %(null), "f6" }, + { %(undefined), "f7" }, + { %(simple(16)), "f0" }, + { %(simple(24)), "f8 18" }, + { %(simple(255)), "f8 ff" }, + { %(0("2013-03-21T20:04:00Z")), "c0 74 32 30 31 33 2d 30 33 2d 32 31 54 32 30 3a 30 34 3a 30 30 5a" }, + { %(1(1363896240)), "c1 1a 51 4b 67 b0" }, + { %(1(1363896240.5)), "c1 fb 41 d4 52 d9 ec 20 00 00" }, + { %(23(h'01020304')), "d7 44 01 02 03 04" }, + { %(24(h'6449455446')), "d8 18 45 64 49 45 54 46" }, + { %(32("http://www.example.com")), "d8 20 76 68 74 74 70 3a 2f 2f 77 77 77 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d" }, + { %(h''), "40" }, + { %(h'01020304'), "44 01 02 03 04" }, + { %(""), "60" }, + { %("a"), "61 61" }, + { %("IETF"), "64 49 45 54 46" }, + { %(""\\"), "62225c" }, + { %("\u00fc"), "62 c3 bc" }, + { %("\u6c34"), "63 e6 b0 b4" }, + { %("\ud800\udd51"), "64 f0 90 85 91" }, + { %([]), "80" }, + { %([1, 2, 3]), "83 01 02 03" }, + { %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" }, + { %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" }, + { %({}), "a0" }, + { %({1: 2, 3: 4}), "a2 01 02 03 04" }, + { %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" }, + { %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" }, + { %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" }, + { %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" }, + { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" }, + { %([_ ]), "9f ff" }, + { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" }, + { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" }, + { %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" }, + { %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" }, + { %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" }, + { %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" }, + { %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" }, + { %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" }, +] diff --git a/src/cbor.cr b/src/cbor.cr index 5ca810a..b7ca491 100644 --- a/src/cbor.cr +++ b/src/cbor.cr @@ -4,5 +4,6 @@ require "./cbor/**" module CBOR VERSION = "0.1.0" - # TODO: Put your code here + # Represents CBOR types + alias Type = Nil | Bool | String | Bytes | Array(Type) | Hash(Type, Type) | Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64 end diff --git a/src/cbor/decoder.cr b/src/cbor/decoder.cr new file mode 100644 index 0000000..b9c7641 --- /dev/null +++ b/src/cbor/decoder.cr @@ -0,0 +1,23 @@ +abstract class CBOR::Decoder + abstract def current_token : Token::T + abstract def read_token : Token::T + abstract def finish_token! + + def read : Type + read_value + end + + def read_value : Type + case token = current_token + when Token::IntT + finish_token! + token.value + when Token::BytesT + finish_token! + token.value + when Token::StringT + finish_token! + token.value + end + end +end diff --git a/src/cbor/decoder/io_decoder.cr b/src/cbor/decoder/io_decoder.cr new file mode 100644 index 0000000..5cd3515 --- /dev/null +++ b/src/cbor/decoder/io_decoder.cr @@ -0,0 +1,25 @@ +class CBOR::IODecoder < CBOR::Decoder + def initialize(string_or_io : String | IO) + @lexer = Lexer.new(string_or_io) + end + + def self.new(array : Array(UInt8)) + slice = Bytes.new(array.to_unsafe, array.size) + new(slice) + end + + @[AlwaysInline] + def current_token : Token::T + @lexer.current_token + end + + @[AlwaysInline] + def read_token : Token::T + @lexer.read_token + end + + @[AlwaysInline] + def finish_token! + @lexer.finish_token! + end +end diff --git a/src/cbor/diagnostic.cr b/src/cbor/diagnostic.cr new file mode 100644 index 0000000..5129130 --- /dev/null +++ b/src/cbor/diagnostic.cr @@ -0,0 +1,15 @@ +module CBOR::Diagnostic + def to_s(value : CBOR::ByteArray) : String + value.to_diagnostic + end + + {% for type in [UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64, Int64, Int128] %} + def to_s(value : {{type}}) : String + {{type}}.to_s + end + {% end %} + + def to_s(value : String) + %("#{value}") + end +end diff --git a/src/cbor/lexer.cr b/src/cbor/lexer.cr index f182910..ff1b89b 100644 --- a/src/cbor/lexer.cr +++ b/src/cbor/lexer.cr @@ -1,6 +1,8 @@ require "./token" class CBOR::Lexer + BREAK = 0xff + def self.new(string : String) new IO::Memory.new(string) end @@ -10,11 +12,12 @@ class CBOR::Lexer end @token : Token::T + @current_pos : Int64 + @token_finished : Bool def initialize(@io : IO) - @byte_number = 0 - @current_byte_number = 0 - @token = Token::NullT.new(0) + @current_pos = 0 + @token = Token::UndefinedT.new(0) @token_finished = true end @@ -44,7 +47,7 @@ class CBOR::Lexer end private def next_token - @current_byte_number = @byte_number + @current_pos = @io.pos.to_i64 current_byte = next_byte case current_byte @@ -81,27 +84,27 @@ class CBOR::Lexer consume_binary(read(UInt32)) when 0x5b consume_binary(read(UInt64)) + when 0x5f + Token::BytesArrayT.new(@current_pos) else - fail + raise ParseError.new("Unexpected first byte #{current_byte}") end end private def next_byte : UInt8 byte = @io.read_byte - @byte_number += 1 - fail unless byte + raise ParseError.new("Unexpected EOF at byte #{@io.pos}") unless byte byte end private def consume_int(value) - Token::IntT.new(@current_byte_number, value) + Token::IntT.new(@current_pos, value) end private def consume_binary(size) bytes = Bytes.new(size) @io.read_fully(bytes) - @byte_number += size - Token::BytesT.new(@current_byte_number, bytes) + Token::BytesT.new(@current_pos, bytes) end # Creates a method overloaded for each UInt sizes to convert the UInt into @@ -127,11 +130,6 @@ class CBOR::Lexer {% end %} private def read(type : T.class) forall T - @byte_number += sizeof(T) @io.read_bytes(T, IO::ByteFormat::NetworkEndian) end - - private def fail - raise "Pase error" - end end diff --git a/src/cbor/parse_error.cr b/src/cbor/parse_error.cr new file mode 100644 index 0000000..e7d6558 --- /dev/null +++ b/src/cbor/parse_error.cr @@ -0,0 +1,2 @@ +class CBOR::ParseError < Exception +end diff --git a/src/cbor/token.cr b/src/cbor/token.cr index 3a41caa..51d04a3 100644 --- a/src/cbor/token.cr +++ b/src/cbor/token.cr @@ -1,14 +1,17 @@ class CBOR::Token - record NullT, byte_number : Int32 - record BoolT, byte_number : Int32, value : Bool - record ArrayT, byte_number : Int32, size : UInt32 - record MapT, byte_number : Int32, size : UInt32 - record IntT, byte_number : Int32, value : Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64 | Int128 - record FloatT, byte_number : Int32, value : Float64 - record StringT, byte_number : Int32, value : String - record BytesT, byte_number : Int32, value : Bytes + record NullT, byte_number : Int64 + record UndefinedT, byte_number : Int64 + record BoolT, byte_number : Int64, value : Bool + record ArrayT, byte_number : Int64, size : UInt32? + record MapT, byte_number : Int64, size : UInt32? + record IntT, byte_number : Int64, value : Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64 | Int128 + record FloatT, byte_number : Int64, value : Float64 + record StringT, byte_number : Int64, value : String + record BytesT, byte_number : Int64, value : Bytes + record StringArrayT, byte_number : Int64 + record BytesArrayT, byte_number : Int64 - alias T = NullT | BoolT | ArrayT | MapT | IntT | FloatT | StringT | BytesT + alias T = NullT | UndefinedT | BoolT | ArrayT | MapT | IntT | FloatT | StringT | BytesT | StringArrayT | BytesArrayT def self.to_s(token : T) case token @@ -17,6 +20,10 @@ class CBOR::Token when BytesT return %(h'') if token.value.empty? "h'#{token.value.hexstring}'" + when NullT + "null" + when UndefinedT + "undefined" else raise "Diagnostic notation for type #{token.class} not implemented" end diff --git a/src/cbor/type/bytes_array.cr b/src/cbor/type/bytes_array.cr new file mode 100644 index 0000000..1cdfdcf --- /dev/null +++ b/src/cbor/type/bytes_array.cr @@ -0,0 +1,16 @@ +class CBOR::BytesArray < Array(UInt8) + def to_a : Array(UInt8) + end + + def to_bytes : Bytes + Bytes.new(self.to_unsafe, self.size) + end + + def to_diagnostic : String + "(_ #{map(&to_byte_diagnostic).join(", ")})" + end + + private def to_byte_diagnostic(i : UInt8) : String + "h'#{i.hexstring}'" + end +end