Continue implementation and start decoder

dev
Alberto Restifo 2020-04-19 20:40:10 +02:00
parent b179ef7857
commit aded275148
10 changed files with 221 additions and 117 deletions

View File

@ -1,100 +1,33 @@
require "../spec_helper"
describe CBOR::Lexer do
describe "examples from the RFC7049 Appendix A" do
tests = [
{ %(0), "00" },
{ %(1), "01" },
{ %(10), "0a" },
{ %(23), "17" },
{ %(24), "18 18" },
{ %(25), "18 19" },
{ %(100), "18 64" },
{ %(1000), "19 03 e8" },
{ %(1000000), "1a 00 0f 42 40" },
{ %(1000000000000), "1b 00 00 00 e8 d4 a5 10 00" },
{ %(18446744073709551615), "1b ff ff ff ff ff ff ff ff" },
# { %(18446744073709551616), "c2 49 01 00 00 00 00 00 00 00 00"},
{ %(-18446744073709551616), "3b ff ff ff ff ff ff ff ff" },
# { %(-18446744073709551617), "c3 49 01 00 00 00 00 00 00 00 00"},
{ %(-1), "20" },
{ %(-10), "29" },
{ %(-100), "38 63" },
{ %(-1000), "39 03 e7" },
# { %(0.0), "f9 00 00"},
# { %(-0.0), "f9 80 00"},
# { %(1.0), "f9 3c 00"},
# { %(1.1), "fb 3f f1 99 99 99 99 99 9a"},
# { %(1.5), "f9 3e 00"},
# { %(65504.0), "f9 7b ff"},
# { %(100000.0), "fa 47 c3 50 00"},
# { %(3.4028234663852886e+38), "fa 7f 7f ff ff"},
# { %(1.0e+300), "fb 7e 37 e4 3c 88 00 75 9c"},
# { %(5.960464477539063e-8), "f9 00 01"},
# { %(0.00006103515625), "f9 04 00"},
# { %(-4.0), "f9 c4 00"},
# { %(-4.1), "fb c0 10 66 66 66 66 66 66"},
# { %(Infinity), "f9 7c 00"},
# { %(NaN), "f9 7e 00"},
# { %(-Infinity), "f9 fc 00"},
# { %(Infinity), "fa 7f 80 00 00"},
# { %(NaN), "fa 7f c0 00 00"},
# { %(-Infinity), "fa ff 80 00 00"},
# { %(Infinity), "fb 7f f0 00 00 00 00 00 00"},
# { %(NaN), "fb 7f f8 00 00 00 00 00 00"},
# { %(-Infinity), "fb ff f0 00 00 00 00 00 00"},
# { %(false), "f4"},
# { %(true), "f5"},
# { %(null), "f6"},
# { %(undefined), "f7"},
# { %(simple(16)), "f0"},
# { %(simple(24)), "f8 18"},
# { %(simple(255)), "f8 ff"},
# { %(0(\"2013-03-21T20:04:00Z\")), "c0 74 32 30 31 33 2d 30 33 2d 32 31 54 32 30 3a 30 34 3a 30 30 5a"},
# { %(1(1363896240)), "c1 1a 51 4b 67 b0"},
# { %(1(1363896240.5)), "c1 fb 41 d4 52 d9 ec 20 00 00"},
# { %(23(h'01020304')), "d7 44 01 02 03 04"},
# { %(24(h'6449455446')), "d8 18 45 64 49 45 54 46"},
# { %(32("http://www.example.com")), "d8 20 76 68 74 74 70 3a 2f 2f 77 77 77 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d" },
{ %(h''), "40" },
{ %(h'01020304'), "44 01 02 03 04" },
# { %(""), "60" },
# { %("a"), "61 61" },
# { %("IETF"), "64 49 45 54 46" },
# { %(""\\"), "62225c" },
# { %("\u00fc"), "62 c3 bc" },
# { %("\u6c34"), "63 e6 b0 b4" },
# { %("\ud800\udd51"), "64 f0 90 85 91" },
# { %([]), "80" },
# { %([1, 2, 3]), "83 01 02 03" },
# { %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
# { %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
# { %({}), "a0" },
# { %({1: 2, 3: 4}), "a2 01 02 03 04" },
# { %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
# { %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
# { %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
# { %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
# { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
# { %([_ ]), "9f ff" },
# { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
# { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
# { %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
# { %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
# { %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
# { %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
# { %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
# { %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },
]
describe "read_token" do
describe "reads an int" do
tests = [
{value: 0, bytes: Bytes[0x00]},
{value: 1, bytes: Bytes[0x01]},
{value: 10, bytes: Bytes[0x0a]},
{value: 23, bytes: Bytes[0x17]},
{value: 24, bytes: Bytes[0x18, 0x18]},
{value: 25, bytes: Bytes[0x18, 0x19]},
{value: 100, bytes: Bytes[0x18, 0x64]},
{value: 1000, bytes: Bytes[0x19, 0x03, 0xe8]},
{value: 1000000, bytes: Bytes[0x1a, 0x00, 0x0f, 0x42, 0x40]},
{value: 1000000000000, bytes: Bytes[0x1b, 0x00, 0x00, 0x00, 0xe8, 0xd4, 0xa5, 0x10, 0x00]},
{value: -1, bytes: Bytes[0x20]},
{value: -10, bytes: Bytes[0x29]},
{value: -100, bytes: Bytes[0x38, 0x63]},
{value: -1000, bytes: Bytes[0x39, 0x03, 0xe7]},
]
tests.each do |tt|
debug, hex = tt
it "Reads #{hex} as #{debug}" do
bytes = hex.split.map(&.to_u8(16))
lexer = CBOR::Lexer.new(Slice.new(bytes.to_unsafe, bytes.size))
tests.each do |tt|
it "reads #{tt[:bytes].hexstring} as #{tt[:value].to_s}" do
lexer = CBOR::Lexer.new(tt[:bytes])
token = lexer.read_token
CBOR::Token.to_s(token).should eq(debug)
token = lexer.read_token
token.should be_a(CBOR::Token::IntT)
token.as(CBOR::Token::IntT).value.should eq(tt[:value])
end
end
end
end

84
spec/rfc_tests.cr Normal file
View File

@ -0,0 +1,84 @@
tests = [
{ %(0), "00" },
{ %(1), "01" },
{ %(10), "0a" },
{ %(23), "17" },
{ %(24), "18 18" },
{ %(25), "18 19" },
{ %(100), "18 64" },
{ %(1000), "19 03 e8" },
{ %(1000000), "1a 00 0f 42 40" },
{ %(1000000000000), "1b 00 00 00 e8 d4 a5 10 00" },
{ %(18446744073709551615), "1b ff ff ff ff ff ff ff ff" },
{ %(18446744073709551616), "c2 49 01 00 00 00 00 00 00 00 00" },
{ %(-18446744073709551616), "3b ff ff ff ff ff ff ff ff" },
{ %(-18446744073709551617), "c3 49 01 00 00 00 00 00 00 00 00" },
{ %(-1), "20" },
{ %(-10), "29" },
{ %(-100), "38 63" },
{ %(-1000), "39 03 e7" },
{ %(0.0), "f9 00 00" },
{ %(-0.0), "f9 80 00" },
{ %(1.0), "f9 3c 00" },
{ %(1.1), "fb 3f f1 99 99 99 99 99 9a" },
{ %(1.5), "f9 3e 00" },
{ %(65504.0), "f9 7b ff" },
{ %(100000.0), "fa 47 c3 50 00" },
{ %(3.4028234663852886e+38), "fa 7f 7f ff ff" },
{ %(1.0e+300), "fb 7e 37 e4 3c 88 00 75 9c" },
{ %(5.960464477539063e-8), "f9 00 01" },
{ %(0.00006103515625), "f9 04 00" },
{ %(-4.0), "f9 c4 00" },
{ %(-4.1), "fb c0 10 66 66 66 66 66 66" },
{ %(Infinity), "f9 7c 00" },
{ %(NaN), "f9 7e 00" },
{ %(-Infinity), "f9 fc 00" },
{ %(Infinity), "fa 7f 80 00 00" },
{ %(NaN), "fa 7f c0 00 00" },
{ %(-Infinity), "fa ff 80 00 00" },
{ %(Infinity), "fb 7f f0 00 00 00 00 00 00" },
{ %(NaN), "fb 7f f8 00 00 00 00 00 00" },
{ %(-Infinity), "fb ff f0 00 00 00 00 00 00" },
{ %(false), "f4" },
{ %(true), "f5" },
{ %(null), "f6" },
{ %(undefined), "f7" },
{ %(simple(16)), "f0" },
{ %(simple(24)), "f8 18" },
{ %(simple(255)), "f8 ff" },
{ %(0("2013-03-21T20:04:00Z")), "c0 74 32 30 31 33 2d 30 33 2d 32 31 54 32 30 3a 30 34 3a 30 30 5a" },
{ %(1(1363896240)), "c1 1a 51 4b 67 b0" },
{ %(1(1363896240.5)), "c1 fb 41 d4 52 d9 ec 20 00 00" },
{ %(23(h'01020304')), "d7 44 01 02 03 04" },
{ %(24(h'6449455446')), "d8 18 45 64 49 45 54 46" },
{ %(32("http://www.example.com")), "d8 20 76 68 74 74 70 3a 2f 2f 77 77 77 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d" },
{ %(h''), "40" },
{ %(h'01020304'), "44 01 02 03 04" },
{ %(""), "60" },
{ %("a"), "61 61" },
{ %("IETF"), "64 49 45 54 46" },
{ %(""\\"), "62225c" },
{ %("\u00fc"), "62 c3 bc" },
{ %("\u6c34"), "63 e6 b0 b4" },
{ %("\ud800\udd51"), "64 f0 90 85 91" },
{ %([]), "80" },
{ %([1, 2, 3]), "83 01 02 03" },
{ %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
{ %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
{ %({}), "a0" },
{ %({1: 2, 3: 4}), "a2 01 02 03 04" },
{ %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
{ %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
{ %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
{ %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
{ %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
{ %([_ ]), "9f ff" },
{ %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
{ %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
{ %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
{ %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
{ %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
{ %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
{ %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
{ %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },
]

View File

@ -4,5 +4,6 @@ require "./cbor/**"
module CBOR
VERSION = "0.1.0"
# TODO: Put your code here
# Represents CBOR types
alias Type = Nil | Bool | String | Bytes | Array(Type) | Hash(Type, Type) | Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64
end

23
src/cbor/decoder.cr Normal file
View File

@ -0,0 +1,23 @@
abstract class CBOR::Decoder
abstract def current_token : Token::T
abstract def read_token : Token::T
abstract def finish_token!
def read : Type
read_value
end
def read_value : Type
case token = current_token
when Token::IntT
finish_token!
token.value
when Token::BytesT
finish_token!
token.value
when Token::StringT
finish_token!
token.value
end
end
end

View File

@ -0,0 +1,25 @@
class CBOR::IODecoder < CBOR::Decoder
def initialize(string_or_io : String | IO)
@lexer = Lexer.new(string_or_io)
end
def self.new(array : Array(UInt8))
slice = Bytes.new(array.to_unsafe, array.size)
new(slice)
end
@[AlwaysInline]
def current_token : Token::T
@lexer.current_token
end
@[AlwaysInline]
def read_token : Token::T
@lexer.read_token
end
@[AlwaysInline]
def finish_token!
@lexer.finish_token!
end
end

15
src/cbor/diagnostic.cr Normal file
View File

@ -0,0 +1,15 @@
module CBOR::Diagnostic
def to_s(value : CBOR::ByteArray) : String
value.to_diagnostic
end
{% for type in [UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64, Int64, Int128] %}
def to_s(value : {{type}}) : String
{{type}}.to_s
end
{% end %}
def to_s(value : String)
%("#{value}")
end
end

View File

@ -1,6 +1,8 @@
require "./token"
class CBOR::Lexer
BREAK = 0xff
def self.new(string : String)
new IO::Memory.new(string)
end
@ -10,11 +12,12 @@ class CBOR::Lexer
end
@token : Token::T
@current_pos : Int64
@token_finished : Bool
def initialize(@io : IO)
@byte_number = 0
@current_byte_number = 0
@token = Token::NullT.new(0)
@current_pos = 0
@token = Token::UndefinedT.new(0)
@token_finished = true
end
@ -44,7 +47,7 @@ class CBOR::Lexer
end
private def next_token
@current_byte_number = @byte_number
@current_pos = @io.pos.to_i64
current_byte = next_byte
case current_byte
@ -81,27 +84,27 @@ class CBOR::Lexer
consume_binary(read(UInt32))
when 0x5b
consume_binary(read(UInt64))
when 0x5f
Token::BytesArrayT.new(@current_pos)
else
fail
raise ParseError.new("Unexpected first byte #{current_byte}")
end
end
private def next_byte : UInt8
byte = @io.read_byte
@byte_number += 1
fail unless byte
raise ParseError.new("Unexpected EOF at byte #{@io.pos}") unless byte
byte
end
private def consume_int(value)
Token::IntT.new(@current_byte_number, value)
Token::IntT.new(@current_pos, value)
end
private def consume_binary(size)
bytes = Bytes.new(size)
@io.read_fully(bytes)
@byte_number += size
Token::BytesT.new(@current_byte_number, bytes)
Token::BytesT.new(@current_pos, bytes)
end
# Creates a method overloaded for each UInt sizes to convert the UInt into
@ -127,11 +130,6 @@ class CBOR::Lexer
{% end %}
private def read(type : T.class) forall T
@byte_number += sizeof(T)
@io.read_bytes(T, IO::ByteFormat::NetworkEndian)
end
private def fail
raise "Pase error"
end
end

2
src/cbor/parse_error.cr Normal file
View File

@ -0,0 +1,2 @@
class CBOR::ParseError < Exception
end

View File

@ -1,14 +1,17 @@
class CBOR::Token
record NullT, byte_number : Int32
record BoolT, byte_number : Int32, value : Bool
record ArrayT, byte_number : Int32, size : UInt32
record MapT, byte_number : Int32, size : UInt32
record IntT, byte_number : Int32, value : Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64 | Int128
record FloatT, byte_number : Int32, value : Float64
record StringT, byte_number : Int32, value : String
record BytesT, byte_number : Int32, value : Bytes
record NullT, byte_number : Int64
record UndefinedT, byte_number : Int64
record BoolT, byte_number : Int64, value : Bool
record ArrayT, byte_number : Int64, size : UInt32?
record MapT, byte_number : Int64, size : UInt32?
record IntT, byte_number : Int64, value : Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64 | Int128
record FloatT, byte_number : Int64, value : Float64
record StringT, byte_number : Int64, value : String
record BytesT, byte_number : Int64, value : Bytes
record StringArrayT, byte_number : Int64
record BytesArrayT, byte_number : Int64
alias T = NullT | BoolT | ArrayT | MapT | IntT | FloatT | StringT | BytesT
alias T = NullT | UndefinedT | BoolT | ArrayT | MapT | IntT | FloatT | StringT | BytesT | StringArrayT | BytesArrayT
def self.to_s(token : T)
case token
@ -17,6 +20,10 @@ class CBOR::Token
when BytesT
return %(h'') if token.value.empty?
"h'#{token.value.hexstring}'"
when NullT
"null"
when UndefinedT
"undefined"
else
raise "Diagnostic notation for type #{token.class} not implemented"
end

View File

@ -0,0 +1,16 @@
class CBOR::BytesArray < Array(UInt8)
def to_a : Array(UInt8)
end
def to_bytes : Bytes
Bytes.new(self.to_unsafe, self.size)
end
def to_diagnostic : String
"(_ #{map(&to_byte_diagnostic).join(", ")})"
end
private def to_byte_diagnostic(i : UInt8) : String
"h'#{i.hexstring}'"
end
end