Implement array tokenization

dev
Alberto Restifo 2020-04-23 09:40:51 +02:00
parent 547e8af2bd
commit 3c91037cf3
3 changed files with 42 additions and 30 deletions

View File

@ -73,23 +73,23 @@ tests = [
{ %("\u00fc"), "62 c3 bc" },
{ %("\u6c34"), "63 e6 b0 b4" },
# { %("\ud800\udd51"), "64 f0 90 85 91" }, TODO: Maybe there is a problem with unicode escaping? Or maybe it's just the diagnostics
# { %([]), "80" },
# { %([1, 2, 3]), "83 01 02 03" },
# { %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
# { %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
{ %([]), "80" },
{ %([1, 2, 3]), "83 01 02 03" },
{ %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
{ %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
# { %({}), "a0" },
# { %({1: 2, 3: 4}), "a2 01 02 03 04" },
# { %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
# { %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
# { %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
# { %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
# { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
# { %([_ ]), "9f ff" },
# { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
# { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
# { %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
# { %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
# { %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
{ %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
{ %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
{ %([_ ]), "9f ff" },
{ %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
{ %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
{ %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
{ %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
{ %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
# { %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
# { %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
# { %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },

View File

@ -42,19 +42,31 @@ class CBOR::Diagnostic
else
bytes(token.value)
end
# when Kind::Array
# value = token.value.as(Array(Type))
# return "[]" unless value.size > 0
# content = value.map { |token| to_diagnostic(token) }.join(", ")
# return "[#{content}]" if token.size
# "[_ #{content}]"
when Token::ArrayT
arr = read_array(token.size)
return "[#{arr.join(", ")}]" if token.size
"[_ #{arr.join(", ")}]"
else
token.inspect
end
end
private def read_array(size : Int32?) : Array(String)
arr = size ? Array(String).new(size) : Array(String).new
if size
size.times do
val = next_value
raise ParseError.new("Unexpected EOF while reading array body") unless val
arr << val
end
else
@lexer.until_break { |token| arr << to_diagnostic(token) }
end
arr
end
private def chunks(value : Bytes, chunks : Array(Int32)) : Array(Bytes)
res = Array(Bytes).new
bytes = value.to_a

View File

@ -20,6 +20,15 @@ class CBOR::Lexer
decode(byte)
end
def until_break(&block : Token::T ->)
loop do
byte = next_byte
raise ParseError.new("unexpected EOF while searching for break") unless byte
break if byte == BREAK
yield decode(byte)
end
end
private def decode(byte : UInt8) : Token::T
case byte
when 0x00..0x1b
@ -34,7 +43,7 @@ class CBOR::Lexer
consume_string(read_size(byte - 0x60))
when 0x7f
read_string_array
when 0x80..0x97
when 0x80..0x9b
array_start(read_size(byte - 0x80))
when 0x9f
Token::ArrayT.new
@ -75,15 +84,6 @@ class CBOR::Lexer
Token::StringT.new(value: value, chunks: chunks)
end
private def until_break(&block : Token::T ->)
loop do
byte = next_byte
raise ParseError.new("unexpected EOF while searching for break") unless byte
break if byte == BREAK
yield decode(byte)
end
end
# Reads the size for the next token type
private def read_size(current_byte : UInt8) : Int
case current_byte