Implement array tokenization
parent
547e8af2bd
commit
3c91037cf3
|
@ -73,23 +73,23 @@ tests = [
|
|||
{ %("\u00fc"), "62 c3 bc" },
|
||||
{ %("\u6c34"), "63 e6 b0 b4" },
|
||||
# { %("\ud800\udd51"), "64 f0 90 85 91" }, TODO: Maybe there is a problem with unicode escaping? Or maybe it's just the diagnostics
|
||||
# { %([]), "80" },
|
||||
# { %([1, 2, 3]), "83 01 02 03" },
|
||||
# { %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
|
||||
# { %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
|
||||
{ %([]), "80" },
|
||||
{ %([1, 2, 3]), "83 01 02 03" },
|
||||
{ %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
|
||||
{ %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
|
||||
# { %({}), "a0" },
|
||||
# { %({1: 2, 3: 4}), "a2 01 02 03 04" },
|
||||
# { %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
|
||||
# { %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
|
||||
# { %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
|
||||
# { %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
|
||||
# { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
|
||||
# { %([_ ]), "9f ff" },
|
||||
# { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
|
||||
# { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
|
||||
# { %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
|
||||
# { %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
|
||||
# { %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
|
||||
{ %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
|
||||
{ %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
|
||||
{ %([_ ]), "9f ff" },
|
||||
{ %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
|
||||
{ %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
|
||||
{ %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
|
||||
{ %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
|
||||
{ %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
|
||||
# { %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
|
||||
# { %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
|
||||
# { %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },
|
||||
|
|
|
@ -42,19 +42,31 @@ class CBOR::Diagnostic
|
|||
else
|
||||
bytes(token.value)
|
||||
end
|
||||
# when Kind::Array
|
||||
# value = token.value.as(Array(Type))
|
||||
# return "[]" unless value.size > 0
|
||||
|
||||
# content = value.map { |token| to_diagnostic(token) }.join(", ")
|
||||
|
||||
# return "[#{content}]" if token.size
|
||||
# "[_ #{content}]"
|
||||
when Token::ArrayT
|
||||
arr = read_array(token.size)
|
||||
return "[#{arr.join(", ")}]" if token.size
|
||||
"[_ #{arr.join(", ")}]"
|
||||
else
|
||||
token.inspect
|
||||
end
|
||||
end
|
||||
|
||||
private def read_array(size : Int32?) : Array(String)
|
||||
arr = size ? Array(String).new(size) : Array(String).new
|
||||
|
||||
if size
|
||||
size.times do
|
||||
val = next_value
|
||||
raise ParseError.new("Unexpected EOF while reading array body") unless val
|
||||
arr << val
|
||||
end
|
||||
else
|
||||
@lexer.until_break { |token| arr << to_diagnostic(token) }
|
||||
end
|
||||
|
||||
arr
|
||||
end
|
||||
|
||||
private def chunks(value : Bytes, chunks : Array(Int32)) : Array(Bytes)
|
||||
res = Array(Bytes).new
|
||||
bytes = value.to_a
|
||||
|
|
|
@ -20,6 +20,15 @@ class CBOR::Lexer
|
|||
decode(byte)
|
||||
end
|
||||
|
||||
def until_break(&block : Token::T ->)
|
||||
loop do
|
||||
byte = next_byte
|
||||
raise ParseError.new("unexpected EOF while searching for break") unless byte
|
||||
break if byte == BREAK
|
||||
yield decode(byte)
|
||||
end
|
||||
end
|
||||
|
||||
private def decode(byte : UInt8) : Token::T
|
||||
case byte
|
||||
when 0x00..0x1b
|
||||
|
@ -34,7 +43,7 @@ class CBOR::Lexer
|
|||
consume_string(read_size(byte - 0x60))
|
||||
when 0x7f
|
||||
read_string_array
|
||||
when 0x80..0x97
|
||||
when 0x80..0x9b
|
||||
array_start(read_size(byte - 0x80))
|
||||
when 0x9f
|
||||
Token::ArrayT.new
|
||||
|
@ -75,15 +84,6 @@ class CBOR::Lexer
|
|||
Token::StringT.new(value: value, chunks: chunks)
|
||||
end
|
||||
|
||||
private def until_break(&block : Token::T ->)
|
||||
loop do
|
||||
byte = next_byte
|
||||
raise ParseError.new("unexpected EOF while searching for break") unless byte
|
||||
break if byte == BREAK
|
||||
yield decode(byte)
|
||||
end
|
||||
end
|
||||
|
||||
# Reads the size for the next token type
|
||||
private def read_size(current_byte : UInt8) : Int
|
||||
case current_byte
|
||||
|
|
Loading…
Reference in New Issue