Implement array tokenization

2020-04-23 09:40:51 +02:00 · 2020-04-23 09:40:51 +02:00 · 3c91037cf3
commit 3c91037cf3
parent 547e8af2bd
3 changed files with 42 additions and 30 deletions
--- a/spec/rfc_spec.cr
+++ b/spec/rfc_spec.cr
@ -73,23 +73,23 @@ tests = [
  { %("\u00fc"), "62 c3 bc" },
  { %("\u6c34"), "63 e6 b0 b4" },
  # { %("\ud800\udd51"), "64 f0 90 85 91" }, TODO: Maybe there is a problem with unicode escaping? Or maybe it's just the diagnostics
-  # { %([]), "80" },
-  # { %([1, 2, 3]), "83 01 02 03" },
-  # { %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
-  # { %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
+  { %([]), "80" },
+  { %([1, 2, 3]), "83 01 02 03" },
+  { %([1, [2, 3], [4, 5]]), "83 01 82 02 03 82 04 05" },
+  { %([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19" },
  # { %({}), "a0" },
  # { %({1: 2, 3: 4}), "a2 01 02 03 04" },
  # { %({"a": 1, "b": [2, 3]}), "a2 61 61 01 61 62 82 02 03" },
  # { %(["a", {"b": "c"}]), "82 61 61 a1 61 62 61 63" },
  # { %({"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}), "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" },
-  # { %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
-  # { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
-  # { %([_ ]), "9f ff" },
-  # { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
-  # { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
-  # { %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
-  # { %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
-  # { %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
+  { %((_ h'0102', h'030405')), "5f 42 01 02 43 03 04 05 ff" },
+  { %((_ "strea", "ming")), "7f 65 73 74 72 65 61 64 6d 69 6e 67 ff" },
+  { %([_ ]), "9f ff" },
+  { %([_ 1, [2, 3], [_ 4, 5]]), "9f 01 82 02 03 9f 04 05 ff ff" },
+  { %([_ 1, [2, 3], [4, 5]]), "9f 01 82 02 03 82 04 05 ff" },
+  { %([1, [2, 3], [_ 4, 5]]), "83 01 82 02 03 9f 04 05 ff" },
+  { %([1, [_ 2, 3], [4, 5]]), "83 01 9f 02 03 ff 82 04 05" },
+  { %([_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), "9f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19 ff" },
  # { %({_ "a": 1, "b": [_ 2, 3]}), "bf 61 61 01 61 62 9f 02 03 ff ff" },
  # { %(["a", {_ "b": "c"}]), "82 61 61 bf 61 62 61 63 ff" },
  # { %({_ "Fun": true, "Amt": -2}), "bf 63 46 75 6e f5 63 41 6d 74 21 ff" },
--- a/src/cbor/diagnostic.cr
+++ b/src/cbor/diagnostic.cr
@ -42,19 +42,31 @@ class CBOR::Diagnostic
      else
        bytes(token.value)
      end
-      # when Kind::Array
-      #   value = token.value.as(Array(Type))
-      #   return "[]" unless value.size > 0
-
-      #   content = value.map { |token| to_diagnostic(token) }.join(", ")
-
-      #   return "[#{content}]" if token.size
-      #   "[_ #{content}]"
+    when Token::ArrayT
+      arr = read_array(token.size)
+      return "[#{arr.join(", ")}]" if token.size
+      "[_ #{arr.join(", ")}]"
    else
      token.inspect
    end
  end

+  private def read_array(size : Int32?) : Array(String)
+    arr = size ? Array(String).new(size) : Array(String).new
+
+    if size
+      size.times do
+        val = next_value
+        raise ParseError.new("Unexpected EOF while reading array body") unless val
+        arr << val
+      end
+    else
+      @lexer.until_break { |token| arr << to_diagnostic(token) }
+    end
+
+    arr
+  end
+
  private def chunks(value : Bytes, chunks : Array(Int32)) : Array(Bytes)
    res = Array(Bytes).new
    bytes = value.to_a
--- a/src/cbor/lexer.cr
+++ b/src/cbor/lexer.cr
@ -20,6 +20,15 @@ class CBOR::Lexer
    decode(byte)
  end

+  def until_break(&block : Token::T ->)
+    loop do
+      byte = next_byte
+      raise ParseError.new("unexpected EOF while searching for break") unless byte
+      break if byte == BREAK
+      yield decode(byte)
+    end
+  end
+
  private def decode(byte : UInt8) : Token::T
    case byte
    when 0x00..0x1b
@ -34,7 +43,7 @@ class CBOR::Lexer
      consume_string(read_size(byte - 0x60))
    when 0x7f
      read_string_array
-    when 0x80..0x97
+    when 0x80..0x9b
      array_start(read_size(byte - 0x80))
    when 0x9f
      Token::ArrayT.new
@ -75,15 +84,6 @@ class CBOR::Lexer
    Token::StringT.new(value: value, chunks: chunks)
  end

-  private def until_break(&block : Token::T ->)
-    loop do
-      byte = next_byte
-      raise ParseError.new("unexpected EOF while searching for break") unless byte
-      break if byte == BREAK
-      yield decode(byte)
-    end
-  end
-
  # Reads the size for the next token type
  private def read_size(current_byte : UInt8) : Int
    case current_byte