From 9d1c255c2c3b61cb068424ae8fa770480c792d7b Mon Sep 17 00:00:00 2001 From: Alberto Restifo Date: Sun, 19 Apr 2020 00:18:54 +0200 Subject: [PATCH] Start lexer and tests --- spec/cbor/lexer_spec.cr | 35 ++++++++++++ src/cbor/lexer.cr | 122 +++++++++++++++++++++++++--------------- src/cbor/token.cr | 29 ++++++---- 3 files changed, 129 insertions(+), 57 deletions(-) create mode 100644 spec/cbor/lexer_spec.cr diff --git a/spec/cbor/lexer_spec.cr b/spec/cbor/lexer_spec.cr new file mode 100644 index 0000000..07a5d8a --- /dev/null +++ b/spec/cbor/lexer_spec.cr @@ -0,0 +1,35 @@ +require "../spec_helper" + +describe CBOR::Lexer do + describe "examples from the RFC7049 Appendix A" do + tests : Array(Tuple(String, Bytes)) = [ + {"0", [0x00]}, + {"1", [0x01]}, + # {"10", "0a"}, + # {"23", "17"}, + # {"24", "18 18"}, + # {"25", "18 19"}, + # {"100", "18 64"}, + # {"1000", "19 03 e8"}, + # {"1000000", "1a 00 0f 42 40"}, + # {"1000000000000", "1b 00 00 00 e8 d4 a5 10 00"}, + # {"18446744073709551615", "1b ff ff ff ff ff ff ff ff"}, + # {"18446744073709551616", "c2 49 01 00 00 00 00 00 00 0000"}, + # {"-18446744073709551616", "3b ff ff ff ff ff ff ff ff"}, + # {"-18446744073709551617", "c3 49 01 00 00 00 00 00 00 00 00"}, + # {"-1", "20"}, + # {"-10", "29"}, + # {"-100", "38 63"}, + # {"-1000", "39 03 e7"}, + ] + + tests.each do |tt| + it "Reads #{tt[1].inspect} as #{tt[0]}" do + lexer = CBOR::Lexer.new(tt[1]) + + token = lexer.read_token + CBOR::Token.to_s(token).should eq(tt[0]) + end + end + end +end diff --git a/src/cbor/lexer.cr b/src/cbor/lexer.cr index 34b6906..8da4221 100644 --- a/src/cbor/lexer.cr +++ b/src/cbor/lexer.cr @@ -1,67 +1,97 @@ require "./token" class CBOR::Lexer - @current_byte : UInt8 - - def initialize(@io : IO) - @current_byte = 0x0 + def self.new(string : String) + new IO::Memory.new(string) end - def next_token : Token? - byte = @io.read_byte - return nil if byte.nil? + def self.new(slice : Bytes) + new IO::Memory.new(slice) + end - @current_byte = byte + @token : Token::T - # See: RFC7049 Appedix B - case @current_byte - when .<= 0x17 - Token.new(kind: Token::Kind::UInt, value: @curren_byte) - when 0x18 - read_uint8 - when 0x19 - read_uint16 - when 0x1a - read_uint32 - when 0x1b - read_uint64 - when .<= 0x37 - Token.new(kind: Token::Kind::NInt, value: Int8(@curren_byte)) + def initialize(@io : IO) + @byte_number = 0 + @current_byte_number = 0 + @token = Token::NullT.new(0) + @token_finished = true + end + + @[AlwaysInline] + def current_token : Token::T + if @token_finished + @token_finished = false + @token = next_token + else + @token end end - private def read_uint8 + @[AlwaysInline] + def finish_token! + @token_finished = true + end + + @[AlwaysInline] + def read_token : Token::T + if @token_finished + @token = next_token + else + finish_token! + end + @token + end + + private def next_token + @current_byte_number = @byte_number + current_byte = next_byte + + case current_byte + when 0x00..0x17 + consume_int(current_byte) + when 0x18 + consume_int(read(Uint8)) + when 0x19 + consume_int(read(Uint16)) + when 0x1a + consume_int(read(Uint32)) + when 0x1b + consume_int(read(Uint64)) + when 0x20..0x37 + consume_int(flip(current_byte.to_i8)) + when 0x38 + consume_int(flip(read(Uint8).to_i8)) + when 0x39 + consume_int(flip(read(Uint16).to_i16)) + when 0x3a + consume_int(flip(read(Uint32).to_i32)) + when 0x3b + consume_int(flip(read(Uint64).to_i64)) + end + end + + private def next_byte : Uint8 byte = @io.read_byte - return unexpect_eof if byte.nil? - - Token.new(kind: Token::Kind::UInt, value: byte) + @byte_number += 1 + fail unless byte + byte end - private def read_uint16 - value = UInt16.from_io(read_next(2), IO::ByteFormat::BigEndian) - Token.new(kind: Token::Kind::UInt, value: value) + private def consume_int(value) + Token::IntT.new(@current_byte_number, value) end - private def read_uint32 - value = UInt32.from_io(read_next(4), IO::ByteFormat::BigEndian) - Token.new(kind: Token::Kind::UInt, value: value) + private def flip(value) + -1 - value end - private def read_uint64 - value = UInt64.from_io(read_next(8), IO::ByteFormat::BigEndian) - Token.new(kind: Token::Kind::UInt, value: value) + private def read(type : T.class) forall T + @byte_number += sizeof(T) + @io.read_bytes(T, IO::ByteFormat::NetworkEndian) end - private def read_next(n : Int) - slice = Bytes.new(n) - - read = @io.read(slice) - return unexpect_eof if read == 0 - - slice - end - - private def unexpected_eof - raise "Unexpected EOF" + private def fail + raise "Pase error" end end diff --git a/src/cbor/token.cr b/src/cbor/token.cr index a5079cd..118c840 100644 --- a/src/cbor/token.cr +++ b/src/cbor/token.cr @@ -1,14 +1,21 @@ -class CBOR::Token(T) - enum Kind - UInt8 - NInt - Byte - Text - Array - Map - Float - end +class CBOR::Token + record NullT, byte_number : Int32 + record BoolT, byte_number : Int32, value : Bool + record ArrayT, byte_number : Int32, size : UInt32 + record MapT, byte_number : Int32, size : UInt32 + record IntT, byte_number : Int32, value : Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64 + record FloatT, byte_number : Int32, value : Float64 + record StringT, byte_number : Int32, value : String + record BytesT, byte_number : Int32, value : Bytes - def initialize(@value : T, @kind : Kind) + alias T = NullT | BoolT | ArrayT | MapT | IntT | FloatT | StringT | BytesT + + def self.to_s(token : T) + case token + when IntT + token.value.to_s + else + "NOT IMPLEMENTED YET!" + end end end