Refactor lexer in preparation for decoder

dev
Alberto Restifo 2020-04-22 23:48:58 +02:00
parent ddb2615031
commit 9739c4971b
4 changed files with 111 additions and 222 deletions

View File

@ -24,12 +24,12 @@ describe CBOR::Lexer do
it "reads #{tt[:bytes].hexstring} as #{tt[:value].to_s}" do
lexer = CBOR::Lexer.new(tt[:bytes])
token = lexer.read_next
token = lexer.next_token
token.should_not be_nil
next unless token
token.kind.should eq(CBOR::Kind::Int)
token.value.as(Int).should eq(tt[:value])
token.should be_a(CBOR::Token::IntT)
token.as(CBOR::Token::IntT).value.should eq(tt[:value])
end
end
end

View File

@ -19,28 +19,28 @@ class CBOR::Diagnostic
end
private def next_value : String?
token = @lexer.read_next
token = @lexer.next_token
return nil unless token
to_diagnostic(token)
end
private def to_diagnostic(token : Token) : String
case token.kind
when Kind::Int
private def to_diagnostic(token : Token::T) : String
case token
when Token::IntT
token.value.to_s
when Kind::String
when Token::StringT
if token.chunks
chunks = chunks(token.value.as(String), token.chunks.as(Array(Int32)))
chunks = chunks(token.value, token.chunks.not_nil!)
"(_ #{chunks.map { |s| string(s) }.join(", ")})"
else
string(token.value.as(String))
string(token.value)
end
when Kind::Bytes
when Token::BytesT
if token.chunks
chunks = chunks(token.value.as(Bytes), token.chunks.as(Array(Int32)))
chunks = chunks(token.value, token.chunks.not_nil!)
"(_ #{chunks.map { |b| bytes(b) }.join(", ")})"
else
bytes(token.value.as(Bytes))
bytes(token.value)
end
# when Kind::Array
# value = token.value.as(Array(Type))
@ -51,7 +51,7 @@ class CBOR::Diagnostic
# return "[#{content}]" if token.size
# "[_ #{content}]"
else
token.kind.to_s
token.inspect
end
end

View File

@ -1,4 +1,6 @@
class CBOR::Lexer
BREAK = 0xff
def self.new(string : String)
new IO::Memory.new(string)
end
@ -7,235 +9,127 @@ class CBOR::Lexer
new IO::Memory.new(slice)
end
@current_pos : Int64
@eof : Bool = false
# Holds a list of previously opened tokens.
# When a break in reached, the last entry in the array is
# the token to close.
@open_tokens = [] of Kind
@current_byte : UInt8 = 0
def initialize(@io : IO)
@current_pos = 0
end
# Reads the next concrete value
def read_value : Type?
res = read_next
return nil unless res
res.value
end
# Reads the next concrete value, returning the token kind.
# Useful when you need to differentiate between Null and Undefined.
def read_next : Token?
def next_token : Token::T?
return nil if @eof
token = next_token
return nil unless token
byte = next_byte
return nil unless byte
case token.kind
when Kind::Int,
Kind::String,
Kind::Bool,
Kind::Float,
Kind::Bytes
token
when Kind::Null,
Kind::Undefined
Token.new(kind: token.kind, value: nil)
when Kind::BytesArray
decode(byte)
end
private def decode(byte : UInt8) : Token::T
case byte
when 0x00..0x1b
consume_int(read_size(byte))
when 0x20..0x3b
consume_int(to_negative_int(read_size(byte - 0x20)))
when 0x40..0x5b
consume_binary(read_size(byte - 0x40))
when 0x5f
read_bytes_array
when Kind::StringArray
when 0x60..0x7b
consume_string(read_size(byte - 0x60))
when 0x7f
read_string_array
when Kind::Array
read_array(token)
when 0x80..0x97
array_start(read_size(byte - 0x80))
when 0x9f
Token::ArrayT.new
else
raise ParseError.new("Unexpected first byte 0x#{byte.to_s(16)}")
end
end
# Consumes the bytes array until it reaches a break
def read_bytes_array : Token
private def read_bytes_array : Token::BytesT
bytes = BytesArray.new
chunks = Array(Int32).new
read_until(Kind::BytesArrayEnd, only: Kind::Bytes) do |c|
chunk = c.as(Bytes)
chunks << chunk.size
bytes << chunk
until_break do |token|
unless token.is_a?(Token::BytesT)
raise ParseError.new("Invalid token #{token.class} while parsing a bytes array")
end
chunks << token.value.size
bytes << token.value
end
Token.new(
kind: Kind::Bytes,
value: bytes.to_bytes,
chunks: chunks,
)
Token::BytesT.new(value: bytes.to_bytes, chunks: chunks)
end
# Reads until break for chunks of strings
def read_string_array : Token
private def read_string_array : Token::StringT
value = ""
chunks = Array(Int32).new
read_until(Kind::StringArrayEnd, only: Kind::String) do |c|
chunk = c.as(String)
chunks << chunk.size
value += chunk
end
Token.new(
kind: Kind::String,
value: value,
chunks: chunks,
)
end
def read_array(token : Token) : Token
if token.size.nil?
read_until(Kind::ArrayEnd) { |element| token.value.as(Array(Type)) << element }
else
token.size.not_nil!.times { token.value.as(Array(Type)) << read_value }
end
token
end
private def next_token : Token?
return nil if @eof
@current_pos = @io.pos.to_i64
current_byte = next_byte
return nil unless current_byte
case current_byte
when 0x00..0x17
consume_int(current_byte)
when 0x18
consume_int(read(UInt8))
when 0x19
consume_int(read(UInt16))
when 0x1a
consume_int(read(UInt32))
when 0x1b
consume_int(read(UInt64))
when 0x20..0x37
# This range represents values from -1..-24 so we subtract 0x20
# from the uint8 value to
consume_int(to_negative_int(current_byte.to_u8 - 0x20))
when 0x38
consume_int(to_negative_int(read(UInt8)))
when 0x39
consume_int(to_negative_int(read(UInt16)))
when 0x3a
consume_int(to_negative_int(read(UInt32)))
when 0x3b
consume_int(to_negative_int(read(UInt64)))
when 0x40..0x57
# read current_byte - 0x40 bytes ahead
consume_binary(current_byte - 0x40)
when 0x58
consume_binary(read(UInt8))
when 0x59
consume_binary(read(UInt16))
when 0x5a
consume_binary(read(UInt32))
when 0x5b
consume_binary(read(UInt64))
when 0x5f
Token.new(kind: open_token(Kind::BytesArray), value: nil)
when 0x60..0x77
consume_string(current_byte - 0x60)
when 0x78
consume_string(read(UInt8))
when 0x79
consume_string(read(UInt16))
when 0x7a
consume_string(read(UInt32))
when 0x7b
consume_string(read(UInt16))
when 0x7f
Token.new(kind: open_token(Kind::StringArray), value: nil)
when 0x80..0x97
array_start(current_byte - 0x80)
when 0x98
array_start(read(UInt8))
when 0x99
array_start(read(UInt16))
when 0x9a
array_start(read(UInt32))
when 0x9b
array_start(read(UInt64))
when 0x9f
Token.new(kind: open_token(Kind::Array), value: nil)
when 0xff
Token.new(kind: finish_token, value: nil)
else
raise ParseError.new("Unexpected first byte 0x#{current_byte.to_s(16)}")
end
end
# Reads tokens until it meets the stop kind.
# Optionally it can fail when the read token is not of the passed kind.
private def read_until(stop : Kind, only : Kind? = nil, &block)
loop do
token = next_token
raise ParseError.new("Unexpected EOF") unless token
break if token.kind == stop
if only && token.kind != only
raise ParseError.new("Illegal token #{token.kind.to_s} while reading #{only.to_s} array")
until_break do |token|
unless token.is_a?(Token::StringT)
raise ParseError.new("Invalid token #{token.class} while parsing a string array")
end
yield token.value
chunks << token.value.size
value += token.value
end
Token::StringT.new(value: value, chunks: chunks)
end
private def until_break(&block : Token::T ->)
loop do
byte = next_byte
raise ParseError.new("unexpected EOF while searching for break") unless byte
break if byte == BREAK
yield decode(byte)
end
end
# Reads the size for the next token type
private def read_size(current_byte : UInt8) : Int
case current_byte
when 0x00..0x17
current_byte
when 0x18
read(UInt8)
when 0x19
read(UInt16)
when 0x1a
read(UInt32)
when 0x1b
read(UInt64)
else
raise ParseError.new("Unexpected byte 0x#{current_byte.to_s(16)} while reading size")
end
end
private def next_byte : UInt8?
byte = @io.read_byte
if byte
byte
else
@eof = true
nil
end
return byte if byte
@eof = true
nil
end
private def consume_int(value)
Token.new(kind: Kind::Int, value: value)
Token::IntT.new(value: value)
end
private def consume_binary(size : Int)
raise ParseError.new("Maximum size for binary array exeeded") if size > Int32::MAX
bytes = read_bytes(size)
Token.new(kind: Kind::Bytes, value: bytes)
Token::BytesT.new(value: bytes)
end
private def consume_string(size)
Token.new(kind: Kind::String, value: @io.read_string(size))
Token::StringT.new(value: @io.read_string(size))
end
private def array_start(size)
raise ParseError.new("Maximum size for array exeeded") if size > Int32::MAX
s = size.to_i32
Token.new(kind: Kind::Array, value: Array(Type).new(s), size: s)
end
private def open_token(kind : Kind) : Kind
@open_tokens << kind
kind
end
private def finish_token : Kind
opened_token = @open_tokens.pop
case opened_token
when Kind::Array
Kind::ArrayEnd
when Kind::BytesArray
Kind::BytesArrayEnd
when Kind::StringArray
Kind::StringArrayEnd
else
raise ParseError.new("Unexpected token termination #{opened_token.to_s}")
end
Token::ArrayT.new(size: size.to_i32)
end
# Creates a method overloaded for each UInt sizes to convert the UInt into

View File

@ -1,24 +1,19 @@
enum CBOR::Kind
Null
Undefined
Bool
Int
Float
Bytes
BytesArray
BytesArrayEnd
String
StringArray
StringArrayEnd
Array
ArrayEnd
Map
end
module CBOR::Token
record NullT, undefined : Bool = false
record BoolT, value : Bool
record IntT, value : Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64 | Int128
record FloatT, value : Float32 | Float64
record BytesT, value : Bytes, chunks : Array(Int32)? = nil
record StringT, value : String, chunks : Array(Int32)? = nil
record ArrayT, size : Int32? = nil
record MapT, size : Int32? = nil
record CBOR::Token,
kind : Kind,
value : Type,
size : Int32? = nil,
# Used only for BytesArray and StringArray: it contains the size of each
# chunks composing the type
chunks : Array(Int32)? = nil
alias T = NullT |
BoolT |
IntT |
FloatT |
BytesT |
StringT |
ArrayT |
MapT
end