Refactor lexer and simplify token

dev
Alberto Restifo 2020-04-21 15:02:31 +02:00
parent 56eed66541
commit 154876403e
6 changed files with 126 additions and 88 deletions

View File

@ -24,9 +24,12 @@ describe CBOR::Lexer do
it "reads #{tt[:bytes].hexstring} as #{tt[:value].to_s}" do it "reads #{tt[:bytes].hexstring} as #{tt[:value].to_s}" do
lexer = CBOR::Lexer.new(tt[:bytes]) lexer = CBOR::Lexer.new(tt[:bytes])
token = lexer.next_token token = lexer.read_next
token.should be_a(CBOR::Token::IntT) token.should_not be_nil
token.as(CBOR::Token::IntT).value.should eq(tt[:value]) next unless token
token[:kind].should eq(CBOR::Kind::Int)
token[:value].as(Int).should eq(tt[:value])
end end
end end
end end

View File

@ -5,5 +5,19 @@ module CBOR
VERSION = "0.1.0" VERSION = "0.1.0"
# Represents CBOR types # Represents CBOR types
alias Type = Nil | Bool | String | Bytes | Array(Type) | Hash(Type, Type) | Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64 alias Type = Nil |
Bool |
String |
Bytes |
Array(Type) |
Hash(Type, Type) |
Int8 |
UInt8 |
Int16 |
UInt16 |
Int32 |
UInt32 |
Int64 |
UInt64 |
Int128
end end

View File

@ -15,36 +15,21 @@ class CBOR::Diagnostic
# represation of the input. # represation of the input.
def to_s : String def to_s : String
result = "" result = ""
while value = next_value
while val = next_value result << value
result += val
end end
result result
end end
private def next_value : String? private def next_value : String?
token = @lexer.next_token token = @lexer.read_next
return nil unless token return nil unless token
case token case token[:kind]
when Token::BytesArrayT when Kind::BytesArray
consume_bytes_array BytesArray.new(token[:value]).to_diagnostics
else else
Token.to_diagnostic(token) Token.do_diagnostics(token)
end end
end end
private def consume_bytes_array : String
elements = [] of String
loop do
token = @lexer.next_token
raise "Unexpected EOF" unless token
break if token.is_a?(Token::BytesArrayEndT)
elements << Token.to_diagnostic(token)
end
"(_ #{elements.join(", ")})"
end
end end

View File

@ -14,17 +14,62 @@ class CBOR::Lexer
# Holds a list of previously opened tokens. # Holds a list of previously opened tokens.
# When a break in reached, the last entry in the array is # When a break in reached, the last entry in the array is
# the token to close. # the token to close.
@open_tokens = [] of Token::T @open_tokens = [] of Kind
def initialize(@io : IO) def initialize(@io : IO)
@current_pos = 0 @current_pos = 0
end end
def next_token # Reads the next concrete value
def read_value : Type?
res = read_next
return nil unless res
res[:value]
end
# Readsn the next concrete value, returning the token kind.
# Useful when you need to differentiate between Null and Undefined.
def read_next : Token?
return nil if @eof
token = next_token
return nil unless token
case token[:kind]
when Kind::Int,
Kind::String,
Kind::Bool,
Kind::Float,
Kind::Bytes
token
when Kind::Null,
Kind::Undefined
{kind: token[:kind], value: nil}
when Kind::BytesArray
{kind: token[:kind], value: read_bytes_array}
end
end
# Consumes the bytes array until it reaches a break
def read_bytes_array : Bytes
bytes = BytesArray.new
loop do
token = next_token
raise ParseError.new("Unexpected EOF while reading bytes array") unless token
break if token[:kind] == Kind::BytesArrayEnd
raise ParseError.new("Illegal token #{token.class} while reading bytes array") unless token[:kind] == Kind::Bytes
bytes << token[:value].as(UInt8)
end
bytes.to_bytes
end
private def next_token : Token?
return nil if @eof return nil if @eof
@current_pos = @io.pos.to_i64 @current_pos = @io.pos.to_i64
current_byte = @io.read_byte current_byte = next_byte
return nil unless current_byte return nil unless current_byte
case current_byte case current_byte
@ -62,9 +107,9 @@ class CBOR::Lexer
when 0x5b when 0x5b
consume_binary(read(UInt64)) consume_binary(read(UInt64))
when 0x5f when 0x5f
open_token(Token::BytesArrayT.new(@current_pos)) {kind: open_token(Kind::BytesArray), value: nil}
when 0xff when 0xff
finish_token {kind: finish_token, value: nil}
else else
raise ParseError.new("Unexpected first byte 0x#{current_byte.to_s(16)}") raise ParseError.new("Unexpected first byte 0x#{current_byte.to_s(16)}")
end end
@ -81,32 +126,32 @@ class CBOR::Lexer
end end
private def consume_int(value) private def consume_int(value)
Token::IntT.new(@current_pos, value) {kind: Kind::Int, value: value}
end end
private def consume_binary(size) private def consume_binary(size)
bytes = Bytes.new(size) bytes = Bytes.new(size)
@io.read_fully(bytes) @io.read_fully(bytes)
Token::BytesT.new(@current_pos, bytes) {kind: Kind::Bytes, value: bytes}
end end
private def open_token(token : Token::T) : Token::T private def open_token(kind : Kind) : Kind
@open_tokens.push(token) @open_tokens << kind
token kind
end end
private def finish_token : Token::T private def finish_token : Kind
opened_token = @open_tokens.pop opened_token = @open_tokens.pop
case opened_token case opened_token
when Token::ArrayT when Kind::Array
Token::ArrayEndT.new(@current_pos) Kind::ArrayEnd
when Token::BytesArrayT when Kind::BytesArray
Token::BytesArrayEndT.new(@current_pos) Kind::BytesArrayEnd
when Token::StringArrayT when Kind::StringArray
Token::StringArrayEndT.new(@current_pos) Kind::StringArrayEnd
else else
raise ParseError.new("Unexpected token termination #{opened_token.class}") raise ParseError.new("Unexpected token termination #{opened_token.to_s}")
end end
end end

View File

@ -1,63 +1,50 @@
class CBOR::Token enum CBOR::Kind
record NullT, byte_number : Int64 Null
record UndefinedT, byte_number : Int64 Undefined
record BoolT, byte_number : Int64, value : Bool Bool
record ArrayT, byte_number : Int64, size : UInt32? Int
record ArrayEndT, byte_number : Int64 Float
record MapT, byte_number : Int64, size : UInt32? Bytes
record IntT, byte_number : Int64, value : Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64 | Int128 BytesArray
record FloatT, byte_number : Int64, value : Float64 BytesArrayEnd
record StringT, byte_number : Int64, value : String String
record BytesT, byte_number : Int64, value : Bytes StringArray
record StringArrayT, byte_number : Int64 StringArrayEnd
record StringArrayEndT, byte_number : Int64 Array
record BytesArrayT, byte_number : Int64 ArrayEnd
record BytesArrayEndT, byte_number : Int64 Map
alias T = NullT | def to_diagnostic : String
UndefinedT | case self
BoolT | when Int
ArrayT |
ArrayEndT |
MapT |
IntT |
FloatT |
StringT |
BytesT |
StringArrayT |
StringArrayEndT |
BytesArrayT |
BytesArrayEndT
def self.to_diagnostic(token : T) : String
case token
when IntT
token.value.to_s token.value.to_s
when BytesT when Bytes
return %(h'') if token.value.empty? return %(h'') if token.value.empty?
"h'#{token.value.hexstring}'" "h'#{token.value.hexstring}'"
when NullT when Null
"null" "null"
when UndefinedT when Undefined
"undefined" "undefined"
when BoolT when BoolT
token.value.to_s token.value.to_s
when BytesArrayT when BytesArray
"(_ " "(_ "
when BytesArrayEndT when BytesArrayEnd
")" ")"
when FloatT when Float
"TODO" "TODO"
when StringT when String
"TODO" "TODO"
when StringArrayT when StringArray
"TODO" "TODO"
when MapT when Map
"TODO" "TODO"
when ArrayT when Array
"TODO" "TODO"
else else
raise "Uknown diagnostics representation for #{token.class}" raise "Uknown diagnostics representation for #{self.to_s}"
end end
end end
end end
alias CBOR::Token = NamedTuple(kind: Kind, value: Type)

View File

@ -1,4 +1,8 @@
class CBOR::BytesArray < Array(UInt8) class CBOR::BytesArray < Array(UInt8)
def self.new(bytes : Bytes)
new(bytes.to_a)
end
def to_a : Array(UInt8) def to_a : Array(UInt8)
self.as(Array(UInt8)) self.as(Array(UInt8))
end end