Refactor lexer and simplify token

dev
Alberto Restifo 2020-04-21 15:02:31 +02:00
parent 56eed66541
commit 154876403e
6 changed files with 126 additions and 88 deletions

View File

@ -24,9 +24,12 @@ describe CBOR::Lexer do
it "reads #{tt[:bytes].hexstring} as #{tt[:value].to_s}" do
lexer = CBOR::Lexer.new(tt[:bytes])
token = lexer.next_token
token.should be_a(CBOR::Token::IntT)
token.as(CBOR::Token::IntT).value.should eq(tt[:value])
token = lexer.read_next
token.should_not be_nil
next unless token
token[:kind].should eq(CBOR::Kind::Int)
token[:value].as(Int).should eq(tt[:value])
end
end
end

View File

@ -5,5 +5,19 @@ module CBOR
VERSION = "0.1.0"
# Represents CBOR types
alias Type = Nil | Bool | String | Bytes | Array(Type) | Hash(Type, Type) | Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64
alias Type = Nil |
Bool |
String |
Bytes |
Array(Type) |
Hash(Type, Type) |
Int8 |
UInt8 |
Int16 |
UInt16 |
Int32 |
UInt32 |
Int64 |
UInt64 |
Int128
end

View File

@ -15,36 +15,21 @@ class CBOR::Diagnostic
# represation of the input.
def to_s : String
result = ""
while val = next_value
result += val
while value = next_value
result << value
end
result
end
private def next_value : String?
token = @lexer.next_token
token = @lexer.read_next
return nil unless token
case token
when Token::BytesArrayT
consume_bytes_array
case token[:kind]
when Kind::BytesArray
BytesArray.new(token[:value]).to_diagnostics
else
Token.to_diagnostic(token)
Token.do_diagnostics(token)
end
end
private def consume_bytes_array : String
elements = [] of String
loop do
token = @lexer.next_token
raise "Unexpected EOF" unless token
break if token.is_a?(Token::BytesArrayEndT)
elements << Token.to_diagnostic(token)
end
"(_ #{elements.join(", ")})"
end
end

View File

@ -14,17 +14,62 @@ class CBOR::Lexer
# Holds a list of previously opened tokens.
# When a break in reached, the last entry in the array is
# the token to close.
@open_tokens = [] of Token::T
@open_tokens = [] of Kind
def initialize(@io : IO)
@current_pos = 0
end
def next_token
# Reads the next concrete value
def read_value : Type?
res = read_next
return nil unless res
res[:value]
end
# Readsn the next concrete value, returning the token kind.
# Useful when you need to differentiate between Null and Undefined.
def read_next : Token?
return nil if @eof
token = next_token
return nil unless token
case token[:kind]
when Kind::Int,
Kind::String,
Kind::Bool,
Kind::Float,
Kind::Bytes
token
when Kind::Null,
Kind::Undefined
{kind: token[:kind], value: nil}
when Kind::BytesArray
{kind: token[:kind], value: read_bytes_array}
end
end
# Consumes the bytes array until it reaches a break
def read_bytes_array : Bytes
bytes = BytesArray.new
loop do
token = next_token
raise ParseError.new("Unexpected EOF while reading bytes array") unless token
break if token[:kind] == Kind::BytesArrayEnd
raise ParseError.new("Illegal token #{token.class} while reading bytes array") unless token[:kind] == Kind::Bytes
bytes << token[:value].as(UInt8)
end
bytes.to_bytes
end
private def next_token : Token?
return nil if @eof
@current_pos = @io.pos.to_i64
current_byte = @io.read_byte
current_byte = next_byte
return nil unless current_byte
case current_byte
@ -62,9 +107,9 @@ class CBOR::Lexer
when 0x5b
consume_binary(read(UInt64))
when 0x5f
open_token(Token::BytesArrayT.new(@current_pos))
{kind: open_token(Kind::BytesArray), value: nil}
when 0xff
finish_token
{kind: finish_token, value: nil}
else
raise ParseError.new("Unexpected first byte 0x#{current_byte.to_s(16)}")
end
@ -81,32 +126,32 @@ class CBOR::Lexer
end
private def consume_int(value)
Token::IntT.new(@current_pos, value)
{kind: Kind::Int, value: value}
end
private def consume_binary(size)
bytes = Bytes.new(size)
@io.read_fully(bytes)
Token::BytesT.new(@current_pos, bytes)
{kind: Kind::Bytes, value: bytes}
end
private def open_token(token : Token::T) : Token::T
@open_tokens.push(token)
token
private def open_token(kind : Kind) : Kind
@open_tokens << kind
kind
end
private def finish_token : Token::T
private def finish_token : Kind
opened_token = @open_tokens.pop
case opened_token
when Token::ArrayT
Token::ArrayEndT.new(@current_pos)
when Token::BytesArrayT
Token::BytesArrayEndT.new(@current_pos)
when Token::StringArrayT
Token::StringArrayEndT.new(@current_pos)
when Kind::Array
Kind::ArrayEnd
when Kind::BytesArray
Kind::BytesArrayEnd
when Kind::StringArray
Kind::StringArrayEnd
else
raise ParseError.new("Unexpected token termination #{opened_token.class}")
raise ParseError.new("Unexpected token termination #{opened_token.to_s}")
end
end

View File

@ -1,63 +1,50 @@
class CBOR::Token
record NullT, byte_number : Int64
record UndefinedT, byte_number : Int64
record BoolT, byte_number : Int64, value : Bool
record ArrayT, byte_number : Int64, size : UInt32?
record ArrayEndT, byte_number : Int64
record MapT, byte_number : Int64, size : UInt32?
record IntT, byte_number : Int64, value : Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 | Int64 | UInt64 | Int128
record FloatT, byte_number : Int64, value : Float64
record StringT, byte_number : Int64, value : String
record BytesT, byte_number : Int64, value : Bytes
record StringArrayT, byte_number : Int64
record StringArrayEndT, byte_number : Int64
record BytesArrayT, byte_number : Int64
record BytesArrayEndT, byte_number : Int64
enum CBOR::Kind
Null
Undefined
Bool
Int
Float
Bytes
BytesArray
BytesArrayEnd
String
StringArray
StringArrayEnd
Array
ArrayEnd
Map
alias T = NullT |
UndefinedT |
BoolT |
ArrayT |
ArrayEndT |
MapT |
IntT |
FloatT |
StringT |
BytesT |
StringArrayT |
StringArrayEndT |
BytesArrayT |
BytesArrayEndT
def self.to_diagnostic(token : T) : String
case token
when IntT
def to_diagnostic : String
case self
when Int
token.value.to_s
when BytesT
when Bytes
return %(h'') if token.value.empty?
"h'#{token.value.hexstring}'"
when NullT
when Null
"null"
when UndefinedT
when Undefined
"undefined"
when BoolT
token.value.to_s
when BytesArrayT
when BytesArray
"(_ "
when BytesArrayEndT
when BytesArrayEnd
")"
when FloatT
when Float
"TODO"
when StringT
when String
"TODO"
when StringArrayT
when StringArray
"TODO"
when MapT
when Map
"TODO"
when ArrayT
when Array
"TODO"
else
raise "Uknown diagnostics representation for #{token.class}"
raise "Uknown diagnostics representation for #{self.to_s}"
end
end
end
alias CBOR::Token = NamedTuple(kind: Kind, value: Type)

View File

@ -1,4 +1,8 @@
class CBOR::BytesArray < Array(UInt8)
def self.new(bytes : Bytes)
new(bytes.to_a)
end
def to_a : Array(UInt8)
self.as(Array(UInt8))
end