From 8baacbb606c321dbaaf3dfd4d323bc9726ecd4cf Mon Sep 17 00:00:00 2001 From: Alberto Restifo Date: Sun, 24 May 2020 14:36:34 +0200 Subject: [PATCH] Add encoding capabilities * Create CBOR::Encoder, capable of translating Crystal values * Add `#to_cbor` method to all major types * Add `#to_cbor` in the CBOR::Serializable macro --- spec/cbor/encoder_spec.cr | 85 ++++++++++++++++ spec/cbor/serializable_spec.cr | 2 +- src/cbor/encoder.cr | 180 +++++++++++++++++++++++++++++++++ src/cbor/serializable.cr | 128 +++++++++++------------ src/cbor/to_cbor.cr | 98 ++++++++++++++++++ 5 files changed, 421 insertions(+), 72 deletions(-) create mode 100644 spec/cbor/encoder_spec.cr create mode 100644 src/cbor/encoder.cr create mode 100644 src/cbor/to_cbor.cr diff --git a/spec/cbor/encoder_spec.cr b/spec/cbor/encoder_spec.cr new file mode 100644 index 0000000..c7e5dca --- /dev/null +++ b/spec/cbor/encoder_spec.cr @@ -0,0 +1,85 @@ +require "../spec_helper" + +# Selection of tests from the RFC +tests = [ + # Float16 is not supportet for the encoding, so those tests will never work + # {0.0, "f9 00 00"}, + # {-0.0, "f9 80 00"}, + # {1.0, "f9 3c 00"}, + # {1.5, "f9 3e 00"}, + # {65504.0, "f9 7b ff"}, + # {6.1035156e-5, "f9 04 00"}, + # {-4.0, "f9 c4 00"}, + # {5.9604645e-8, "f9 00 01"}, + + {0, "00"}, + {1, "01"}, + {10, "0a"}, + {23, "17"}, + {24, "18 18"}, + {25, "18 19"}, + {100, "18 64"}, + {1000, "19 03 e8"}, + {1000000, "1a 00 0f 42 40"}, + {1000000000000, "1b 00 00 00 e8 d4 a5 10 00"}, + {18446744073709551615, "1b ff ff ff ff ff ff ff ff"}, + {-1, "20"}, + {-10, "29"}, + {-100, "38 63"}, + {-1000, "39 03 e7"}, + {1.1, "fb 3f f1 99 99 99 99 99 9a"}, + {100000.0_f32, "fa 47 c3 50 00"}, + {3.4028235e+38_f32, "fa 7f 7f ff ff"}, + {1.0e+300, "fb 7e 37 e4 3c 88 00 75 9c"}, + {-4.1, "fb c0 10 66 66 66 66 66 66"}, + {Float32::INFINITY, "fa 7f 80 00 00"}, + {Float32::NAN, "fa 7f c0 00 00"}, + {-Float32::INFINITY, "fa ff 80 00 00"}, + {Float64::INFINITY, "fb 7f f0 00 00 00 00 00 00"}, + {Float64::NAN, "fb 7f f8 00 00 00 00 00 00"}, + {-Float64::INFINITY, "fb ff f0 00 00 00 00 00 00"}, + {false, "f4"}, + {true, "f5"}, + {Nil, "f6"}, + {CBOR::SimpleValue::Undefined, "f7"}, + {CBOR::SimpleValue.new(16), "f0"}, + {CBOR::SimpleValue.new(24), "f8 18"}, + {CBOR::SimpleValue.new(255), "f8 ff"}, + {Bytes[0x01, 0x02, 0x03, 0x04], "44 01 02 03 04"}, + {"", "60"}, + {"a", "61 61"}, + {"IETF", "64 49 45 54 46"}, + {"\u00fc", "62 c3 bc"}, + {"\u6c34", "63 e6 b0 b4"}, + {[] of UInt8, "80"}, + {[1, 2, 3], "83 01 02 03"}, + {[1, [2, 3], [4, 5]], "83 01 82 02 03 82 04 05"}, + {[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], "98 19 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 18 18 19"}, + {Hash(UInt8, UInt8).new, "a0"}, + { {1 => 2, 3 => 4}, "a2 01 02 03 04" }, + { {"a" => 1, "b" => [2, 3]}, "a2 61 61 01 61 62 82 02 03" }, + {["a", {"b" => "c"}], "82 61 61 a1 61 62 61 63"}, + { {"a" => "A", "b" => "B", "c" => "C", "d" => "D", "e" => "E"}, "a5 61 61 61 41 61 62 61 42 61 63 61 43 61 64 61 44 61 65 61 45" }, +] + +describe CBOR::Encoder do + describe "with the RFC examples" do + tests.each_with_index do |tt, index| + describe "test ##{index}" do + value, hex_string = tt + + bytes_arr = hex_string.split.map(&.to_u8(16)) + want_bytes = Bytes.new(bytes_arr.to_unsafe, bytes_arr.size) + + it "econdes #{value.to_s} to #{want_bytes.hexstring}" do + res = IO::Memory.new + + encoder = CBOR::Encoder.new(res) + encoder.write(value) + + res.to_slice.hexstring.should eq(want_bytes.hexstring) + end + end + end + end +end diff --git a/spec/cbor/serializable_spec.cr b/spec/cbor/serializable_spec.cr index dc26c35..1019af0 100644 --- a/spec/cbor/serializable_spec.cr +++ b/spec/cbor/serializable_spec.cr @@ -68,7 +68,7 @@ describe CBOR::Serializable do end end - describe "by default it's strict" do + describe "strict by default" do it "errors on missing fields" do expect_raises(CBOR::ParseError) do ExampleStrict.from_cbor(Bytes[0xbf, 0x61, 0x61, 0x01, 0x61, 0x62, 0x9f, 0x02, 0x03, 0xff, 0xff]) diff --git a/src/cbor/encoder.cr b/src/cbor/encoder.cr new file mode 100644 index 0000000..4d654ab --- /dev/null +++ b/src/cbor/encoder.cr @@ -0,0 +1,180 @@ +class CBOR::Encoder + def self.new(io : IO = IO::Memory.new) + packer = new(io) + yield packer + packer + end + + def initialize(@io : IO = IO::Memory.new) + end + + def write(value : Nil.class, use_undefined : Bool = false) + write(use_undefined ? SimpleValue::Undefined : SimpleValue::Null) + end + + def write(value : Bool) + write(value ? SimpleValue::True : SimpleValue::False) + end + + def write(value : SimpleValue) + write_size(0xe0, value.value) + end + + def write(value : String) + write_size(0x60, value.bytesize) + write_slice(value.to_slice) + end + + def write(value : Bytes) + write_size(0x40, value.bytesize) + write_slice(value) + end + + def write(value : Symbol) + write(value.to_s) + end + + def write(value : Float32 | Float64) + case value + when Float32 + write_byte(0xfa) + when Float64 + write_byte(0xfb) + end + write_value(value) + end + + def write(value : Int8 | Int16 | Int32 | Int64) + return write(value.to_u64) if value >= 0 + + # When it's negative, transform it into a positive value and write the + # resulting unsigled int with an offset + positive_value = -(value + 1) + write(positive_value.to_u64, 0x20) + end + + # The Int128 can't be bigger than an UInt64 if positive or when inverted + def write(value : Int128) + if value > 0 && value <= UInt64::MAX + return write(value.to_u64, 0x20) + end + + # Flip the value + positive_value = -(value + 1) + + # TODO: Use custom error + raise ParseError.new("Negative Int128 too big, it must fit in a UInt64") if positive_value > UInt64::MAX + + write(positive_value.to_u, 0x20) + end + + def write(value : UInt8 | UInt16 | UInt32 | UInt64, offset : UInt8 = 0x00) + compressed = compress(value) + + # No need to write the value as the "size" contains the number + write_size(offset, compressed) + end + + def write(value : Hash) + write_object_start(value.size) + value.each do |key, val| + write(key) + write(val) + end + end + + def write(value : Array) + write_array_start(value.size) + value.each { |item| write(item) } + end + + def write(value : Tuple) + write_array_start(value.size) + value.each { |item| write(item) } + end + + def write(value : Tag) + write_size(0xc0, value) + write_value(value) + end + + def write_array_start(size) + write_size(0x80, size) + end + + def write_object_start(size) + write_size(0xa0, size) + end + + def object(&block) + write_map_start + yield + write_break + end + + private def write_map_start + write_byte(0xbf) + end + + private def write_break + write_byte(0xff) + end + + # Find the smallest UInt capable of containing the value + private def compress(value : UInt8 | UInt16 | UInt32 | UInt64) + case value + when .<= UInt8::MAX + value.to_u8 + when .<= UInt16::MAX + value.to_u16 + when .<= UInt32::MAX + value.to_u32 + else + value + end + end + + # Write the size flag for the se + private def write_size(offset : UInt8, bytesize) + case bytesize + when 0x00..0x17 + write_byte(offset + bytesize) + when 0x18..0xff + write_byte(offset + 0x18) + write_byte(bytesize.to_u8) + when 0x0000..0xffff + write_byte(offset + 0x19) + write_value(bytesize.to_u16) + when 0x0000_0000..0xffff_ffff + write_byte(offset + 0x1a) + write_value(bytesize.to_u32) + when 0x0000_0000_0000_0000..0xffff_ffff_ffff_ffff + write_byte(offset + 0x1b) + write_value(bytesize.to_u64) + else + # TODO: Use a encoding error instead + raise ParseError.new("invalid length") + end + end + + private def write_byte(byte : UInt8) + @io.write_byte(byte) + end + + private def write_slice(slice : Bytes) + @io.write(slice) + end + + private def write_value(value) + @io.write_bytes(value, IO::ByteFormat::BigEndian) + end + + def to_slice : Bytes + raise Error.new("to slice not implemented for io type: #{typeof(io)}") unless io.responds_to?(:to_slice) + @io.to_slice + end + + def to_s : String + @io.to_s + end +end diff --git a/src/cbor/serializable.cr b/src/cbor/serializable.cr index fc24610..8057471 100644 --- a/src/cbor/serializable.cr +++ b/src/cbor/serializable.cr @@ -64,6 +64,7 @@ module CBOR # * **key**: the value of the key in the json object (by default the name of the instance variable) # * **converter**: specify an alternate type for parsing and generation. The converter must define `from_cbor(CBOR::Decoder)` and `to_cbor(value, CBOR::Builder)` as class methods. Examples of converters are `Time::Format` and `Time::EpochConverter` for `Time`. # * **presence**: if `true`, a `@{{key}}_present` instance variable will be generated when the key was present (even if it has a `null` value), `false` by default + # * **emit_null**: if `true`, emits a `null` value for nilable property (by default nulls are not emitted) # * **nil_as_undefined**: if `true`, when the value is `nil`, it is emitted as `undefined` (by default `nil` are encoded as `null`) # # Deserialization also respects default values of variables: @@ -98,15 +99,16 @@ module CBOR # ``` # # - # ### Class annotation `JSON::Serializable::Options` + # ### Class annotation `CBOR::Serializable::Options` # # supported properties: # * **emit_nulls**: if `true`, emits a `null` value for all nilable properties (by default nulls are not emitted) + # * **nil_as_undefined**: if `true`, emits a `nil` value as undefined (by default nil emits `null`) # # ``` # require "json" # - # @[JSON::Serializable::Options(emit_nulls: true)] + # @[CBOR::Serializable::Options(emit_nulls: true)] # class A # include JSON::Serializable # @a : Int32? @@ -241,77 +243,60 @@ module CBOR raise ::CBOR::SerializationError.new("Unknown CBOR attribute: #{key}", self.class.to_s, nil) end - # protected def on_to_cbor(cbor : ::CBOR::Builder) - # end + protected def on_to_cbor(cbor : ::CBOR::Encoder) + end - # def to_json(json : ::JSON::Builder) - # {% begin %} - # {% options = @type.annotation(::JSON::Serializable::Options) %} - # {% emit_nulls = options && options[:emit_nulls] %} + def to_cbor(cbor : ::CBOR::Encoder) + {% begin %} + {% options = @type.annotation(::CBOR::Serializable::Options) %} + {% emit_nulls = options && options[:emit_nulls] %} + {% nil_as_undefined = options && options[:nil_as_undefined] %} - # {% properties = {} of Nil => Nil %} - # {% for ivar in @type.instance_vars %} - # {% ann = ivar.annotation(::JSON::Field) %} - # {% unless ann && ann[:ignore] %} - # {% - # properties[ivar.id] = { - # type: ivar.type, - # key: ((ann && ann[:key]) || ivar).id.stringify, - # root: ann && ann[:root], - # converter: ann && ann[:converter], - # emit_null: (ann && (ann[:emit_null] != nil) ? ann[:emit_null] : emit_nulls), - # } - # %} - # {% end %} - # {% end %} + {% properties = {} of Nil => Nil %} + {% for ivar in @type.instance_vars %} + {% ann = ivar.annotation(::CBOR::Field) %} + {% unless ann && ann[:ignore] %} + {% + properties[ivar.id] = { + type: ivar.type, + key: ((ann && ann[:key]) || ivar).id.stringify, + converter: ann && ann[:converter], + emit_null: (ann && (ann[:emit_null] != nil) ? ann[:emit_null] : emit_nulls), + nil_as_undefined: (ann && (ann[:nil_as_undefined] != nil) ? ann[:nil_as_undefined] : nil_as_undefined), + } + %} + {% end %} + {% end %} - # json.object do - # {% for name, value in properties %} - # _{{name}} = @{{name}} + cbor.object do + {% for name, value in properties %} + _{{name}} = @{{name}} - # {% unless value[:emit_null] %} - # unless _{{name}}.nil? - # {% end %} + {% unless value[:emit_null] %} + unless _{{name}}.nil? + {% end %} - # json.field({{value[:key]}}) do - # {% if value[:root] %} - # {% if value[:emit_null] %} - # if _{{name}}.nil? - # nil.to_json(json) - # else - # {% end %} + # Write the key of the map + write({{value[:key]}}) - # json.object do - # json.field({{value[:root]}}) do - # {% end %} + {% if value[:converter] %} + if _{{name}} + {{ value[:converter] }}.to_cbor(_{{name}}, cbor) + else + cbor.write(nil, use_undefined: value[:nil_as_undefined]) + end + {% else %} + _{{name}}.to_cbor(cbor) + {% end %} - # {% if value[:converter] %} - # if _{{name}} - # {{ value[:converter] }}.to_json(_{{name}}, json) - # else - # nil.to_json(json) - # end - # {% else %} - # _{{name}}.to_json(json) - # {% end %} - - # {% if value[:root] %} - # {% if value[:emit_null] %} - # end - # {% end %} - # end - # end - # {% end %} - # end - - # {% unless value[:emit_null] %} - # end - # {% end %} - # {% end %} - # on_to_json(json) - # end - # {% end %} - # end + {% unless value[:emit_null] %} + end + {% end %} + {% end %} + on_to_cbor(cbor) + end + {% end %} + end module Unmapped @[CBOR::Field(ignore: true)] @@ -325,11 +310,12 @@ module CBOR end end - # protected def on_to_json(json) - # json_unmapped.each do |key, value| - # json.field(key) { value.to_json(json) } - # end - # end + protected def on_to_cbor(cbor : ::CBOR::Encoder) + cbor_unmapped.each do |key, value| + write(key) + value.to_cbor(cbor) + end + end end # Tells this class to decode CBOR by using a field as a discriminator. diff --git a/src/cbor/to_cbor.cr b/src/cbor/to_cbor.cr new file mode 100644 index 0000000..40b78d0 --- /dev/null +++ b/src/cbor/to_cbor.cr @@ -0,0 +1,98 @@ +class Object + def to_cbor : Bytes + encoder = CBOR::Encoder.new + to_cbor(encoder) + encoder.to_slice + end + + def to_cbor(io : IO) + encoder = CBOR::Encoder.new + to_cbor(encoder) + self + end + + def to_cbor(encoder : CBOR::Encoder) + encoder.write(self) + end +end + +struct Set + def to_cbor(encoder : CBOR::Encoder) + encoder.write_array_start(self.size) + each { |elem| elem.to_cbor(encoder) } + end +end + +class Array + def to_cbor(encoder : CBOR::Encoder) + encoder.write_array_start(self.size) + each { |elem| elem.to_cbor(encoder) } + end +end + +class Hash + def to_cbor(encoder : CBOR::Encoder) + encoder.write_object_start(self.size) + each do |key, value| + key.to_cbor(encoder) + value.to_cbor(encoder) + end + end +end + +struct Tuple + def to_cbor(encoder : CBOR::Encoder) + encoder.write_array_start(self.size) + each { |elem| elem.to_cbor(encoder) } + end +end + +struct NamedTuple + def to_cbor(encoder : CBOR::Encoder) + encoder.write_object_start(self.size) + {% for key in T.keys %} + {{key.stringify}}.to_cbor(encoder) + self[{{key.symbolize}}].to_cbor(encoder) + {% end %} + end +end + +struct Enum + def to_cbor(encoder : CBOR::Encoder) + value.to_cbor(encoder) + end +end + +struct Time::Format + def to_cbor(value : Time, encoder : CBOR::Encoder) + format(value).to_cbor(encoder) + end +end + +module Time::Format::RFC_3339 + # Encodes the time as a properly tagged CBOR string as specified by + # [RFC 7049 section 2.4.1](https://tools.ietf.org/html/rfc7049#section-2.4.1). + def self.to_cbor(value : Time, encoder : CBOR::Encoder) + encoder.write(CBOR::Tag::RFC3339Time) + self.format(self, fraction_digits: 0).to_cbor(encoder) + end +end + +struct Time + # Emits the time as a tagged unix timestamp, asp specified by + # [RFC 7049 section 2.4.1](https://tools.ietf.org/html/rfc7049#section-2.4.1). + # + # If you would like to encode the time as a tagged RFC 3339 string isntead, + # you can tag the field with the `Time::Format::RFC_3339` instead: + # + # ``` + # class Foo + # @[CBOR::Filed(converter: Time::Format::RFC_3339)] + # property created_at : Time + # end + # ``` + def to_cbor(encoder : CBOR::Encoder) + encoder.write(CBOR::Tag::EpochTime) + self.to_unix.to_cbor(encoder) + end +end