From 7e5f11b67007cfce29f641f14b8fff30d6152810 Mon Sep 17 00:00:00 2001 From: Karchnu Date: Fri, 13 Nov 2020 16:13:23 +0100 Subject: [PATCH] Moving get_buffer --- float16.cr | 48 +++++++++++++++++------------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/float16.cr b/float16.cr index 8179356..878da13 100644 --- a/float16.cr +++ b/float16.cr @@ -50,21 +50,6 @@ def get_buffer(value : UInt16) [ ((value >> 8) & 0xFF).to_u8, (value & 0xFF).to_u8 ] end -def get_buffer(value : Float32) - # TODO: is there a simpler way to perform binary operations over a float? - # Extract IEEE754 components - io = IO::Memory.new - io.write_bytes(value, IO::ByteFormat::NetworkEndian) - io.rewind - v = io.gets(4) - - if v.nil? - raise "cannot perform f32 to f16 on value #{value}" - end - - v.to_slice -end - def get_summary(value : Float32) buffer = get_buffer value @@ -102,6 +87,21 @@ def get_summary(value : UInt16) "16-bit: #{str_value} => #{str_sign} #{str_exp} #{str_man}" end +def get_buffer(value : Float32) + # TODO: is there a simpler way to perform binary operations over a float? + # Extract IEEE754 components + io = IO::Memory.new + io.write_bytes(value, IO::ByteFormat::NetworkEndian) + io.rewind + v = io.gets(4) + + if v.nil? + raise "cannot perform f32 to f16 on value #{value}" + end + + v.to_slice +end + enum ConversionInfo OK NotANumber @@ -128,7 +128,6 @@ def f32_to_f16(value : Float32) # Set mantissa MSB for NaN (and also keep shifted mantissa bits) nan_bit = man == 0 ? 0 : 0x0200 final_value = (((sign << 15) | 0x7C00 | nan_bit | man) & 0xFFFF).to_u16 - # puts "#{get_summary final_value} => inf or nan" conversion_info = if nan_bit != 0 ConversionInfo::NotANumber elsif sign == 0 @@ -144,13 +143,10 @@ def f32_to_f16(value : Float32) # Unbias the exponent, then bias for half precision half_exp = (exp.to_i64 - 127 + 15).to_i16 - # puts " exp: #{typeof(exp)} -> #{exp}" - # puts "half_exp: #{typeof(half_exp)} -> #{half_exp}" # Check for exponent overflow, return +infinity if half_exp >= 0x1F final_value = (half_sign | 0x7C00).to_u16 - # puts "#{get_summary final_value} => overflow, return ± inf" return final_value, ConversionInfo::Overflow end @@ -158,8 +154,7 @@ def f32_to_f16(value : Float32) if half_exp <= 0 # Check mantissa for what we can do if 14 - half_exp > 24 - # No rounding possibility, so this is a full underflow, return signed zero - # puts "#{get_summary half_sign.to_u16} => full underflow" + # No rounding possibility, so this is a full underflow, return signed zero return half_sign.to_u16, ConversionInfo::FullUnderflow end @@ -167,37 +162,28 @@ def f32_to_f16(value : Float32) man = man | 0x0080_0000 half_man = man >> (14 - half_exp) - pp! binary_mantisse_f32(man), binary_mantisse_f16(half_man) - # Check for rounding (see comment above functions) round_bit = 1 << (13 - half_exp) if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 - half_man += 1 + half_man += 1 end # No exponent for subnormals final_value = (half_sign | half_man).to_u16 - # puts "#{get_summary final_value} => underflow" - return final_value, ConversionInfo::Underflow end # Rebias the exponent half_exp = (half_exp) << 10 half_man = (man >> 13) & 0x03FF - # puts " man: #{binary_mantisse_f32(man)}" - # puts "half_man: #{binary_mantisse_f16(half_man)}" # Check for rounding (see comment above functions) round_bit = 0x0000_1000u32 final_value = if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 - # puts "round it" # Round it ((half_sign | half_exp | half_man) + 1).to_u16 else v = (half_sign | half_exp | half_man) - # puts "NOT round it #{binary_32(v)}" - # puts "NOT round it #{binary_16(v.to_u16)}" (half_sign | half_exp | half_man).to_u16 end