diff --git a/float16.cr b/float16.cr new file mode 100644 index 0000000..b363372 --- /dev/null +++ b/float16.cr @@ -0,0 +1,135 @@ +f32_to_f16 1.5 +f32_to_f16 50.5 +f32_to_f16 0.0 +f32_to_f16 -1.0 +f32_to_f16 0.15625 +f32_to_f16 (1_f32 / 0_f32).as(Float32) + +def binary_8(v) + sprintf "%08b", v & 0xFF +end + +def binary_16(v) + sprintf "%08b %08b", + (v >> 8) & 0xFF, + v & 0xFF +end + +def binary_24(v) + sprintf "%08b %08b %08b", + (v >> 16) & 0xFF, + (v >> 8) & 0xFF, + v & 0xFF +end + +def binary_mantisse(v) + binary_24(v)[1..] +end + +def binary_32(v) + sprintf "%08b %08b %08b %08b", + (v >> 24) & 0xFF, + (v >> 16) & 0xFF, + (v >> 8) & 0xFF, + v & 0xFF +end + +def print_summary(value : Float32) + + # 0 or 1 + sign = (buffer[0].to_u32 >> 7) + # 8-bit value + exp = ((buffer[0].to_u32 & 0x7F) << 1) | (buffer[1].to_u32 >> 7) + # 23-bit value + man = (buffer[1].to_u32 << 16) | (buffer[2].to_u32 << 8) | buffer[3].to_u32 + + str_value = "%10.6f" % value + str_sign = binary_8(sign)[-1] + str_exp = binary_8(exp) + str_man = binary_mantisse(man) + + puts "#{str_value} => #{str_sign} #{str_exp} #{str_man}" +end + +def print_summary(value : Float32) +end + +def f32_to_f16(value : Float32) + # TODO: is there a simpler way to perform binary operations over a float? + # Extract IEEE754 components + io = IO::Memory.new + io.write_bytes(value, IO::ByteFormat::NetworkEndian) + io.rewind + v = io.gets(4) + + if v.nil? + raise "cannot perform f32 to f16 on value #{value}" + end + + buffer = v.to_slice + + # 0 or 1 + sign = (buffer[0].to_u32 >> 7) + # 8-bit value + exp = ((buffer[0].to_u32 & 0x7F) << 1) | (buffer[1].to_u32 >> 7) + # 23-bit value + man = (buffer[1].to_u32 << 16) | (buffer[2].to_u32 << 8) | buffer[3].to_u32 + + print_summary value, buffer + + # Check for all exponent bits being set, which is Infinity or NaN + if exp == 0xFF + puts "exp == 0xFF" + # Set mantissa MSB for NaN (and also keep shifted mantissa bits) + nan_bit = man == 0 ? 0 : 0x0200 + pp! binary_24(nan_bit) + float16_value = ((sign << 15) | 0x7C00 | nan_bit | man) & 0xFFFF + f16_value = sprintf "%08b %08b", float16_value >> 8, float16_value & 0xFF + pp! f16_value + return float16_value + end + + return 0 + +# // The number is normalized, start assembling half precision version +# let half_sign = sign >> 16; +# // Unbias the exponent, then bias for half precision +# let unbiased_exp = ((exp >> 23) as i32) - 127; +# let half_exp = unbiased_exp + 15; +# +# // Check for exponent overflow, return +infinity +# if half_exp >= 0x1F { +# return (half_sign | 0x7C00u32) as u16; +# } +# +# // Check for underflow +# if half_exp <= 0 { +# // Check mantissa for what we can do +# if 14 - half_exp > 24 { +# // No rounding possibility, so this is a full underflow, return signed zero +# return half_sign as u16; +# } +# // Don't forget about hidden leading mantissa bit when assembling mantissa +# let man = man | 0x0080_0000u32; +# let mut half_man = man >> (14 - half_exp); +# // Check for rounding (see comment above functions) +# let round_bit = 1 << (13 - half_exp); +# if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { +# half_man += 1; +# } +# // No exponent for subnormals +# return (half_sign | half_man) as u16; +# } +# +# // Rebias the exponent +# let half_exp = (half_exp as u32) << 10; +# let half_man = man >> 13; +# // Check for rounding (see comment above functions) +# let round_bit = 0x0000_1000u32; +# if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { +# // Round it +# ((half_sign | half_exp | half_man) + 1) as u16 +# } else { +# (half_sign | half_exp | half_man) as u16 +# } +end