float16 tests

float16
Karchnu 2020-11-12 11:09:05 +01:00
parent b8ebc57bb7
commit 16a839666a
1 changed files with 135 additions and 0 deletions

135
float16.cr Normal file
View File

@ -0,0 +1,135 @@
f32_to_f16 1.5
f32_to_f16 50.5
f32_to_f16 0.0
f32_to_f16 -1.0
f32_to_f16 0.15625
f32_to_f16 (1_f32 / 0_f32).as(Float32)
def binary_8(v)
sprintf "%08b", v & 0xFF
end
def binary_16(v)
sprintf "%08b %08b",
(v >> 8) & 0xFF,
v & 0xFF
end
def binary_24(v)
sprintf "%08b %08b %08b",
(v >> 16) & 0xFF,
(v >> 8) & 0xFF,
v & 0xFF
end
def binary_mantisse(v)
binary_24(v)[1..]
end
def binary_32(v)
sprintf "%08b %08b %08b %08b",
(v >> 24) & 0xFF,
(v >> 16) & 0xFF,
(v >> 8) & 0xFF,
v & 0xFF
end
def print_summary(value : Float32)
# 0 or 1
sign = (buffer[0].to_u32 >> 7)
# 8-bit value
exp = ((buffer[0].to_u32 & 0x7F) << 1) | (buffer[1].to_u32 >> 7)
# 23-bit value
man = (buffer[1].to_u32 << 16) | (buffer[2].to_u32 << 8) | buffer[3].to_u32
str_value = "%10.6f" % value
str_sign = binary_8(sign)[-1]
str_exp = binary_8(exp)
str_man = binary_mantisse(man)
puts "#{str_value} => #{str_sign} #{str_exp} #{str_man}"
end
def print_summary(value : Float32)
end
def f32_to_f16(value : Float32)
# TODO: is there a simpler way to perform binary operations over a float?
# Extract IEEE754 components
io = IO::Memory.new
io.write_bytes(value, IO::ByteFormat::NetworkEndian)
io.rewind
v = io.gets(4)
if v.nil?
raise "cannot perform f32 to f16 on value #{value}"
end
buffer = v.to_slice
# 0 or 1
sign = (buffer[0].to_u32 >> 7)
# 8-bit value
exp = ((buffer[0].to_u32 & 0x7F) << 1) | (buffer[1].to_u32 >> 7)
# 23-bit value
man = (buffer[1].to_u32 << 16) | (buffer[2].to_u32 << 8) | buffer[3].to_u32
print_summary value, buffer
# Check for all exponent bits being set, which is Infinity or NaN
if exp == 0xFF
puts "exp == 0xFF"
# Set mantissa MSB for NaN (and also keep shifted mantissa bits)
nan_bit = man == 0 ? 0 : 0x0200
pp! binary_24(nan_bit)
float16_value = ((sign << 15) | 0x7C00 | nan_bit | man) & 0xFFFF
f16_value = sprintf "%08b %08b", float16_value >> 8, float16_value & 0xFF
pp! f16_value
return float16_value
end
return 0
# // The number is normalized, start assembling half precision version
# let half_sign = sign >> 16;
# // Unbias the exponent, then bias for half precision
# let unbiased_exp = ((exp >> 23) as i32) - 127;
# let half_exp = unbiased_exp + 15;
#
# // Check for exponent overflow, return +infinity
# if half_exp >= 0x1F {
# return (half_sign | 0x7C00u32) as u16;
# }
#
# // Check for underflow
# if half_exp <= 0 {
# // Check mantissa for what we can do
# if 14 - half_exp > 24 {
# // No rounding possibility, so this is a full underflow, return signed zero
# return half_sign as u16;
# }
# // Don't forget about hidden leading mantissa bit when assembling mantissa
# let man = man | 0x0080_0000u32;
# let mut half_man = man >> (14 - half_exp);
# // Check for rounding (see comment above functions)
# let round_bit = 1 << (13 - half_exp);
# if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
# half_man += 1;
# }
# // No exponent for subnormals
# return (half_sign | half_man) as u16;
# }
#
# // Rebias the exponent
# let half_exp = (half_exp as u32) << 10;
# let half_man = man >> 13;
# // Check for rounding (see comment above functions)
# let round_bit = 0x0000_1000u32;
# if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
# // Round it
# ((half_sign | half_exp | half_man) + 1) as u16
# } else {
# (half_sign | half_exp | half_man) as u16
# }
end