Compare commits
5 Commits
Author | SHA1 | Date |
---|---|---|
Karchnu | 7e5f11b670 | |
Karchnu | decfbd5f81 | |
Karchnu | da16890f2e | |
Karchnu | cd44c4a724 | |
Karchnu | 16a839666a |
|
@ -0,0 +1,191 @@
|
|||
print_result 0.0
|
||||
print_result -0.0
|
||||
print_result 1.5
|
||||
print_result -1.5
|
||||
print_result (1_f32 / 0_f32).as(Float32)
|
||||
print_result -(1_f32 / 0_f32).as(Float32)
|
||||
print_result (0_f32 / 0_f32).as(Float32)
|
||||
print_result -(0_f32 / 0_f32).as(Float32)
|
||||
print_result 65504.0
|
||||
print_result 10_000.0
|
||||
print_result -10_000.0
|
||||
print_result 153.0
|
||||
print_result -3992.0
|
||||
# print_result 0.15625
|
||||
# print_result 10.539187
|
||||
# print_result -10.539187
|
||||
|
||||
def print_result(value : Float32)
|
||||
final_value, status = f32_to_f16 value
|
||||
puts "#{get_summary value}"
|
||||
puts "#{get_summary final_value} => status: #{status}"
|
||||
end
|
||||
|
||||
def binary_8(v)
|
||||
sprintf "%08b", v & 0xFF
|
||||
end
|
||||
|
||||
def binary_16(v)
|
||||
sprintf "%08b %08b", (v >> 8) & 0xFF, v & 0xFF
|
||||
end
|
||||
|
||||
def binary_24(v)
|
||||
sprintf "%08b %08b %08b", (v >> 16) & 0xFF, (v >> 8) & 0xFF, v & 0xFF
|
||||
end
|
||||
|
||||
def binary_32(v)
|
||||
sprintf "%08b %08b %08b %08b", (v >> 24) & 0xFF, (v >> 16) & 0xFF, (v >> 8) & 0xFF, v & 0xFF
|
||||
end
|
||||
|
||||
def binary_mantisse_f32(v)
|
||||
binary_24(v)[1..] # mantisse only is 23-bit, remove the first represented bit
|
||||
end
|
||||
|
||||
def binary_mantisse_f16(v)
|
||||
binary_16(v)[6..] # mantisse only is 10-bit, remove the first represented bits
|
||||
end
|
||||
|
||||
|
||||
def get_buffer(value : UInt16)
|
||||
[ ((value >> 8) & 0xFF).to_u8, (value & 0xFF).to_u8 ]
|
||||
end
|
||||
|
||||
def get_summary(value : Float32)
|
||||
buffer = get_buffer value
|
||||
|
||||
# 0 or 1
|
||||
sign = (buffer[0].to_u32 >> 7)
|
||||
# 8-bit value
|
||||
exp = ((buffer[0].to_u32 & 0x7F) << 1) | (buffer[1].to_u32 >> 7)
|
||||
# 23-bit value
|
||||
man = (buffer[1].to_u32 << 16) | (buffer[2].to_u32 << 8) | buffer[3].to_u32
|
||||
|
||||
str_value = "%15.6f" % value
|
||||
str_sign = binary_8(sign)[-1]
|
||||
str_exp = "%10s" % binary_8(exp)
|
||||
str_man = "%28s" % binary_mantisse_f32(man)
|
||||
|
||||
"32-bit: #{str_value} => #{str_sign} #{str_exp} #{str_man}"
|
||||
end
|
||||
|
||||
# Float16 in a UInt16 value
|
||||
def get_summary(value : UInt16)
|
||||
buffer = get_buffer value
|
||||
|
||||
# 1-bit value
|
||||
sign = (buffer[0].to_u32 >> 7)
|
||||
# 5-bit value
|
||||
exp = (buffer[0].to_u32 & 0x7F) >> 2
|
||||
# 23-bit value
|
||||
man = ((buffer[0].to_u32 & 0x03) << 8) | buffer[1].to_u32
|
||||
|
||||
str_value = "%15d" % value
|
||||
str_sign = binary_8(sign)[-1] # 1-bit value
|
||||
str_exp = "%10s" % binary_8(exp)[3..7] # 5-bit value
|
||||
str_man = "%28s" % binary_mantisse_f16(man) # 10-bit value
|
||||
|
||||
"16-bit: #{str_value} => #{str_sign} #{str_exp} #{str_man}"
|
||||
end
|
||||
|
||||
def get_buffer(value : Float32)
|
||||
# TODO: is there a simpler way to perform binary operations over a float?
|
||||
# Extract IEEE754 components
|
||||
io = IO::Memory.new
|
||||
io.write_bytes(value, IO::ByteFormat::NetworkEndian)
|
||||
io.rewind
|
||||
v = io.gets(4)
|
||||
|
||||
if v.nil?
|
||||
raise "cannot perform f32 to f16 on value #{value}"
|
||||
end
|
||||
|
||||
v.to_slice
|
||||
end
|
||||
|
||||
enum ConversionInfo
|
||||
OK
|
||||
NotANumber
|
||||
Overflow
|
||||
Underflow
|
||||
FullUnderflow
|
||||
Infinite
|
||||
NegativeInfinite
|
||||
end
|
||||
|
||||
def f32_to_f16(value : Float32)
|
||||
|
||||
buffer = get_buffer value
|
||||
|
||||
# 0 or 1
|
||||
sign = (buffer[0].to_u32 >> 7)
|
||||
# 8-bit value
|
||||
exp = ((buffer[0].to_u32 & 0x7F) << 1) | (buffer[1].to_u32 >> 7)
|
||||
# 23-bit value
|
||||
man = ((buffer[1].to_u32 & 0x7F) << 16) | (buffer[2].to_u32 << 8) | buffer[3].to_u32
|
||||
|
||||
# Check for all exponent bits being set, which is Infinity or NaN
|
||||
if exp == 0xFF
|
||||
# Set mantissa MSB for NaN (and also keep shifted mantissa bits)
|
||||
nan_bit = man == 0 ? 0 : 0x0200
|
||||
final_value = (((sign << 15) | 0x7C00 | nan_bit | man) & 0xFFFF).to_u16
|
||||
conversion_info = if nan_bit != 0
|
||||
ConversionInfo::NotANumber
|
||||
elsif sign == 0
|
||||
ConversionInfo::Infinite
|
||||
else
|
||||
ConversionInfo::NegativeInfinite
|
||||
end
|
||||
return final_value, conversion_info
|
||||
end
|
||||
|
||||
# The number is normalized, start assembling half precision version
|
||||
half_sign = sign << 15
|
||||
|
||||
# Unbias the exponent, then bias for half precision
|
||||
half_exp = (exp.to_i64 - 127 + 15).to_i16
|
||||
|
||||
# Check for exponent overflow, return +infinity
|
||||
if half_exp >= 0x1F
|
||||
final_value = (half_sign | 0x7C00).to_u16
|
||||
return final_value, ConversionInfo::Overflow
|
||||
end
|
||||
|
||||
# Check for underflow
|
||||
if half_exp <= 0
|
||||
# Check mantissa for what we can do
|
||||
if 14 - half_exp > 24
|
||||
# No rounding possibility, so this is a full underflow, return signed zero
|
||||
return half_sign.to_u16, ConversionInfo::FullUnderflow
|
||||
end
|
||||
|
||||
# Don't forget about hidden leading mantissa bit when assembling mantissa
|
||||
man = man | 0x0080_0000
|
||||
half_man = man >> (14 - half_exp)
|
||||
|
||||
# Check for rounding (see comment above functions)
|
||||
round_bit = 1 << (13 - half_exp)
|
||||
if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0
|
||||
half_man += 1
|
||||
end
|
||||
|
||||
# No exponent for subnormals
|
||||
final_value = (half_sign | half_man).to_u16
|
||||
|
||||
return final_value, ConversionInfo::Underflow
|
||||
end
|
||||
|
||||
# Rebias the exponent
|
||||
half_exp = (half_exp) << 10
|
||||
half_man = (man >> 13) & 0x03FF
|
||||
# Check for rounding (see comment above functions)
|
||||
round_bit = 0x0000_1000u32
|
||||
final_value = if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0
|
||||
# Round it
|
||||
((half_sign | half_exp | half_man) + 1).to_u16
|
||||
else
|
||||
v = (half_sign | half_exp | half_man)
|
||||
(half_sign | half_exp | half_man).to_u16
|
||||
end
|
||||
|
||||
return final_value, ConversionInfo::OK
|
||||
end
|
Loading…
Reference in New Issue