Adding conversion info and better display
parent
cd44c4a724
commit
da16890f2e
87
float16.cr
87
float16.cr
|
@ -1,14 +1,24 @@
|
||||||
f32_to_f16 1.5
|
print_result 0.0
|
||||||
f32_to_f16 50.5
|
print_result -0.0
|
||||||
f32_to_f16 0.0
|
print_result 1.5
|
||||||
f32_to_f16 -1.0
|
print_result -1.5
|
||||||
f32_to_f16 0.15625
|
print_result (1_f32 / 0_f32).as(Float32)
|
||||||
f32_to_f16 (1_f32 / 0_f32).as(Float32)
|
print_result -(1_f32 / 0_f32).as(Float32)
|
||||||
f32_to_f16 -(1_f32 / 0_f32).as(Float32)
|
print_result (0_f32 / 0_f32).as(Float32)
|
||||||
f32_to_f16 100_000.0
|
print_result -(0_f32 / 0_f32).as(Float32)
|
||||||
f32_to_f16 -100_000.0
|
print_result 65504.0
|
||||||
f32_to_f16 10.539187535151395835581398159855
|
print_result 10_000.0
|
||||||
f32_to_f16 -10.539187535151395835581398159855
|
print_result -10_000.0
|
||||||
|
# print_result 50.5
|
||||||
|
# print_result 0.15625
|
||||||
|
# print_result 10.539187
|
||||||
|
# print_result -10.539187
|
||||||
|
|
||||||
|
def print_result(value : Float32)
|
||||||
|
final_value, status = f32_to_f16 value
|
||||||
|
puts "#{get_summary value}"
|
||||||
|
puts "#{get_summary final_value} => status: #{status}"
|
||||||
|
end
|
||||||
|
|
||||||
def binary_8(v)
|
def binary_8(v)
|
||||||
sprintf "%08b", v & 0xFF
|
sprintf "%08b", v & 0xFF
|
||||||
|
@ -66,8 +76,8 @@ def get_summary(value : Float32)
|
||||||
|
|
||||||
str_value = "%15.6f" % value
|
str_value = "%15.6f" % value
|
||||||
str_sign = binary_8(sign)[-1]
|
str_sign = binary_8(sign)[-1]
|
||||||
str_exp = "%15s" % binary_8(exp)
|
str_exp = "%10s" % binary_8(exp)
|
||||||
str_man = "%30s" % binary_mantisse_f32(man)
|
str_man = "%28s" % binary_mantisse_f32(man)
|
||||||
|
|
||||||
"32-bit: #{str_value} => #{str_sign} #{str_exp} #{str_man}"
|
"32-bit: #{str_value} => #{str_sign} #{str_exp} #{str_man}"
|
||||||
end
|
end
|
||||||
|
@ -81,19 +91,28 @@ def get_summary(value : UInt16)
|
||||||
# 5-bit value
|
# 5-bit value
|
||||||
exp = (buffer[0].to_u32 & 0x7F) >> 2
|
exp = (buffer[0].to_u32 & 0x7F) >> 2
|
||||||
# 23-bit value
|
# 23-bit value
|
||||||
man = ((buffer[0].to_u32 & 0x03) << 6) | (buffer[1].to_u32 << 8)
|
man = ((buffer[0].to_u32 & 0x03) << 8) | buffer[1].to_u32
|
||||||
|
|
||||||
str_value = "%15d" % value
|
str_value = "%15d" % value
|
||||||
str_sign = binary_8(sign)[-1] # 1-bit value
|
str_sign = binary_8(sign)[-1] # 1-bit value
|
||||||
str_exp = "%15s" % binary_8(exp)[3..7] # 5-bit value
|
str_exp = "%10s" % binary_8(exp)[3..7] # 5-bit value
|
||||||
str_man = "%30s" % binary_mantisse_f16(man) # 10-bit value
|
str_man = "%28s" % binary_mantisse_f16(man) # 10-bit value
|
||||||
|
|
||||||
"16-bit: #{str_value} => #{str_sign} #{str_exp} #{str_man}"
|
"16-bit: #{str_value} => #{str_sign} #{str_exp} #{str_man}"
|
||||||
end
|
end
|
||||||
|
|
||||||
def f32_to_f16(value : Float32)
|
enum ConversionInfo
|
||||||
|
OK
|
||||||
|
NotANumber
|
||||||
|
Overflow
|
||||||
|
Underflow
|
||||||
|
FullUnderflow
|
||||||
|
Infinite
|
||||||
|
NegativeInfinite
|
||||||
|
end
|
||||||
|
|
||||||
|
def f32_to_f16(value : Float32)
|
||||||
|
|
||||||
puts get_summary value
|
|
||||||
buffer = get_buffer value
|
buffer = get_buffer value
|
||||||
|
|
||||||
# 0 or 1
|
# 0 or 1
|
||||||
|
@ -101,15 +120,22 @@ def f32_to_f16(value : Float32)
|
||||||
# 8-bit value
|
# 8-bit value
|
||||||
exp = ((buffer[0].to_u32 & 0x7F) << 1) | (buffer[1].to_u32 >> 7)
|
exp = ((buffer[0].to_u32 & 0x7F) << 1) | (buffer[1].to_u32 >> 7)
|
||||||
# 23-bit value
|
# 23-bit value
|
||||||
man = (buffer[1].to_u32 << 16) | (buffer[2].to_u32 << 8) | buffer[3].to_u32
|
man = ((buffer[1].to_u32 & 0x7F) << 16) | (buffer[2].to_u32 << 8) | buffer[3].to_u32
|
||||||
|
|
||||||
# Check for all exponent bits being set, which is Infinity or NaN
|
# Check for all exponent bits being set, which is Infinity or NaN
|
||||||
if exp == 0xFF
|
if exp == 0xFF
|
||||||
# Set mantissa MSB for NaN (and also keep shifted mantissa bits)
|
# Set mantissa MSB for NaN (and also keep shifted mantissa bits)
|
||||||
nan_bit = man == 0 ? 0 : 0x0200
|
nan_bit = man == 0 ? 0 : 0x0200
|
||||||
float16_value = (((sign << 15) | 0x7C00 | nan_bit | man) & 0xFFFF).to_u16
|
final_value = (((sign << 15) | 0x7C00 | nan_bit | man) & 0xFFFF).to_u16
|
||||||
puts "#{get_summary float16_value} => inf or nan"
|
# puts "#{get_summary final_value} => inf or nan"
|
||||||
return float16_value
|
conversion_info = if nan_bit != 0
|
||||||
|
ConversionInfo::NotANumber
|
||||||
|
elsif sign == 0
|
||||||
|
ConversionInfo::Infinite
|
||||||
|
else
|
||||||
|
ConversionInfo::NegativeInfinite
|
||||||
|
end
|
||||||
|
return final_value, conversion_info
|
||||||
end
|
end
|
||||||
|
|
||||||
# The number is normalized, start assembling half precision version
|
# The number is normalized, start assembling half precision version
|
||||||
|
@ -123,8 +149,8 @@ def f32_to_f16(value : Float32)
|
||||||
# Check for exponent overflow, return +infinity
|
# Check for exponent overflow, return +infinity
|
||||||
if half_exp >= 0x1F
|
if half_exp >= 0x1F
|
||||||
final_value = (half_sign | 0x7C00).to_u16
|
final_value = (half_sign | 0x7C00).to_u16
|
||||||
puts "#{get_summary final_value} => overflow, return ± inf"
|
# puts "#{get_summary final_value} => overflow, return ± inf"
|
||||||
return final_value
|
return final_value, ConversionInfo::Overflow
|
||||||
end
|
end
|
||||||
|
|
||||||
# Check for underflow
|
# Check for underflow
|
||||||
|
@ -132,8 +158,8 @@ def f32_to_f16(value : Float32)
|
||||||
# Check mantissa for what we can do
|
# Check mantissa for what we can do
|
||||||
if 14 - half_exp > 24
|
if 14 - half_exp > 24
|
||||||
# No rounding possibility, so this is a full underflow, return signed zero
|
# No rounding possibility, so this is a full underflow, return signed zero
|
||||||
puts "#{get_summary half_sign.to_u16} => full underflow"
|
# puts "#{get_summary half_sign.to_u16} => full underflow"
|
||||||
return half_sign.to_u16
|
return half_sign.to_u16, ConversionInfo::FullUnderflow
|
||||||
end
|
end
|
||||||
|
|
||||||
# Don't forget about hidden leading mantissa bit when assembling mantissa
|
# Don't forget about hidden leading mantissa bit when assembling mantissa
|
||||||
|
@ -153,7 +179,7 @@ def f32_to_f16(value : Float32)
|
||||||
|
|
||||||
puts "#{get_summary final_value} => underflow"
|
puts "#{get_summary final_value} => underflow"
|
||||||
|
|
||||||
return final_value
|
return final_value, ConversionInfo::Underflow
|
||||||
end
|
end
|
||||||
|
|
||||||
# Rebias the exponent
|
# Rebias the exponent
|
||||||
|
@ -164,12 +190,15 @@ def f32_to_f16(value : Float32)
|
||||||
# Check for rounding (see comment above functions)
|
# Check for rounding (see comment above functions)
|
||||||
round_bit = 0x0000_1000u32
|
round_bit = 0x0000_1000u32
|
||||||
final_value = if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0
|
final_value = if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0
|
||||||
|
puts "round it"
|
||||||
# Round it
|
# Round it
|
||||||
((half_sign | half_exp | half_man) + 1).to_u16
|
((half_sign | half_exp | half_man) + 1).to_u16
|
||||||
else
|
else
|
||||||
|
v = (half_sign | half_exp | half_man)
|
||||||
|
# puts "NOT round it #{binary_32(v)}"
|
||||||
|
# puts "NOT round it #{binary_16(v.to_u16)}"
|
||||||
(half_sign | half_exp | half_man).to_u16
|
(half_sign | half_exp | half_man).to_u16
|
||||||
end
|
end
|
||||||
|
|
||||||
puts "#{get_summary final_value}"
|
return final_value, ConversionInfo::OK
|
||||||
final_value
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue