blob: e54098a1e566d8c794b44aeb549ca4f0b68d32c5 [file] [log] [blame]
Austin Engcc2516a2023-10-17 20:57:54 +00001// Copyright 2022 The Dawn & Tint Authors
Ben Claytonc2eccfc2022-05-25 15:04:24 +00002//
Austin Engcc2516a2023-10-17 20:57:54 +00003// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are met:
Ben Claytonc2eccfc2022-05-25 15:04:24 +00005//
Austin Engcc2516a2023-10-17 20:57:54 +00006// 1. Redistributions of source code must retain the above copyright notice, this
7// list of conditions and the following disclaimer.
Ben Claytonc2eccfc2022-05-25 15:04:24 +00008//
Austin Engcc2516a2023-10-17 20:57:54 +00009// 2. Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// 3. Neither the name of the copyright holder nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Ben Claytonc2eccfc2022-05-25 15:04:24 +000027
Ben Claytoncd52f382023-08-07 13:11:08 +000028#include "src/tint/lang/core/number.h"
Ben Claytonc2eccfc2022-05-25 15:04:24 +000029
30#include <algorithm>
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +000031#include <cmath>
Ben Claytonc2eccfc2022-05-25 15:04:24 +000032#include <cstring>
Ben Claytonc2eccfc2022-05-25 15:04:24 +000033
Ben Claytonf848af22023-07-28 16:37:32 +000034#include "src/tint/utils/ice/ice.h"
dan sinclair22b4dd22023-07-21 00:40:07 +000035#include "src/tint/utils/memory/bitcast.h"
36#include "src/tint/utils/text/string_stream.h"
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +000037
dan sinclairce6dffe2023-08-14 21:01:40 +000038namespace tint::core {
dan sinclair00d0fd52022-11-09 20:03:09 +000039namespace {
40
41constexpr uint16_t kF16Nan = 0x7e00u;
42constexpr uint16_t kF16PosInf = 0x7c00u;
43constexpr uint16_t kF16NegInf = 0xfc00u;
44
45constexpr uint16_t kF16SignMask = 0x8000u;
46constexpr uint16_t kF16ExponentMask = 0x7c00u;
47constexpr uint16_t kF16MantissaMask = 0x03ffu;
48
49constexpr uint32_t kF16MantissaBits = 10;
50constexpr uint32_t kF16ExponentBias = 15;
51
52constexpr uint32_t kF32SignMask = 0x80000000u;
53constexpr uint32_t kF32ExponentMask = 0x7f800000u;
54constexpr uint32_t kF32MantissaMask = 0x007fffffu;
55
56constexpr uint32_t kF32MantissaBits = 23;
57constexpr uint32_t kF32ExponentBias = 127;
58
59constexpr uint32_t kMaxF32BiasedExpForF16NormalNumber = 142;
60constexpr uint32_t kMinF32BiasedExpForF16NormalNumber = 113;
61constexpr uint32_t kMaxF32BiasedExpForF16SubnormalNumber = 112;
62constexpr uint32_t kMinF32BiasedExpForF16SubnormalNumber = 103;
63
64} // namespace
Ben Claytonc2eccfc2022-05-25 15:04:24 +000065
Ben Claytonc2eccfc2022-05-25 15:04:24 +000066f16::type f16::Quantize(f16::type value) {
Antonio Maioranod060f362022-07-29 17:12:01 +000067 if (value > kHighestValue) {
Ben Claytonc2eccfc2022-05-25 15:04:24 +000068 return std::numeric_limits<f16::type>::infinity();
69 }
Antonio Maioranod060f362022-07-29 17:12:01 +000070 if (value < kLowestValue) {
Ben Claytonc2eccfc2022-05-25 15:04:24 +000071 return -std::numeric_limits<f16::type>::infinity();
72 }
dan sinclair00d0fd52022-11-09 20:03:09 +000073
Ben Claytonc2eccfc2022-05-25 15:04:24 +000074 // Below value must be within the finite range of a f16.
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +000075 // Assert we use binary32 (i.e. float) as underlying type, which has 4 bytes.
76 static_assert(std::is_same<f16::type, float>());
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +000077
dan sinclairbae54e72023-07-28 15:01:54 +000078 uint32_t u32 = tint::Bitcast<uint32_t>(value);
dan sinclair00d0fd52022-11-09 20:03:09 +000079 if ((u32 & ~kF32SignMask) == 0) {
Ben Clayton0d2aedf2022-11-18 11:57:37 +000080 return value; // +/- zero
Ben Claytonc2eccfc2022-05-25 15:04:24 +000081 }
dan sinclair00d0fd52022-11-09 20:03:09 +000082 if ((u32 & kF32ExponentMask) == kF32ExponentMask) { // exponent all 1's
Ben Clayton0d2aedf2022-11-18 11:57:37 +000083 return value; // inf or nan
Ben Claytonc2eccfc2022-05-25 15:04:24 +000084 }
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +000085
86 // We are now going to quantize a f32 number into subnormal f16 and store the result value back
87 // into a f32 variable. Notice that all subnormal f16 values are just normal f32 values. Below
88 // will show that we can do this quantization by just masking out 13 or more lowest mantissa
89 // bits of the original f32 number.
90 //
91 // Note:
dan sinclair00d0fd52022-11-09 20:03:09 +000092 // * f32 has 1 sign bit, 8 exponent bits for biased exponent (i.e. unbiased exponent + 127), and
93 // 23 mantissa bits. Binary form: s_eeeeeeee_mmmmmmmmmmmmmmmmmmmmmmm
94 //
95 // * f16 has 1 sign bit, 5 exponent bits for biased exponent (i.e. unbiased exponent + 15), and
96 // 10 mantissa bits. Binary form: s_eeeee_mmmmmmmmmm
97 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +000098 // The largest finite f16 number has a biased exponent of 11110 in binary, or 30 decimal, and so
dan sinclair00d0fd52022-11-09 20:03:09 +000099 // an unbiased exponent of 30 - 15 = 15.
100 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000101 // The smallest finite f16 number has a biased exponent of 00001 in binary, or 1 decimal, and so
102 // a unbiased exponent of 1 - 15 = -14.
103 //
104 // We may follow the argument below:
105 // 1. All normal or subnormal f16 values, range from 0x1.p-24 to 0x1.ffcp15, are exactly
dan sinclair00d0fd52022-11-09 20:03:09 +0000106 // representable by a normal f32 number.
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000107 // 1.1. We can denote the set of all f32 number that are exact representation of finite f16
108 // values by `R`.
109 // 1.2. We can do the quantization by mapping a normal f32 value v (in the f16 finite range)
110 // to a certain f32 number v' in the set R, which is the largest (by the meaning of absolute
111 // value) one among all values in R that are no larger than v.
dan sinclair00d0fd52022-11-09 20:03:09 +0000112 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000113 // 2. We can decide whether a given normal f32 number v is in the set R, by looking at its
114 // mantissa bits and biased exponent `e`. Recall that biased exponent e is unbiased exponent +
115 // 127, and in the range of 1 to 254 for normal f32 number.
Antonio Maioranod060f362022-07-29 17:12:01 +0000116 // 2.1. If e >= 143, i.e. abs(v) >= 2^16 > f16::kHighestValue = 0x1.ffcp15, v is larger than
117 // any finite f16 value and can not be in set R. 2.2. If 142 >= e >= 113, or
118 // f16::kHighestValue >= abs(v) >= f16::kSmallestValue = 2^-14, v falls in the range of normal
119 // f16 values. In this case, v is in the set R iff the lowest 13 mantissa bits are all 0. (See
120 // below for proof)
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000121 // 2.2.1. If we let v' be v with lowest 13 mantissa bits masked to 0, v' will be in set R
122 // and the largest one in set R that no larger than v. Such v' is the quantized value of v.
Antonio Maioranod060f362022-07-29 17:12:01 +0000123 // 2.3. If 112 >= e >= 103, i.e. 2^-14 > abs(v) >= f16::kSmallestSubnormalValue = 2^-24, v
124 // falls in the range of subnormal f16 values. In this case, v is in the set R iff the lowest
125 // 126-e mantissa bits are all 0. Notice that 126-e is in range 14 to 23, inclusive. (See
126 // below for proof)
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000127 // 2.3.1. If we let v' be v with lowest 126-e mantissa bits masked to 0, v' will be in set R
128 // and the largest on in set R that no larger than v. Such v' is the quantized value of v.
129 // 2.4. If 2^-24 > abs(v) > 0, i.e. 103 > e, v is smaller than any finite f16 value and not
130 // equal to 0.0, thus can not be in set R.
131 // 2.5. If abs(v) = 0, v is in set R and is just +-0.0.
132 //
dan sinclair00d0fd52022-11-09 20:03:09 +0000133 // Proof for 2.2
134 // -------------
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000135 // Any normal f16 number, in binary form, s_eeeee_mmmmmmmmmm, has value
dan sinclair00d0fd52022-11-09 20:03:09 +0000136 //
137 // (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmmmm) * (2^-10)) * 2^(uint(eeeee) - 15)
138 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000139 // in which unit(bbbbb) means interprete binary pattern "bbbbb" as unsigned binary number,
140 // and we have 1 <= uint(eeeee) <= 30.
dan sinclair00d0fd52022-11-09 20:03:09 +0000141 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000142 // This value is equal to a normal f32 number with binary
143 // s_EEEEEEEE_mmmmmmmmmm0000000000000
dan sinclair00d0fd52022-11-09 20:03:09 +0000144 //
145 // where uint(EEEEEEEE) = uint(eeeee) + 112, so that unbiased exponent is kept unchanged
146 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000147 // uint(EEEEEEEE) - 127 = uint(eeeee) - 15
dan sinclair00d0fd52022-11-09 20:03:09 +0000148 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000149 // and its value is
dan sinclair00d0fd52022-11-09 20:03:09 +0000150 // (s == 0 ? 1 : -1) *
151 // (1 + uint(mmmmm_mmmmm_00000_00000_000) * (2^-23)) * 2^(uint(EEEEEEEE) - 127)
152 // == (s == 0 ? 1 : -1) *
153 // (1 + uint(mmmmm_mmmmm) * (2^-10)) * 2^(uint(eeeee) - 15)
154 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000155 // Notice that uint(EEEEEEEE) is in range [113, 142], showing that it is a normal f32 number.
dan sinclair00d0fd52022-11-09 20:03:09 +0000156 // So we proved that any normal f16 number can be exactly representd by a normal f32 number
157 // with biased exponent in range [113, 142] and the lowest 13 mantissa bits 0.
158 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000159 // On the other hand, since mantissa bits mmmmmmmmmm are arbitrary, the value of any f32
160 // that has a biased exponent in range [113, 142] and lowest 13 mantissa bits zero is equal
dan sinclair00d0fd52022-11-09 20:03:09 +0000161 // to a normal f16 value. Hence we prove 2.2.
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000162 //
dan sinclair00d0fd52022-11-09 20:03:09 +0000163 // Proof for 2.3
164 // -------------
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000165 // Any subnormal f16 number has a binary form of s_00000_mmmmmmmmmm, and its value is
dan sinclair00d0fd52022-11-09 20:03:09 +0000166 //
167 // (s == 0 ? 1 : -1) * uint(mmmmmmmmmm) * (2^-10) * (2^-14)
168 // == (s == 0 ? 1 : -1) * uint(mmmmmmmmmm) * (2^-24).
169 //
170 // We discuss the bit pattern of mantissa bits mmmmmmmmmm.
171 // Case 1: mantissa bits have no leading zero bit, s_00000_1mmmmmmmmm
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000172 // In this case the value is
dan sinclair00d0fd52022-11-09 20:03:09 +0000173 // (s == 0 ? 1 : -1) * uint(1mmmm_mmmmm) * (2^-10) * (2^-14)
174 // == (s == 0 ? 1 : -1) * ( uint(1_mmmmm_mmmm) * (2^-9)) * (2^-15)
175 // == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmmm) * (2^-9)) * (2^-15)
176 // == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmmm0_00000_00000_000) * (2^-23)) * (2^-15)
177 //
178 // which is equal to the value of the normal f32 number
179 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000180 // s_EEEEEEEE_mmmmm_mmmm0_00000_00000_000
dan sinclair00d0fd52022-11-09 20:03:09 +0000181 //
182 // where uint(EEEEEEEE) == -15 + 127 = 112. Hence we proved that any subnormal f16 number
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000183 // with no leading zero mantissa bit can be exactly represented by a f32 number with
184 // biased exponent 112 and the lowest 14 mantissa bits zero, and the value of any f32
dan sinclair00d0fd52022-11-09 20:03:09 +0000185 // number with biased exponent 112 and the lowest 14 mantissa bits zero is equal to a
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000186 // subnormal f16 number with no leading zero mantissa bit.
dan sinclair00d0fd52022-11-09 20:03:09 +0000187 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000188 // Case 2: mantissa bits has 1 leading zero bit, s_00000_01mmmmmmmm
189 // In this case the value is
dan sinclair00d0fd52022-11-09 20:03:09 +0000190 // (s == 0 ? 1 : -1) * uint(01mmm_mmmmm) * (2^-10) * (2^-14)
191 // == (s == 0 ? 1 : -1) * ( uint(01_mmmmm_mmm) * (2^-8)) * (2^-16)
192 // == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmm) * (2^-8)) * (2^-16)
193 // == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmm00_00000_00000_000) * (2^-23)) * (2^-16)
194 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000195 // which is equal to the value of normal f32 number
dan sinclair00d0fd52022-11-09 20:03:09 +0000196 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000197 // s_EEEEEEEE_mmmmm_mmm00_00000_00000_000
dan sinclair00d0fd52022-11-09 20:03:09 +0000198 //
199 // where uint(EEEEEEEE) = -16 + 127 = 111. Hence we proved that any subnormal f16 number
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000200 // with 1 leading zero mantissa bit can be exactly represented by a f32 number with
201 // biased exponent 111 and the lowest 15 mantissa bits zero, and the value of any f32
dan sinclair00d0fd52022-11-09 20:03:09 +0000202 // number with biased exponent 111 and the lowest 15 mantissa bits zero is equal to a
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000203 // subnormal f16 number with 1 leading zero mantissa bit.
dan sinclair00d0fd52022-11-09 20:03:09 +0000204 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000205 // Case 3 to case 8: ......
dan sinclair00d0fd52022-11-09 20:03:09 +0000206 //
207 // Case 9: mantissa bits has 8 leading zero bits, s_00000_000000001m
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000208 // In this case the value is
dan sinclair00d0fd52022-11-09 20:03:09 +0000209 // (s == 0 ? 1 : -1) * uint(00000_0001m) * (2^-10) * (2^-14)
210 // == (s == 0 ? 1 : -1) * ( uint(000000001_m) * (2^-1)) * (2^-23)
211 // == (s == 0 ? 1 : -1) * (1 + uint(m) * (2^-1)) * (2^-23)
212 // == (s == 0 ? 1 : -1) * (1 + uint(m0000_00000_00000_00000_000) * (2^-23)) * (2^-23)
213 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000214 // which is equal to the value of normal f32 number
dan sinclair00d0fd52022-11-09 20:03:09 +0000215 //
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000216 // s_EEEEEEEE_m0000_00000_00000_00000_000
dan sinclair00d0fd52022-11-09 20:03:09 +0000217 //
218 // where uint(EEEEEEEE) = -23 + 127 = 104. Hence we proved that any subnormal f16 number
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000219 // with 8 leading zero mantissa bit can be exactly represented by a f32 number with
220 // biased exponent 104 and the lowest 22 mantissa bits zero, and the value of any f32
221 // number with biased exponent 104 and the lowest 22 mantissa bits zero are equal to a
222 // subnormal f16 number with 8 leading zero mantissa bit.
dan sinclair00d0fd52022-11-09 20:03:09 +0000223 //
224 // Case 10: mantissa bits has 9 leading zero bits, s_00000_0000000001
225 // In this case the value is just +-2^-24 == +-0x1.0p-24,
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000226 // the f32 number has biased exponent 103 and all 23 mantissa bits zero.
dan sinclair00d0fd52022-11-09 20:03:09 +0000227 //
228 // Case 11: mantissa bits has 10 leading zero bits, s_00000_0000000000, just 0.0
229 //
230 // Concluding all these case, we proved that any subnormal f16 number with N leading zero
231 // mantissa bit can be exactly represented by a f32 number with biased exponent 112 - N and the
232 // lowest 14 + N mantissa bits zero, and the value of any f32 number with biased exponent
233 // 112 - N (= e) and the lowest 14 + N (= 126 - e) mantissa bits zero are equal to a subnormal
234 // f16 number with N leading zero mantissa bits. N is in range [0, 9], so the f32 number's
235 // biased exponent e is in range [103, 112], or unbiased exponent in [-24, -15].
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000236
237 float abs_value = std::fabs(value);
Antonio Maioranod060f362022-07-29 17:12:01 +0000238 if (abs_value >= kSmallestValue) {
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000239 // Value falls in the normal f16 range, quantize it to a normal f16 value by masking out
240 // lowest 13 mantissa bits.
dan sinclair00d0fd52022-11-09 20:03:09 +0000241 u32 = u32 & ~((1u << (kF32MantissaBits - kF16MantissaBits)) - 1);
Antonio Maioranod060f362022-07-29 17:12:01 +0000242 } else if (abs_value >= kSmallestSubnormalValue) {
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000243 // Value should be quantized to a subnormal f16 value.
244
245 // Get the biased exponent `e` of f32 value, e.g. value 127 representing exponent 2^0.
dan sinclair00d0fd52022-11-09 20:03:09 +0000246 uint32_t biased_exponent_original = (u32 & kF32ExponentMask) >> kF32MantissaBits;
Antonio Maioranod060f362022-07-29 17:12:01 +0000247 // Since we ensure that kSmallestValue = 0x1f-14 > abs(value) >= kSmallestSubnormalValue =
248 // 0x1f-24, value will have a unbiased exponent in range -24 to -15 (inclusive), and the
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000249 // corresponding biased exponent in f32 is in range 103 to 112 (inclusive).
Ben Claytonf848af22023-07-28 16:37:32 +0000250 TINT_ASSERT((kMinF32BiasedExpForF16SubnormalNumber <= biased_exponent_original) &&
251 (biased_exponent_original <= kMaxF32BiasedExpForF16SubnormalNumber));
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000252
253 // As we have proved, masking out the lowest 126-e mantissa bits of input value will result
254 // in a valid subnormal f16 value, which is exactly the required quantization result.
255 uint32_t discard_bits = 126 - biased_exponent_original; // In range 14 to 23 (inclusive)
Ben Claytonf848af22023-07-28 16:37:32 +0000256 TINT_ASSERT((14 <= discard_bits) && (discard_bits <= kF32MantissaBits));
Zhaoming Jiang0fb4e2c2022-06-10 18:18:35 +0000257 uint32_t discard_mask = (1u << discard_bits) - 1;
258 u32 = u32 & ~discard_mask;
259 } else {
260 // value is too small that it can't even be represented as subnormal f16 number. Quantize
261 // to zero.
262 return value > 0 ? 0.0 : -0.0;
263 }
dan sinclair00d0fd52022-11-09 20:03:09 +0000264
dan sinclairbae54e72023-07-28 15:01:54 +0000265 return tint::Bitcast<f16::type>(u32);
Ben Claytonc2eccfc2022-05-25 15:04:24 +0000266}
267
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000268uint16_t f16::BitsRepresentation() const {
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000269 // Assert we use binary32 (i.e. float) as underlying type, which has 4 bytes.
270 static_assert(std::is_same<f16::type, float>());
271
272 // The stored value in f16 object must be already quantized, so it should be either NaN, +/-
273 // Inf, or exactly representable by normal or subnormal f16.
274
275 if (std::isnan(value)) {
dan sinclair00d0fd52022-11-09 20:03:09 +0000276 return kF16Nan;
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000277 }
278
279 if (std::isinf(value)) {
dan sinclair00d0fd52022-11-09 20:03:09 +0000280 return value > 0 ? kF16PosInf : kF16NegInf;
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000281 }
282
283 // Now quantized_value must be a finite f16 exactly-representable value.
284 // The following table shows exponent cases for all finite f16 exactly-representable value.
285 // ---------------------------------------------------------------------------
286 // | Value category | Unbiased exp | F16 biased exp | F32 biased exp |
287 // |------------------|----------------|------------------|------------------|
288 // | +/- zero | \ | 0 | 0 |
289 // | Subnormal f16 | [-24, -15] | 0 | [103, 112] |
dan sinclair00d0fd52022-11-09 20:03:09 +0000290 // | Normal f16 | [-14, 15] | [1, 30] | [113, 142] |
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000291 // ---------------------------------------------------------------------------
292
dan sinclairbae54e72023-07-28 15:01:54 +0000293 uint32_t f32_bit_pattern = tint::Bitcast<uint32_t>(value);
dan sinclair00d0fd52022-11-09 20:03:09 +0000294 uint32_t f32_biased_exponent = (f32_bit_pattern & kF32ExponentMask) >> kF32MantissaBits;
295 uint32_t f32_mantissa = f32_bit_pattern & kF32MantissaMask;
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000296
dan sinclair00d0fd52022-11-09 20:03:09 +0000297 uint16_t f16_sign_part = static_cast<uint16_t>((f32_bit_pattern & kF32SignMask) >> 16);
Ben Claytonf848af22023-07-28 16:37:32 +0000298 TINT_ASSERT((f16_sign_part & ~kF16SignMask) == 0);
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000299
dan sinclair00d0fd52022-11-09 20:03:09 +0000300 if ((f32_bit_pattern & ~kF32SignMask) == 0) {
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000301 // +/- zero
302 return f16_sign_part;
303 }
304
dan sinclair00d0fd52022-11-09 20:03:09 +0000305 if ((kMinF32BiasedExpForF16NormalNumber <= f32_biased_exponent) &&
306 (f32_biased_exponent <= kMaxF32BiasedExpForF16NormalNumber)) {
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000307 // Normal f16
dan sinclair00d0fd52022-11-09 20:03:09 +0000308 uint32_t f16_biased_exponent = f32_biased_exponent - kF32ExponentBias + kF16ExponentBias;
309 uint16_t f16_exp_part = static_cast<uint16_t>(f16_biased_exponent << kF16MantissaBits);
310 uint16_t f16_mantissa_part =
311 static_cast<uint16_t>(f32_mantissa >> (kF32MantissaBits - kF16MantissaBits));
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000312
Ben Claytonf848af22023-07-28 16:37:32 +0000313 TINT_ASSERT((f16_exp_part & ~kF16ExponentMask) == 0);
314 TINT_ASSERT((f16_mantissa_part & ~kF16MantissaMask) == 0);
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000315
316 return f16_sign_part | f16_exp_part | f16_mantissa_part;
317 }
318
dan sinclair00d0fd52022-11-09 20:03:09 +0000319 if ((kMinF32BiasedExpForF16SubnormalNumber <= f32_biased_exponent) &&
320 (f32_biased_exponent <= kMaxF32BiasedExpForF16SubnormalNumber)) {
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000321 // Subnormal f16
322 // The resulting exp bits are always 0, and the mantissa bits should be handled specially.
323 uint16_t f16_exp_part = 0;
324 // The resulting subnormal f16 will have only 1 valid mantissa bit if the unbiased exponent
325 // of value is of the minimum, i.e. -24; and have all 10 mantissa bits valid if the unbiased
326 // exponent of value is of the maximum, i.e. -15.
327 uint32_t f16_valid_mantissa_bits =
dan sinclair00d0fd52022-11-09 20:03:09 +0000328 f32_biased_exponent - kMinF32BiasedExpForF16SubnormalNumber + 1;
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000329 // The resulting f16 mantissa part comes from right-shifting the f32 mantissa bits with
330 // leading 1 added.
331 uint16_t f16_mantissa_part =
dan sinclair00d0fd52022-11-09 20:03:09 +0000332 static_cast<uint16_t>((f32_mantissa | (kF32MantissaMask + 1)) >>
333 (kF32MantissaBits + 1 - f16_valid_mantissa_bits));
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000334
Ben Claytonf848af22023-07-28 16:37:32 +0000335 TINT_ASSERT((1 <= f16_valid_mantissa_bits) &&
336 (f16_valid_mantissa_bits <= kF16MantissaBits));
337 TINT_ASSERT((f16_mantissa_part & ~((1u << f16_valid_mantissa_bits) - 1)) == 0);
338 TINT_ASSERT((f16_mantissa_part != 0));
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000339
340 return f16_sign_part | f16_exp_part | f16_mantissa_part;
341 }
342
343 // Neither zero, subnormal f16 or normal f16, shall never hit.
Ben Claytonf848af22023-07-28 16:37:32 +0000344 TINT_UNREACHABLE();
dan sinclair00d0fd52022-11-09 20:03:09 +0000345}
346
347// static
dan sinclairce6dffe2023-08-14 21:01:40 +0000348core::Number<core::detail::NumberKindF16> f16::FromBits(uint16_t bits) {
dan sinclair00d0fd52022-11-09 20:03:09 +0000349 // Assert we use binary32 (i.e. float) as underlying type, which has 4 bytes.
350 static_assert(std::is_same<f16::type, float>());
351
352 if (bits == kF16PosInf) {
353 return f16(std::numeric_limits<f16::type>::infinity());
354 }
355 if (bits == kF16NegInf) {
356 return f16(-std::numeric_limits<f16::type>::infinity());
357 }
358
359 auto f16_sign_bit = uint32_t(bits & kF16SignMask);
360 // If none of the other bits are set we have a 0. If only the sign bit is set we have a -0.
361 if ((bits & ~kF16SignMask) == 0) {
362 return f16(f16_sign_bit > 0 ? -0.f : 0.f);
363 }
364
365 auto f16_mantissa = uint32_t(bits & kF16MantissaMask);
366 auto f16_biased_exponent = uint32_t(bits & kF16ExponentMask);
367
368 // F16 NaN has all expoennt bits set and at least one mantissa bit set
369 if (((f16_biased_exponent & kF16ExponentMask) == kF16ExponentMask) && f16_mantissa != 0) {
370 return f16(std::numeric_limits<f16::type>::quiet_NaN());
371 }
372
373 // Shift the exponent over to be a regular number.
374 f16_biased_exponent >>= kF16MantissaBits;
375
376 // Add the F32 bias and remove the F16 bias.
377 uint32_t f32_biased_exponent = f16_biased_exponent + kF32ExponentBias - kF16ExponentBias;
378
379 if (f16_biased_exponent == 0) {
380 // Subnormal number
381 //
382 // All subnormal F16 values can be represented as normal F32 values. Shift the mantissa and
383 // set the exponent as if this was a normal f16 value.
384
385 // While the first F16 exponent bit is not set
386 constexpr uint32_t kF16FirstExponentBit = 0x0400;
387 while ((f16_mantissa & kF16FirstExponentBit) == 0) {
388 // Shift the mantissa to the left
389 f16_mantissa <<= 1;
390 // Decrease the biased exponent to counter the shift
391 f32_biased_exponent -= 1;
392 }
393
394 // Remove the first exponent bit from the mantissa value
395 f16_mantissa &= ~kF16FirstExponentBit;
396 // Increase the exponent to deal with the masked off value.
397 f32_biased_exponent += 1;
398 }
399
400 // The mantissa bits are shifted over the difference in mantissa size to be in the F32 location.
401 uint32_t f32_mantissa = f16_mantissa << (kF32MantissaBits - kF16MantissaBits);
402
403 // Shift the exponent to the F32 exponent position before the mantissa.
404 f32_biased_exponent <<= kF32MantissaBits;
405
406 // Shift the sign bit over to the f32 sign bit position
407 uint32_t f32_sign_bit = f16_sign_bit << 16;
408
409 // Combine values together into the F32 value as a uint32_t.
410 uint32_t val = f32_sign_bit | f32_biased_exponent | f32_mantissa;
411
412 // Bitcast to a F32 and then store into the F16 Number
dan sinclairbae54e72023-07-28 15:01:54 +0000413 return f16(tint::Bitcast<f16::type>(val));
Zhaoming Jiang2c7440a2022-07-07 03:29:11 +0000414}
415
dan sinclairce6dffe2023-08-14 21:01:40 +0000416} // namespace tint::core