Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 1 | // Copyright 2022 The Dawn & Tint Authors |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 2 | // |
Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 3 | // Redistribution and use in source and binary forms, with or without |
| 4 | // modification, are permitted provided that the following conditions are met: |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 5 | // |
Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 6 | // 1. Redistributions of source code must retain the above copyright notice, this |
| 7 | // list of conditions and the following disclaimer. |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 8 | // |
Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 9 | // 2. Redistributions in binary form must reproduce the above copyright notice, |
| 10 | // this list of conditions and the following disclaimer in the documentation |
| 11 | // and/or other materials provided with the distribution. |
| 12 | // |
| 13 | // 3. Neither the name of the copyright holder nor the names of its |
| 14 | // contributors may be used to endorse or promote products derived from |
| 15 | // this software without specific prior written permission. |
| 16 | // |
| 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 18 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 20 | // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
| 21 | // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 22 | // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 23 | // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 24 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 25 | // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 27 | |
Ben Clayton | cd52f38 | 2023-08-07 13:11:08 +0000 | [diff] [blame] | 28 | #include "src/tint/lang/core/number.h" |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 29 | |
| 30 | #include <algorithm> |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 31 | #include <cmath> |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 32 | #include <cstring> |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 33 | |
Ben Clayton | f848af2 | 2023-07-28 16:37:32 +0000 | [diff] [blame] | 34 | #include "src/tint/utils/ice/ice.h" |
dan sinclair | 22b4dd2 | 2023-07-21 00:40:07 +0000 | [diff] [blame] | 35 | #include "src/tint/utils/memory/bitcast.h" |
| 36 | #include "src/tint/utils/text/string_stream.h" |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 37 | |
dan sinclair | ce6dffe | 2023-08-14 21:01:40 +0000 | [diff] [blame] | 38 | namespace tint::core { |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 39 | namespace { |
| 40 | |
| 41 | constexpr uint16_t kF16Nan = 0x7e00u; |
| 42 | constexpr uint16_t kF16PosInf = 0x7c00u; |
| 43 | constexpr uint16_t kF16NegInf = 0xfc00u; |
| 44 | |
| 45 | constexpr uint16_t kF16SignMask = 0x8000u; |
| 46 | constexpr uint16_t kF16ExponentMask = 0x7c00u; |
| 47 | constexpr uint16_t kF16MantissaMask = 0x03ffu; |
| 48 | |
| 49 | constexpr uint32_t kF16MantissaBits = 10; |
| 50 | constexpr uint32_t kF16ExponentBias = 15; |
| 51 | |
| 52 | constexpr uint32_t kF32SignMask = 0x80000000u; |
| 53 | constexpr uint32_t kF32ExponentMask = 0x7f800000u; |
| 54 | constexpr uint32_t kF32MantissaMask = 0x007fffffu; |
| 55 | |
| 56 | constexpr uint32_t kF32MantissaBits = 23; |
| 57 | constexpr uint32_t kF32ExponentBias = 127; |
| 58 | |
| 59 | constexpr uint32_t kMaxF32BiasedExpForF16NormalNumber = 142; |
| 60 | constexpr uint32_t kMinF32BiasedExpForF16NormalNumber = 113; |
| 61 | constexpr uint32_t kMaxF32BiasedExpForF16SubnormalNumber = 112; |
| 62 | constexpr uint32_t kMinF32BiasedExpForF16SubnormalNumber = 103; |
| 63 | |
| 64 | } // namespace |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 65 | |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 66 | f16::type f16::Quantize(f16::type value) { |
Antonio Maiorano | d060f36 | 2022-07-29 17:12:01 +0000 | [diff] [blame] | 67 | if (value > kHighestValue) { |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 68 | return std::numeric_limits<f16::type>::infinity(); |
| 69 | } |
Antonio Maiorano | d060f36 | 2022-07-29 17:12:01 +0000 | [diff] [blame] | 70 | if (value < kLowestValue) { |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 71 | return -std::numeric_limits<f16::type>::infinity(); |
| 72 | } |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 73 | |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 74 | // Below value must be within the finite range of a f16. |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 75 | // Assert we use binary32 (i.e. float) as underlying type, which has 4 bytes. |
| 76 | static_assert(std::is_same<f16::type, float>()); |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 77 | |
dan sinclair | bae54e7 | 2023-07-28 15:01:54 +0000 | [diff] [blame] | 78 | uint32_t u32 = tint::Bitcast<uint32_t>(value); |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 79 | if ((u32 & ~kF32SignMask) == 0) { |
Ben Clayton | 0d2aedf | 2022-11-18 11:57:37 +0000 | [diff] [blame] | 80 | return value; // +/- zero |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 81 | } |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 82 | if ((u32 & kF32ExponentMask) == kF32ExponentMask) { // exponent all 1's |
Ben Clayton | 0d2aedf | 2022-11-18 11:57:37 +0000 | [diff] [blame] | 83 | return value; // inf or nan |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 84 | } |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 85 | |
| 86 | // We are now going to quantize a f32 number into subnormal f16 and store the result value back |
| 87 | // into a f32 variable. Notice that all subnormal f16 values are just normal f32 values. Below |
| 88 | // will show that we can do this quantization by just masking out 13 or more lowest mantissa |
| 89 | // bits of the original f32 number. |
| 90 | // |
| 91 | // Note: |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 92 | // * f32 has 1 sign bit, 8 exponent bits for biased exponent (i.e. unbiased exponent + 127), and |
| 93 | // 23 mantissa bits. Binary form: s_eeeeeeee_mmmmmmmmmmmmmmmmmmmmmmm |
| 94 | // |
| 95 | // * f16 has 1 sign bit, 5 exponent bits for biased exponent (i.e. unbiased exponent + 15), and |
| 96 | // 10 mantissa bits. Binary form: s_eeeee_mmmmmmmmmm |
| 97 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 98 | // The largest finite f16 number has a biased exponent of 11110 in binary, or 30 decimal, and so |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 99 | // an unbiased exponent of 30 - 15 = 15. |
| 100 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 101 | // The smallest finite f16 number has a biased exponent of 00001 in binary, or 1 decimal, and so |
| 102 | // a unbiased exponent of 1 - 15 = -14. |
| 103 | // |
| 104 | // We may follow the argument below: |
| 105 | // 1. All normal or subnormal f16 values, range from 0x1.p-24 to 0x1.ffcp15, are exactly |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 106 | // representable by a normal f32 number. |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 107 | // 1.1. We can denote the set of all f32 number that are exact representation of finite f16 |
| 108 | // values by `R`. |
| 109 | // 1.2. We can do the quantization by mapping a normal f32 value v (in the f16 finite range) |
| 110 | // to a certain f32 number v' in the set R, which is the largest (by the meaning of absolute |
| 111 | // value) one among all values in R that are no larger than v. |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 112 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 113 | // 2. We can decide whether a given normal f32 number v is in the set R, by looking at its |
| 114 | // mantissa bits and biased exponent `e`. Recall that biased exponent e is unbiased exponent + |
| 115 | // 127, and in the range of 1 to 254 for normal f32 number. |
Antonio Maiorano | d060f36 | 2022-07-29 17:12:01 +0000 | [diff] [blame] | 116 | // 2.1. If e >= 143, i.e. abs(v) >= 2^16 > f16::kHighestValue = 0x1.ffcp15, v is larger than |
| 117 | // any finite f16 value and can not be in set R. 2.2. If 142 >= e >= 113, or |
| 118 | // f16::kHighestValue >= abs(v) >= f16::kSmallestValue = 2^-14, v falls in the range of normal |
| 119 | // f16 values. In this case, v is in the set R iff the lowest 13 mantissa bits are all 0. (See |
| 120 | // below for proof) |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 121 | // 2.2.1. If we let v' be v with lowest 13 mantissa bits masked to 0, v' will be in set R |
| 122 | // and the largest one in set R that no larger than v. Such v' is the quantized value of v. |
Antonio Maiorano | d060f36 | 2022-07-29 17:12:01 +0000 | [diff] [blame] | 123 | // 2.3. If 112 >= e >= 103, i.e. 2^-14 > abs(v) >= f16::kSmallestSubnormalValue = 2^-24, v |
| 124 | // falls in the range of subnormal f16 values. In this case, v is in the set R iff the lowest |
| 125 | // 126-e mantissa bits are all 0. Notice that 126-e is in range 14 to 23, inclusive. (See |
| 126 | // below for proof) |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 127 | // 2.3.1. If we let v' be v with lowest 126-e mantissa bits masked to 0, v' will be in set R |
| 128 | // and the largest on in set R that no larger than v. Such v' is the quantized value of v. |
| 129 | // 2.4. If 2^-24 > abs(v) > 0, i.e. 103 > e, v is smaller than any finite f16 value and not |
| 130 | // equal to 0.0, thus can not be in set R. |
| 131 | // 2.5. If abs(v) = 0, v is in set R and is just +-0.0. |
| 132 | // |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 133 | // Proof for 2.2 |
| 134 | // ------------- |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 135 | // Any normal f16 number, in binary form, s_eeeee_mmmmmmmmmm, has value |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 136 | // |
| 137 | // (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmmmm) * (2^-10)) * 2^(uint(eeeee) - 15) |
| 138 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 139 | // in which unit(bbbbb) means interprete binary pattern "bbbbb" as unsigned binary number, |
| 140 | // and we have 1 <= uint(eeeee) <= 30. |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 141 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 142 | // This value is equal to a normal f32 number with binary |
| 143 | // s_EEEEEEEE_mmmmmmmmmm0000000000000 |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 144 | // |
| 145 | // where uint(EEEEEEEE) = uint(eeeee) + 112, so that unbiased exponent is kept unchanged |
| 146 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 147 | // uint(EEEEEEEE) - 127 = uint(eeeee) - 15 |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 148 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 149 | // and its value is |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 150 | // (s == 0 ? 1 : -1) * |
| 151 | // (1 + uint(mmmmm_mmmmm_00000_00000_000) * (2^-23)) * 2^(uint(EEEEEEEE) - 127) |
| 152 | // == (s == 0 ? 1 : -1) * |
| 153 | // (1 + uint(mmmmm_mmmmm) * (2^-10)) * 2^(uint(eeeee) - 15) |
| 154 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 155 | // Notice that uint(EEEEEEEE) is in range [113, 142], showing that it is a normal f32 number. |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 156 | // So we proved that any normal f16 number can be exactly representd by a normal f32 number |
| 157 | // with biased exponent in range [113, 142] and the lowest 13 mantissa bits 0. |
| 158 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 159 | // On the other hand, since mantissa bits mmmmmmmmmm are arbitrary, the value of any f32 |
| 160 | // that has a biased exponent in range [113, 142] and lowest 13 mantissa bits zero is equal |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 161 | // to a normal f16 value. Hence we prove 2.2. |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 162 | // |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 163 | // Proof for 2.3 |
| 164 | // ------------- |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 165 | // Any subnormal f16 number has a binary form of s_00000_mmmmmmmmmm, and its value is |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 166 | // |
| 167 | // (s == 0 ? 1 : -1) * uint(mmmmmmmmmm) * (2^-10) * (2^-14) |
| 168 | // == (s == 0 ? 1 : -1) * uint(mmmmmmmmmm) * (2^-24). |
| 169 | // |
| 170 | // We discuss the bit pattern of mantissa bits mmmmmmmmmm. |
| 171 | // Case 1: mantissa bits have no leading zero bit, s_00000_1mmmmmmmmm |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 172 | // In this case the value is |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 173 | // (s == 0 ? 1 : -1) * uint(1mmmm_mmmmm) * (2^-10) * (2^-14) |
| 174 | // == (s == 0 ? 1 : -1) * ( uint(1_mmmmm_mmmm) * (2^-9)) * (2^-15) |
| 175 | // == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmmm) * (2^-9)) * (2^-15) |
| 176 | // == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmmm0_00000_00000_000) * (2^-23)) * (2^-15) |
| 177 | // |
| 178 | // which is equal to the value of the normal f32 number |
| 179 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 180 | // s_EEEEEEEE_mmmmm_mmmm0_00000_00000_000 |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 181 | // |
| 182 | // where uint(EEEEEEEE) == -15 + 127 = 112. Hence we proved that any subnormal f16 number |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 183 | // with no leading zero mantissa bit can be exactly represented by a f32 number with |
| 184 | // biased exponent 112 and the lowest 14 mantissa bits zero, and the value of any f32 |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 185 | // number with biased exponent 112 and the lowest 14 mantissa bits zero is equal to a |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 186 | // subnormal f16 number with no leading zero mantissa bit. |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 187 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 188 | // Case 2: mantissa bits has 1 leading zero bit, s_00000_01mmmmmmmm |
| 189 | // In this case the value is |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 190 | // (s == 0 ? 1 : -1) * uint(01mmm_mmmmm) * (2^-10) * (2^-14) |
| 191 | // == (s == 0 ? 1 : -1) * ( uint(01_mmmmm_mmm) * (2^-8)) * (2^-16) |
| 192 | // == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmm) * (2^-8)) * (2^-16) |
| 193 | // == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmm00_00000_00000_000) * (2^-23)) * (2^-16) |
| 194 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 195 | // which is equal to the value of normal f32 number |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 196 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 197 | // s_EEEEEEEE_mmmmm_mmm00_00000_00000_000 |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 198 | // |
| 199 | // where uint(EEEEEEEE) = -16 + 127 = 111. Hence we proved that any subnormal f16 number |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 200 | // with 1 leading zero mantissa bit can be exactly represented by a f32 number with |
| 201 | // biased exponent 111 and the lowest 15 mantissa bits zero, and the value of any f32 |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 202 | // number with biased exponent 111 and the lowest 15 mantissa bits zero is equal to a |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 203 | // subnormal f16 number with 1 leading zero mantissa bit. |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 204 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 205 | // Case 3 to case 8: ...... |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 206 | // |
| 207 | // Case 9: mantissa bits has 8 leading zero bits, s_00000_000000001m |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 208 | // In this case the value is |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 209 | // (s == 0 ? 1 : -1) * uint(00000_0001m) * (2^-10) * (2^-14) |
| 210 | // == (s == 0 ? 1 : -1) * ( uint(000000001_m) * (2^-1)) * (2^-23) |
| 211 | // == (s == 0 ? 1 : -1) * (1 + uint(m) * (2^-1)) * (2^-23) |
| 212 | // == (s == 0 ? 1 : -1) * (1 + uint(m0000_00000_00000_00000_000) * (2^-23)) * (2^-23) |
| 213 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 214 | // which is equal to the value of normal f32 number |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 215 | // |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 216 | // s_EEEEEEEE_m0000_00000_00000_00000_000 |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 217 | // |
| 218 | // where uint(EEEEEEEE) = -23 + 127 = 104. Hence we proved that any subnormal f16 number |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 219 | // with 8 leading zero mantissa bit can be exactly represented by a f32 number with |
| 220 | // biased exponent 104 and the lowest 22 mantissa bits zero, and the value of any f32 |
| 221 | // number with biased exponent 104 and the lowest 22 mantissa bits zero are equal to a |
| 222 | // subnormal f16 number with 8 leading zero mantissa bit. |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 223 | // |
| 224 | // Case 10: mantissa bits has 9 leading zero bits, s_00000_0000000001 |
| 225 | // In this case the value is just +-2^-24 == +-0x1.0p-24, |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 226 | // the f32 number has biased exponent 103 and all 23 mantissa bits zero. |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 227 | // |
| 228 | // Case 11: mantissa bits has 10 leading zero bits, s_00000_0000000000, just 0.0 |
| 229 | // |
| 230 | // Concluding all these case, we proved that any subnormal f16 number with N leading zero |
| 231 | // mantissa bit can be exactly represented by a f32 number with biased exponent 112 - N and the |
| 232 | // lowest 14 + N mantissa bits zero, and the value of any f32 number with biased exponent |
| 233 | // 112 - N (= e) and the lowest 14 + N (= 126 - e) mantissa bits zero are equal to a subnormal |
| 234 | // f16 number with N leading zero mantissa bits. N is in range [0, 9], so the f32 number's |
| 235 | // biased exponent e is in range [103, 112], or unbiased exponent in [-24, -15]. |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 236 | |
| 237 | float abs_value = std::fabs(value); |
Antonio Maiorano | d060f36 | 2022-07-29 17:12:01 +0000 | [diff] [blame] | 238 | if (abs_value >= kSmallestValue) { |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 239 | // Value falls in the normal f16 range, quantize it to a normal f16 value by masking out |
| 240 | // lowest 13 mantissa bits. |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 241 | u32 = u32 & ~((1u << (kF32MantissaBits - kF16MantissaBits)) - 1); |
Antonio Maiorano | d060f36 | 2022-07-29 17:12:01 +0000 | [diff] [blame] | 242 | } else if (abs_value >= kSmallestSubnormalValue) { |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 243 | // Value should be quantized to a subnormal f16 value. |
| 244 | |
| 245 | // Get the biased exponent `e` of f32 value, e.g. value 127 representing exponent 2^0. |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 246 | uint32_t biased_exponent_original = (u32 & kF32ExponentMask) >> kF32MantissaBits; |
Antonio Maiorano | d060f36 | 2022-07-29 17:12:01 +0000 | [diff] [blame] | 247 | // Since we ensure that kSmallestValue = 0x1f-14 > abs(value) >= kSmallestSubnormalValue = |
| 248 | // 0x1f-24, value will have a unbiased exponent in range -24 to -15 (inclusive), and the |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 249 | // corresponding biased exponent in f32 is in range 103 to 112 (inclusive). |
Ben Clayton | f848af2 | 2023-07-28 16:37:32 +0000 | [diff] [blame] | 250 | TINT_ASSERT((kMinF32BiasedExpForF16SubnormalNumber <= biased_exponent_original) && |
| 251 | (biased_exponent_original <= kMaxF32BiasedExpForF16SubnormalNumber)); |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 252 | |
| 253 | // As we have proved, masking out the lowest 126-e mantissa bits of input value will result |
| 254 | // in a valid subnormal f16 value, which is exactly the required quantization result. |
| 255 | uint32_t discard_bits = 126 - biased_exponent_original; // In range 14 to 23 (inclusive) |
Ben Clayton | f848af2 | 2023-07-28 16:37:32 +0000 | [diff] [blame] | 256 | TINT_ASSERT((14 <= discard_bits) && (discard_bits <= kF32MantissaBits)); |
Zhaoming Jiang | 0fb4e2c | 2022-06-10 18:18:35 +0000 | [diff] [blame] | 257 | uint32_t discard_mask = (1u << discard_bits) - 1; |
| 258 | u32 = u32 & ~discard_mask; |
| 259 | } else { |
| 260 | // value is too small that it can't even be represented as subnormal f16 number. Quantize |
| 261 | // to zero. |
| 262 | return value > 0 ? 0.0 : -0.0; |
| 263 | } |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 264 | |
dan sinclair | bae54e7 | 2023-07-28 15:01:54 +0000 | [diff] [blame] | 265 | return tint::Bitcast<f16::type>(u32); |
Ben Clayton | c2eccfc | 2022-05-25 15:04:24 +0000 | [diff] [blame] | 266 | } |
| 267 | |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 268 | uint16_t f16::BitsRepresentation() const { |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 269 | // Assert we use binary32 (i.e. float) as underlying type, which has 4 bytes. |
| 270 | static_assert(std::is_same<f16::type, float>()); |
| 271 | |
| 272 | // The stored value in f16 object must be already quantized, so it should be either NaN, +/- |
| 273 | // Inf, or exactly representable by normal or subnormal f16. |
| 274 | |
| 275 | if (std::isnan(value)) { |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 276 | return kF16Nan; |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 277 | } |
| 278 | |
| 279 | if (std::isinf(value)) { |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 280 | return value > 0 ? kF16PosInf : kF16NegInf; |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 281 | } |
| 282 | |
| 283 | // Now quantized_value must be a finite f16 exactly-representable value. |
| 284 | // The following table shows exponent cases for all finite f16 exactly-representable value. |
| 285 | // --------------------------------------------------------------------------- |
| 286 | // | Value category | Unbiased exp | F16 biased exp | F32 biased exp | |
| 287 | // |------------------|----------------|------------------|------------------| |
| 288 | // | +/- zero | \ | 0 | 0 | |
| 289 | // | Subnormal f16 | [-24, -15] | 0 | [103, 112] | |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 290 | // | Normal f16 | [-14, 15] | [1, 30] | [113, 142] | |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 291 | // --------------------------------------------------------------------------- |
| 292 | |
dan sinclair | bae54e7 | 2023-07-28 15:01:54 +0000 | [diff] [blame] | 293 | uint32_t f32_bit_pattern = tint::Bitcast<uint32_t>(value); |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 294 | uint32_t f32_biased_exponent = (f32_bit_pattern & kF32ExponentMask) >> kF32MantissaBits; |
| 295 | uint32_t f32_mantissa = f32_bit_pattern & kF32MantissaMask; |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 296 | |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 297 | uint16_t f16_sign_part = static_cast<uint16_t>((f32_bit_pattern & kF32SignMask) >> 16); |
Ben Clayton | f848af2 | 2023-07-28 16:37:32 +0000 | [diff] [blame] | 298 | TINT_ASSERT((f16_sign_part & ~kF16SignMask) == 0); |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 299 | |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 300 | if ((f32_bit_pattern & ~kF32SignMask) == 0) { |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 301 | // +/- zero |
| 302 | return f16_sign_part; |
| 303 | } |
| 304 | |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 305 | if ((kMinF32BiasedExpForF16NormalNumber <= f32_biased_exponent) && |
| 306 | (f32_biased_exponent <= kMaxF32BiasedExpForF16NormalNumber)) { |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 307 | // Normal f16 |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 308 | uint32_t f16_biased_exponent = f32_biased_exponent - kF32ExponentBias + kF16ExponentBias; |
| 309 | uint16_t f16_exp_part = static_cast<uint16_t>(f16_biased_exponent << kF16MantissaBits); |
| 310 | uint16_t f16_mantissa_part = |
| 311 | static_cast<uint16_t>(f32_mantissa >> (kF32MantissaBits - kF16MantissaBits)); |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 312 | |
Ben Clayton | f848af2 | 2023-07-28 16:37:32 +0000 | [diff] [blame] | 313 | TINT_ASSERT((f16_exp_part & ~kF16ExponentMask) == 0); |
| 314 | TINT_ASSERT((f16_mantissa_part & ~kF16MantissaMask) == 0); |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 315 | |
| 316 | return f16_sign_part | f16_exp_part | f16_mantissa_part; |
| 317 | } |
| 318 | |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 319 | if ((kMinF32BiasedExpForF16SubnormalNumber <= f32_biased_exponent) && |
| 320 | (f32_biased_exponent <= kMaxF32BiasedExpForF16SubnormalNumber)) { |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 321 | // Subnormal f16 |
| 322 | // The resulting exp bits are always 0, and the mantissa bits should be handled specially. |
| 323 | uint16_t f16_exp_part = 0; |
| 324 | // The resulting subnormal f16 will have only 1 valid mantissa bit if the unbiased exponent |
| 325 | // of value is of the minimum, i.e. -24; and have all 10 mantissa bits valid if the unbiased |
| 326 | // exponent of value is of the maximum, i.e. -15. |
| 327 | uint32_t f16_valid_mantissa_bits = |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 328 | f32_biased_exponent - kMinF32BiasedExpForF16SubnormalNumber + 1; |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 329 | // The resulting f16 mantissa part comes from right-shifting the f32 mantissa bits with |
| 330 | // leading 1 added. |
| 331 | uint16_t f16_mantissa_part = |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 332 | static_cast<uint16_t>((f32_mantissa | (kF32MantissaMask + 1)) >> |
| 333 | (kF32MantissaBits + 1 - f16_valid_mantissa_bits)); |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 334 | |
Ben Clayton | f848af2 | 2023-07-28 16:37:32 +0000 | [diff] [blame] | 335 | TINT_ASSERT((1 <= f16_valid_mantissa_bits) && |
| 336 | (f16_valid_mantissa_bits <= kF16MantissaBits)); |
| 337 | TINT_ASSERT((f16_mantissa_part & ~((1u << f16_valid_mantissa_bits) - 1)) == 0); |
| 338 | TINT_ASSERT((f16_mantissa_part != 0)); |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 339 | |
| 340 | return f16_sign_part | f16_exp_part | f16_mantissa_part; |
| 341 | } |
| 342 | |
| 343 | // Neither zero, subnormal f16 or normal f16, shall never hit. |
Ben Clayton | f848af2 | 2023-07-28 16:37:32 +0000 | [diff] [blame] | 344 | TINT_UNREACHABLE(); |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 345 | } |
| 346 | |
| 347 | // static |
dan sinclair | ce6dffe | 2023-08-14 21:01:40 +0000 | [diff] [blame] | 348 | core::Number<core::detail::NumberKindF16> f16::FromBits(uint16_t bits) { |
dan sinclair | 00d0fd5 | 2022-11-09 20:03:09 +0000 | [diff] [blame] | 349 | // Assert we use binary32 (i.e. float) as underlying type, which has 4 bytes. |
| 350 | static_assert(std::is_same<f16::type, float>()); |
| 351 | |
| 352 | if (bits == kF16PosInf) { |
| 353 | return f16(std::numeric_limits<f16::type>::infinity()); |
| 354 | } |
| 355 | if (bits == kF16NegInf) { |
| 356 | return f16(-std::numeric_limits<f16::type>::infinity()); |
| 357 | } |
| 358 | |
| 359 | auto f16_sign_bit = uint32_t(bits & kF16SignMask); |
| 360 | // If none of the other bits are set we have a 0. If only the sign bit is set we have a -0. |
| 361 | if ((bits & ~kF16SignMask) == 0) { |
| 362 | return f16(f16_sign_bit > 0 ? -0.f : 0.f); |
| 363 | } |
| 364 | |
| 365 | auto f16_mantissa = uint32_t(bits & kF16MantissaMask); |
| 366 | auto f16_biased_exponent = uint32_t(bits & kF16ExponentMask); |
| 367 | |
| 368 | // F16 NaN has all expoennt bits set and at least one mantissa bit set |
| 369 | if (((f16_biased_exponent & kF16ExponentMask) == kF16ExponentMask) && f16_mantissa != 0) { |
| 370 | return f16(std::numeric_limits<f16::type>::quiet_NaN()); |
| 371 | } |
| 372 | |
| 373 | // Shift the exponent over to be a regular number. |
| 374 | f16_biased_exponent >>= kF16MantissaBits; |
| 375 | |
| 376 | // Add the F32 bias and remove the F16 bias. |
| 377 | uint32_t f32_biased_exponent = f16_biased_exponent + kF32ExponentBias - kF16ExponentBias; |
| 378 | |
| 379 | if (f16_biased_exponent == 0) { |
| 380 | // Subnormal number |
| 381 | // |
| 382 | // All subnormal F16 values can be represented as normal F32 values. Shift the mantissa and |
| 383 | // set the exponent as if this was a normal f16 value. |
| 384 | |
| 385 | // While the first F16 exponent bit is not set |
| 386 | constexpr uint32_t kF16FirstExponentBit = 0x0400; |
| 387 | while ((f16_mantissa & kF16FirstExponentBit) == 0) { |
| 388 | // Shift the mantissa to the left |
| 389 | f16_mantissa <<= 1; |
| 390 | // Decrease the biased exponent to counter the shift |
| 391 | f32_biased_exponent -= 1; |
| 392 | } |
| 393 | |
| 394 | // Remove the first exponent bit from the mantissa value |
| 395 | f16_mantissa &= ~kF16FirstExponentBit; |
| 396 | // Increase the exponent to deal with the masked off value. |
| 397 | f32_biased_exponent += 1; |
| 398 | } |
| 399 | |
| 400 | // The mantissa bits are shifted over the difference in mantissa size to be in the F32 location. |
| 401 | uint32_t f32_mantissa = f16_mantissa << (kF32MantissaBits - kF16MantissaBits); |
| 402 | |
| 403 | // Shift the exponent to the F32 exponent position before the mantissa. |
| 404 | f32_biased_exponent <<= kF32MantissaBits; |
| 405 | |
| 406 | // Shift the sign bit over to the f32 sign bit position |
| 407 | uint32_t f32_sign_bit = f16_sign_bit << 16; |
| 408 | |
| 409 | // Combine values together into the F32 value as a uint32_t. |
| 410 | uint32_t val = f32_sign_bit | f32_biased_exponent | f32_mantissa; |
| 411 | |
| 412 | // Bitcast to a F32 and then store into the F16 Number |
dan sinclair | bae54e7 | 2023-07-28 15:01:54 +0000 | [diff] [blame] | 413 | return f16(tint::Bitcast<f16::type>(val)); |
Zhaoming Jiang | 2c7440a | 2022-07-07 03:29:11 +0000 | [diff] [blame] | 414 | } |
| 415 | |
dan sinclair | ce6dffe | 2023-08-14 21:01:40 +0000 | [diff] [blame] | 416 | } // namespace tint::core |