Blame - src/tint/lang/core/number.cc - dawn

blob: e54098a1e566d8c794b44aeb549ca4f0b68d32c5 [file] [log] [blame]

Austin Eng	cc2516a	2023-10-17 20:57:54 +0000	[diff] [blame]	1	// Copyright 2022 The Dawn & Tint Authors
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	2	//
Austin Eng	cc2516a	2023-10-17 20:57:54 +0000	[diff] [blame]	3	// Redistribution and use in source and binary forms, with or without
				4	// modification, are permitted provided that the following conditions are met:
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	5	//
Austin Eng	cc2516a	2023-10-17 20:57:54 +0000	[diff] [blame]	6	// 1. Redistributions of source code must retain the above copyright notice, this
				7	// list of conditions and the following disclaimer.
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	8	//
Austin Eng	cc2516a	2023-10-17 20:57:54 +0000	[diff] [blame]	9	// 2. Redistributions in binary form must reproduce the above copyright notice,
				10	// this list of conditions and the following disclaimer in the documentation
				11	// and/or other materials provided with the distribution.
				12	//
				13	// 3. Neither the name of the copyright holder nor the names of its
				14	// contributors may be used to endorse or promote products derived from
				15	// this software without specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
				20	// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
				21	// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				22	// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
				23	// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
				24	// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
				25	// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				26	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	27
Ben Clayton	cd52f38	2023-08-07 13:11:08 +0000	[diff] [blame]	28	#include "src/tint/lang/core/number.h"
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	29
				30	#include <algorithm>
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	31	#include <cmath>
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	32	#include <cstring>
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	33
Ben Clayton	f848af2	2023-07-28 16:37:32 +0000	[diff] [blame]	34	#include "src/tint/utils/ice/ice.h"
dan sinclair	22b4dd2	2023-07-21 00:40:07 +0000	[diff] [blame]	35	#include "src/tint/utils/memory/bitcast.h"
				36	#include "src/tint/utils/text/string_stream.h"
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	37
dan sinclair	ce6dffe	2023-08-14 21:01:40 +0000	[diff] [blame]	38	namespace tint::core {
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	39	namespace {
				40
				41	constexpr uint16_t kF16Nan = 0x7e00u;
				42	constexpr uint16_t kF16PosInf = 0x7c00u;
				43	constexpr uint16_t kF16NegInf = 0xfc00u;
				44
				45	constexpr uint16_t kF16SignMask = 0x8000u;
				46	constexpr uint16_t kF16ExponentMask = 0x7c00u;
				47	constexpr uint16_t kF16MantissaMask = 0x03ffu;
				48
				49	constexpr uint32_t kF16MantissaBits = 10;
				50	constexpr uint32_t kF16ExponentBias = 15;
				51
				52	constexpr uint32_t kF32SignMask = 0x80000000u;
				53	constexpr uint32_t kF32ExponentMask = 0x7f800000u;
				54	constexpr uint32_t kF32MantissaMask = 0x007fffffu;
				55
				56	constexpr uint32_t kF32MantissaBits = 23;
				57	constexpr uint32_t kF32ExponentBias = 127;
				58
				59	constexpr uint32_t kMaxF32BiasedExpForF16NormalNumber = 142;
				60	constexpr uint32_t kMinF32BiasedExpForF16NormalNumber = 113;
				61	constexpr uint32_t kMaxF32BiasedExpForF16SubnormalNumber = 112;
				62	constexpr uint32_t kMinF32BiasedExpForF16SubnormalNumber = 103;
				63
				64	} // namespace
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	65
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	66	f16::type f16::Quantize(f16::type value) {
Antonio Maiorano	d060f36	2022-07-29 17:12:01 +0000	[diff] [blame]	67	if (value > kHighestValue) {
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	68	return std::numeric_limits<f16::type>::infinity();
				69	}
Antonio Maiorano	d060f36	2022-07-29 17:12:01 +0000	[diff] [blame]	70	if (value < kLowestValue) {
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	71	return -std::numeric_limits<f16::type>::infinity();
				72	}
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	73
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	74	// Below value must be within the finite range of a f16.
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	75	// Assert we use binary32 (i.e. float) as underlying type, which has 4 bytes.
				76	static_assert(std::is_same<f16::type, float>());
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	77
dan sinclair	bae54e7	2023-07-28 15:01:54 +0000	[diff] [blame]	78	uint32_t u32 = tint::Bitcast<uint32_t>(value);
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	79	if ((u32 & ~kF32SignMask) == 0) {
Ben Clayton	0d2aedf	2022-11-18 11:57:37 +0000	[diff] [blame]	80	return value; // +/- zero
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	81	}
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	82	if ((u32 & kF32ExponentMask) == kF32ExponentMask) { // exponent all 1's
Ben Clayton	0d2aedf	2022-11-18 11:57:37 +0000	[diff] [blame]	83	return value; // inf or nan
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	84	}
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	85
				86	// We are now going to quantize a f32 number into subnormal f16 and store the result value back
				87	// into a f32 variable. Notice that all subnormal f16 values are just normal f32 values. Below
				88	// will show that we can do this quantization by just masking out 13 or more lowest mantissa
				89	// bits of the original f32 number.
				90	//
				91	// Note:
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	92	// * f32 has 1 sign bit, 8 exponent bits for biased exponent (i.e. unbiased exponent + 127), and
				93	// 23 mantissa bits. Binary form: s_eeeeeeee_mmmmmmmmmmmmmmmmmmmmmmm
				94	//
				95	// * f16 has 1 sign bit, 5 exponent bits for biased exponent (i.e. unbiased exponent + 15), and
				96	// 10 mantissa bits. Binary form: s_eeeee_mmmmmmmmmm
				97	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	98	// The largest finite f16 number has a biased exponent of 11110 in binary, or 30 decimal, and so
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	99	// an unbiased exponent of 30 - 15 = 15.
				100	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	101	// The smallest finite f16 number has a biased exponent of 00001 in binary, or 1 decimal, and so
				102	// a unbiased exponent of 1 - 15 = -14.
				103	//
				104	// We may follow the argument below:
				105	// 1. All normal or subnormal f16 values, range from 0x1.p-24 to 0x1.ffcp15, are exactly
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	106	// representable by a normal f32 number.
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	107	// 1.1. We can denote the set of all f32 number that are exact representation of finite f16
				108	// values by `R`.
				109	// 1.2. We can do the quantization by mapping a normal f32 value v (in the f16 finite range)
				110	// to a certain f32 number v' in the set R, which is the largest (by the meaning of absolute
				111	// value) one among all values in R that are no larger than v.
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	112	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	113	// 2. We can decide whether a given normal f32 number v is in the set R, by looking at its
				114	// mantissa bits and biased exponent `e`. Recall that biased exponent e is unbiased exponent +
				115	// 127, and in the range of 1 to 254 for normal f32 number.
Antonio Maiorano	d060f36	2022-07-29 17:12:01 +0000	[diff] [blame]	116	// 2.1. If e >= 143, i.e. abs(v) >= 2^16 > f16::kHighestValue = 0x1.ffcp15, v is larger than
				117	// any finite f16 value and can not be in set R. 2.2. If 142 >= e >= 113, or
				118	// f16::kHighestValue >= abs(v) >= f16::kSmallestValue = 2^-14, v falls in the range of normal
				119	// f16 values. In this case, v is in the set R iff the lowest 13 mantissa bits are all 0. (See
				120	// below for proof)
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	121	// 2.2.1. If we let v' be v with lowest 13 mantissa bits masked to 0, v' will be in set R
				122	// and the largest one in set R that no larger than v. Such v' is the quantized value of v.
Antonio Maiorano	d060f36	2022-07-29 17:12:01 +0000	[diff] [blame]	123	// 2.3. If 112 >= e >= 103, i.e. 2^-14 > abs(v) >= f16::kSmallestSubnormalValue = 2^-24, v
				124	// falls in the range of subnormal f16 values. In this case, v is in the set R iff the lowest
				125	// 126-e mantissa bits are all 0. Notice that 126-e is in range 14 to 23, inclusive. (See
				126	// below for proof)
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	127	// 2.3.1. If we let v' be v with lowest 126-e mantissa bits masked to 0, v' will be in set R
				128	// and the largest on in set R that no larger than v. Such v' is the quantized value of v.
				129	// 2.4. If 2^-24 > abs(v) > 0, i.e. 103 > e, v is smaller than any finite f16 value and not
				130	// equal to 0.0, thus can not be in set R.
				131	// 2.5. If abs(v) = 0, v is in set R and is just +-0.0.
				132	//
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	133	// Proof for 2.2
				134	// -------------
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	135	// Any normal f16 number, in binary form, s_eeeee_mmmmmmmmmm, has value
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	136	//
				137	// (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmmmm) * (2^-10)) * 2^(uint(eeeee) - 15)
				138	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	139	// in which unit(bbbbb) means interprete binary pattern "bbbbb" as unsigned binary number,
				140	// and we have 1 <= uint(eeeee) <= 30.
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	141	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	142	// This value is equal to a normal f32 number with binary
				143	// s_EEEEEEEE_mmmmmmmmmm0000000000000
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	144	//
				145	// where uint(EEEEEEEE) = uint(eeeee) + 112, so that unbiased exponent is kept unchanged
				146	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	147	// uint(EEEEEEEE) - 127 = uint(eeeee) - 15
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	148	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	149	// and its value is
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	150	// (s == 0 ? 1 : -1) *
				151	// (1 + uint(mmmmm_mmmmm_00000_00000_000) * (2^-23)) * 2^(uint(EEEEEEEE) - 127)
				152	// == (s == 0 ? 1 : -1) *
				153	// (1 + uint(mmmmm_mmmmm) * (2^-10)) * 2^(uint(eeeee) - 15)
				154	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	155	// Notice that uint(EEEEEEEE) is in range [113, 142], showing that it is a normal f32 number.
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	156	// So we proved that any normal f16 number can be exactly representd by a normal f32 number
				157	// with biased exponent in range [113, 142] and the lowest 13 mantissa bits 0.
				158	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	159	// On the other hand, since mantissa bits mmmmmmmmmm are arbitrary, the value of any f32
				160	// that has a biased exponent in range [113, 142] and lowest 13 mantissa bits zero is equal
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	161	// to a normal f16 value. Hence we prove 2.2.
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	162	//
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	163	// Proof for 2.3
				164	// -------------
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	165	// Any subnormal f16 number has a binary form of s_00000_mmmmmmmmmm, and its value is
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	166	//
				167	// (s == 0 ? 1 : -1) * uint(mmmmmmmmmm) * (2^-10) * (2^-14)
				168	// == (s == 0 ? 1 : -1) * uint(mmmmmmmmmm) * (2^-24).
				169	//
				170	// We discuss the bit pattern of mantissa bits mmmmmmmmmm.
				171	// Case 1: mantissa bits have no leading zero bit, s_00000_1mmmmmmmmm
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	172	// In this case the value is
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	173	// (s == 0 ? 1 : -1) * uint(1mmmm_mmmmm) * (2^-10) * (2^-14)
				174	// == (s == 0 ? 1 : -1) * ( uint(1_mmmmm_mmmm) * (2^-9)) * (2^-15)
				175	// == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmmm) * (2^-9)) * (2^-15)
				176	// == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmmm0_00000_00000_000) * (2^-23)) * (2^-15)
				177	//
				178	// which is equal to the value of the normal f32 number
				179	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	180	// s_EEEEEEEE_mmmmm_mmmm0_00000_00000_000
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	181	//
				182	// where uint(EEEEEEEE) == -15 + 127 = 112. Hence we proved that any subnormal f16 number
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	183	// with no leading zero mantissa bit can be exactly represented by a f32 number with
				184	// biased exponent 112 and the lowest 14 mantissa bits zero, and the value of any f32
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	185	// number with biased exponent 112 and the lowest 14 mantissa bits zero is equal to a
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	186	// subnormal f16 number with no leading zero mantissa bit.
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	187	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	188	// Case 2: mantissa bits has 1 leading zero bit, s_00000_01mmmmmmmm
				189	// In this case the value is
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	190	// (s == 0 ? 1 : -1) * uint(01mmm_mmmmm) * (2^-10) * (2^-14)
				191	// == (s == 0 ? 1 : -1) * ( uint(01_mmmmm_mmm) * (2^-8)) * (2^-16)
				192	// == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmm) * (2^-8)) * (2^-16)
				193	// == (s == 0 ? 1 : -1) * (1 + uint(mmmmm_mmm00_00000_00000_000) * (2^-23)) * (2^-16)
				194	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	195	// which is equal to the value of normal f32 number
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	196	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	197	// s_EEEEEEEE_mmmmm_mmm00_00000_00000_000
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	198	//
				199	// where uint(EEEEEEEE) = -16 + 127 = 111. Hence we proved that any subnormal f16 number
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	200	// with 1 leading zero mantissa bit can be exactly represented by a f32 number with
				201	// biased exponent 111 and the lowest 15 mantissa bits zero, and the value of any f32
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	202	// number with biased exponent 111 and the lowest 15 mantissa bits zero is equal to a
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	203	// subnormal f16 number with 1 leading zero mantissa bit.
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	204	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	205	// Case 3 to case 8: ......
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	206	//
				207	// Case 9: mantissa bits has 8 leading zero bits, s_00000_000000001m
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	208	// In this case the value is
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	209	// (s == 0 ? 1 : -1) * uint(00000_0001m) * (2^-10) * (2^-14)
				210	// == (s == 0 ? 1 : -1) * ( uint(000000001_m) * (2^-1)) * (2^-23)
				211	// == (s == 0 ? 1 : -1) * (1 + uint(m) * (2^-1)) * (2^-23)
				212	// == (s == 0 ? 1 : -1) * (1 + uint(m0000_00000_00000_00000_000) * (2^-23)) * (2^-23)
				213	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	214	// which is equal to the value of normal f32 number
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	215	//
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	216	// s_EEEEEEEE_m0000_00000_00000_00000_000
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	217	//
				218	// where uint(EEEEEEEE) = -23 + 127 = 104. Hence we proved that any subnormal f16 number
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	219	// with 8 leading zero mantissa bit can be exactly represented by a f32 number with
				220	// biased exponent 104 and the lowest 22 mantissa bits zero, and the value of any f32
				221	// number with biased exponent 104 and the lowest 22 mantissa bits zero are equal to a
				222	// subnormal f16 number with 8 leading zero mantissa bit.
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	223	//
				224	// Case 10: mantissa bits has 9 leading zero bits, s_00000_0000000001
				225	// In this case the value is just +-2^-24 == +-0x1.0p-24,
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	226	// the f32 number has biased exponent 103 and all 23 mantissa bits zero.
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	227	//
				228	// Case 11: mantissa bits has 10 leading zero bits, s_00000_0000000000, just 0.0
				229	//
				230	// Concluding all these case, we proved that any subnormal f16 number with N leading zero
				231	// mantissa bit can be exactly represented by a f32 number with biased exponent 112 - N and the
				232	// lowest 14 + N mantissa bits zero, and the value of any f32 number with biased exponent
				233	// 112 - N (= e) and the lowest 14 + N (= 126 - e) mantissa bits zero are equal to a subnormal
				234	// f16 number with N leading zero mantissa bits. N is in range [0, 9], so the f32 number's
				235	// biased exponent e is in range [103, 112], or unbiased exponent in [-24, -15].
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	236
				237	float abs_value = std::fabs(value);
Antonio Maiorano	d060f36	2022-07-29 17:12:01 +0000	[diff] [blame]	238	if (abs_value >= kSmallestValue) {
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	239	// Value falls in the normal f16 range, quantize it to a normal f16 value by masking out
				240	// lowest 13 mantissa bits.
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	241	u32 = u32 & ~((1u << (kF32MantissaBits - kF16MantissaBits)) - 1);
Antonio Maiorano	d060f36	2022-07-29 17:12:01 +0000	[diff] [blame]	242	} else if (abs_value >= kSmallestSubnormalValue) {
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	243	// Value should be quantized to a subnormal f16 value.
				244
				245	// Get the biased exponent `e` of f32 value, e.g. value 127 representing exponent 2^0.
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	246	uint32_t biased_exponent_original = (u32 & kF32ExponentMask) >> kF32MantissaBits;
Antonio Maiorano	d060f36	2022-07-29 17:12:01 +0000	[diff] [blame]	247	// Since we ensure that kSmallestValue = 0x1f-14 > abs(value) >= kSmallestSubnormalValue =
				248	// 0x1f-24, value will have a unbiased exponent in range -24 to -15 (inclusive), and the
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	249	// corresponding biased exponent in f32 is in range 103 to 112 (inclusive).
Ben Clayton	f848af2	2023-07-28 16:37:32 +0000	[diff] [blame]	250	TINT_ASSERT((kMinF32BiasedExpForF16SubnormalNumber <= biased_exponent_original) &&
				251	(biased_exponent_original <= kMaxF32BiasedExpForF16SubnormalNumber));
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	252
				253	// As we have proved, masking out the lowest 126-e mantissa bits of input value will result
				254	// in a valid subnormal f16 value, which is exactly the required quantization result.
				255	uint32_t discard_bits = 126 - biased_exponent_original; // In range 14 to 23 (inclusive)
Ben Clayton	f848af2	2023-07-28 16:37:32 +0000	[diff] [blame]	256	TINT_ASSERT((14 <= discard_bits) && (discard_bits <= kF32MantissaBits));
Zhaoming Jiang	0fb4e2c	2022-06-10 18:18:35 +0000	[diff] [blame]	257	uint32_t discard_mask = (1u << discard_bits) - 1;
				258	u32 = u32 & ~discard_mask;
				259	} else {
				260	// value is too small that it can't even be represented as subnormal f16 number. Quantize
				261	// to zero.
				262	return value > 0 ? 0.0 : -0.0;
				263	}
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	264
dan sinclair	bae54e7	2023-07-28 15:01:54 +0000	[diff] [blame]	265	return tint::Bitcast<f16::type>(u32);
Ben Clayton	c2eccfc	2022-05-25 15:04:24 +0000	[diff] [blame]	266	}
				267
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	268	uint16_t f16::BitsRepresentation() const {
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	269	// Assert we use binary32 (i.e. float) as underlying type, which has 4 bytes.
				270	static_assert(std::is_same<f16::type, float>());
				271
				272	// The stored value in f16 object must be already quantized, so it should be either NaN, +/-
				273	// Inf, or exactly representable by normal or subnormal f16.
				274
				275	if (std::isnan(value)) {
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	276	return kF16Nan;
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	277	}
				278
				279	if (std::isinf(value)) {
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	280	return value > 0 ? kF16PosInf : kF16NegInf;
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	281	}
				282
				283	// Now quantized_value must be a finite f16 exactly-representable value.
				284	// The following table shows exponent cases for all finite f16 exactly-representable value.
				285	// ---------------------------------------------------------------------------
				286	// \| Value category \| Unbiased exp \| F16 biased exp \| F32 biased exp \|
				287	// \|------------------\|----------------\|------------------\|------------------\|
				288	// \| +/- zero \| \ \| 0 \| 0 \|
				289	// \| Subnormal f16 \| [-24, -15] \| 0 \| [103, 112] \|
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	290	// \| Normal f16 \| [-14, 15] \| [1, 30] \| [113, 142] \|
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	291	// ---------------------------------------------------------------------------
				292
dan sinclair	bae54e7	2023-07-28 15:01:54 +0000	[diff] [blame]	293	uint32_t f32_bit_pattern = tint::Bitcast<uint32_t>(value);
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	294	uint32_t f32_biased_exponent = (f32_bit_pattern & kF32ExponentMask) >> kF32MantissaBits;
				295	uint32_t f32_mantissa = f32_bit_pattern & kF32MantissaMask;
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	296
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	297	uint16_t f16_sign_part = static_cast<uint16_t>((f32_bit_pattern & kF32SignMask) >> 16);
Ben Clayton	f848af2	2023-07-28 16:37:32 +0000	[diff] [blame]	298	TINT_ASSERT((f16_sign_part & ~kF16SignMask) == 0);
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	299
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	300	if ((f32_bit_pattern & ~kF32SignMask) == 0) {
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	301	// +/- zero
				302	return f16_sign_part;
				303	}
				304
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	305	if ((kMinF32BiasedExpForF16NormalNumber <= f32_biased_exponent) &&
				306	(f32_biased_exponent <= kMaxF32BiasedExpForF16NormalNumber)) {
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	307	// Normal f16
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	308	uint32_t f16_biased_exponent = f32_biased_exponent - kF32ExponentBias + kF16ExponentBias;
				309	uint16_t f16_exp_part = static_cast<uint16_t>(f16_biased_exponent << kF16MantissaBits);
				310	uint16_t f16_mantissa_part =
				311	static_cast<uint16_t>(f32_mantissa >> (kF32MantissaBits - kF16MantissaBits));
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	312
Ben Clayton	f848af2	2023-07-28 16:37:32 +0000	[diff] [blame]	313	TINT_ASSERT((f16_exp_part & ~kF16ExponentMask) == 0);
				314	TINT_ASSERT((f16_mantissa_part & ~kF16MantissaMask) == 0);
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	315
				316	return f16_sign_part \| f16_exp_part \| f16_mantissa_part;
				317	}
				318
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	319	if ((kMinF32BiasedExpForF16SubnormalNumber <= f32_biased_exponent) &&
				320	(f32_biased_exponent <= kMaxF32BiasedExpForF16SubnormalNumber)) {
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	321	// Subnormal f16
				322	// The resulting exp bits are always 0, and the mantissa bits should be handled specially.
				323	uint16_t f16_exp_part = 0;
				324	// The resulting subnormal f16 will have only 1 valid mantissa bit if the unbiased exponent
				325	// of value is of the minimum, i.e. -24; and have all 10 mantissa bits valid if the unbiased
				326	// exponent of value is of the maximum, i.e. -15.
				327	uint32_t f16_valid_mantissa_bits =
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	328	f32_biased_exponent - kMinF32BiasedExpForF16SubnormalNumber + 1;
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	329	// The resulting f16 mantissa part comes from right-shifting the f32 mantissa bits with
				330	// leading 1 added.
				331	uint16_t f16_mantissa_part =
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	332	static_cast<uint16_t>((f32_mantissa \| (kF32MantissaMask + 1)) >>
				333	(kF32MantissaBits + 1 - f16_valid_mantissa_bits));
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	334
Ben Clayton	f848af2	2023-07-28 16:37:32 +0000	[diff] [blame]	335	TINT_ASSERT((1 <= f16_valid_mantissa_bits) &&
				336	(f16_valid_mantissa_bits <= kF16MantissaBits));
				337	TINT_ASSERT((f16_mantissa_part & ~((1u << f16_valid_mantissa_bits) - 1)) == 0);
				338	TINT_ASSERT((f16_mantissa_part != 0));
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	339
				340	return f16_sign_part \| f16_exp_part \| f16_mantissa_part;
				341	}
				342
				343	// Neither zero, subnormal f16 or normal f16, shall never hit.
Ben Clayton	f848af2	2023-07-28 16:37:32 +0000	[diff] [blame]	344	TINT_UNREACHABLE();
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	345	}
				346
				347	// static
dan sinclair	ce6dffe	2023-08-14 21:01:40 +0000	[diff] [blame]	348	core::Number<core::detail::NumberKindF16> f16::FromBits(uint16_t bits) {
dan sinclair	00d0fd5	2022-11-09 20:03:09 +0000	[diff] [blame]	349	// Assert we use binary32 (i.e. float) as underlying type, which has 4 bytes.
				350	static_assert(std::is_same<f16::type, float>());
				351
				352	if (bits == kF16PosInf) {
				353	return f16(std::numeric_limits<f16::type>::infinity());
				354	}
				355	if (bits == kF16NegInf) {
				356	return f16(-std::numeric_limits<f16::type>::infinity());
				357	}
				358
				359	auto f16_sign_bit = uint32_t(bits & kF16SignMask);
				360	// If none of the other bits are set we have a 0. If only the sign bit is set we have a -0.
				361	if ((bits & ~kF16SignMask) == 0) {
				362	return f16(f16_sign_bit > 0 ? -0.f : 0.f);
				363	}
				364
				365	auto f16_mantissa = uint32_t(bits & kF16MantissaMask);
				366	auto f16_biased_exponent = uint32_t(bits & kF16ExponentMask);
				367
				368	// F16 NaN has all expoennt bits set and at least one mantissa bit set
				369	if (((f16_biased_exponent & kF16ExponentMask) == kF16ExponentMask) && f16_mantissa != 0) {
				370	return f16(std::numeric_limits<f16::type>::quiet_NaN());
				371	}
				372
				373	// Shift the exponent over to be a regular number.
				374	f16_biased_exponent >>= kF16MantissaBits;
				375
				376	// Add the F32 bias and remove the F16 bias.
				377	uint32_t f32_biased_exponent = f16_biased_exponent + kF32ExponentBias - kF16ExponentBias;
				378
				379	if (f16_biased_exponent == 0) {
				380	// Subnormal number
				381	//
				382	// All subnormal F16 values can be represented as normal F32 values. Shift the mantissa and
				383	// set the exponent as if this was a normal f16 value.
				384
				385	// While the first F16 exponent bit is not set
				386	constexpr uint32_t kF16FirstExponentBit = 0x0400;
				387	while ((f16_mantissa & kF16FirstExponentBit) == 0) {
				388	// Shift the mantissa to the left
				389	f16_mantissa <<= 1;
				390	// Decrease the biased exponent to counter the shift
				391	f32_biased_exponent -= 1;
				392	}
				393
				394	// Remove the first exponent bit from the mantissa value
				395	f16_mantissa &= ~kF16FirstExponentBit;
				396	// Increase the exponent to deal with the masked off value.
				397	f32_biased_exponent += 1;
				398	}
				399
				400	// The mantissa bits are shifted over the difference in mantissa size to be in the F32 location.
				401	uint32_t f32_mantissa = f16_mantissa << (kF32MantissaBits - kF16MantissaBits);
				402
				403	// Shift the exponent to the F32 exponent position before the mantissa.
				404	f32_biased_exponent <<= kF32MantissaBits;
				405
				406	// Shift the sign bit over to the f32 sign bit position
				407	uint32_t f32_sign_bit = f16_sign_bit << 16;
				408
				409	// Combine values together into the F32 value as a uint32_t.
				410	uint32_t val = f32_sign_bit \| f32_biased_exponent \| f32_mantissa;
				411
				412	// Bitcast to a F32 and then store into the F16 Number
dan sinclair	bae54e7	2023-07-28 15:01:54 +0000	[diff] [blame]	413	return f16(tint::Bitcast<f16::type>(val));
Zhaoming Jiang	2c7440a	2022-07-07 03:29:11 +0000	[diff] [blame]	414	}
				415
dan sinclair	ce6dffe	2023-08-14 21:01:40 +0000	[diff] [blame]	416	} // namespace tint::core