src/dawn/common/Sha3.cpp - dawn.git - Git at Google

 // Copyright 2025 The Dawn & Tint Authors
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 // 1. Redistributions of source code must retain the above copyright notice, this
 //    list of conditions and the following disclaimer.
 //
 // 2. Redistributions in binary form must reproduce the above copyright notice,
 //    this list of conditions and the following disclaimer in the documentation
 //    and/or other materials provided with the distribution.
 //
 // 3. Neither the name of the copyright holder nor the names of its
 //    contributors may be used to endorse or promote products derived from
 //    this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #ifdef UNSAFE_BUFFERS_BUILD
 // TODO(crbug.com/439062058): Remove this and convert code to safer constructs.
 #pragma allow_unsafe_buffers
 #endif

 #include "src/dawn/common/Sha3.h"

 #include <algorithm>
 #include <bitset>
 #include <cstring>

 #include "src/dawn/common/Assert.h"

 namespace dawn {

 // An implementation of FIPS 202 from scratch, checked against some of the official test vectors.
 // All references to algorithms, variables or sections are from
 // https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf
 //
 // Because this is an implementation of only SHA3 and not the general family of Keccak functions, we
 // hardcode b=1600 (the hash works on 64bit "lanes") and nr=24 (there are 24 rounds or state
 // mixing). For efficiency the function of the state modify it directly instead of making a copy.

 namespace {

 constexpr uint32_t kLaneBitWidth = sizeof(Sha3Lane) * 8;  // aka "w"
 constexpr uint32_t kLog2LaneBitWidth = 6;                 // aka "l"
 static_assert(1 << kLog2LaneBitWidth == kLaneBitWidth);
 constexpr uint32_t kRoundCount = 24;  // aka "nr"

 // The rotation of a lane by `offset` bits.
 Sha3Lane Rotl(Sha3Lane l, size_t offset) {
     DAWN_ASSERT(offset < kLaneBitWidth);
     // Offset should not be 0, as the expected result should be just identical and
     // right-shifting (kLaneBitWidth - offset) == kLaneBitWidth bits on Sha3Lane (having
     // kLaneBitWidth bits) results in undefined behavior.
     DAWN_ASSERT(offset > 0);
     return (l << offset) | (l >> (kLaneBitWidth - offset));
 }

 // Section 3.2.1: Specification of Theta.
 void Theta(Sha3State& a) {
     // Step 1, compute C, the parity of each column.
     std::array<Sha3Lane, 5> c;
     for (size_t x = 0; x < 5; x++) {
         c[x] = a[x + 0] ^ a[x + 5] ^ a[x + 10] ^ a[x + 15] ^ a[x + 20];
     }

     // Combined Step 2 and 3, so as to only store a single lane of D at a time.
     for (size_t x = 0; x < 5; x++) {
         // Step 2, compute D[X] as the parity of two neighboring columns
         Sha3Lane d_x = c[(x + 4) % 5] ^ Rotl(c[(x + 1) % 5], 1);

         // Step 3, mix it in each sheet of A.
         for (size_t y = 0; y < 5; y++) {
             a[x + 5 * y] ^= d_x;
         }
     }
 }

 // Section 3.2.2: Specification of Rho.
 // Rho is a per-lane rotation. Instead of iteratively walking the lanes in a non-linear order and
 // computing the offset based on the loop index, we precompute all the offsets at compile time.
 static constexpr std::array<uint8_t, 25> kRhoOffsets = []() {
     std::array<uint8_t, 25> offsets = {};

     // Step 1, lane 0 is unmodified.
     offsets[0] = 0;

     // Step 2, the start of the iteration of lines.
     uint32_t x = 1;
     uint32_t y = 0;

     // Step 3, iterate through all but the (0, 0) lane.
     for (uint32_t t = 0; t < 24; t++) {
         // Step 3a, compute the offset for the lane, pre-mod it to make them valid rotation offsets.
         offsets[x + 5 * y] = (((t + 1) * (t + 2)) / 2) % kLaneBitWidth;

         // Step 3b, update the current lane.
         uint32_t previousX = x;
         uint32_t previousY = y;
         x = previousY;
         y = (2 * previousX + 3 * previousY) % 5;
     }

     return offsets;
 }();

 void Rho(Sha3State& a) {
     // Rotating starts from i = 1, as kRhoOffsets[0] = 0 and the lane 0 is not rotated.
     static_assert(kRhoOffsets[0] == 0);
     for (uint32_t i = 1; i < 25; i++) {
         a[i] = Rotl(a[i], kRhoOffsets[i]);
     }
 }

 // Section 3.2.3: Specification of Pi
 // Pi is a permutation of all the lanes with (0, 0) staying in place and a 24 lane cycle. This
 // means that we can efficiently update the state if we know the order of the cycle.
 static constexpr std::array<uint8_t, 24> kPiCycleIndices = []() {
     // Compute the index of the next lane we copy from in the permutation.
     std::array<uint32_t, 25> copyFromX = {};
     std::array<uint32_t, 25> copyFromY = {};

     // Step 1
     for (uint32_t x = 0; x < 5; x++) {
         for (uint32_t y = 0; y < 5; y++) {
             copyFromX[x + 5 * y] = (x + 3 * y) % 5;
             copyFromY[x + 5 * y] = x;
         }
     }

     // Invert the copy* to instead get the coordinates of the next lane in the permutation.
     std::array<uint32_t, 25> nextX = {};
     std::array<uint32_t, 25> nextY = {};
     for (uint32_t x = 0; x < 5; x++) {
         for (uint32_t y = 0; y < 5; y++) {
             uint32_t copyX = copyFromX[x + y * 5];
             uint32_t copyY = copyFromY[x + y * 5];

             nextX[copyX + 5 * copyY] = x;
             nextY[copyX + 5 * copyY] = y;
         }
     }

     // Walk the cycle starting anywhere on it, but not (0, 0) as it is fixed point.
     std::array<uint8_t, 24> cycleIndices = {};
     uint32_t x = 1;
     uint32_t y = 0;
     for (size_t i = 0; i < cycleIndices.size(); i++) {
         cycleIndices[i] = x + 5 * y;
         uint32_t previousX = x;
         uint32_t previousY = y;
         x = nextX[previousX + 5 * previousY];
         y = nextY[previousX + 5 * previousY];
     }

     return cycleIndices;
 }();

 void Pi(Sha3State& a) {
     // The first lane of the cycle is set to the value of the last lane of the cycle.
     Sha3Lane previousLane = a[kPiCycleIndices.back()];

     for (size_t i = 0; i < kPiCycleIndices.size(); i++) {
         Sha3Lane currentLane = a[kPiCycleIndices[i]];
         a[kPiCycleIndices[i]] = previousLane;
         previousLane = currentLane;
     }
 }

 // Section 3.2.4: Specification of Chi
 void Chi(Sha3State& a) {
     // Step 1, Xi mixes the bits of each row so we need to copy each plane out before mixing.
     for (uint32_t y = 0; y < 5; y++) {
         std::array<Sha3Lane, 5> a_y;
         for (uint32_t x = 0; x < 5; x++) {
             a_y[x] = a[x + 5 * y];
         }

         for (uint32_t x = 0; x < 5; x++) {
             a[x + 5 * y] ^= ~a_y[(x + 1) % 5] & a_y[(x + 2) % 5];
         }
     }
 }

 // Section 3.2.3: Specification of Iota

 // Algorithm 5 returns a bit given a variable "t". All computations are done modulo 255 so we
 // precompute all possible values (even if we don't end up using all of them). We cannot use
 // std::bitset::set as it isn't constexpr until C++23.
 static constexpr std::array<bool, 256> kRoundConstantsBits = []() {
     std::array<bool, 256> bits = {};

     // Step 1, return 1 when t is 0.
     bits[0] = true;

     // Step 2
     uint8_t R = 1;

     // Step 3
     for (int i = 1; i < 256; i++) {
         bool R8 = R & (0x80);
         // Step 3a, 3f
         R <<= 1;

         // Step 3b, 3c, 3d, 3e
         if (R8) {
             R ^= 0b01110001;
         }
         bits[i] = R & 1;
     }

     return bits;
 }();

 // Algorithm 6 computes a round constant based on the round number. Precompute all of them.
 static constexpr std::array<Sha3Lane, kRoundCount> kRoundConstants = []() {
     std::array<Sha3Lane, kRoundCount> RCs = {};

     for (uint32_t ir = 0; ir < kRoundCount; ir++) {
         // Step 2
         Sha3Lane RC = 0;

         // Step 3
         for (uint32_t j = 0; j < kLog2LaneBitWidth + 1; j++) {
             if (kRoundConstantsBits[j + 7 * ir]) {
                 RC |= uint64_t(1) << ((1 << j) - 1);
             }
         }

         RCs[ir] = RC;
     }

     return RCs;
 }();

 void Iota(Sha3State& a, uint32_t ir) {
     // Step 4, combine the round constant in the first lane.
     a[0] ^= kRoundConstants[ir];
 }

 // Section 3.3, Keccak-b[b, nr]
 void Keccak(Sha3State& a) {
     for (uint32_t ir = 0; ir < kRoundCount; ir++) {
         Theta(a);
         // TODO(402772741): For efficiency combine Rho and Pi together.
         Rho(a);
         Pi(a);
         Chi(a);
         Iota(a, ir);
     }
 }

 // TODO(402772741): This could be made more efficient by xoring whole 64bits at a time.
 void memxorpy(void* dst, const void* src, size_t n) {
     char* dstChars = static_cast<char*>(dst);
     const char* srcChars = static_cast<const char*>(src);

     while (n > 0) {
         *dstChars ^= *srcChars;
         n--;
         dstChars++;
         srcChars++;
     }
 }

 }  // anonymous namespace

 // Section 4: Sponge Construction
 // The spec only defines hashing a full message but for convenience we use an Update/Finalize API.
 // This means that Algorithm 8 is spread over multiple function calls. For each chunk of "rate"
 // bits we xor them at the start of the state and perform Keccak on the state. SHA3 also adds a
 // 01 suffix at the end of the message and pads the remaining bits with 10...0...01. (with 11
 // being a valid padding, but not 1).
 template <size_t OutputLength>
 void Sha3<OutputLength>::Update(const void* data, size_t size) {
     uint8_t* stateAsString = reinterpret_cast<uint8_t*>(&mState);
     const uint8_t* dataAsBytes = static_cast<const uint8_t*>(data);

     while (size > 0) {
         DAWN_ASSERT(mOffsetInState < kByteRate);
         size_t toProcess = std::min(size, kByteRate - mOffsetInState);

         memxorpy(stateAsString + mOffsetInState, dataAsBytes, toProcess);
         size -= toProcess;
         dataAsBytes += toProcess;
         mOffsetInState += toProcess;

         if (mOffsetInState == kByteRate) {
             Keccak(mState);
             mOffsetInState = 0;
         }
     }
 }

 template <size_t OutputLength>
 typename Sha3<OutputLength>::Output Sha3<OutputLength>::Finalize() {
     uint8_t* stateAsString = reinterpret_cast<uint8_t*>(&mState);
     DAWN_ASSERT(mOffsetInState < kByteRate);

     // Add in the 01 suffix for SHA3, as well as the first 1 for the padding.
     uint8_t* suffixByte = stateAsString + mOffsetInState;
     *suffixByte ^= 0b110;

     // Add in the last 1 of the multi-rate padding. The byte may be the same byte as suffixByte.
     uint8_t* endByte = stateAsString + (kByteRate - 1);
     *endByte ^= 0b1000'0000;

     // Do the final Keccak for the absorption in the sponge.
     Keccak(mState);

     // Garble the offset to trigger an ASSERT in Update or Finalize if they are called again.
     mOffsetInState = 0xC0FFEE;

     // The squeeze of the hash value can be done in one step.
     static_assert(sizeof(Output) <= kByteRate);
     Output output;
     memcpy(&output, &mState, sizeof(output));
     return output;
 }

 // static
 template <size_t OutputLength>
 typename Sha3<OutputLength>::Output Sha3<OutputLength>::Hash(const void* data, size_t size) {
     Sha3 sha;
     sha.Update(data, size);
     return sha.Finalize();
 }

 template class Sha3<224>;
 template class Sha3<256>;
 template class Sha3<384>;
 template class Sha3<512>;

 Sha3State KeccakForTesting(Sha3State s) {
     Sha3State res = s;
     Keccak(res);
     return res;
 }

 }  // namespace dawn
	// Copyright 2025 The Dawn & Tint Authors
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are met:
	//
	// 1. Redistributions of source code must retain the above copyright notice, this
	// list of conditions and the following disclaimer.
	//
	// 2. Redistributions in binary form must reproduce the above copyright notice,
	// this list of conditions and the following disclaimer in the documentation
	// and/or other materials provided with the distribution.
	//
	// 3. Neither the name of the copyright holder nor the names of its
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
	// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
	// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	#ifdef UNSAFE_BUFFERS_BUILD
	// TODO(crbug.com/439062058): Remove this and convert code to safer constructs.
	#pragma allow_unsafe_buffers
	#endif

	#include "src/dawn/common/Sha3.h"

	#include <algorithm>
	#include <bitset>
	#include <cstring>

	#include "src/dawn/common/Assert.h"

	namespace dawn {

	// An implementation of FIPS 202 from scratch, checked against some of the official test vectors.
	// All references to algorithms, variables or sections are from
	// https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf
	//
	// Because this is an implementation of only SHA3 and not the general family of Keccak functions, we
	// hardcode b=1600 (the hash works on 64bit "lanes") and nr=24 (there are 24 rounds or state
	// mixing). For efficiency the function of the state modify it directly instead of making a copy.

	namespace {

	constexpr uint32_t kLaneBitWidth = sizeof(Sha3Lane) * 8; // aka "w"
	constexpr uint32_t kLog2LaneBitWidth = 6; // aka "l"
	static_assert(1 << kLog2LaneBitWidth == kLaneBitWidth);
	constexpr uint32_t kRoundCount = 24; // aka "nr"

	// The rotation of a lane by `offset` bits.
	Sha3Lane Rotl(Sha3Lane l, size_t offset) {
	DAWN_ASSERT(offset < kLaneBitWidth);
	// Offset should not be 0, as the expected result should be just identical and
	// right-shifting (kLaneBitWidth - offset) == kLaneBitWidth bits on Sha3Lane (having
	// kLaneBitWidth bits) results in undefined behavior.
	DAWN_ASSERT(offset > 0);
	return (l << offset) \| (l >> (kLaneBitWidth - offset));
	}

	// Section 3.2.1: Specification of Theta.
	void Theta(Sha3State& a) {
	// Step 1, compute C, the parity of each column.
	std::array<Sha3Lane, 5> c;
	for (size_t x = 0; x < 5; x++) {
	c[x] = a[x + 0] ^ a[x + 5] ^ a[x + 10] ^ a[x + 15] ^ a[x + 20];
	}

	// Combined Step 2 and 3, so as to only store a single lane of D at a time.
	for (size_t x = 0; x < 5; x++) {
	// Step 2, compute D[X] as the parity of two neighboring columns
	Sha3Lane d_x = c[(x + 4) % 5] ^ Rotl(c[(x + 1) % 5], 1);

	// Step 3, mix it in each sheet of A.
	for (size_t y = 0; y < 5; y++) {
	a[x + 5 * y] ^= d_x;
	}
	}
	}

	// Section 3.2.2: Specification of Rho.
	// Rho is a per-lane rotation. Instead of iteratively walking the lanes in a non-linear order and
	// computing the offset based on the loop index, we precompute all the offsets at compile time.
	static constexpr std::array<uint8_t, 25> kRhoOffsets = []() {
	std::array<uint8_t, 25> offsets = {};

	// Step 1, lane 0 is unmodified.
	offsets[0] = 0;

	// Step 2, the start of the iteration of lines.
	uint32_t x = 1;
	uint32_t y = 0;

	// Step 3, iterate through all but the (0, 0) lane.
	for (uint32_t t = 0; t < 24; t++) {
	// Step 3a, compute the offset for the lane, pre-mod it to make them valid rotation offsets.
	offsets[x + 5 * y] = (((t + 1) * (t + 2)) / 2) % kLaneBitWidth;

	// Step 3b, update the current lane.
	uint32_t previousX = x;
	uint32_t previousY = y;
	x = previousY;
	y = (2 * previousX + 3 * previousY) % 5;
	}

	return offsets;
	}();

	void Rho(Sha3State& a) {
	// Rotating starts from i = 1, as kRhoOffsets[0] = 0 and the lane 0 is not rotated.
	static_assert(kRhoOffsets[0] == 0);
	for (uint32_t i = 1; i < 25; i++) {
	a[i] = Rotl(a[i], kRhoOffsets[i]);
	}
	}

	// Section 3.2.3: Specification of Pi
	// Pi is a permutation of all the lanes with (0, 0) staying in place and a 24 lane cycle. This
	// means that we can efficiently update the state if we know the order of the cycle.
	static constexpr std::array<uint8_t, 24> kPiCycleIndices = []() {
	// Compute the index of the next lane we copy from in the permutation.
	std::array<uint32_t, 25> copyFromX = {};
	std::array<uint32_t, 25> copyFromY = {};

	// Step 1
	for (uint32_t x = 0; x < 5; x++) {
	for (uint32_t y = 0; y < 5; y++) {
	copyFromX[x + 5 * y] = (x + 3 * y) % 5;
	copyFromY[x + 5 * y] = x;
	}
	}

	// Invert the copy* to instead get the coordinates of the next lane in the permutation.
	std::array<uint32_t, 25> nextX = {};
	std::array<uint32_t, 25> nextY = {};
	for (uint32_t x = 0; x < 5; x++) {
	for (uint32_t y = 0; y < 5; y++) {
	uint32_t copyX = copyFromX[x + y * 5];
	uint32_t copyY = copyFromY[x + y * 5];

	nextX[copyX + 5 * copyY] = x;
	nextY[copyX + 5 * copyY] = y;
	}
	}

	// Walk the cycle starting anywhere on it, but not (0, 0) as it is fixed point.
	std::array<uint8_t, 24> cycleIndices = {};
	uint32_t x = 1;
	uint32_t y = 0;
	for (size_t i = 0; i < cycleIndices.size(); i++) {
	cycleIndices[i] = x + 5 * y;
	uint32_t previousX = x;
	uint32_t previousY = y;
	x = nextX[previousX + 5 * previousY];
	y = nextY[previousX + 5 * previousY];
	}

	return cycleIndices;
	}();

	void Pi(Sha3State& a) {
	// The first lane of the cycle is set to the value of the last lane of the cycle.
	Sha3Lane previousLane = a[kPiCycleIndices.back()];

	for (size_t i = 0; i < kPiCycleIndices.size(); i++) {
	Sha3Lane currentLane = a[kPiCycleIndices[i]];
	a[kPiCycleIndices[i]] = previousLane;
	previousLane = currentLane;
	}
	}

	// Section 3.2.4: Specification of Chi
	void Chi(Sha3State& a) {
	// Step 1, Xi mixes the bits of each row so we need to copy each plane out before mixing.
	for (uint32_t y = 0; y < 5; y++) {
	std::array<Sha3Lane, 5> a_y;
	for (uint32_t x = 0; x < 5; x++) {
	a_y[x] = a[x + 5 * y];
	}

	for (uint32_t x = 0; x < 5; x++) {
	a[x + 5 * y] ^= ~a_y[(x + 1) % 5] & a_y[(x + 2) % 5];
	}
	}
	}

	// Section 3.2.3: Specification of Iota

	// Algorithm 5 returns a bit given a variable "t". All computations are done modulo 255 so we
	// precompute all possible values (even if we don't end up using all of them). We cannot use
	// std::bitset::set as it isn't constexpr until C++23.
	static constexpr std::array<bool, 256> kRoundConstantsBits = []() {
	std::array<bool, 256> bits = {};

	// Step 1, return 1 when t is 0.
	bits[0] = true;

	// Step 2
	uint8_t R = 1;

	// Step 3
	for (int i = 1; i < 256; i++) {
	bool R8 = R & (0x80);
	// Step 3a, 3f
	R <<= 1;

	// Step 3b, 3c, 3d, 3e
	if (R8) {
	R ^= 0b01110001;
	}
	bits[i] = R & 1;
	}

	return bits;
	}();

	// Algorithm 6 computes a round constant based on the round number. Precompute all of them.
	static constexpr std::array<Sha3Lane, kRoundCount> kRoundConstants = []() {
	std::array<Sha3Lane, kRoundCount> RCs = {};

	for (uint32_t ir = 0; ir < kRoundCount; ir++) {
	// Step 2
	Sha3Lane RC = 0;

	// Step 3
	for (uint32_t j = 0; j < kLog2LaneBitWidth + 1; j++) {
	if (kRoundConstantsBits[j + 7 * ir]) {
	RC \|= uint64_t(1) << ((1 << j) - 1);
	}
	}

	RCs[ir] = RC;
	}

	return RCs;
	}();

	void Iota(Sha3State& a, uint32_t ir) {
	// Step 4, combine the round constant in the first lane.
	a[0] ^= kRoundConstants[ir];
	}

	// Section 3.3, Keccak-b[b, nr]
	void Keccak(Sha3State& a) {
	for (uint32_t ir = 0; ir < kRoundCount; ir++) {
	Theta(a);
	// TODO(402772741): For efficiency combine Rho and Pi together.
	Rho(a);
	Pi(a);
	Chi(a);
	Iota(a, ir);
	}
	}

	// TODO(402772741): This could be made more efficient by xoring whole 64bits at a time.
	void memxorpy(void* dst, const void* src, size_t n) {
	char* dstChars = static_cast<char*>(dst);
	const char* srcChars = static_cast<const char*>(src);

	while (n > 0) {
	dstChars ^= srcChars;
	n--;
	dstChars++;
	srcChars++;
	}
	}

	} // anonymous namespace

	// Section 4: Sponge Construction
	// The spec only defines hashing a full message but for convenience we use an Update/Finalize API.
	// This means that Algorithm 8 is spread over multiple function calls. For each chunk of "rate"
	// bits we xor them at the start of the state and perform Keccak on the state. SHA3 also adds a
	// 01 suffix at the end of the message and pads the remaining bits with 10...0...01. (with 11
	// being a valid padding, but not 1).
	template <size_t OutputLength>
	void Sha3<OutputLength>::Update(const void* data, size_t size) {
	uint8_t* stateAsString = reinterpret_cast<uint8_t*>(&mState);
	const uint8_t* dataAsBytes = static_cast<const uint8_t*>(data);

	while (size > 0) {
	DAWN_ASSERT(mOffsetInState < kByteRate);
	size_t toProcess = std::min(size, kByteRate - mOffsetInState);

	memxorpy(stateAsString + mOffsetInState, dataAsBytes, toProcess);
	size -= toProcess;
	dataAsBytes += toProcess;
	mOffsetInState += toProcess;

	if (mOffsetInState == kByteRate) {
	Keccak(mState);
	mOffsetInState = 0;
	}
	}
	}

	template <size_t OutputLength>
	typename Sha3<OutputLength>::Output Sha3<OutputLength>::Finalize() {
	uint8_t* stateAsString = reinterpret_cast<uint8_t*>(&mState);
	DAWN_ASSERT(mOffsetInState < kByteRate);

	// Add in the 01 suffix for SHA3, as well as the first 1 for the padding.
	uint8_t* suffixByte = stateAsString + mOffsetInState;
	*suffixByte ^= 0b110;

	// Add in the last 1 of the multi-rate padding. The byte may be the same byte as suffixByte.
	uint8_t* endByte = stateAsString + (kByteRate - 1);
	*endByte ^= 0b1000'0000;

	// Do the final Keccak for the absorption in the sponge.
	Keccak(mState);

	// Garble the offset to trigger an ASSERT in Update or Finalize if they are called again.
	mOffsetInState = 0xC0FFEE;

	// The squeeze of the hash value can be done in one step.
	static_assert(sizeof(Output) <= kByteRate);
	Output output;
	memcpy(&output, &mState, sizeof(output));
	return output;
	}

	// static
	template <size_t OutputLength>
	typename Sha3<OutputLength>::Output Sha3<OutputLength>::Hash(const void* data, size_t size) {
	Sha3 sha;
	sha.Update(data, size);
	return sha.Finalize();
	}

	template class Sha3<224>;
	template class Sha3<256>;
	template class Sha3<384>;
	template class Sha3<512>;

	Sha3State KeccakForTesting(Sha3State s) {
	Sha3State res = s;
	Keccak(res);
	return res;
	}

	} // namespace dawn