[tint][utils] Add DecodeBase64FromComments()
A rather bespoke utility for decoding base64 encoded data in the
WGSL-style comments of a string.
Will be used by the WGSL fuzzers.
Change-Id: I242af8521877314ebf5ca5d82a2cc4c7ca492336
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/162304
Reviewed-by: dan sinclair <dsinclair@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/tint/utils/text/BUILD.bazel b/src/tint/utils/text/BUILD.bazel
index 6e4f90d..21ae073 100644
--- a/src/tint/utils/text/BUILD.bazel
+++ b/src/tint/utils/text/BUILD.bazel
@@ -39,6 +39,7 @@
cc_library(
name = "text",
srcs = [
+ "base64.cc",
"string.cc",
"string_stream.cc",
"unicode.cc",
diff --git a/src/tint/utils/text/BUILD.cmake b/src/tint/utils/text/BUILD.cmake
index 72a1258..f5f8c10 100644
--- a/src/tint/utils/text/BUILD.cmake
+++ b/src/tint/utils/text/BUILD.cmake
@@ -39,6 +39,7 @@
# Kind: lib
################################################################################
tint_add_target(tint_utils_text lib
+ utils/text/base64.cc
utils/text/base64.h
utils/text/string.cc
utils/text/string.h
diff --git a/src/tint/utils/text/BUILD.gn b/src/tint/utils/text/BUILD.gn
index de1a835..c13c8c0 100644
--- a/src/tint/utils/text/BUILD.gn
+++ b/src/tint/utils/text/BUILD.gn
@@ -44,6 +44,7 @@
libtint_source_set("text") {
sources = [
+ "base64.cc",
"base64.h",
"string.cc",
"string.h",
diff --git a/src/tint/utils/text/base64.cc b/src/tint/utils/text/base64.cc
new file mode 100644
index 0000000..7ae85cc
--- /dev/null
+++ b/src/tint/utils/text/base64.cc
@@ -0,0 +1,71 @@
+// Copyright 2023 The Dawn & Tint Authors
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <string>
+
+#include "src/tint/utils/text/base64.h"
+
+namespace tint {
+
+Vector<std::byte, 0> DecodeBase64FromComments(std::string_view wgsl) {
+ Vector<std::byte, 0> out;
+ size_t block_nesting = 0;
+ bool line_comment = false;
+ for (size_t i = 0, n = wgsl.length(); i < n; i++) {
+ char curr = wgsl[i];
+ if (curr == '\n') {
+ line_comment = false;
+ continue;
+ }
+
+ char next = (i + 1) < n ? wgsl[i + 1] : 0;
+ if (curr == '/' && next == '*') {
+ block_nesting++;
+ i++; // skip '*'
+ continue;
+ }
+ if (block_nesting > 0 && curr == '*' && next == '/') {
+ block_nesting--;
+ i++; // skip '/'
+ continue;
+ }
+ if (block_nesting == 0 && curr == '/' && next == '/') {
+ line_comment = true;
+ i++; // skip '/'
+ continue;
+ }
+
+ if (block_nesting > 0 || line_comment) {
+ if (auto v = DecodeBase64(curr)) {
+ out.Push(std::byte{*v});
+ }
+ }
+ }
+ return out;
+}
+
+} // namespace tint
diff --git a/src/tint/utils/text/base64.h b/src/tint/utils/text/base64.h
index df5a2d9..1047c33 100644
--- a/src/tint/utils/text/base64.h
+++ b/src/tint/utils/text/base64.h
@@ -31,6 +31,8 @@
#include <cstdint>
#include <optional>
+#include "src/tint/utils/containers/vector.h"
+
namespace tint {
/// Decodes a byte from a base64 encoded character
@@ -55,6 +57,11 @@
}
return std::nullopt;
}
+/// DecodeBase64FromComments parses all the comments from the WGSL source string as a base64 byte
+/// stream. Non-base64 characters are skipped
+/// @param wgsl the WGSL source
+/// @return the base64 decoded bytes
+Vector<std::byte, 0> DecodeBase64FromComments(std::string_view wgsl);
} // namespace tint
diff --git a/src/tint/utils/text/base64_test.cc b/src/tint/utils/text/base64_test.cc
index b3dc404..3eb282f 100644
--- a/src/tint/utils/text/base64_test.cc
+++ b/src/tint/utils/text/base64_test.cc
@@ -28,18 +28,22 @@
#include "src/tint/utils/text/base64.h"
#include <optional>
+#include <vector>
#include "gtest/gtest.h"
+#include "src/tint/utils/containers/transform.h"
+#include "src/tint/utils/text/string.h"
+
namespace tint::utils {
namespace {
-struct Case {
+struct DecodeBase64Case {
char in;
std::optional<uint8_t> out;
};
-using DecodeBase64Test = testing::TestWithParam<Case>;
+using DecodeBase64Test = testing::TestWithParam<DecodeBase64Case>;
TEST_P(DecodeBase64Test, Char) {
EXPECT_EQ(DecodeBase64(GetParam().in), GetParam().out);
@@ -47,87 +51,172 @@
INSTANTIATE_TEST_SUITE_P(Valid,
DecodeBase64Test,
- testing::Values(Case{'A', 0},
- Case{'B', 1},
- Case{'C', 2},
- Case{'D', 3},
- Case{'E', 4},
- Case{'F', 5},
- Case{'G', 6},
- Case{'H', 7},
- Case{'I', 8},
- Case{'J', 9},
- Case{'K', 10},
- Case{'L', 11},
- Case{'M', 12},
- Case{'N', 13},
- Case{'O', 14},
- Case{'P', 15},
- Case{'Q', 16},
- Case{'R', 17},
- Case{'S', 18},
- Case{'T', 19},
- Case{'U', 20},
- Case{'V', 21},
- Case{'W', 22},
- Case{'X', 23},
- Case{'Y', 24},
- Case{'Z', 25},
- Case{'a', 26},
- Case{'b', 27},
- Case{'c', 28},
- Case{'d', 29},
- Case{'e', 30},
- Case{'f', 31},
- Case{'g', 32},
- Case{'h', 33},
- Case{'i', 34},
- Case{'j', 35},
- Case{'k', 36},
- Case{'l', 37},
- Case{'m', 38},
- Case{'n', 39},
- Case{'o', 40},
- Case{'p', 41},
- Case{'q', 42},
- Case{'r', 43},
- Case{'s', 44},
- Case{'t', 45},
- Case{'u', 46},
- Case{'v', 47},
- Case{'w', 48},
- Case{'x', 49},
- Case{'y', 50},
- Case{'z', 51},
- Case{'0', 52},
- Case{'1', 53},
- Case{'2', 54},
- Case{'3', 55},
- Case{'4', 56},
- Case{'5', 57},
- Case{'6', 58},
- Case{'7', 59},
- Case{'8', 60},
- Case{'9', 61},
- Case{'+', 62},
- Case{'/', 63}));
+ testing::Values(DecodeBase64Case{'A', 0},
+ DecodeBase64Case{'B', 1},
+ DecodeBase64Case{'C', 2},
+ DecodeBase64Case{'D', 3},
+ DecodeBase64Case{'E', 4},
+ DecodeBase64Case{'F', 5},
+ DecodeBase64Case{'G', 6},
+ DecodeBase64Case{'H', 7},
+ DecodeBase64Case{'I', 8},
+ DecodeBase64Case{'J', 9},
+ DecodeBase64Case{'K', 10},
+ DecodeBase64Case{'L', 11},
+ DecodeBase64Case{'M', 12},
+ DecodeBase64Case{'N', 13},
+ DecodeBase64Case{'O', 14},
+ DecodeBase64Case{'P', 15},
+ DecodeBase64Case{'Q', 16},
+ DecodeBase64Case{'R', 17},
+ DecodeBase64Case{'S', 18},
+ DecodeBase64Case{'T', 19},
+ DecodeBase64Case{'U', 20},
+ DecodeBase64Case{'V', 21},
+ DecodeBase64Case{'W', 22},
+ DecodeBase64Case{'X', 23},
+ DecodeBase64Case{'Y', 24},
+ DecodeBase64Case{'Z', 25},
+ DecodeBase64Case{'a', 26},
+ DecodeBase64Case{'b', 27},
+ DecodeBase64Case{'c', 28},
+ DecodeBase64Case{'d', 29},
+ DecodeBase64Case{'e', 30},
+ DecodeBase64Case{'f', 31},
+ DecodeBase64Case{'g', 32},
+ DecodeBase64Case{'h', 33},
+ DecodeBase64Case{'i', 34},
+ DecodeBase64Case{'j', 35},
+ DecodeBase64Case{'k', 36},
+ DecodeBase64Case{'l', 37},
+ DecodeBase64Case{'m', 38},
+ DecodeBase64Case{'n', 39},
+ DecodeBase64Case{'o', 40},
+ DecodeBase64Case{'p', 41},
+ DecodeBase64Case{'q', 42},
+ DecodeBase64Case{'r', 43},
+ DecodeBase64Case{'s', 44},
+ DecodeBase64Case{'t', 45},
+ DecodeBase64Case{'u', 46},
+ DecodeBase64Case{'v', 47},
+ DecodeBase64Case{'w', 48},
+ DecodeBase64Case{'x', 49},
+ DecodeBase64Case{'y', 50},
+ DecodeBase64Case{'z', 51},
+ DecodeBase64Case{'0', 52},
+ DecodeBase64Case{'1', 53},
+ DecodeBase64Case{'2', 54},
+ DecodeBase64Case{'3', 55},
+ DecodeBase64Case{'4', 56},
+ DecodeBase64Case{'5', 57},
+ DecodeBase64Case{'6', 58},
+ DecodeBase64Case{'7', 59},
+ DecodeBase64Case{'8', 60},
+ DecodeBase64Case{'9', 61},
+ DecodeBase64Case{'+', 62},
+ DecodeBase64Case{'/', 63}));
INSTANTIATE_TEST_SUITE_P(Invalid,
DecodeBase64Test,
- testing::Values(Case{'@', std::nullopt},
- Case{'#', std::nullopt},
- Case{'^', std::nullopt},
- Case{'&', std::nullopt},
- Case{'!', std::nullopt},
- Case{'*', std::nullopt},
- Case{'(', std::nullopt},
- Case{')', std::nullopt},
- Case{'-', std::nullopt},
- Case{'.', std::nullopt},
- Case{'\0', std::nullopt},
- Case{'\n', std::nullopt}));
+ testing::Values(DecodeBase64Case{'@', std::nullopt},
+ DecodeBase64Case{'#', std::nullopt},
+ DecodeBase64Case{'^', std::nullopt},
+ DecodeBase64Case{'&', std::nullopt},
+ DecodeBase64Case{'!', std::nullopt},
+ DecodeBase64Case{'*', std::nullopt},
+ DecodeBase64Case{'(', std::nullopt},
+ DecodeBase64Case{')', std::nullopt},
+ DecodeBase64Case{'-', std::nullopt},
+ DecodeBase64Case{'.', std::nullopt},
+ DecodeBase64Case{'\0', std::nullopt},
+ DecodeBase64Case{'\n', std::nullopt}));
-INSTANTIATE_TEST_SUITE_P(Padding, DecodeBase64Test, testing::Values(Case{'=', std::nullopt}));
+INSTANTIATE_TEST_SUITE_P(Padding,
+ DecodeBase64Test,
+ testing::Values(DecodeBase64Case{'=', std::nullopt}));
+
+struct DecodeBase64FromCommentsCase {
+ std::string_view wgsl;
+ Vector<int, 0> expected;
+};
+
+static std::ostream& operator<<(std::ostream& o, const DecodeBase64FromCommentsCase& c) {
+ return o << "'" << ReplaceAll(c.wgsl, "\n", "") << "'";
+}
+
+using DecodeBase64FromCommentsTest = ::testing::TestWithParam<DecodeBase64FromCommentsCase>;
+
+TEST_P(DecodeBase64FromCommentsTest, None) {
+ auto got_bytes = DecodeBase64FromComments(GetParam().wgsl);
+ auto got = Transform(got_bytes, [](std::byte byte) { return static_cast<int>(byte); });
+ EXPECT_EQ(got, GetParam().expected);
+}
+
+INSTANTIATE_TEST_SUITE_P(,
+ DecodeBase64FromCommentsTest,
+ testing::ValuesIn(std::vector<DecodeBase64FromCommentsCase>{
+ {"", Empty},
+ {"//", Empty},
+ {"abc", Empty},
+ {"abc//", Empty},
+ {
+ R"(a
+b
+c)",
+ Empty,
+ },
+ {"// abc", {26, 27, 28}},
+ {"a // bc", {27, 28}},
+ {"ab // c", {28}},
+ {"// a.b.c", {26, 27, 28}},
+ {
+ R"(a
+b
+c)",
+ Empty,
+ },
+ {
+ R"(a
+// b
+c)",
+ {27},
+ },
+ {
+ R"(// a
+// b
+// c)",
+ {26, 27, 28},
+ },
+ {
+ R"(/* a
+b
+c
+*/)",
+ {26, 27, 28},
+ },
+ {
+ R"(/* a
+b
+*/
+c)",
+ {26, 27},
+ },
+ {
+ R"(a/*
+b
+*/
+c)",
+ {27},
+ },
+ {
+ "x/*a*/b/*c*/y",
+ {26, 28},
+ },
+ {
+ "x/*a/*b*/c*/z",
+ {26, 27, 28},
+ },
+ }));
} // namespace
} // namespace tint::utils