[tint][utils] Add DecodeBase64FromComments()

A rather bespoke utility for decoding base64 encoded data in the
WGSL-style comments of a string.
Will be used by the WGSL fuzzers.

Change-Id: I242af8521877314ebf5ca5d82a2cc4c7ca492336
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/162304
Reviewed-by: dan sinclair <dsinclair@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/tint/utils/text/BUILD.bazel b/src/tint/utils/text/BUILD.bazel
index 6e4f90d..21ae073 100644
--- a/src/tint/utils/text/BUILD.bazel
+++ b/src/tint/utils/text/BUILD.bazel
@@ -39,6 +39,7 @@
 cc_library(
   name = "text",
   srcs = [
+    "base64.cc",
     "string.cc",
     "string_stream.cc",
     "unicode.cc",
diff --git a/src/tint/utils/text/BUILD.cmake b/src/tint/utils/text/BUILD.cmake
index 72a1258..f5f8c10 100644
--- a/src/tint/utils/text/BUILD.cmake
+++ b/src/tint/utils/text/BUILD.cmake
@@ -39,6 +39,7 @@
 # Kind:      lib
 ################################################################################
 tint_add_target(tint_utils_text lib
+  utils/text/base64.cc
   utils/text/base64.h
   utils/text/string.cc
   utils/text/string.h
diff --git a/src/tint/utils/text/BUILD.gn b/src/tint/utils/text/BUILD.gn
index de1a835..c13c8c0 100644
--- a/src/tint/utils/text/BUILD.gn
+++ b/src/tint/utils/text/BUILD.gn
@@ -44,6 +44,7 @@
 
 libtint_source_set("text") {
   sources = [
+    "base64.cc",
     "base64.h",
     "string.cc",
     "string.h",
diff --git a/src/tint/utils/text/base64.cc b/src/tint/utils/text/base64.cc
new file mode 100644
index 0000000..7ae85cc
--- /dev/null
+++ b/src/tint/utils/text/base64.cc
@@ -0,0 +1,71 @@
+// Copyright 2023 The Dawn & Tint Authors
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <string>
+
+#include "src/tint/utils/text/base64.h"
+
+namespace tint {
+
+Vector<std::byte, 0> DecodeBase64FromComments(std::string_view wgsl) {
+    Vector<std::byte, 0> out;
+    size_t block_nesting = 0;
+    bool line_comment = false;
+    for (size_t i = 0, n = wgsl.length(); i < n; i++) {
+        char curr = wgsl[i];
+        if (curr == '\n') {
+            line_comment = false;
+            continue;
+        }
+
+        char next = (i + 1) < n ? wgsl[i + 1] : 0;
+        if (curr == '/' && next == '*') {
+            block_nesting++;
+            i++;  // skip '*'
+            continue;
+        }
+        if (block_nesting > 0 && curr == '*' && next == '/') {
+            block_nesting--;
+            i++;  // skip '/'
+            continue;
+        }
+        if (block_nesting == 0 && curr == '/' && next == '/') {
+            line_comment = true;
+            i++;  // skip '/'
+            continue;
+        }
+
+        if (block_nesting > 0 || line_comment) {
+            if (auto v = DecodeBase64(curr)) {
+                out.Push(std::byte{*v});
+            }
+        }
+    }
+    return out;
+}
+
+}  // namespace tint
diff --git a/src/tint/utils/text/base64.h b/src/tint/utils/text/base64.h
index df5a2d9..1047c33 100644
--- a/src/tint/utils/text/base64.h
+++ b/src/tint/utils/text/base64.h
@@ -31,6 +31,8 @@
 #include <cstdint>
 #include <optional>
 
+#include "src/tint/utils/containers/vector.h"
+
 namespace tint {
 
 /// Decodes a byte from a base64 encoded character
@@ -55,6 +57,11 @@
     }
     return std::nullopt;
 }
+/// DecodeBase64FromComments parses all the comments from the WGSL source string as a base64 byte
+/// stream. Non-base64 characters are skipped
+/// @param wgsl the WGSL source
+/// @return the base64 decoded bytes
+Vector<std::byte, 0> DecodeBase64FromComments(std::string_view wgsl);
 
 }  // namespace tint
 
diff --git a/src/tint/utils/text/base64_test.cc b/src/tint/utils/text/base64_test.cc
index b3dc404..3eb282f 100644
--- a/src/tint/utils/text/base64_test.cc
+++ b/src/tint/utils/text/base64_test.cc
@@ -28,18 +28,22 @@
 #include "src/tint/utils/text/base64.h"
 
 #include <optional>
+#include <vector>
 
 #include "gtest/gtest.h"
 
+#include "src/tint/utils/containers/transform.h"
+#include "src/tint/utils/text/string.h"
+
 namespace tint::utils {
 namespace {
 
-struct Case {
+struct DecodeBase64Case {
     char in;
     std::optional<uint8_t> out;
 };
 
-using DecodeBase64Test = testing::TestWithParam<Case>;
+using DecodeBase64Test = testing::TestWithParam<DecodeBase64Case>;
 
 TEST_P(DecodeBase64Test, Char) {
     EXPECT_EQ(DecodeBase64(GetParam().in), GetParam().out);
@@ -47,87 +51,172 @@
 
 INSTANTIATE_TEST_SUITE_P(Valid,
                          DecodeBase64Test,
-                         testing::Values(Case{'A', 0},
-                                         Case{'B', 1},
-                                         Case{'C', 2},
-                                         Case{'D', 3},
-                                         Case{'E', 4},
-                                         Case{'F', 5},
-                                         Case{'G', 6},
-                                         Case{'H', 7},
-                                         Case{'I', 8},
-                                         Case{'J', 9},
-                                         Case{'K', 10},
-                                         Case{'L', 11},
-                                         Case{'M', 12},
-                                         Case{'N', 13},
-                                         Case{'O', 14},
-                                         Case{'P', 15},
-                                         Case{'Q', 16},
-                                         Case{'R', 17},
-                                         Case{'S', 18},
-                                         Case{'T', 19},
-                                         Case{'U', 20},
-                                         Case{'V', 21},
-                                         Case{'W', 22},
-                                         Case{'X', 23},
-                                         Case{'Y', 24},
-                                         Case{'Z', 25},
-                                         Case{'a', 26},
-                                         Case{'b', 27},
-                                         Case{'c', 28},
-                                         Case{'d', 29},
-                                         Case{'e', 30},
-                                         Case{'f', 31},
-                                         Case{'g', 32},
-                                         Case{'h', 33},
-                                         Case{'i', 34},
-                                         Case{'j', 35},
-                                         Case{'k', 36},
-                                         Case{'l', 37},
-                                         Case{'m', 38},
-                                         Case{'n', 39},
-                                         Case{'o', 40},
-                                         Case{'p', 41},
-                                         Case{'q', 42},
-                                         Case{'r', 43},
-                                         Case{'s', 44},
-                                         Case{'t', 45},
-                                         Case{'u', 46},
-                                         Case{'v', 47},
-                                         Case{'w', 48},
-                                         Case{'x', 49},
-                                         Case{'y', 50},
-                                         Case{'z', 51},
-                                         Case{'0', 52},
-                                         Case{'1', 53},
-                                         Case{'2', 54},
-                                         Case{'3', 55},
-                                         Case{'4', 56},
-                                         Case{'5', 57},
-                                         Case{'6', 58},
-                                         Case{'7', 59},
-                                         Case{'8', 60},
-                                         Case{'9', 61},
-                                         Case{'+', 62},
-                                         Case{'/', 63}));
+                         testing::Values(DecodeBase64Case{'A', 0},
+                                         DecodeBase64Case{'B', 1},
+                                         DecodeBase64Case{'C', 2},
+                                         DecodeBase64Case{'D', 3},
+                                         DecodeBase64Case{'E', 4},
+                                         DecodeBase64Case{'F', 5},
+                                         DecodeBase64Case{'G', 6},
+                                         DecodeBase64Case{'H', 7},
+                                         DecodeBase64Case{'I', 8},
+                                         DecodeBase64Case{'J', 9},
+                                         DecodeBase64Case{'K', 10},
+                                         DecodeBase64Case{'L', 11},
+                                         DecodeBase64Case{'M', 12},
+                                         DecodeBase64Case{'N', 13},
+                                         DecodeBase64Case{'O', 14},
+                                         DecodeBase64Case{'P', 15},
+                                         DecodeBase64Case{'Q', 16},
+                                         DecodeBase64Case{'R', 17},
+                                         DecodeBase64Case{'S', 18},
+                                         DecodeBase64Case{'T', 19},
+                                         DecodeBase64Case{'U', 20},
+                                         DecodeBase64Case{'V', 21},
+                                         DecodeBase64Case{'W', 22},
+                                         DecodeBase64Case{'X', 23},
+                                         DecodeBase64Case{'Y', 24},
+                                         DecodeBase64Case{'Z', 25},
+                                         DecodeBase64Case{'a', 26},
+                                         DecodeBase64Case{'b', 27},
+                                         DecodeBase64Case{'c', 28},
+                                         DecodeBase64Case{'d', 29},
+                                         DecodeBase64Case{'e', 30},
+                                         DecodeBase64Case{'f', 31},
+                                         DecodeBase64Case{'g', 32},
+                                         DecodeBase64Case{'h', 33},
+                                         DecodeBase64Case{'i', 34},
+                                         DecodeBase64Case{'j', 35},
+                                         DecodeBase64Case{'k', 36},
+                                         DecodeBase64Case{'l', 37},
+                                         DecodeBase64Case{'m', 38},
+                                         DecodeBase64Case{'n', 39},
+                                         DecodeBase64Case{'o', 40},
+                                         DecodeBase64Case{'p', 41},
+                                         DecodeBase64Case{'q', 42},
+                                         DecodeBase64Case{'r', 43},
+                                         DecodeBase64Case{'s', 44},
+                                         DecodeBase64Case{'t', 45},
+                                         DecodeBase64Case{'u', 46},
+                                         DecodeBase64Case{'v', 47},
+                                         DecodeBase64Case{'w', 48},
+                                         DecodeBase64Case{'x', 49},
+                                         DecodeBase64Case{'y', 50},
+                                         DecodeBase64Case{'z', 51},
+                                         DecodeBase64Case{'0', 52},
+                                         DecodeBase64Case{'1', 53},
+                                         DecodeBase64Case{'2', 54},
+                                         DecodeBase64Case{'3', 55},
+                                         DecodeBase64Case{'4', 56},
+                                         DecodeBase64Case{'5', 57},
+                                         DecodeBase64Case{'6', 58},
+                                         DecodeBase64Case{'7', 59},
+                                         DecodeBase64Case{'8', 60},
+                                         DecodeBase64Case{'9', 61},
+                                         DecodeBase64Case{'+', 62},
+                                         DecodeBase64Case{'/', 63}));
 
 INSTANTIATE_TEST_SUITE_P(Invalid,
                          DecodeBase64Test,
-                         testing::Values(Case{'@', std::nullopt},
-                                         Case{'#', std::nullopt},
-                                         Case{'^', std::nullopt},
-                                         Case{'&', std::nullopt},
-                                         Case{'!', std::nullopt},
-                                         Case{'*', std::nullopt},
-                                         Case{'(', std::nullopt},
-                                         Case{')', std::nullopt},
-                                         Case{'-', std::nullopt},
-                                         Case{'.', std::nullopt},
-                                         Case{'\0', std::nullopt},
-                                         Case{'\n', std::nullopt}));
+                         testing::Values(DecodeBase64Case{'@', std::nullopt},
+                                         DecodeBase64Case{'#', std::nullopt},
+                                         DecodeBase64Case{'^', std::nullopt},
+                                         DecodeBase64Case{'&', std::nullopt},
+                                         DecodeBase64Case{'!', std::nullopt},
+                                         DecodeBase64Case{'*', std::nullopt},
+                                         DecodeBase64Case{'(', std::nullopt},
+                                         DecodeBase64Case{')', std::nullopt},
+                                         DecodeBase64Case{'-', std::nullopt},
+                                         DecodeBase64Case{'.', std::nullopt},
+                                         DecodeBase64Case{'\0', std::nullopt},
+                                         DecodeBase64Case{'\n', std::nullopt}));
 
-INSTANTIATE_TEST_SUITE_P(Padding, DecodeBase64Test, testing::Values(Case{'=', std::nullopt}));
+INSTANTIATE_TEST_SUITE_P(Padding,
+                         DecodeBase64Test,
+                         testing::Values(DecodeBase64Case{'=', std::nullopt}));
+
+struct DecodeBase64FromCommentsCase {
+    std::string_view wgsl;
+    Vector<int, 0> expected;
+};
+
+static std::ostream& operator<<(std::ostream& o, const DecodeBase64FromCommentsCase& c) {
+    return o << "'" << ReplaceAll(c.wgsl, "\n", "␤") << "'";
+}
+
+using DecodeBase64FromCommentsTest = ::testing::TestWithParam<DecodeBase64FromCommentsCase>;
+
+TEST_P(DecodeBase64FromCommentsTest, None) {
+    auto got_bytes = DecodeBase64FromComments(GetParam().wgsl);
+    auto got = Transform(got_bytes, [](std::byte byte) { return static_cast<int>(byte); });
+    EXPECT_EQ(got, GetParam().expected);
+}
+
+INSTANTIATE_TEST_SUITE_P(,
+                         DecodeBase64FromCommentsTest,
+                         testing::ValuesIn(std::vector<DecodeBase64FromCommentsCase>{
+                             {"", Empty},
+                             {"//", Empty},
+                             {"abc", Empty},
+                             {"abc//", Empty},
+                             {
+                                 R"(a
+b
+c)",
+                                 Empty,
+                             },
+                             {"// abc", {26, 27, 28}},
+                             {"a // bc", {27, 28}},
+                             {"ab // c", {28}},
+                             {"// a.b.c", {26, 27, 28}},
+                             {
+                                 R"(a
+b
+c)",
+                                 Empty,
+                             },
+                             {
+                                 R"(a
+// b
+c)",
+                                 {27},
+                             },
+                             {
+                                 R"(// a
+// b
+// c)",
+                                 {26, 27, 28},
+                             },
+                             {
+                                 R"(/* a
+b
+c
+*/)",
+                                 {26, 27, 28},
+                             },
+                             {
+                                 R"(/* a
+b
+*/
+c)",
+                                 {26, 27},
+                             },
+                             {
+                                 R"(a/*
+b
+*/
+c)",
+                                 {27},
+                             },
+                             {
+                                 "x/*a*/b/*c*/y",
+                                 {26, 28},
+                             },
+                             {
+                                 "x/*a/*b*/c*/z",
+                                 {26, 27, 28},
+                             },
+                         }));
 
 }  // namespace
 }  // namespace tint::utils