Regex fuzzer: replace function calls with builtins

Replaces random identifiers used as function names in calls with the
names of builtin functions.

Fixes: tint:1617.
Change-Id: I4e70276c9023bcb35b860c98fca6a95dc284f60a
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/96580
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Alastair Donaldson <allydonaldson@googlemail.com>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
index c5d2aba..ea1aea1 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
@@ -39,6 +39,7 @@
     kInsertReturnStatement,
     kReplaceOperator,
     kInsertBreakOrContinue,
+    kReplaceFunctionCallWithBuiltin,
     kNumMutationKinds
 };
 
@@ -115,6 +116,11 @@
                 return 0;
             }
             break;
+        case MutationKind::kReplaceFunctionCallWithBuiltin:
+            if (!mutator.ReplaceFunctionCallWithBuiltin(wgsl_code)) {
+                return 0;
+            }
+            break;
         default:
             assert(false && "Unreachable");
             return 0;
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
index 55ec028..9e0b1a0 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
@@ -31,6 +31,7 @@
     using WgslMutator::FindClosingBrace;
     using WgslMutator::FindOperatorOccurrence;
     using WgslMutator::GetFunctionBodyPositions;
+    using WgslMutator::GetFunctionCallIdentifiers;
     using WgslMutator::GetIdentifiers;
     using WgslMutator::GetIntLiterals;
     using WgslMutator::GetLoopBodyPositions;
@@ -236,18 +237,12 @@
       })";
 
     std::vector<std::pair<size_t, size_t>> identifiers_pos = mutator.GetIdentifiers(wgsl_code);
-
     std::vector<std::pair<size_t, size_t>> ground_truth = {
-        std::make_pair(3, 12),   std::make_pair(28, 3),  std::make_pair(37, 4),
-        std::make_pair(49, 5),   std::make_pair(60, 3),  std::make_pair(68, 4),
-        std::make_pair(81, 4),   std::make_pair(110, 6), std::make_pair(123, 2),
-        std::make_pair(133, 4),  std::make_pair(144, 7), std::make_pair(162, 4),
-        std::make_pair(183, 12), std::make_pair(209, 6), std::make_pair(221, 3),
-        std::make_pair(244, 8),  std::make_pair(259, 2), std::make_pair(271, 4),
-        std::make_pair(288, 12), std::make_pair(319, 7), std::make_pair(328, 14),
-        std::make_pair(352, 2),  std::make_pair(363, 4), std::make_pair(381, 3),
-        std::make_pair(394, 3),  std::make_pair(399, 3), std::make_pair(418, 12)};
-
+        {0, 2},   {3, 12},  {28, 3},   {32, 3},   {37, 4},   {42, 3},   {49, 5},  {55, 4},
+        {60, 3},  {68, 4},  {73, 3},   {81, 4},   {86, 3},   {110, 6},  {123, 2}, {126, 11},
+        {144, 7}, {152, 8}, {162, 4},  {167, 3},  {183, 12}, {209, 6},  {216, 4}, {221, 3},
+        {244, 8}, {259, 2}, {262, 13}, {288, 12}, {319, 7},  {328, 14}, {352, 2}, {355, 12},
+        {381, 3}, {385, 7}, {394, 3},  {399, 3},  {418, 12}};
     ASSERT_EQ(ground_truth, identifiers_pos);
 }
 
@@ -603,13 +598,46 @@
 TEST(TestInsertBreakOrContinue, TestLoopPositions4) {
     RandomGenerator generator(0);
     WgslMutatorTest mutator(generator);
-    // This WGSL-like code is not valid, but it suffices to test regex-based matching (which is
-    // intended to work well on semi-valid code).
-    std::string wgsl_code = R"(unifor { } uniform { } sloop { } _loop { } _while { } awhile { } )";
+    std::string wgsl_code =
+        R"(fn clamp_0acf8f() {
+        var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
+      }
+      @vertex
+      fn vertex_main() -> @builtin(position) vec4<f32> {
+         clamp_0acf8f();"
+         return vec4<f32>();
+      }
+      @fragment
+      fn fragment_main() {
+        clamp_0acf8f();
+      }
+      @compute @workgroup_size(1)
+      fn compute_main() {"
+        var<private> foo: f32 = 0.0;
+        clamp_0acf8f    ();
+      })";
 
     std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
     ASSERT_TRUE(loop_positions.empty());
 }
 
+TEST(TestReplaceFunctionCallWithBuiltin, FindFunctionCalls) {
+    RandomGenerator generator(0);
+    WgslMutatorTest mutator(generator);
+    std::string function_body = R"({
+          var<private> foo: f32 = 0.0;
+          var foo_2: i32 = 10;
+          clamp_0acf8f  ();
+          _0acf8f();
+          f
+();
+          j = (i * 30);
+        })";
+    std::vector<std::pair<size_t, size_t>> call_identifiers =
+        mutator.GetFunctionCallIdentifiers(function_body);
+    std::vector<std::pair<size_t, size_t>> ground_truth{{82, 12}, {110, 7}, {131, 1}};
+    ASSERT_EQ(ground_truth, call_identifiers);
+}
+
 }  // namespace
 }  // namespace tint::fuzzers::regex_fuzzer
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
index 0d20831..a965613 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
@@ -47,18 +47,32 @@
     // by a character which cannot be part of a WGSL identifer. The regex
     // for the WGSL identifier is obtained from:
     // https://www.w3.org/TR/WGSL/#identifiers.
-    std::regex wgsl_identifier_regex("[^a-zA-Z]([a-zA-Z][0-9a-zA-Z_]*)[^0-9a-zA-Z_]");
+    std::regex identifier_regex("[_a-zA-Z][0-9a-zA-Z_]*");
 
-    std::smatch match;
+    auto identifiers_begin =
+        std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), identifier_regex);
+    auto identifiers_end = std::sregex_iterator();
 
-    std::string::const_iterator search_start(wgsl_code.cbegin());
-    std::string prefix;
+    for (std::sregex_iterator i = identifiers_begin; i != identifiers_end; ++i) {
+        result.push_back(
+            {static_cast<size_t>(i->prefix().second - wgsl_code.cbegin()), i->str().size()});
+    }
+    return result;
+}
 
-    while (regex_search(search_start, wgsl_code.cend(), match, wgsl_identifier_regex) == true) {
-        prefix += match.prefix();
-        result.push_back(std::make_pair(prefix.size() + 1, match.str(1).size()));
-        prefix += match.str(0);
-        search_start = match.suffix().first;
+std::vector<std::pair<size_t, size_t>> WgslMutator::GetFunctionCallIdentifiers(
+    const std::string& wgsl_code) {
+    std::vector<std::pair<size_t, size_t>> result;
+
+    std::regex call_regex("([_a-zA-Z][0-9a-zA-Z_]*)[ \\n]*\\(");
+
+    auto identifiers_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), call_regex);
+    auto identifiers_end = std::sregex_iterator();
+
+    for (std::sregex_iterator i = identifiers_begin; i != identifiers_end; ++i) {
+        auto submatch = (*i)[1];
+        result.push_back(
+            {static_cast<size_t>(submatch.first - wgsl_code.cbegin()), submatch.str().size()});
     }
     return result;
 }
@@ -489,4 +503,153 @@
     return {};
 }
 
+bool WgslMutator::ReplaceFunctionCallWithBuiltin(std::string& wgsl_code) {
+    std::vector<std::pair<size_t, bool>> function_body_positions =
+        GetFunctionBodyPositions(wgsl_code);
+
+    // No function was found in wgsl_code.
+    if (function_body_positions.empty()) {
+        return false;
+    }
+
+    // Pick a random function
+    auto function = generator_.GetRandomElement(function_body_positions);
+
+    // Find the corresponding closing bracket for the function, and find a semi-colon within the
+    // function body.
+    size_t left_bracket_pos = function.first;
+
+    size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
+
+    if (right_bracket_pos == 0) {
+        return false;
+    }
+
+    std::string function_body(
+        wgsl_code.substr(left_bracket_pos, right_bracket_pos - left_bracket_pos));
+
+    std::vector<std::pair<size_t, size_t>> function_call_identifiers =
+        GetFunctionCallIdentifiers(function_body);
+    if (function_call_identifiers.empty()) {
+        return false;
+    }
+    auto function_call_identifier = generator_.GetRandomElement(function_call_identifiers);
+
+    std::vector<std::string> builtin_functions{"all",
+                                               "any",
+                                               "select",
+                                               "arrayLength",
+                                               "abs",
+                                               "acos",
+                                               "acosh",
+                                               "asin",
+                                               "asinh",
+                                               "atan",
+                                               "atanh",
+                                               "atan2",
+                                               "ceil",
+                                               "clamp",
+                                               "cos",
+                                               "cosh",
+                                               "cross",
+                                               "degrees",
+                                               "distance",
+                                               "exp",
+                                               "exp2",
+                                               "faceForward",
+                                               "floor",
+                                               "fma",
+                                               "fract",
+                                               "frexp",
+                                               "inverseSqrt",
+                                               "ldexp",
+                                               "length",
+                                               "log",
+                                               "log2",
+                                               "max",
+                                               "min",
+                                               "mix",
+                                               "modf",
+                                               "normalize",
+                                               "pow",
+                                               "quantizeToF16",
+                                               "radians",
+                                               "reflect",
+                                               "refract",
+                                               "round",
+                                               "saturate",
+                                               "sign",
+                                               "sin",
+                                               "sinh",
+                                               "smoothstep",
+                                               "sqrt",
+                                               "step",
+                                               "tan",
+                                               "tanh",
+                                               "trunc",
+                                               "abs",
+                                               "clamp",
+                                               "countLeadingZeros",
+                                               "countOneBits",
+                                               "countTrailingZeros",
+                                               "extractBits",
+                                               "firstLeadingBit",
+                                               "firstTrailingBit",
+                                               "insertBits",
+                                               "max",
+                                               "min",
+                                               "reverseBits",
+                                               "determinant",
+                                               "transpose",
+                                               "dot",
+                                               "dpdx",
+                                               "dpdxCoarse",
+                                               "dpdxFine",
+                                               "dpdy",
+                                               "dpdyCoarse",
+                                               "dpdyFine",
+                                               "fwidth",
+                                               "fwidthCoarse",
+                                               "fwidthFine",
+                                               "textureDimensions",
+                                               "textureGather",
+                                               "textureGatherCompare",
+                                               "textureLoad",
+                                               "textureNumLayers",
+                                               "textureNumLevels",
+                                               "textureNumSamples",
+                                               "textureSample",
+                                               "textureSampleBias",
+                                               "textureSampleCompare",
+                                               "textureSampleCompareLevel",
+                                               "textureSampleGrad",
+                                               "textureSampleLevel",
+                                               "textureStore",
+                                               "atomicLoad",
+                                               "atomicStore",
+                                               "atomicAdd",
+                                               "atomicSub",
+                                               "atomicMax",
+                                               "atomicMin",
+                                               "atomicAnd",
+                                               "atomicOr",
+                                               "atomicXor",
+                                               "pack4x8snorm",
+                                               "pack4x8unorm",
+                                               "pack2x16snorm",
+                                               "pack2x16unorm",
+                                               "pack2x16float",
+                                               "unpack4x8snorm",
+                                               "unpack4x8unorm",
+                                               "unpack2x16snorm",
+                                               "unpack2x16unorm",
+                                               "unpack2x16float",
+                                               "storageBarrier",
+                                               "workgroupBarrier"};
+    wgsl_code.replace(left_bracket_pos + function_call_identifier.first,
+                      function_call_identifier.second,
+                      generator_.GetRandomElement(builtin_functions));
+    return true;
+}
+
 }  // namespace tint::fuzzers::regex_fuzzer
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
index 5308bf8..fde4611 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
+++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
@@ -84,6 +84,14 @@
     /// @return true if an operator replacement happened or false otherwise.
     bool ReplaceRandomOperator(std::string& wgsl_code);
 
+    /// Given a WGSL-like string, replaces a random identifier that appears to be a function call
+    /// with the name of a built-in function. This will often lead to an invalid module, as the
+    /// mutation does not aim to check whether the original and replacement function have the same
+    /// number or types of arguments.
+    /// @param wgsl_code - the initial WGSL-like string that will be mutated.
+    /// @return true if a function call replacement happened or false otherwise.
+    bool ReplaceFunctionCallWithBuiltin(std::string& wgsl_code);
+
   protected:
     /// Given index idx1 it delets the region of length interval_len
     /// starting at index idx1;
@@ -128,6 +136,13 @@
     /// identifiers in wgsl_code.
     std::vector<std::pair<size_t, size_t>> GetIdentifiers(const std::string& wgsl_code);
 
+    /// A function that finds the identifiers in a WGSL-like string that appear to be used as
+    /// function names in function call expressions.
+    /// @param wgsl_code - the WGSL-like string where the identifiers will be found.
+    /// @return a vector with the positions and the length of all the
+    /// identifiers in wgsl_code.
+    std::vector<std::pair<size_t, size_t>> GetFunctionCallIdentifiers(const std::string& wgsl_code);
+
     /// A function that returns returns the starting position
     /// and the length of all the integer literals in a WGSL-like string.
     /// @param wgsl_code - the WGSL-like string where the int literals