Regex fuzzer: replace operators

A mutation that replaces operators in a WGSL-like string at random.

Fixes: tint:1092.

Change-Id: I912825365f338266d34a1bffb5c8a96cecaba179
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/96404
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Alastair Donaldson <allydonaldson@googlemail.com>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
index 9fe2a6f..83aac1e 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
@@ -37,6 +37,7 @@
     kReplaceIdentifier,
     kReplaceLiteral,
     kInsertReturnStatement,
+    kReplaceOperator,
     kNumMutationKinds
 };
 
@@ -102,6 +103,11 @@
             }
             break;
 
+        case MutationKind::kReplaceOperator:
+            if (!mutator.ReplaceRandomOperator(wgsl_code)) {
+                return 0;
+            }
+            break;
         default:
             assert(false && "Unreachable");
             return 0;
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
index 76856a9..32a6619 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <optional>
 #include <string>
 
 #include "gtest/gtest.h"
@@ -28,6 +29,7 @@
     using WgslMutator::DeleteInterval;
     using WgslMutator::DuplicateInterval;
     using WgslMutator::FindClosingBrace;
+    using WgslMutator::FindOperatorOccurrence;
     using WgslMutator::GetFunctionBodyPositions;
     using WgslMutator::GetIdentifiers;
     using WgslMutator::GetIntLiterals;
@@ -543,5 +545,46 @@
     ASSERT_EQ(expected_wgsl_code, wgsl_code);
 }
 
+TEST(TestReplaceOperator, TestIdentifyOperators) {
+    RandomGenerator generator(0);
+    WgslMutatorTest mutator(generator);
+    std::string code =
+        R"(
+x += 2;
+y = a + b;
+z = -a;
+x *= b / c;
+t = t && t | t || t;
+b = b > c ^ c <= d;
+a >>= b;
+b <<= a;
+a = a << 2;
+b = b >> 3;
+c = a % 3;
+d %= e;
+)";
+    // These are the operator occurrences that will be observed by going through the file character
+    // by character. This includes, for example, identifying the ">" operator if search starts after
+    // the first character of ">>".
+    std::vector<std::pair<uint32_t, uint32_t>> operator_occurrences = {
+        {3, 2},   {4, 1},   {11, 1},  {15, 1},  {22, 1},  {24, 1},  {30, 2},  {31, 1},
+        {35, 1},  {42, 1},  {46, 2},  {47, 1},  {51, 1},  {55, 2},  {56, 1},  {63, 1},
+        {67, 1},  {71, 1},  {75, 2},  {76, 1},  {83, 3},  {84, 2},  {85, 1},  {92, 3},
+        {93, 2},  {94, 1},  {101, 1}, {105, 2}, {106, 1}, {113, 1}, {117, 2}, {118, 1},
+        {125, 1}, {129, 1}, {136, 2}, {137, 1}, {3, 2}};
+    uint32_t operator_occurrence_index = 0;
+    for (size_t i = 0; i < code.length(); i++) {
+        // Move on to the next operator occurrence if the current index into the code string exceeds
+        // the index associated with that operator occurrence. Exception: stay with the last
+        // operator occurrence if search has already passed the last operator in the file.
+        if (i < code.length() - 2 && i > operator_occurrences[operator_occurrence_index].first) {
+            operator_occurrence_index =
+                (operator_occurrence_index + 1) % operator_occurrences.size();
+        }
+        ASSERT_EQ(operator_occurrences[operator_occurrence_index],
+                  mutator.FindOperatorOccurrence(code, static_cast<uint32_t>(i)).value());
+    }
+}
+
 }  // namespace
 }  // namespace tint::fuzzers::regex_fuzzer
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
index f525caa..773eea6 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
@@ -318,4 +318,119 @@
     return true;
 }
 
+std::string WgslMutator::ChooseRandomReplacementForOperator(const std::string& existing_operator) {
+    // Operators are partitioned into three classes: assignment, expression and increment. The regex
+    // mutator will swap operators in the same class. The hypothesis is that this should exercise a
+    // number of type-safe swaps (e.g. changing += to *=), as well as some badly-typed yet
+    // interesting swaps (e.g. changing + to ^ when the operators are matrices), while avoiding
+    // making totally nonsensical replacements (such as changing ++ too /).
+    std::vector<std::string> assignment_operators{
+        "=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>="};
+    std::vector<std::string> expression_operators{"+",  "-",  "*", "/",  "%",  "&&", "||",
+                                                  "&",  "|",  "^", "<<", ">>", "<",  ">",
+                                                  "<=", ">=", "!", "!=", "~"};
+    std::vector<std::string> increment_operators{"++", "--"};
+    for (auto operators : {assignment_operators, expression_operators, increment_operators}) {
+        auto it = std::find(operators.begin(), operators.end(), existing_operator);
+        if (it != operators.end()) {
+            // The operator falls into this category, so select another operator from the category.
+            operators.erase(it);
+            return generator_.GetRandomElement(operators);
+        }
+    }
+    assert(false && "Unknown operator");
+    return "";
+}
+
+bool WgslMutator::ReplaceRandomOperator(std::string& wgsl_code) {
+    // Choose an index into the code at random.
+    const uint32_t start_index = generator_.GetUInt32(static_cast<uint32_t>(wgsl_code.size()));
+    // Find the first operator occurrence from the chosen point, wrapping back to the start of the
+    // file if needed.
+    auto maybe_operator_occurrence = FindOperatorOccurrence(wgsl_code, start_index);
+    if (!maybe_operator_occurrence.has_value()) {
+        // It is unlikely that there will be *no* operators in the file, but if this is the case
+        // then this mutation cannot be applied.
+        return false;
+    }
+    std::string existing_operator =
+        wgsl_code.substr(maybe_operator_occurrence->first, maybe_operator_occurrence->second);
+    // Replace the identified operator with a randomly-chosen alternative.
+    wgsl_code.replace(maybe_operator_occurrence->first, maybe_operator_occurrence->second,
+                      ChooseRandomReplacementForOperator(existing_operator));
+    return true;
+}
+
+std::optional<std::pair<uint32_t, uint32_t>> WgslMutator::FindOperatorOccurrence(
+    const std::string& wgsl_code,
+    uint32_t start_index) {
+    // Loops through the characters of the code in a wrap-around fashion, looking for the first
+    // encountered token that is a WGSL operator.
+
+    for (size_t i = 0; i < wgsl_code.size(); i++) {
+        uint32_t current_index = static_cast<uint32_t>((start_index + i) % wgsl_code.size());
+
+        // To cater for multi-character operator tokens, get the three consecutive characters from
+        // the code string starting at the current index. Use null characters to account for the
+        // case where search has reached the end of the code string.
+        char first_character = wgsl_code[current_index];
+        char second_character =
+            current_index == wgsl_code.size() - 1 ? '\0' : wgsl_code[current_index + 1];
+        char third_character =
+            current_index >= wgsl_code.size() - 2 ? '\0' : wgsl_code[current_index + 2];
+
+        // This uses the extracted characters to match for the various WGSL operators.
+        switch (first_character) {
+            case '!':
+            case '^':
+                switch (second_character) {
+                    case '=':
+                        return {{current_index, 2}};
+                    default:
+                        return {{current_index, 1}};
+                }
+            case '|':
+            case '&':
+            case '+':
+            case '-':
+                if (second_character == first_character || second_character == '=') {
+                    return {{current_index, 2}};
+                }
+                return {{current_index, 1}};
+            case '*':
+            case '/':
+            case '%':
+                switch (second_character) {
+                    case '=':
+                        return {{current_index, 2}};
+                    default:
+                        return {{current_index, 1}};
+                }
+            case '=':
+                if (second_character == '=') {
+                    return {{current_index, 2}};
+                }
+                return {{current_index, 1}};
+            case '<':
+            case '>':
+                if (second_character == '=') {
+                    return {{current_index, 2}};
+                }
+                if (second_character == first_character) {
+                    if (third_character == '=') {
+                        return {{current_index, 3}};
+                    }
+                    return {{current_index, 2}};
+                }
+                return {{current_index, 1}};
+            case '~':
+                return {{current_index, 1}};
+            default:
+                break;
+        }
+    }
+    // No operator was found, so empty is returned.
+    return {};
+}
+
 }  // namespace tint::fuzzers::regex_fuzzer
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
index f04aea1..c5ae900 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
+++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
@@ -15,6 +15,7 @@
 #ifndef SRC_TINT_FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_
 #define SRC_TINT_FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_
 
+#include <optional>
 #include <string>
 #include <utility>
 #include <vector>
@@ -72,6 +73,12 @@
     /// @return true if the mutation was succesful or false otherwise.
     bool InsertReturnStatement(std::string& wgsl_code);
 
+    /// A function that, given WGSL-like string, generates a new WGSL-like string by replacing one
+    /// randomly-chosen operator in the original string with another operator.
+    /// @param wgsl_code - the initial WGSL-like string that will be mutated.
+    /// @return true if an operator replacement happened or false otherwise.
+    bool ReplaceRandomOperator(std::string& wgsl_code);
+
   protected:
     /// Given index idx1 it delets the region of length interval_len
     /// starting at index idx1;
@@ -147,6 +154,22 @@
                        size_t reg2_len,
                        std::string& wgsl_code);
 
+    /// Finds the next occurrence of an operator in a WGSL-like string from a given starting
+    /// position, wrapping around to the start of the string if no operator is found before reaching
+    /// the end, and returning empty of no operator is found at all. There is no guarantee that the
+    /// result will correspond to a WGSL operator token, e.g. the identified characters could be
+    /// part of a comment, or e.g. the file might contain >>=, in which case the operator
+    /// >= will be identified should it happen that the starting index corresponds to the second >
+    /// character of this operator. Given that the regex mutator does not aim to guarantee
+    /// well-formed programs, these issues are acceptable.
+    /// @param wgsl_code - the WGSL-like string in which operator occurrences will be found.
+    /// @param start_index - the index at which search should start
+    /// @return empty if no operator is found, otherwise a pair comprising the index at which the
+    /// operator starts and the character length of the operator.
+    std::optional<std::pair<uint32_t, uint32_t>> FindOperatorOccurrence(
+        const std::string& wgsl_code,
+        uint32_t start_index);
+
   private:
     /// A function that given a delimiter, returns a vector that contains
     /// all the positions of the delimiter in the WGSL code.
@@ -167,6 +190,13 @@
                          std::string replacement_text,
                          std::string& wgsl_code);
 
+    /// Given a string representing a WGSL operator, randomly returns a different WGSL operator in
+    /// the same category as the original, where the three categories are assignment operators (such
+    /// as = and +=), expression operators (such as + and ^) and increment operators (++ and --).
+    /// @param existing_operator - the characters comprising some WGSL operator
+    /// @return another WGSL operator falling into the same category.
+    std::string ChooseRandomReplacementForOperator(const std::string& existing_operator);
+
     RandomGenerator& generator_;
 };