Regex fuzzer: replace operators
A mutation that replaces operators in a WGSL-like string at random.
Fixes: tint:1092.
Change-Id: I912825365f338266d34a1bffb5c8a96cecaba179
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/96404
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Alastair Donaldson <allydonaldson@googlemail.com>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
index 9fe2a6f..83aac1e 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
@@ -37,6 +37,7 @@
kReplaceIdentifier,
kReplaceLiteral,
kInsertReturnStatement,
+ kReplaceOperator,
kNumMutationKinds
};
@@ -102,6 +103,11 @@
}
break;
+ case MutationKind::kReplaceOperator:
+ if (!mutator.ReplaceRandomOperator(wgsl_code)) {
+ return 0;
+ }
+ break;
default:
assert(false && "Unreachable");
return 0;
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
index 76856a9..32a6619 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <optional>
#include <string>
#include "gtest/gtest.h"
@@ -28,6 +29,7 @@
using WgslMutator::DeleteInterval;
using WgslMutator::DuplicateInterval;
using WgslMutator::FindClosingBrace;
+ using WgslMutator::FindOperatorOccurrence;
using WgslMutator::GetFunctionBodyPositions;
using WgslMutator::GetIdentifiers;
using WgslMutator::GetIntLiterals;
@@ -543,5 +545,46 @@
ASSERT_EQ(expected_wgsl_code, wgsl_code);
}
+TEST(TestReplaceOperator, TestIdentifyOperators) {
+ RandomGenerator generator(0);
+ WgslMutatorTest mutator(generator);
+ std::string code =
+ R"(
+x += 2;
+y = a + b;
+z = -a;
+x *= b / c;
+t = t && t | t || t;
+b = b > c ^ c <= d;
+a >>= b;
+b <<= a;
+a = a << 2;
+b = b >> 3;
+c = a % 3;
+d %= e;
+)";
+ // These are the operator occurrences that will be observed by going through the file character
+ // by character. This includes, for example, identifying the ">" operator if search starts after
+ // the first character of ">>".
+ std::vector<std::pair<uint32_t, uint32_t>> operator_occurrences = {
+ {3, 2}, {4, 1}, {11, 1}, {15, 1}, {22, 1}, {24, 1}, {30, 2}, {31, 1},
+ {35, 1}, {42, 1}, {46, 2}, {47, 1}, {51, 1}, {55, 2}, {56, 1}, {63, 1},
+ {67, 1}, {71, 1}, {75, 2}, {76, 1}, {83, 3}, {84, 2}, {85, 1}, {92, 3},
+ {93, 2}, {94, 1}, {101, 1}, {105, 2}, {106, 1}, {113, 1}, {117, 2}, {118, 1},
+ {125, 1}, {129, 1}, {136, 2}, {137, 1}, {3, 2}};
+ uint32_t operator_occurrence_index = 0;
+ for (size_t i = 0; i < code.length(); i++) {
+ // Move on to the next operator occurrence if the current index into the code string exceeds
+ // the index associated with that operator occurrence. Exception: stay with the last
+ // operator occurrence if search has already passed the last operator in the file.
+ if (i < code.length() - 2 && i > operator_occurrences[operator_occurrence_index].first) {
+ operator_occurrence_index =
+ (operator_occurrence_index + 1) % operator_occurrences.size();
+ }
+ ASSERT_EQ(operator_occurrences[operator_occurrence_index],
+ mutator.FindOperatorOccurrence(code, static_cast<uint32_t>(i)).value());
+ }
+}
+
} // namespace
} // namespace tint::fuzzers::regex_fuzzer
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
index f525caa..773eea6 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
@@ -318,4 +318,119 @@
return true;
}
+std::string WgslMutator::ChooseRandomReplacementForOperator(const std::string& existing_operator) {
+ // Operators are partitioned into three classes: assignment, expression and increment. The regex
+ // mutator will swap operators in the same class. The hypothesis is that this should exercise a
+ // number of type-safe swaps (e.g. changing += to *=), as well as some badly-typed yet
+ // interesting swaps (e.g. changing + to ^ when the operators are matrices), while avoiding
+ // making totally nonsensical replacements (such as changing ++ too /).
+ std::vector<std::string> assignment_operators{
+ "=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>="};
+ std::vector<std::string> expression_operators{"+", "-", "*", "/", "%", "&&", "||",
+ "&", "|", "^", "<<", ">>", "<", ">",
+ "<=", ">=", "!", "!=", "~"};
+ std::vector<std::string> increment_operators{"++", "--"};
+ for (auto operators : {assignment_operators, expression_operators, increment_operators}) {
+ auto it = std::find(operators.begin(), operators.end(), existing_operator);
+ if (it != operators.end()) {
+ // The operator falls into this category, so select another operator from the category.
+ operators.erase(it);
+ return generator_.GetRandomElement(operators);
+ }
+ }
+ assert(false && "Unknown operator");
+ return "";
+}
+
+bool WgslMutator::ReplaceRandomOperator(std::string& wgsl_code) {
+ // Choose an index into the code at random.
+ const uint32_t start_index = generator_.GetUInt32(static_cast<uint32_t>(wgsl_code.size()));
+ // Find the first operator occurrence from the chosen point, wrapping back to the start of the
+ // file if needed.
+ auto maybe_operator_occurrence = FindOperatorOccurrence(wgsl_code, start_index);
+ if (!maybe_operator_occurrence.has_value()) {
+ // It is unlikely that there will be *no* operators in the file, but if this is the case
+ // then this mutation cannot be applied.
+ return false;
+ }
+ std::string existing_operator =
+ wgsl_code.substr(maybe_operator_occurrence->first, maybe_operator_occurrence->second);
+ // Replace the identified operator with a randomly-chosen alternative.
+ wgsl_code.replace(maybe_operator_occurrence->first, maybe_operator_occurrence->second,
+ ChooseRandomReplacementForOperator(existing_operator));
+ return true;
+}
+
+std::optional<std::pair<uint32_t, uint32_t>> WgslMutator::FindOperatorOccurrence(
+ const std::string& wgsl_code,
+ uint32_t start_index) {
+ // Loops through the characters of the code in a wrap-around fashion, looking for the first
+ // encountered token that is a WGSL operator.
+
+ for (size_t i = 0; i < wgsl_code.size(); i++) {
+ uint32_t current_index = static_cast<uint32_t>((start_index + i) % wgsl_code.size());
+
+ // To cater for multi-character operator tokens, get the three consecutive characters from
+ // the code string starting at the current index. Use null characters to account for the
+ // case where search has reached the end of the code string.
+ char first_character = wgsl_code[current_index];
+ char second_character =
+ current_index == wgsl_code.size() - 1 ? '\0' : wgsl_code[current_index + 1];
+ char third_character =
+ current_index >= wgsl_code.size() - 2 ? '\0' : wgsl_code[current_index + 2];
+
+ // This uses the extracted characters to match for the various WGSL operators.
+ switch (first_character) {
+ case '!':
+ case '^':
+ switch (second_character) {
+ case '=':
+ return {{current_index, 2}};
+ default:
+ return {{current_index, 1}};
+ }
+ case '|':
+ case '&':
+ case '+':
+ case '-':
+ if (second_character == first_character || second_character == '=') {
+ return {{current_index, 2}};
+ }
+ return {{current_index, 1}};
+ case '*':
+ case '/':
+ case '%':
+ switch (second_character) {
+ case '=':
+ return {{current_index, 2}};
+ default:
+ return {{current_index, 1}};
+ }
+ case '=':
+ if (second_character == '=') {
+ return {{current_index, 2}};
+ }
+ return {{current_index, 1}};
+ case '<':
+ case '>':
+ if (second_character == '=') {
+ return {{current_index, 2}};
+ }
+ if (second_character == first_character) {
+ if (third_character == '=') {
+ return {{current_index, 3}};
+ }
+ return {{current_index, 2}};
+ }
+ return {{current_index, 1}};
+ case '~':
+ return {{current_index, 1}};
+ default:
+ break;
+ }
+ }
+ // No operator was found, so empty is returned.
+ return {};
+}
+
} // namespace tint::fuzzers::regex_fuzzer
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
index f04aea1..c5ae900 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
+++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
@@ -15,6 +15,7 @@
#ifndef SRC_TINT_FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_
#define SRC_TINT_FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_
+#include <optional>
#include <string>
#include <utility>
#include <vector>
@@ -72,6 +73,12 @@
/// @return true if the mutation was succesful or false otherwise.
bool InsertReturnStatement(std::string& wgsl_code);
+ /// A function that, given WGSL-like string, generates a new WGSL-like string by replacing one
+ /// randomly-chosen operator in the original string with another operator.
+ /// @param wgsl_code - the initial WGSL-like string that will be mutated.
+ /// @return true if an operator replacement happened or false otherwise.
+ bool ReplaceRandomOperator(std::string& wgsl_code);
+
protected:
/// Given index idx1 it delets the region of length interval_len
/// starting at index idx1;
@@ -147,6 +154,22 @@
size_t reg2_len,
std::string& wgsl_code);
+ /// Finds the next occurrence of an operator in a WGSL-like string from a given starting
+ /// position, wrapping around to the start of the string if no operator is found before reaching
+ /// the end, and returning empty of no operator is found at all. There is no guarantee that the
+ /// result will correspond to a WGSL operator token, e.g. the identified characters could be
+ /// part of a comment, or e.g. the file might contain >>=, in which case the operator
+ /// >= will be identified should it happen that the starting index corresponds to the second >
+ /// character of this operator. Given that the regex mutator does not aim to guarantee
+ /// well-formed programs, these issues are acceptable.
+ /// @param wgsl_code - the WGSL-like string in which operator occurrences will be found.
+ /// @param start_index - the index at which search should start
+ /// @return empty if no operator is found, otherwise a pair comprising the index at which the
+ /// operator starts and the character length of the operator.
+ std::optional<std::pair<uint32_t, uint32_t>> FindOperatorOccurrence(
+ const std::string& wgsl_code,
+ uint32_t start_index);
+
private:
/// A function that given a delimiter, returns a vector that contains
/// all the positions of the delimiter in the WGSL code.
@@ -167,6 +190,13 @@
std::string replacement_text,
std::string& wgsl_code);
+ /// Given a string representing a WGSL operator, randomly returns a different WGSL operator in
+ /// the same category as the original, where the three categories are assignment operators (such
+ /// as = and +=), expression operators (such as + and ^) and increment operators (++ and --).
+ /// @param existing_operator - the characters comprising some WGSL operator
+ /// @return another WGSL operator falling into the same category.
+ std::string ChooseRandomReplacementForOperator(const std::string& existing_operator);
+
RandomGenerator& generator_;
};