Regex fuzzer: Add break and continue statements
Adds a break and continue statements to randomly-chosen loops. Also
overhauls support for adding return statements to functions.
Fixes: tint:1125.
Change-Id: Ib1a82b49e3fbb0b5520c725c8b8459d68383bed2
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/96543
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Commit-Queue: Alastair Donaldson <allydonaldson@googlemail.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
index 83aac1e..c5d2aba 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
@@ -38,6 +38,7 @@
kReplaceLiteral,
kInsertReturnStatement,
kReplaceOperator,
+ kInsertBreakOrContinue,
kNumMutationKinds
};
@@ -108,6 +109,12 @@
return 0;
}
break;
+
+ case MutationKind::kInsertBreakOrContinue:
+ if (!mutator.InsertBreakOrContinue(wgsl_code)) {
+ return 0;
+ }
+ break;
default:
assert(false && "Unreachable");
return 0;
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
index 32a6619..55ec028 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
@@ -33,6 +33,7 @@
using WgslMutator::GetFunctionBodyPositions;
using WgslMutator::GetIdentifiers;
using WgslMutator::GetIntLiterals;
+ using WgslMutator::GetLoopBodyPositions;
using WgslMutator::ReplaceRegion;
using WgslMutator::SwapIntervals;
};
@@ -382,69 +383,7 @@
ASSERT_NE(expected, function_body);
}
-TEST(TestInsertReturn, TestInsertReturn1) {
- RandomGenerator generator(0);
- WgslMutatorTest mutator(generator);
- std::string wgsl_code =
- R"(fn clamp_0acf8f() {
- var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
- }
- @vertex
- fn vertex_main() -> @builtin(position) vec4<f32> {
- clamp_0acf8f();
- var foo_1: i32 = 3;
- return vec4<f32>();
- }
- @fragment
- fn fragment_main() {
- clamp_0acf8f();
- }
- @compute @workgroup_size(1)
- fn compute_main() {
- var<private> foo: f32 = 0.0;
- var foo_2: i32 = 10;
- clamp_0acf8f();
- }
- foo_1 = 5 + 7;
- var foo_3 : i32 = -20;)";
-
- std::vector<size_t> semicolon_pos;
- for (size_t pos = wgsl_code.find(";", 0); pos != std::string::npos;
- pos = wgsl_code.find(";", pos + 1)) {
- semicolon_pos.push_back(pos);
- }
-
- // should insert a return true statement after the first semicolon of the
- // first function the the WGSL-like string above.
- wgsl_code.insert(semicolon_pos[0] + 1, "return true;");
-
- std::string expected_wgsl_code =
- R"(fn clamp_0acf8f() {
- var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());return true;
- }
- @vertex
- fn vertex_main() -> @builtin(position) vec4<f32> {
- clamp_0acf8f();
- var foo_1: i32 = 3;
- return vec4<f32>();
- }
- @fragment
- fn fragment_main() {
- clamp_0acf8f();
- }
- @compute @workgroup_size(1)
- fn compute_main() {
- var<private> foo: f32 = 0.0;
- var foo_2: i32 = 10;
- clamp_0acf8f();
- }
- foo_1 = 5 + 7;
- var foo_3 : i32 = -20;)";
-
- ASSERT_EQ(expected_wgsl_code, wgsl_code);
-}
-
-TEST(TestInsertReturn, TestFunctionPositions) {
+TEST(TestInsertReturn, TestFunctionPositions1) {
RandomGenerator generator(0);
WgslMutatorTest mutator(generator);
std::string wgsl_code =
@@ -475,8 +414,32 @@
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
- std::vector<size_t> function_positions = mutator.GetFunctionBodyPositions(wgsl_code);
- std::vector<size_t> expected_positions = {180, 586};
+ std::vector<std::pair<size_t, bool>> function_positions =
+ mutator.GetFunctionBodyPositions(wgsl_code);
+ std::vector<std::pair<size_t, bool>> expected_positions = {
+ {18, false}, {180, true}, {323, false}, {423, false}, {586, true}};
+ ASSERT_EQ(expected_positions, function_positions);
+}
+
+TEST(TestInsertReturn, TestFunctionPositions2) {
+ RandomGenerator generator(0);
+ WgslMutatorTest mutator(generator);
+ std::string wgsl_code =
+ R"(fn some_loop_body() {
+}
+
+fn f() {
+ var j : i32; i = (i + 1)) {
+ some_loop_body(); ((i < 5) && (j < 10));
+ for(var i : i32 = 0;
+ j = (i * 30);
+ }
+}
+)";
+
+ std::vector<std::pair<size_t, bool>> function_positions =
+ mutator.GetFunctionBodyPositions(wgsl_code);
+ std::vector<std::pair<size_t, bool>> expected_positions = {{20, false}, {32, false}};
ASSERT_EQ(expected_positions, function_positions);
}
@@ -586,5 +549,67 @@
}
}
+TEST(TestInsertBreakOrContinue, TestLoopPositions1) {
+ RandomGenerator generator(0);
+ WgslMutatorTest mutator(generator);
+ std::string wgsl_code = " loop { } loop { } loop { }";
+ std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
+ std::vector<size_t> expected_positions = {6, 15, 24};
+ ASSERT_EQ(expected_positions, loop_positions);
+}
+
+TEST(TestInsertBreakOrContinue, TestLoopPositions2) {
+ RandomGenerator generator(0);
+ WgslMutatorTest mutator(generator);
+ std::string wgsl_code = R"( loop { } loop
+{ } loop { })";
+ std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
+ std::vector<size_t> expected_positions = {6, 15, 24};
+ ASSERT_EQ(expected_positions, loop_positions);
+}
+
+TEST(TestInsertBreakOrContinue, TestLoopPositions3) {
+ RandomGenerator generator(0);
+ WgslMutatorTest mutator(generator);
+ // This WGSL-like code is not valid, but it suffices to test regex-based matching (which is
+ // intended to work well on semi-valid code).
+ std::string wgsl_code =
+ R"(fn compute_main() {
+ loop {
+ var twice: i32 = 2 * i;
+ i++;
+ if i == 5 { break; }
+ loop
+ {
+ var twice: i32 = 2 * i;
+ i++;
+ while (i < 100) { i++; }
+ if i == 5 { break; }
+ }
+ }
+ for (a = 0; a < 100; a++) {
+ if (a > 50) {
+ break;
+ }
+ while (i < 100) { i++; }
+ }
+})";
+
+ std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
+ std::vector<size_t> expected_positions = {27, 108, 173, 249, 310};
+ ASSERT_EQ(expected_positions, loop_positions);
+}
+
+TEST(TestInsertBreakOrContinue, TestLoopPositions4) {
+ RandomGenerator generator(0);
+ WgslMutatorTest mutator(generator);
+ // This WGSL-like code is not valid, but it suffices to test regex-based matching (which is
+ // intended to work well on semi-valid code).
+ std::string wgsl_code = R"(unifor { } uniform { } sloop { } _loop { } _while { } awhile { } )";
+
+ std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
+ ASSERT_TRUE(loop_positions.empty());
+}
+
} // namespace
} // namespace tint::fuzzers::regex_fuzzer
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
index 773eea6..0d20831 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
@@ -99,33 +99,98 @@
return (pos == wgsl_code.size() && open_bracket_count >= 1) ? 0 : pos - 1;
}
-std::vector<size_t> WgslMutator::GetFunctionBodyPositions(const std::string& wgsl_code) {
+std::vector<std::pair<size_t, bool>> WgslMutator::GetFunctionBodyPositions(
+ const std::string& wgsl_code) {
// Finds all the functions with a non-void return value.
- std::regex function_regex("fn.*?->.*?\\{");
- std::smatch match;
+ std::regex function_regex("fn[^a-zA-Z_0-9][^\\{]*\\{");
+ std::vector<std::pair<size_t, bool>> result;
+
+ auto functions_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), function_regex);
+ auto functions_end = std::sregex_iterator();
+
+ for (std::sregex_iterator i = functions_begin; i != functions_end; ++i) {
+ bool returns_value = i->str().find("->") != std::string::npos;
+ result.push_back(
+ {static_cast<size_t>(i->suffix().first - wgsl_code.cbegin() - 1), returns_value});
+ }
+ return result;
+}
+
+std::vector<size_t> WgslMutator::GetLoopBodyPositions(const std::string& wgsl_code) {
+ // Finds all loops.
+ std::regex loop_regex("[^a-zA-Z_0-9](for|while|loop)[^\\{]*\\{");
std::vector<size_t> result;
- auto search_start(wgsl_code.cbegin());
- std::string prefix = "";
+ auto loops_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), loop_regex);
+ auto loops_end = std::sregex_iterator();
- while (std::regex_search(search_start, wgsl_code.cend(), match, function_regex)) {
- result.push_back(static_cast<size_t>(match.suffix().first - wgsl_code.cbegin() - 1L));
- search_start = match.suffix().first;
+ for (std::sregex_iterator i = loops_begin; i != loops_end; ++i) {
+ result.push_back(static_cast<size_t>(i->suffix().first - wgsl_code.cbegin() - 1));
}
return result;
}
bool WgslMutator::InsertReturnStatement(std::string& wgsl_code) {
- std::vector<size_t> function_body_positions = GetFunctionBodyPositions(wgsl_code);
+ std::vector<std::pair<size_t, bool>> function_body_positions =
+ GetFunctionBodyPositions(wgsl_code);
// No function was found in wgsl_code.
if (function_body_positions.empty()) {
return false;
}
- // Pick a random function's opening bracket, find the corresponding closing
- // bracket, and find a semi-colon within the function body.
- size_t left_bracket_pos = generator_.GetRandomElement(function_body_positions);
+ // Pick a random function
+ auto function = generator_.GetRandomElement(function_body_positions);
+
+ // Find the corresponding closing bracket for the function, and find a semi-colon within the
+ // function body.
+ size_t left_bracket_pos = function.first;
+
+ size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
+
+ if (right_bracket_pos == 0) {
+ return false;
+ }
+
+ std::vector<size_t> semicolon_positions;
+ for (size_t pos = wgsl_code.find(";", left_bracket_pos + 1); pos < right_bracket_pos;
+ pos = wgsl_code.find(";", pos + 1)) {
+ semicolon_positions.push_back(pos);
+ }
+
+ if (semicolon_positions.empty()) {
+ return false;
+ }
+
+ std::string return_statement = "return";
+ if (function.second) {
+ // The function returns a value. Get all identifiers and integer literals to use as
+ // potential return values.
+ std::vector<std::pair<size_t, size_t>> identifiers = GetIdentifiers(wgsl_code);
+ auto return_values = identifiers;
+ std::vector<std::pair<size_t, size_t>> int_literals = GetIntLiterals(wgsl_code);
+ return_values.insert(return_values.end(), int_literals.begin(), int_literals.end());
+ std::pair<size_t, size_t> return_value = generator_.GetRandomElement(return_values);
+ return_statement += " " + wgsl_code.substr(return_value.first, return_value.second);
+ }
+ return_statement += ";";
+
+ // Insert the return statement immediately after the semicolon.
+ wgsl_code.insert(generator_.GetRandomElement(semicolon_positions) + 1, return_statement);
+ return true;
+}
+
+bool WgslMutator::InsertBreakOrContinue(std::string& wgsl_code) {
+ std::vector<size_t> loop_body_positions = GetLoopBodyPositions(wgsl_code);
+
+ // No loop was found in wgsl_code.
+ if (loop_body_positions.empty()) {
+ return false;
+ }
+
+ // Pick a random loop's opening bracket, find the corresponding closing
+ // bracket, and find a semi-colon within the loop body.
+ size_t left_bracket_pos = generator_.GetRandomElement(loop_body_positions);
size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
@@ -145,17 +210,8 @@
size_t semicolon_position = generator_.GetRandomElement(semicolon_positions);
- // Get all identifiers and integer literals to use as potential return values.
- std::vector<std::pair<size_t, size_t>> identifiers = GetIdentifiers(wgsl_code);
- auto return_values = identifiers;
- std::vector<std::pair<size_t, size_t>> int_literals = GetIntLiterals(wgsl_code);
- return_values.insert(return_values.end(), int_literals.begin(), int_literals.end());
- std::pair<size_t, size_t> return_value = generator_.GetRandomElement(return_values);
- std::string return_statement =
- "return " + wgsl_code.substr(return_value.first, return_value.second) + ";";
-
- // Insert the return statement immediately after the semicolon.
- wgsl_code.insert(semicolon_position + 1, return_statement);
+ // Insert a break or continue immediately after the semicolon.
+ wgsl_code.insert(semicolon_position + 1, generator_.GetBool() ? "break;" : "continue;");
return true;
}
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
index c5ae900..5308bf8 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
+++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
@@ -73,6 +73,11 @@
/// @return true if the mutation was succesful or false otherwise.
bool InsertReturnStatement(std::string& wgsl_code);
+ /// Inserts a break or continue statement in a randomly chosen loop of a WGSL-like string.
+ /// @param wgsl_code - WGSL-like string that will be mutated.
+ /// @return true if the mutation was succesful or false otherwise.
+ bool InsertBreakOrContinue(std::string& wgsl_code);
+
/// A function that, given WGSL-like string, generates a new WGSL-like string by replacing one
/// randomly-chosen operator in the original string with another operator.
/// @param wgsl_code - the initial WGSL-like string that will be mutated.
@@ -103,14 +108,19 @@
/// brace.
size_t FindClosingBrace(size_t opening_bracket_pos, const std::string& wgsl_code);
- /// Returns the starting_position of the bodies of the functions
- /// that follow the regular expression: fn.*?->.*?\\{, which searches for the
- /// keyword fn followed by the function name, its return type and opening brace.
+ /// Returns the starting position of the bodies of the functions identified by an appropriate
+ /// function, together with a boolean indicating whether the function returns a value or not.
/// @param wgsl_code - the WGSL-like string where the functions will be
/// searched.
- /// @return a vector with the starting position of the function bodies in
- /// wgsl_code.
- std::vector<size_t> GetFunctionBodyPositions(const std::string& wgsl_code);
+ /// @return a vector of pairs, where each pair provides the starting position of the function
+ /// body, and the value true if and only if the function returns a value.
+ std::vector<std::pair<size_t, bool>> GetFunctionBodyPositions(const std::string& wgsl_code);
+
+ /// Returns the starting position of the bodies of the loops identified by an appropriate
+ /// regular expressions.
+ /// @param wgsl_code - the WGSL-like string in which loops will be searched for.
+ /// @return a vector with the starting position of the loop bodies in wgsl_code.
+ std::vector<size_t> GetLoopBodyPositions(const std::string& wgsl_code);
/// A function that finds all the identifiers in a WGSL-like string.
/// @param wgsl_code - the WGSL-like string where the identifiers will be found.