Regex fuzzer: Add break and continue statements

Adds a break and continue statements to randomly-chosen loops. Also
overhauls support for adding return statements to functions.

Fixes: tint:1125.
Change-Id: Ib1a82b49e3fbb0b5520c725c8b8459d68383bed2
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/96543
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Commit-Queue: Alastair Donaldson <allydonaldson@googlemail.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
index 83aac1e..c5d2aba 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc
@@ -38,6 +38,7 @@
     kReplaceLiteral,
     kInsertReturnStatement,
     kReplaceOperator,
+    kInsertBreakOrContinue,
     kNumMutationKinds
 };
 
@@ -108,6 +109,12 @@
                 return 0;
             }
             break;
+
+        case MutationKind::kInsertBreakOrContinue:
+            if (!mutator.InsertBreakOrContinue(wgsl_code)) {
+                return 0;
+            }
+            break;
         default:
             assert(false && "Unreachable");
             return 0;
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
index 32a6619..55ec028 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
@@ -33,6 +33,7 @@
     using WgslMutator::GetFunctionBodyPositions;
     using WgslMutator::GetIdentifiers;
     using WgslMutator::GetIntLiterals;
+    using WgslMutator::GetLoopBodyPositions;
     using WgslMutator::ReplaceRegion;
     using WgslMutator::SwapIntervals;
 };
@@ -382,69 +383,7 @@
     ASSERT_NE(expected, function_body);
 }
 
-TEST(TestInsertReturn, TestInsertReturn1) {
-    RandomGenerator generator(0);
-    WgslMutatorTest mutator(generator);
-    std::string wgsl_code =
-        R"(fn clamp_0acf8f() {
-        var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
-      }
-      @vertex
-      fn vertex_main() -> @builtin(position) vec4<f32> {
-        clamp_0acf8f();
-        var foo_1: i32 = 3;
-        return vec4<f32>();
-      }
-      @fragment
-      fn fragment_main() {
-        clamp_0acf8f();
-      }
-      @compute @workgroup_size(1)
-      fn compute_main() {
-        var<private> foo: f32 = 0.0;
-        var foo_2: i32 = 10;
-        clamp_0acf8f();
-      }
-      foo_1 = 5 + 7;
-      var foo_3 : i32 = -20;)";
-
-    std::vector<size_t> semicolon_pos;
-    for (size_t pos = wgsl_code.find(";", 0); pos != std::string::npos;
-         pos = wgsl_code.find(";", pos + 1)) {
-        semicolon_pos.push_back(pos);
-    }
-
-    // should insert a return true statement after the first semicolon of the
-    // first function the the WGSL-like string above.
-    wgsl_code.insert(semicolon_pos[0] + 1, "return true;");
-
-    std::string expected_wgsl_code =
-        R"(fn clamp_0acf8f() {
-        var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());return true;
-      }
-      @vertex
-      fn vertex_main() -> @builtin(position) vec4<f32> {
-        clamp_0acf8f();
-        var foo_1: i32 = 3;
-        return vec4<f32>();
-      }
-      @fragment
-      fn fragment_main() {
-        clamp_0acf8f();
-      }
-      @compute @workgroup_size(1)
-      fn compute_main() {
-        var<private> foo: f32 = 0.0;
-        var foo_2: i32 = 10;
-        clamp_0acf8f();
-      }
-      foo_1 = 5 + 7;
-      var foo_3 : i32 = -20;)";
-
-    ASSERT_EQ(expected_wgsl_code, wgsl_code);
-}
-
-TEST(TestInsertReturn, TestFunctionPositions) {
+TEST(TestInsertReturn, TestFunctionPositions1) {
     RandomGenerator generator(0);
     WgslMutatorTest mutator(generator);
     std::string wgsl_code =
@@ -475,8 +414,32 @@
         foo_1 = 5 + 7;
         var foo_3 : i32 = -20;)";
 
-    std::vector<size_t> function_positions = mutator.GetFunctionBodyPositions(wgsl_code);
-    std::vector<size_t> expected_positions = {180, 586};
+    std::vector<std::pair<size_t, bool>> function_positions =
+        mutator.GetFunctionBodyPositions(wgsl_code);
+    std::vector<std::pair<size_t, bool>> expected_positions = {
+        {18, false}, {180, true}, {323, false}, {423, false}, {586, true}};
+    ASSERT_EQ(expected_positions, function_positions);
+}
+
+TEST(TestInsertReturn, TestFunctionPositions2) {
+    RandomGenerator generator(0);
+    WgslMutatorTest mutator(generator);
+    std::string wgsl_code =
+        R"(fn some_loop_body() {
+}
+
+fn f() {
+  var j : i32; i = (i + 1)) {
+    some_loop_body(); ((i < 5) && (j < 10));
+  for(var i : i32 = 0;
+    j = (i * 30);
+  }
+}
+)";
+
+    std::vector<std::pair<size_t, bool>> function_positions =
+        mutator.GetFunctionBodyPositions(wgsl_code);
+    std::vector<std::pair<size_t, bool>> expected_positions = {{20, false}, {32, false}};
     ASSERT_EQ(expected_positions, function_positions);
 }
 
@@ -586,5 +549,67 @@
     }
 }
 
+TEST(TestInsertBreakOrContinue, TestLoopPositions1) {
+    RandomGenerator generator(0);
+    WgslMutatorTest mutator(generator);
+    std::string wgsl_code = " loop { } loop { } loop { }";
+    std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
+    std::vector<size_t> expected_positions = {6, 15, 24};
+    ASSERT_EQ(expected_positions, loop_positions);
+}
+
+TEST(TestInsertBreakOrContinue, TestLoopPositions2) {
+    RandomGenerator generator(0);
+    WgslMutatorTest mutator(generator);
+    std::string wgsl_code = R"( loop { } loop
+{ } loop { })";
+    std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
+    std::vector<size_t> expected_positions = {6, 15, 24};
+    ASSERT_EQ(expected_positions, loop_positions);
+}
+
+TEST(TestInsertBreakOrContinue, TestLoopPositions3) {
+    RandomGenerator generator(0);
+    WgslMutatorTest mutator(generator);
+    // This WGSL-like code is not valid, but it suffices to test regex-based matching (which is
+    // intended to work well on semi-valid code).
+    std::string wgsl_code =
+        R"(fn compute_main() {
+  loop {
+    var twice: i32 = 2 * i;
+    i++;
+    if i == 5 { break; }
+      loop
+      {
+      var twice: i32 = 2 * i;
+      i++;
+      while (i < 100) { i++; }
+      if i == 5 { break; }
+    }
+  }
+  for (a = 0; a < 100; a++)   {
+    if (a > 50) {
+      break;
+    }
+      while (i < 100) { i++; }
+  }
+})";
+
+    std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
+    std::vector<size_t> expected_positions = {27, 108, 173, 249, 310};
+    ASSERT_EQ(expected_positions, loop_positions);
+}
+
+TEST(TestInsertBreakOrContinue, TestLoopPositions4) {
+    RandomGenerator generator(0);
+    WgslMutatorTest mutator(generator);
+    // This WGSL-like code is not valid, but it suffices to test regex-based matching (which is
+    // intended to work well on semi-valid code).
+    std::string wgsl_code = R"(unifor { } uniform { } sloop { } _loop { } _while { } awhile { } )";
+
+    std::vector<size_t> loop_positions = mutator.GetLoopBodyPositions(wgsl_code);
+    ASSERT_TRUE(loop_positions.empty());
+}
+
 }  // namespace
 }  // namespace tint::fuzzers::regex_fuzzer
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
index 773eea6..0d20831 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
+++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
@@ -99,33 +99,98 @@
     return (pos == wgsl_code.size() && open_bracket_count >= 1) ? 0 : pos - 1;
 }
 
-std::vector<size_t> WgslMutator::GetFunctionBodyPositions(const std::string& wgsl_code) {
+std::vector<std::pair<size_t, bool>> WgslMutator::GetFunctionBodyPositions(
+    const std::string& wgsl_code) {
     // Finds all the functions with a non-void return value.
-    std::regex function_regex("fn.*?->.*?\\{");
-    std::smatch match;
+    std::regex function_regex("fn[^a-zA-Z_0-9][^\\{]*\\{");
+    std::vector<std::pair<size_t, bool>> result;
+
+    auto functions_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), function_regex);
+    auto functions_end = std::sregex_iterator();
+
+    for (std::sregex_iterator i = functions_begin; i != functions_end; ++i) {
+        bool returns_value = i->str().find("->") != std::string::npos;
+        result.push_back(
+            {static_cast<size_t>(i->suffix().first - wgsl_code.cbegin() - 1), returns_value});
+    }
+    return result;
+}
+
+std::vector<size_t> WgslMutator::GetLoopBodyPositions(const std::string& wgsl_code) {
+    // Finds all loops.
+    std::regex loop_regex("[^a-zA-Z_0-9](for|while|loop)[^\\{]*\\{");
     std::vector<size_t> result;
 
-    auto search_start(wgsl_code.cbegin());
-    std::string prefix = "";
+    auto loops_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), loop_regex);
+    auto loops_end = std::sregex_iterator();
 
-    while (std::regex_search(search_start, wgsl_code.cend(), match, function_regex)) {
-        result.push_back(static_cast<size_t>(match.suffix().first - wgsl_code.cbegin() - 1L));
-        search_start = match.suffix().first;
+    for (std::sregex_iterator i = loops_begin; i != loops_end; ++i) {
+        result.push_back(static_cast<size_t>(i->suffix().first - wgsl_code.cbegin() - 1));
     }
     return result;
 }
 
 bool WgslMutator::InsertReturnStatement(std::string& wgsl_code) {
-    std::vector<size_t> function_body_positions = GetFunctionBodyPositions(wgsl_code);
+    std::vector<std::pair<size_t, bool>> function_body_positions =
+        GetFunctionBodyPositions(wgsl_code);
 
     // No function was found in wgsl_code.
     if (function_body_positions.empty()) {
         return false;
     }
 
-    // Pick a random function's opening bracket, find the corresponding closing
-    // bracket, and find a semi-colon within the function body.
-    size_t left_bracket_pos = generator_.GetRandomElement(function_body_positions);
+    // Pick a random function
+    auto function = generator_.GetRandomElement(function_body_positions);
+
+    // Find the corresponding closing bracket for the function, and find a semi-colon within the
+    // function body.
+    size_t left_bracket_pos = function.first;
+
+    size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
+
+    if (right_bracket_pos == 0) {
+        return false;
+    }
+
+    std::vector<size_t> semicolon_positions;
+    for (size_t pos = wgsl_code.find(";", left_bracket_pos + 1); pos < right_bracket_pos;
+         pos = wgsl_code.find(";", pos + 1)) {
+        semicolon_positions.push_back(pos);
+    }
+
+    if (semicolon_positions.empty()) {
+        return false;
+    }
+
+    std::string return_statement = "return";
+    if (function.second) {
+        // The function returns a value. Get all identifiers and integer literals to use as
+        // potential return values.
+        std::vector<std::pair<size_t, size_t>> identifiers = GetIdentifiers(wgsl_code);
+        auto return_values = identifiers;
+        std::vector<std::pair<size_t, size_t>> int_literals = GetIntLiterals(wgsl_code);
+        return_values.insert(return_values.end(), int_literals.begin(), int_literals.end());
+        std::pair<size_t, size_t> return_value = generator_.GetRandomElement(return_values);
+        return_statement += " " + wgsl_code.substr(return_value.first, return_value.second);
+    }
+    return_statement += ";";
+
+    // Insert the return statement immediately after the semicolon.
+    wgsl_code.insert(generator_.GetRandomElement(semicolon_positions) + 1, return_statement);
+    return true;
+}
+
+bool WgslMutator::InsertBreakOrContinue(std::string& wgsl_code) {
+    std::vector<size_t> loop_body_positions = GetLoopBodyPositions(wgsl_code);
+
+    // No loop was found in wgsl_code.
+    if (loop_body_positions.empty()) {
+        return false;
+    }
+
+    // Pick a random loop's opening bracket, find the corresponding closing
+    // bracket, and find a semi-colon within the loop body.
+    size_t left_bracket_pos = generator_.GetRandomElement(loop_body_positions);
 
     size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
 
@@ -145,17 +210,8 @@
 
     size_t semicolon_position = generator_.GetRandomElement(semicolon_positions);
 
-    // Get all identifiers and integer literals to use as potential return values.
-    std::vector<std::pair<size_t, size_t>> identifiers = GetIdentifiers(wgsl_code);
-    auto return_values = identifiers;
-    std::vector<std::pair<size_t, size_t>> int_literals = GetIntLiterals(wgsl_code);
-    return_values.insert(return_values.end(), int_literals.begin(), int_literals.end());
-    std::pair<size_t, size_t> return_value = generator_.GetRandomElement(return_values);
-    std::string return_statement =
-        "return " + wgsl_code.substr(return_value.first, return_value.second) + ";";
-
-    // Insert the return statement immediately after the semicolon.
-    wgsl_code.insert(semicolon_position + 1, return_statement);
+    // Insert a break or continue immediately after the semicolon.
+    wgsl_code.insert(semicolon_position + 1, generator_.GetBool() ? "break;" : "continue;");
     return true;
 }
 
diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
index c5ae900..5308bf8 100644
--- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
+++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
@@ -73,6 +73,11 @@
     /// @return true if the mutation was succesful or false otherwise.
     bool InsertReturnStatement(std::string& wgsl_code);
 
+    /// Inserts a break or continue statement in a randomly chosen loop of a WGSL-like string.
+    /// @param wgsl_code - WGSL-like string that will be mutated.
+    /// @return true if the mutation was succesful or false otherwise.
+    bool InsertBreakOrContinue(std::string& wgsl_code);
+
     /// A function that, given WGSL-like string, generates a new WGSL-like string by replacing one
     /// randomly-chosen operator in the original string with another operator.
     /// @param wgsl_code - the initial WGSL-like string that will be mutated.
@@ -103,14 +108,19 @@
     /// brace.
     size_t FindClosingBrace(size_t opening_bracket_pos, const std::string& wgsl_code);
 
-    /// Returns the starting_position of the bodies of the functions
-    /// that follow the regular expression: fn.*?->.*?\\{, which searches for the
-    /// keyword fn followed by the function name, its return type and opening brace.
+    /// Returns the starting position of the bodies of the functions identified by an appropriate
+    /// function, together with a boolean indicating whether the function returns a value or not.
     /// @param wgsl_code - the WGSL-like string where the functions will be
     /// searched.
-    /// @return a vector with the starting position of the function bodies in
-    /// wgsl_code.
-    std::vector<size_t> GetFunctionBodyPositions(const std::string& wgsl_code);
+    /// @return a vector of pairs, where each pair provides the starting position of the function
+    /// body, and the value true if and only if the function returns a value.
+    std::vector<std::pair<size_t, bool>> GetFunctionBodyPositions(const std::string& wgsl_code);
+
+    /// Returns the starting position of the bodies of the loops identified by an appropriate
+    /// regular expressions.
+    /// @param wgsl_code - the WGSL-like string in which loops will be searched for.
+    /// @return a vector with the starting position of the loop bodies in wgsl_code.
+    std::vector<size_t> GetLoopBodyPositions(const std::string& wgsl_code);
 
     /// A function that finds all the identifiers in a WGSL-like string.
     /// @param wgsl_code - the WGSL-like string where the identifiers will be found.