Regex fuzzer: region deletion and duplication

Adds two transformations, one that deletes a random region enclosed
by a given delimiter and another one that duplicates a region by
inserting it at a position of the WGSL code after a delimiter.

Fixes: tint:1072.
Fixes: tint:1073.

Change-Id: Icb10a7f16a783d5eb8f75a48c4015eb87ea1d174
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/60200
Reviewed-by: Alastair Donaldson <afdx@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Alastair Donaldson <afdx@google.com>
diff --git a/fuzzers/tint_regex_fuzzer/fuzzer.cc b/fuzzers/tint_regex_fuzzer/fuzzer.cc
index fabf90c..e659cdc 100644
--- a/fuzzers/tint_regex_fuzzer/fuzzer.cc
+++ b/fuzzers/tint_regex_fuzzer/fuzzer.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <cassert>
 #include <cstddef>
 #include <cstdint>
 
@@ -30,6 +31,13 @@
 
 CliParams cli_params{};
 
+enum class MutationKind {
+  kSwapIntervals,
+  kDeleteInterval,
+  kDuplicateInterval,
+  kNumMutationKinds
+};
+
 extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv) {
   // Parse CLI parameters. `ParseCliParams` will call `exit` if some parameter
   // is invalid.
@@ -41,12 +49,47 @@
                                           size_t size,
                                           size_t max_size,
                                           unsigned seed) {
+  std::string wgsl_code(data, data + size);
   const std::vector<std::string> delimiters{";"};
   std::mt19937 generator(seed);
-  std::uniform_int_distribution<size_t> distribution(0, delimiters.size() - 1);
-  size_t ind = distribution(generator);
+  std::string delimiter = delimiters[std::uniform_int_distribution<size_t>(
+      0, delimiters.size() - 1)(generator)];
 
-  return FuzzEnclosedRegions(size, max_size, delimiters[ind], data, &generator);
+  MutationKind mutation_kind =
+      static_cast<MutationKind>(std::uniform_int_distribution<size_t>(
+          0,
+          static_cast<size_t>(MutationKind::kNumMutationKinds) - 1)(generator));
+
+  switch (mutation_kind) {
+    case MutationKind::kSwapIntervals:
+      if (!SwapRandomIntervals(delimiter, wgsl_code, generator)) {
+        return 0;
+      }
+      break;
+
+    case MutationKind::kDeleteInterval:
+      if (!DeleteRandomInterval(delimiter, wgsl_code, generator)) {
+        return 0;
+      }
+      break;
+
+    case MutationKind::kDuplicateInterval:
+      if (!DuplicateRandomInterval(delimiter, wgsl_code, generator)) {
+        return 0;
+      }
+      break;
+
+    default:
+      assert(false && "Unreachable");
+      return 0;
+  }
+
+  if (wgsl_code.size() > max_size) {
+    return 0;
+  }
+
+  memcpy(data, wgsl_code.c_str(), wgsl_code.size());
+  return wgsl_code.size();
 }
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
diff --git a/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
index 8ed2eb4..a8963a3 100644
--- a/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
+++ b/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
@@ -31,7 +31,7 @@
 
   // this call should swap R1 with R3.
   SwapIntervals(0, R1.length() - 1, R1.length() + R2.length(),
-                all_regions.length() - 1, &all_regions);
+                all_regions.length() - 1, all_regions);
 
   ASSERT_EQ(R3 + R2 + R1, all_regions);
 }
@@ -47,7 +47,7 @@
   SwapIntervals(R1.length(), R1.length() + R2.length() - 1,
                 R1.length() + R2.length() + R3.length(),
                 R1.length() + R2.length() + R3.length() + R4.length() - 1,
-                &all_regions);
+                all_regions);
 
   ASSERT_EQ(R1 + R4 + R3 + R2 + R5, all_regions);
 }
@@ -61,7 +61,7 @@
   // this call should swap R2 with R3.
   SwapIntervals(R1.length(), R1.length() + R2.length() - 1,
                 R1.length() + R2.length(),
-                R1.length() + R2.length() + R3.length() - 1, &all_regions);
+                R1.length() + R2.length() + R3.length() - 1, all_regions);
 
   ASSERT_EQ(R1 + R3 + R2 + R4, all_regions);
 }
@@ -80,11 +80,93 @@
       R1.length() + R2.length() + R3.length() + R4.length() - 1,
       R1.length() + R2.length() + R3.length() + R4.length(),
       R1.length() + R2.length() + R3.length() + R4.length() + R5.length() - 1,
-      &all_regions);
+      all_regions);
 
   ASSERT_EQ(R1 + R2 + R3 + R5 + R4, all_regions);
 }
 
+// Deletes the first region.
+TEST(DeleteRegionTest, DeleteFirstRegion) {
+  std::string R1 = "|region1|", R2 = "; region2;",
+              R3 = "---------region3---------", R4 = "++region4++",
+              R5 = "***region5***";
+  std::string all_regions = R1 + R2 + R3 + R4 + R5;
+
+  // This call should delete R1.
+  DeleteInterval(0, R1.length() - 1, all_regions);
+
+  ASSERT_EQ(R2 + R3 + R4 + R5, all_regions);
+}
+
+// Deletes the last region.
+TEST(DeleteRegionTest, DeleteLastRegion) {
+  std::string R1 = "|region1|", R2 = "; region2;",
+              R3 = "---------region3---------", R4 = "++region4++",
+              R5 = "***region5***";
+  std::string all_regions = R1 + R2 + R3 + R4 + R5;
+
+  // This call should delete R5.
+  DeleteInterval(R1.length() + R2.length() + R3.length() + R4.length(),
+                 all_regions.length() - 1, all_regions);
+
+  ASSERT_EQ(R1 + R2 + R3 + R4, all_regions);
+}
+
+// Deletes the middle region.
+TEST(DeleteRegionTest, DeleteMiddleRegion) {
+  std::string R1 = "|region1|", R2 = "; region2;",
+              R3 = "---------region3---------", R4 = "++region4++",
+              R5 = "***region5***";
+  std::string all_regions = R1 + R2 + R3 + R4 + R5;
+
+  // This call should delete R3.
+  DeleteInterval(R1.length() + R2.length(),
+                 R1.length() + R2.length() + R3.length() - 1, all_regions);
+
+  ASSERT_EQ(R1 + R2 + R4 + R5, all_regions);
+}
+
+TEST(InsertRegionTest, InsertRegionTest1) {
+  std::string R1 = "|region1|", R2 = "; region2;",
+              R3 = "---------region3---------", R4 = "++region4++",
+              R5 = "***region5***";
+  std::string all_regions = R1 + R2 + R3 + R4 + R5;
+
+  // This call should insert R2 after R4.
+  DuplicateInterval(R1.length(), R1.length() + R2.length() - 1,
+                    R1.length() + R2.length() + R3.length() + R4.length() - 1,
+                    all_regions);
+
+  ASSERT_EQ(R1 + R2 + R3 + R4 + R2 + R5, all_regions);
+}
+
+TEST(InsertRegionTest, InsertRegionTest2) {
+  std::string R1 = "|region1|", R2 = "; region2;",
+              R3 = "---------region3---------", R4 = "++region4++",
+              R5 = "***region5***";
+  std::string all_regions = R1 + R2 + R3 + R4 + R5;
+
+  // This call should insert R3 after R1.
+  DuplicateInterval(R1.length() + R2.length(),
+                    R1.length() + R2.length() + R3.length() - 1,
+                    R1.length() - 1, all_regions);
+
+  ASSERT_EQ(R1 + R3 + R2 + R3 + R4 + R5, all_regions);
+}
+
+TEST(InsertRegionTest, InsertRegionTest3) {
+  std::string R1 = "|region1|", R2 = "; region2;",
+              R3 = "---------region3---------", R4 = "++region4++",
+              R5 = "***region5***";
+  std::string all_regions = R1 + R2 + R3 + R4 + R5;
+
+  // This call should insert R2 after R5.
+  DuplicateInterval(R1.length(), R1.length() + R2.length() - 1,
+                    all_regions.length() - 1, all_regions);
+
+  ASSERT_EQ(R1 + R2 + R3 + R4 + R5 + R2, all_regions);
+}
+
 }  // namespace
 }  // namespace regex_fuzzer
 }  // namespace fuzzers
diff --git a/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
index ca199fa..23356a5 100644
--- a/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
+++ b/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
@@ -32,9 +32,9 @@
 
 size_t GetRandomIntFromRange(size_t lower_bound,
                              size_t upper_bound,
-                             std::mt19937* generator) {
+                             std::mt19937& generator) {
   std::uniform_int_distribution<size_t> dist(lower_bound, upper_bound);
-  return dist(*generator);
+  return dist(generator);
 }
 
 }  //  namespace
@@ -54,30 +54,38 @@
                    size_t idx2,
                    size_t idx3,
                    size_t idx4,
-                   std::string* wgsl_code) {
-  std::string region_1 = wgsl_code->substr(idx1, idx2 - idx1 + 1);
+                   std::string& wgsl_code) {
+  std::string region_1 = wgsl_code.substr(idx1, idx2 - idx1 + 1);
 
-  std::string region_2 = wgsl_code->substr(idx3, idx4 - idx3 + 1);
+  std::string region_2 = wgsl_code.substr(idx3, idx4 - idx3 + 1);
 
   // The second transformation is done first as it doesn't affect ind1 and ind2
-  wgsl_code->replace(idx3, region_2.size(), region_1);
+  wgsl_code.replace(idx3, region_2.size(), region_1);
 
-  wgsl_code->replace(idx1, region_1.size(), region_2);
+  wgsl_code.replace(idx1, region_1.size(), region_2);
 }
 
-size_t FuzzEnclosedRegions(size_t size,
-                           size_t max_size,
-                           const std::string& delimiter,
-                           uint8_t* wgsl_code,
-                           std::mt19937* generator) {
-  std::string init_program(wgsl_code, wgsl_code + size);
+void DeleteInterval(size_t idx1, size_t idx2, std::string& wgsl_code) {
+  wgsl_code.erase(idx1, idx2 - idx1 + 1);
+}
 
+void DuplicateInterval(size_t idx1,
+                       size_t idx2,
+                       size_t idx3,
+                       std::string& wgsl_code) {
+  std::string region = wgsl_code.substr(idx1, idx2 - idx1 + 1);
+  wgsl_code.insert(idx3 + 1, region);
+}
+
+bool SwapRandomIntervals(const std::string& delimiter,
+                         std::string& wgsl_code,
+                         std::mt19937& generator) {
   std::vector<size_t> delimiter_positions =
-      FindDelimiterIndices(delimiter, init_program);
+      FindDelimiterIndices(delimiter, wgsl_code);
 
   // Need to have at least 3 indices
   if (delimiter_positions.size() < 3) {
-    return 0;
+    return false;
   }
 
   // When generating the i-th random number, we should make sure that there are
@@ -93,13 +101,56 @@
 
   SwapIntervals(delimiter_positions[ind1], delimiter_positions[ind2],
                 delimiter_positions[ind3], delimiter_positions[ind4],
-                &init_program);
+                wgsl_code);
 
-  if (init_program.size() > max_size) {
-    return 0;
+  return true;
+}
+
+bool DeleteRandomInterval(const std::string& delimiter,
+                          std::string& wgsl_code,
+                          std::mt19937& generator) {
+  std::vector<size_t> delimiter_positions =
+      FindDelimiterIndices(delimiter, wgsl_code);
+
+  // Need to have at least 2 indices
+  if (delimiter_positions.size() < 2) {
+    return false;
   }
-  memcpy(wgsl_code, init_program.c_str(), init_program.size());
-  return init_program.size();
+
+  size_t ind1 =
+      GetRandomIntFromRange(0, delimiter_positions.size() - 2U, generator);
+  size_t ind2 = GetRandomIntFromRange(
+      ind1 + 1U, delimiter_positions.size() - 1U, generator);
+
+  DeleteInterval(delimiter_positions[ind1], delimiter_positions[ind2],
+                 wgsl_code);
+
+  return true;
+}
+
+bool DuplicateRandomInterval(const std::string& delimiter,
+                             std::string& wgsl_code,
+                             std::mt19937& generator) {
+  std::vector<size_t> delimiter_positions =
+      FindDelimiterIndices(delimiter, wgsl_code);
+
+  // Need to have at least 2 indices
+  if (delimiter_positions.size() < 2) {
+    return false;
+  }
+
+  size_t ind1 =
+      GetRandomIntFromRange(0, delimiter_positions.size() - 2U, generator);
+  size_t ind2 = GetRandomIntFromRange(
+      ind1 + 1U, delimiter_positions.size() - 1U, generator);
+
+  size_t ind3 =
+      GetRandomIntFromRange(0, delimiter_positions.size() - 1U, generator);
+
+  DuplicateInterval(delimiter_positions[ind1], delimiter_positions[ind2],
+                    delimiter_positions[ind3] + 1, wgsl_code);
+
+  return true;
 }
 
 }  // namespace regex_fuzzer
diff --git a/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
index 2c760bf..12a7b69 100644
--- a/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
+++ b/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
@@ -44,22 +44,58 @@
                    size_t idx2,
                    size_t idx3,
                    size_t idx4,
-                   std::string* wgsl_code);
+                   std::string& wgsl_code);
 
-/// A function that, given an initial string (valid WGSL code) and a delimiter,
-/// generates a new set of strings (valid or invalid WGSL code) by
-/// picking two random regions and swapping them.
-/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
-/// @param size - size of the string that will be mutated.
-/// @param max_size - maximal allowed mutation size.
+/// Given 2 indices, idx1, idx2, it delets the region in the interval [idx1,
+/// idx2].
+/// @param idx1 - starting index of the first region.
+/// @param idx2 - terminating index of the second region.
+/// @param wgsl_code - the string where the swap will occur.
+void DeleteInterval(size_t idx1, size_t idx2, std::string& wgsl_code);
+
+/// Given 3 indices, idx1, idx2, and idx3 it inserts the
+/// region in [idx1, idx2] after idx3.
+/// @param idx1 - starting index of region.
+/// @param idx2 - terminating index of the region.
+/// @param idx3 - the position where the region will be inserted.
+/// @param wgsl_code - the string where the swap will occur.
+void DuplicateInterval(size_t idx1,
+                       size_t idx2,
+                       size_t idx3,
+                       std::string& wgsl_code);
+
+/// A function that, given WGSL-like string and a delimiter,
+/// generates another WGSL-like string by picking two random regions
+/// enclosed by the delimiter and swapping them.
 /// @param delimiter - the delimiter that will be used to find enclosed regions.
+/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
 /// @param generator - the random number generator.
-/// @return size of the mutated string.
-size_t FuzzEnclosedRegions(size_t size,
-                           size_t max_size,
-                           const std::string& delimiter,
-                           uint8_t* wgsl_code,
-                           std::mt19937* generator);
+/// @return true if a swap happened or false otherwise.
+bool SwapRandomIntervals(const std::string& delimiter,
+                         std::string& wgsl_code,
+                         std::mt19937& generator);
+
+/// A function that, given a WGSL-like string and a delimiter,
+/// generates another WGSL-like string by deleting a random
+/// region enclosed by the delimiter.
+/// @param delimiter - the delimiter that will be used to find enclosed regions.
+/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
+/// @param generator - the random number generator.
+/// @return true if a deletion happened or false otherwise.
+bool DeleteRandomInterval(const std::string& delimiter,
+                          std::string& wgsl_code,
+                          std::mt19937& generator);
+
+/// A function that, given a WGSL-like string and a delimiter,
+/// generates another WGSL-like string by duplicating a random
+/// region enclosed by the delimiter.
+/// @param delimiter - the delimiter that will be used to find enclosed regions.
+/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
+/// @param generator - the random number generator.
+/// @return true if a duplication happened or false otherwise.
+bool DuplicateRandomInterval(const std::string& delimiter,
+                             std::string& wgsl_code,
+                             std::mt19937& generator);
 
 }  // namespace regex_fuzzer
 }  // namespace fuzzers