Regex fuzzer: Change the region boundaries

Changes the interval boundaries to exclude the first delimiter
that encloses a region.

Change-Id: Ia9186e584d9038b4220cad11d418fa9881e51e8d
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/60346
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Alastair Donaldson <afdx@google.com>
Commit-Queue: Alastair Donaldson <afdx@google.com>
diff --git a/fuzzers/tint_regex_fuzzer/fuzzer.cc b/fuzzers/tint_regex_fuzzer/fuzzer.cc
index e659cdc..cc87cf5 100644
--- a/fuzzers/tint_regex_fuzzer/fuzzer.cc
+++ b/fuzzers/tint_regex_fuzzer/fuzzer.cc
@@ -18,7 +18,7 @@
 
 #include "fuzzers/tint_common_fuzzer.h"
 #include "fuzzers/tint_regex_fuzzer/cli.h"
-
+#include "fuzzers/tint_regex_fuzzer/util.h"
 #include "fuzzers/tint_regex_fuzzer/wgsl_mutator.h"
 
 #include "src/reader/wgsl/parser.h"
@@ -52,13 +52,12 @@
   std::string wgsl_code(data, data + size);
   const std::vector<std::string> delimiters{";"};
   std::mt19937 generator(seed);
-  std::string delimiter = delimiters[std::uniform_int_distribution<size_t>(
-      0, delimiters.size() - 1)(generator)];
 
-  MutationKind mutation_kind =
-      static_cast<MutationKind>(std::uniform_int_distribution<size_t>(
-          0,
-          static_cast<size_t>(MutationKind::kNumMutationKinds) - 1)(generator));
+  std::string delimiter =
+      delimiters[GetRandomIntFromRange(0, delimiters.size() - 1, generator)];
+
+  MutationKind mutation_kind = static_cast<MutationKind>(GetRandomIntFromRange(
+      0, static_cast<size_t>(MutationKind::kNumMutationKinds) - 1, generator));
 
   switch (mutation_kind) {
     case MutationKind::kSwapIntervals:
diff --git a/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
index a8963a3..f0f7f12 100644
--- a/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
+++ b/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc
@@ -25,8 +25,8 @@
 
 // Swaps two non-consecutive regions in the edge
 TEST(SwapRegionsTest, SwapIntervalsEdgeNonConsecutive) {
-  std::string R1 = "|region1|", R2 = "; region2;",
-              R3 = "---------region3---------";
+  std::string R1 = ";region1;", R2 = ";regionregion2",
+              R3 = ";regionregionregion3;";
   std::string all_regions = R1 + R2 + R3;
 
   // this call should swap R1 with R3.
@@ -38,9 +38,9 @@
 
 // Swaps two non-consecutive regions not in the edge
 TEST(SwapRegionsTest, SwapIntervalsNonConsecutiveNonEdge) {
-  std::string R1 = "|region1|", R2 = "; region2;",
-              R3 = "---------region3---------", R4 = "++region4++",
-              R5 = "***region5***";
+  std::string R1 = ";region1;", R2 = ";regionregion2;",
+              R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;",
+              R5 = ";regionregionregionregionregion5;";
   std::string all_regions = R1 + R2 + R3 + R4 + R5;
 
   // this call should swap R2 with R4.
@@ -54,8 +54,9 @@
 
 // Swaps two consecutive regions not in the edge (sorrounded by other regions)
 TEST(SwapRegionsTest, SwapIntervalsConsecutiveEdge) {
-  std::string R1 = "|region1|", R2 = "; region2;", R3 = "++++region3++++",
-              R4 = "---------region4---------";
+  std::string R1 = ";region1;", R2 = ";regionregion2;",
+              R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;",
+              R5 = ";regionregionregionregionregion5;";
   std::string all_regions = R1 + R2 + R3 + R4;
 
   // this call should swap R2 with R3.
@@ -69,9 +70,9 @@
 // Swaps two consecutive regions not in the edge (not sorrounded by other
 // regions)
 TEST(SwapRegionsTest, SwapIntervalsConsecutiveNonEdge) {
-  std::string R1 = "|region1|", R2 = "; region2;",
-              R3 = "---------region3---------", R4 = "++region4++",
-              R5 = "***region5***";
+  std::string R1 = ";region1;", R2 = ";regionregion2;",
+              R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;",
+              R5 = ";regionregionregionregionregion5;";
   std::string all_regions = R1 + R2 + R3 + R4 + R5;
 
   // this call should swap R4 with R5.
@@ -87,49 +88,49 @@
 
 // Deletes the first region.
 TEST(DeleteRegionTest, DeleteFirstRegion) {
-  std::string R1 = "|region1|", R2 = "; region2;",
-              R3 = "---------region3---------", R4 = "++region4++",
-              R5 = "***region5***";
+  std::string R1 = ";region1;", R2 = ";regionregion2;",
+              R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;",
+              R5 = ";regionregionregionregionregion5;";
   std::string all_regions = R1 + R2 + R3 + R4 + R5;
 
   // This call should delete R1.
   DeleteInterval(0, R1.length() - 1, all_regions);
 
-  ASSERT_EQ(R2 + R3 + R4 + R5, all_regions);
+  ASSERT_EQ(";" + R2 + R3 + R4 + R5, all_regions);
 }
 
 // Deletes the last region.
 TEST(DeleteRegionTest, DeleteLastRegion) {
-  std::string R1 = "|region1|", R2 = "; region2;",
-              R3 = "---------region3---------", R4 = "++region4++",
-              R5 = "***region5***";
+  std::string R1 = ";region1;", R2 = ";regionregion2;",
+              R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;",
+              R5 = ";regionregionregionregionregion5;";
   std::string all_regions = R1 + R2 + R3 + R4 + R5;
 
   // This call should delete R5.
   DeleteInterval(R1.length() + R2.length() + R3.length() + R4.length(),
                  all_regions.length() - 1, all_regions);
 
-  ASSERT_EQ(R1 + R2 + R3 + R4, all_regions);
+  ASSERT_EQ(R1 + R2 + R3 + R4 + ";", all_regions);
 }
 
 // Deletes the middle region.
 TEST(DeleteRegionTest, DeleteMiddleRegion) {
-  std::string R1 = "|region1|", R2 = "; region2;",
-              R3 = "---------region3---------", R4 = "++region4++",
-              R5 = "***region5***";
+  std::string R1 = ";region1;", R2 = ";regionregion2;",
+              R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;",
+              R5 = ";regionregionregionregionregion5;";
   std::string all_regions = R1 + R2 + R3 + R4 + R5;
 
   // This call should delete R3.
   DeleteInterval(R1.length() + R2.length(),
                  R1.length() + R2.length() + R3.length() - 1, all_regions);
 
-  ASSERT_EQ(R1 + R2 + R4 + R5, all_regions);
+  ASSERT_EQ(R1 + R2 + ";" + R4 + R5, all_regions);
 }
 
 TEST(InsertRegionTest, InsertRegionTest1) {
-  std::string R1 = "|region1|", R2 = "; region2;",
-              R3 = "---------region3---------", R4 = "++region4++",
-              R5 = "***region5***";
+  std::string R1 = ";region1;", R2 = ";regionregion2;",
+              R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;",
+              R5 = ";regionregionregionregionregion5;";
   std::string all_regions = R1 + R2 + R3 + R4 + R5;
 
   // This call should insert R2 after R4.
@@ -137,13 +138,14 @@
                     R1.length() + R2.length() + R3.length() + R4.length() - 1,
                     all_regions);
 
-  ASSERT_EQ(R1 + R2 + R3 + R4 + R2 + R5, all_regions);
+  ASSERT_EQ(R1 + R2 + R3 + R4 + R2.substr(1, R2.size() - 1) + R5, all_regions);
 }
 
 TEST(InsertRegionTest, InsertRegionTest2) {
-  std::string R1 = "|region1|", R2 = "; region2;",
-              R3 = "---------region3---------", R4 = "++region4++",
-              R5 = "***region5***";
+  std::string R1 = ";region1;", R2 = ";regionregion2;",
+              R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;",
+              R5 = ";regionregionregionregionregion5;";
+
   std::string all_regions = R1 + R2 + R3 + R4 + R5;
 
   // This call should insert R3 after R1.
@@ -151,20 +153,23 @@
                     R1.length() + R2.length() + R3.length() - 1,
                     R1.length() - 1, all_regions);
 
-  ASSERT_EQ(R1 + R3 + R2 + R3 + R4 + R5, all_regions);
+  ASSERT_EQ(R1 + R3.substr(1, R3.length() - 1) + R2 + R3 + R4 + R5,
+            all_regions);
 }
 
 TEST(InsertRegionTest, InsertRegionTest3) {
-  std::string R1 = "|region1|", R2 = "; region2;",
-              R3 = "---------region3---------", R4 = "++region4++",
-              R5 = "***region5***";
+  std::string R1 = ";region1;", R2 = ";regionregion2;",
+              R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;",
+              R5 = ";regionregionregionregionregion5;";
+
   std::string all_regions = R1 + R2 + R3 + R4 + R5;
 
   // This call should insert R2 after R5.
   DuplicateInterval(R1.length(), R1.length() + R2.length() - 1,
                     all_regions.length() - 1, all_regions);
 
-  ASSERT_EQ(R1 + R2 + R3 + R4 + R5 + R2, all_regions);
+  ASSERT_EQ(R1 + R2 + R3 + R4 + R5 + R2.substr(1, R2.length() - 1),
+            all_regions);
 }
 
 }  // namespace
diff --git a/fuzzers/tint_regex_fuzzer/util.h b/fuzzers/tint_regex_fuzzer/util.h
new file mode 100644
index 0000000..dfdc052
--- /dev/null
+++ b/fuzzers/tint_regex_fuzzer/util.h
@@ -0,0 +1,33 @@
+// Copyright 2021 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef FUZZERS_TINT_REGEX_FUZZER_UTIL_H_
+#define FUZZERS_TINT_REGEX_FUZZER_UTIL_H_
+
+#include <random>
+
+namespace tint {
+namespace fuzzers {
+namespace regex_fuzzer {
+
+inline size_t GetRandomIntFromRange(size_t lower_bound,
+                                    size_t upper_bound,
+                                    std::mt19937& generator) {
+  std::uniform_int_distribution<size_t> dist(lower_bound, upper_bound);
+  return dist(generator);
+}
+}  // namespace regex_fuzzer
+}  // namespace fuzzers
+}  // namespace tint
+#endif  // FUZZERS_TINT_REGEX_FUZZER_UTIL_H_
diff --git a/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
index 23356a5..580e3df 100644
--- a/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
+++ b/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc
@@ -16,7 +16,6 @@
 
 #include <cassert>
 #include <cstring>
-#include <iostream>
 #include <map>
 #include <random>
 #include <regex>
@@ -24,21 +23,12 @@
 #include <utility>
 #include <vector>
 
+#include "fuzzers/tint_regex_fuzzer/util.h"
+
 namespace tint {
 namespace fuzzers {
 namespace regex_fuzzer {
 
-namespace {
-
-size_t GetRandomIntFromRange(size_t lower_bound,
-                             size_t upper_bound,
-                             std::mt19937& generator) {
-  std::uniform_int_distribution<size_t> dist(lower_bound, upper_bound);
-  return dist(generator);
-}
-
-}  //  namespace
-
 std::vector<size_t> FindDelimiterIndices(const std::string& delimiter,
                                          const std::string& wgsl_code) {
   std::vector<size_t> result;
@@ -55,25 +45,25 @@
                    size_t idx3,
                    size_t idx4,
                    std::string& wgsl_code) {
-  std::string region_1 = wgsl_code.substr(idx1, idx2 - idx1 + 1);
+  std::string region_1 = wgsl_code.substr(idx1 + 1, idx2 - idx1);
 
-  std::string region_2 = wgsl_code.substr(idx3, idx4 - idx3 + 1);
+  std::string region_2 = wgsl_code.substr(idx3 + 1, idx4 - idx3);
 
   // The second transformation is done first as it doesn't affect ind1 and ind2
-  wgsl_code.replace(idx3, region_2.size(), region_1);
+  wgsl_code.replace(idx3 + 1, region_2.size(), region_1);
 
-  wgsl_code.replace(idx1, region_1.size(), region_2);
+  wgsl_code.replace(idx1 + 1, region_1.size(), region_2);
 }
 
 void DeleteInterval(size_t idx1, size_t idx2, std::string& wgsl_code) {
-  wgsl_code.erase(idx1, idx2 - idx1 + 1);
+  wgsl_code.erase(idx1 + 1, idx2 - idx1);
 }
 
 void DuplicateInterval(size_t idx1,
                        size_t idx2,
                        size_t idx3,
                        std::string& wgsl_code) {
-  std::string region = wgsl_code.substr(idx1, idx2 - idx1 + 1);
+  std::string region = wgsl_code.substr(idx1 + 1, idx2 - idx1);
   wgsl_code.insert(idx3 + 1, region);
 }
 
@@ -148,7 +138,7 @@
       GetRandomIntFromRange(0, delimiter_positions.size() - 1U, generator);
 
   DuplicateInterval(delimiter_positions[ind1], delimiter_positions[ind2],
-                    delimiter_positions[ind3] + 1, wgsl_code);
+                    delimiter_positions[ind3], wgsl_code);
 
   return true;
 }
diff --git a/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
index 12a7b69..7f1a468 100644
--- a/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
+++ b/fuzzers/tint_regex_fuzzer/wgsl_mutator.h
@@ -33,7 +33,7 @@
                                          const std::string& wgsl_code);
 
 /// Given 4 indices, idx1, idx2, idx3 and idx4 it swaps the regions
-/// in the interval [idx1, idx2] with the region in the interval [idx3, idx4]
+/// in the interval (idx1, idx2] with the region in the interval (idx3, idx4]
 /// in wgsl_text.
 /// @param idx1 - starting index of the first region.
 /// @param idx2 - terminating index of the second region.
@@ -46,7 +46,7 @@
                    size_t idx4,
                    std::string& wgsl_code);
 
-/// Given 2 indices, idx1, idx2, it delets the region in the interval [idx1,
+/// Given 2 indices, idx1, idx2, it delets the region in the interval (idx1,
 /// idx2].
 /// @param idx1 - starting index of the first region.
 /// @param idx2 - terminating index of the second region.
@@ -54,7 +54,7 @@
 void DeleteInterval(size_t idx1, size_t idx2, std::string& wgsl_code);
 
 /// Given 3 indices, idx1, idx2, and idx3 it inserts the
-/// region in [idx1, idx2] after idx3.
+/// region in (idx1, idx2] after idx3.
 /// @param idx1 - starting index of region.
 /// @param idx2 - terminating index of the region.
 /// @param idx3 - the position where the region will be inserted.