fix-tests: Attempt to fix tests that use HasSubstr()

By comparing the old substring to the new full body, we can do a pretty
good job at creating a new substring.
This is not an exact science. Always carefully examine the newly
generated strings for correctness.

Change-Id: I8bdf591539a32ec42d0444aa054d88a933e4d250
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/47765
Commit-Queue: Ben Clayton <bclayton@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/tools/fix-tests/fix-tests.go b/tools/fix-tests/fix-tests.go
index 6b8b860..50f60e9 100644
--- a/tools/fix-tests/fix-tests.go
+++ b/tools/fix-tests/fix-tests.go
@@ -25,6 +25,8 @@
 	"path/filepath"
 	"regexp"
 	"strings"
+
+	"dawn.googlesource.com/tint/tools/fix-tests/substr"
 )
 
 func main() {
@@ -38,6 +40,12 @@
 	fmt.Println(`
 fix-tests is a tool to update tests with new expected output.
 
+fix-tests performs string matching and heuristics to fix up expected results of
+tests that use EXPECT_EQ(a, b) and EXPECT_THAT(a, HasSubstr(b))
+
+WARNING: Always thoroughly check the generated output for mistakes.
+This may produce incorrect output
+
 Usage:
   fix-tests <executable>
 
@@ -89,57 +97,120 @@
 	}
 
 	// For each failing test...
-	var errs []error
-	numFixes := 0
+	seen := map[string]bool{}
+	numFixed, numFailed := 0, 0
 	for _, group := range testResults.Groups {
 		for _, suite := range group.Testsuites {
 			for _, failure := range suite.Failures {
 				// .. attempt to fix the problem
-				test := group.Name + "." + suite.Name
+				test := testName(group, suite)
+				if seen[test] {
+					continue
+				}
+				seen[test] = true
+
 				if err := processFailure(test, wd, failure.Failure); err != nil {
-					errs = append(errs, fmt.Errorf("%v: %w", test, err))
+					fmt.Println(fmt.Errorf("%v: %w", test, err))
+					numFailed++
 				} else {
-					numFixes++
+					numFixed++
 				}
 			}
 		}
 	}
 
-	if numFixes > 0 {
-		fmt.Printf("%v tests fixed\n", numFixes)
+	fmt.Println()
+
+	if numFailed > 0 {
+		fmt.Println(numFailed, "tests could not be fixed")
 	}
-	if n := len(errs); n > 0 {
-		fmt.Printf("%v tests could not be fixed:\n", n)
-		for _, err := range errs {
-			fmt.Println(err)
-		}
+	if numFixed > 0 {
+		fmt.Println(numFixed, "tests fixed")
 	}
 	return nil
 }
 
+func testName(group TestsuiteGroup, suite Testsuite) string {
+	groupParts := strings.Split(group.Name, "/")
+	suiteParts := strings.Split(suite.Name, "/")
+	return groupParts[len(groupParts)-1] + "." + suiteParts[0]
+}
+
 var (
 	// Regular expression to match a test declaration
-	reTests = regexp.MustCompile(`TEST(?:_[FP])?\((\w+),[ \n]*(\w+)\)`)
-	// Regular expression to match a EXPECT_EQ failure for strings
-	reExpectEq = regexp.MustCompile(`^([./\\a-z_-]*):(\d+).*\nExpected equality of these values:\n(?:.|\n)*?(?:Which is: |  )"((?:.|\n)*?)[^\\]"\n(?:.|\n)*?(?:Which is: |  )"((?:.|\n)*?)[^\\]"`)
+	reTests = regexp.MustCompile(`TEST(?:_[FP])?\([ \n]*(\w+),[ \n]*(\w+)\)`)
+	// Regular expression to match a `EXPECT_EQ(a, b)` failure for strings
+	reExpectEq = regexp.MustCompile(`([./\\a-z_-]*):(\d+).*\nExpected equality of these values:\n(?:.|\n)*?(?:Which is: |  )"((?:.|\n)*?[^\\])"\n(?:.|\n)*?(?:Which is: |  )"((?:.|\n)*?[^\\])"`)
+	// Regular expression to match a `EXPECT_THAT(a, HasSubstr(b))` failure for strings
+	reExpectHasSubstr = regexp.MustCompile(`([./\\a-z_-]*):(\d+).*\nValue of: .*\nExpected: has substring "((?:.|\n)*?[^\\])"\n  Actual: "((?:.|\n)*?[^\\])"`)
 )
 
 func processFailure(test, wd, failure string) error {
 	// Start by un-escaping newlines in the failure message
 	failure = strings.ReplaceAll(failure, "\\n", "\n")
+	// Matched regex strings will also need to be un-escaped, but do this after
+	// the match, as unescaped quotes may upset the regex patterns
+	unescape := func(s string) string {
+		return strings.ReplaceAll(s, `\"`, `"`)
+	}
+	escape := func(s string) string {
+		s = strings.ReplaceAll(s, "\n", `\n`)
+		s = strings.ReplaceAll(s, "\"", `\"`)
+		return s
+	}
 
 	// Look for a EXPECT_EQ failure pattern
-	var file, a, b string
+	var file string
+	var fix func(testSource string) (string, error)
 	if parts := reExpectEq.FindStringSubmatch(failure); len(parts) == 5 {
-		file, a, b = parts[1], parts[3], parts[4]
+		// EXPECT_EQ(a, b)
+		a, b := unescape(parts[3]), unescape(parts[4])
+		file = parts[1]
+		fix = func(testSource string) (string, error) {
+			// We don't know if a or b is the expected, so just try flipping the string
+			// to the other form.
+			switch {
+			case strings.Contains(testSource, a):
+				testSource = strings.Replace(testSource, a, b, -1)
+			case strings.Contains(testSource, b):
+				testSource = strings.Replace(testSource, b, a, -1)
+			default:
+				// Try escaping for R"(...)" strings
+				a, b = escape(a), escape(b)
+				switch {
+				case strings.Contains(testSource, a):
+					testSource = strings.Replace(testSource, a, b, -1)
+				case strings.Contains(testSource, b):
+					testSource = strings.Replace(testSource, b, a, -1)
+				default:
+					return "", fmt.Errorf("Could not fix 'EXPECT_EQ' pattern in '%v'", file)
+				}
+			}
+			return testSource, nil
+		}
+	} else if parts := reExpectHasSubstr.FindStringSubmatch(failure); len(parts) == 5 {
+		// EXPECT_THAT(a, HasSubstr(b))
+		a, b := unescape(parts[4]), unescape(parts[3])
+		file = parts[1]
+		fix = func(testSource string) (string, error) {
+			if fix := substr.Fix(a, b); fix != "" {
+				if !strings.Contains(testSource, b) {
+					// Try escaping for R"(...)" strings
+					b, fix = escape(b), escape(fix)
+				}
+				if strings.Contains(testSource, b) {
+					testSource = strings.Replace(testSource, b, fix, -1)
+					return testSource, nil
+				}
+				return "", fmt.Errorf("Could apply fix for 'HasSubstr' pattern in '%v'", file)
+			}
+
+			return "", fmt.Errorf("Could find fix for 'HasSubstr' pattern in '%v'", file)
+		}
 	} else {
 		return fmt.Errorf("Cannot fix this type of failure")
 	}
 
-	// Now un-escape any quotes (the regex is sensitive to these)
-	a = strings.ReplaceAll(a, `\"`, `"`)
-	b = strings.ReplaceAll(b, `\"`, `"`)
-
 	// Get the path to the source file containing the test failure
 	sourcePath := filepath.Join(wd, file)
 
@@ -152,33 +223,14 @@
 	// Find the test
 	testIdx, ok := sourceFile.tests[test]
 	if !ok {
-		return fmt.Errorf("Test '%v' not found in '%v'", test, file)
+		return fmt.Errorf("Test not found in '%v'", file)
 	}
 
 	// Grab the source for the particular test
 	testSource := sourceFile.parts[testIdx]
 
-	// We don't know if a or b is the expected, so just try flipping the string
-	// to the other form.
-	switch {
-	case strings.Contains(testSource, a):
-		testSource = strings.Replace(testSource, a, b, -1)
-	case strings.Contains(testSource, b):
-		testSource = strings.Replace(testSource, b, a, -1)
-	default:
-		// Try escaping for R"(...)" strings
-		a = strings.ReplaceAll(a, "\n", `\n`)
-		b = strings.ReplaceAll(b, "\n", `\n`)
-		a = strings.ReplaceAll(a, "\"", `\"`)
-		b = strings.ReplaceAll(b, "\"", `\"`)
-		switch {
-		case strings.Contains(testSource, a):
-			testSource = strings.Replace(testSource, a, b, -1)
-		case strings.Contains(testSource, b):
-			testSource = strings.Replace(testSource, b, a, -1)
-		default:
-			return fmt.Errorf("Could not fix test '%v' in '%v'", test, file)
-		}
+	if testSource, err = fix(testSource); err != nil {
+		return err
 	}
 
 	// Replace the part of the source file
diff --git a/tools/fix-tests/go.mod b/tools/fix-tests/go.mod
new file mode 100644
index 0000000..099add3
--- /dev/null
+++ b/tools/fix-tests/go.mod
@@ -0,0 +1,5 @@
+module dawn.googlesource.com/tint/tools/fix-tests
+
+go 1.16
+
+require github.com/sergi/go-diff v1.2.0
diff --git a/tools/fix-tests/go.sum b/tools/fix-tests/go.sum
new file mode 100644
index 0000000..930b8b0
--- /dev/null
+++ b/tools/fix-tests/go.sum
@@ -0,0 +1,18 @@
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ=
+github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
+gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
diff --git a/tools/fix-tests/substr/substr.go b/tools/fix-tests/substr/substr.go
new file mode 100644
index 0000000..d8ed99d
--- /dev/null
+++ b/tools/fix-tests/substr/substr.go
@@ -0,0 +1,52 @@
+// Copyright 2021 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package substr
+
+import (
+	diff "github.com/sergi/go-diff/diffmatchpatch"
+)
+
+// Fix attempts to reconstruct substr by comparing it to body.
+// substr is a fuzzy substring of body.
+// Fix returns a new exact substring of body, by calculating a diff of the text.
+// If no match could be made, Fix() returns an empty string.
+func Fix(body, substr string) string {
+	dmp := diff.New()
+
+	diffs := dmp.DiffMain(body, substr, false)
+	if len(diffs) == 0 {
+		return ""
+	}
+
+	front := func() diff.Diff { return diffs[0] }
+	back := func() diff.Diff { return diffs[len(diffs)-1] }
+
+	start, end := 0, len(body)
+
+	// Trim edits that remove text from body start
+	for len(diffs) > 0 && front().Type == diff.DiffDelete {
+		start += len(front().Text)
+		diffs = diffs[1:]
+	}
+
+	// Trim edits that remove text from body end
+	for len(diffs) > 0 && back().Type == diff.DiffDelete {
+		end -= len(back().Text)
+		diffs = diffs[:len(diffs)-1]
+	}
+
+	// New substring is the span for the remainder of the edits
+	return body[start:end]
+}
diff --git a/tools/fix-tests/substr/substr_test.go b/tools/fix-tests/substr/substr_test.go
new file mode 100644
index 0000000..1d13eff
--- /dev/null
+++ b/tools/fix-tests/substr/substr_test.go
@@ -0,0 +1,275 @@
+// Copyright 2021 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package substr
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestFixSubstr(t *testing.T) {
+	type test struct {
+		body   string
+		substr string
+		expect string
+	}
+
+	for _, test := range []test{
+		{
+			body:   "abc_def_ghi_jkl_mno",
+			substr: "def_XXX_jkl",
+			expect: "def_ghi_jkl",
+		},
+		{
+			body:   "abc\ndef\nghi\njkl\nmno",
+			substr: "def\nXXX\njkl",
+			expect: "def\nghi\njkl",
+		},
+		{
+			body:   "aaaaa12345ccccc",
+			substr: "1x345",
+			expect: "12345",
+		},
+		{
+			body:   "aaaaa12345ccccc",
+			substr: "12x45",
+			expect: "12345",
+		},
+		{
+			body:   "aaaaa12345ccccc",
+			substr: "123x5",
+			expect: "12345",
+		},
+		{
+			body:   "aaaaaaaaaaaaa",
+			substr: "bbbbbbbbbbbbb",
+			expect: "", // cannot produce a sensible diff
+		}, { ///////////////////////////////////////////////////////////////////
+			body: `Return{
+  {
+    ScalarConstructor[not set]{42u}
+  }
+}
+`,
+			substr: `Return{
+  {
+    ScalarConstructor[not set]{42}
+  }
+}`,
+			expect: `Return{
+  {
+    ScalarConstructor[not set]{42u}
+  }
+}`,
+		}, { ///////////////////////////////////////////////////////////////////
+			body: `VariableDeclStatement{
+  Variable{
+    x_1
+    function
+    __u32
+  }
+}
+Assignment{
+  Identifier[not set]{x_1}
+  ScalarConstructor[not set]{42u}
+}
+Assignment{
+  Identifier[not set]{x_1}
+  ScalarConstructor[not set]{0u}
+}
+Return{}
+`,
+			substr: `Assignment{
+  Identifier[not set]{x_1}
+  ScalarConstructor[not set]{42}
+}
+Assignment{
+  Identifier[not set]{x_1}
+  ScalarConstructor[not set]{0}
+}`,
+			expect: `Assignment{
+  Identifier[not set]{x_1}
+  ScalarConstructor[not set]{42u}
+}
+Assignment{
+  Identifier[not set]{x_1}
+  ScalarConstructor[not set]{0u}
+}`,
+		}, { ///////////////////////////////////////////////////////////////////
+			body: `VariableDeclStatement{
+  Variable{
+    a
+    function
+    __bool
+    {
+      ScalarConstructor[not set]{true}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    b
+    function
+    __bool
+    {
+      ScalarConstructor[not set]{false}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    c
+    function
+    __i32
+    {
+      ScalarConstructor[not set]{-1}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    d
+    function
+    __u32
+    {
+      ScalarConstructor[not set]{1u}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    e
+    function
+    __f32
+    {
+      ScalarConstructor[not set]{1.500000}
+    }
+  }
+}
+`,
+			substr: `VariableDeclStatement{
+  Variable{
+    a
+    function
+    __bool
+    {
+      ScalarConstructor[not set]{true}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    b
+    function
+    __bool
+    {
+      ScalarConstructor[not set]{false}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    c
+    function
+    __i32
+    {
+      ScalarConstructor[not set]{-1}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    d
+    function
+    __u32
+    {
+      ScalarConstructor[not set]{1}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    e
+    function
+    __f32
+    {
+      ScalarConstructor[not set]{1.500000}
+    }
+  }
+}
+`,
+			expect: `VariableDeclStatement{
+  Variable{
+    a
+    function
+    __bool
+    {
+      ScalarConstructor[not set]{true}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    b
+    function
+    __bool
+    {
+      ScalarConstructor[not set]{false}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    c
+    function
+    __i32
+    {
+      ScalarConstructor[not set]{-1}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    d
+    function
+    __u32
+    {
+      ScalarConstructor[not set]{1u}
+    }
+  }
+}
+VariableDeclStatement{
+  Variable{
+    e
+    function
+    __f32
+    {
+      ScalarConstructor[not set]{1.500000}
+    }
+  }
+}
+`,
+		},
+	} {
+		body := strings.ReplaceAll(test.body, "\n", "␤")
+		substr := strings.ReplaceAll(test.substr, "\n", "␤")
+		expect := strings.ReplaceAll(test.expect, "\n", `␤`)
+		got := strings.ReplaceAll(Fix(test.body, test.substr), "\n", "␤")
+		if got != expect {
+			t.Errorf("Test failure:\nbody:   '%v'\nsubstr: '%v'\nexpect: '%v'\ngot:    '%v'\n\n", body, substr, expect, got)
+		}
+
+	}
+}