Add tools/trim-includes: Script to clean up includes

Change-Id: I37e98f7bf6cc4f6a10eaae6b87b2c8e46dcdee18
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/43885
Commit-Queue: Ben Clayton <bclayton@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/tools/trim-includes/build.sh b/tools/trim-includes/build.sh
new file mode 100755
index 0000000..3b6faeb
--- /dev/null
+++ b/tools/trim-includes/build.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -e # Fail on any error.
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$( cd "${SCRIPT_DIR}/../.." >/dev/null 2>&1 && pwd )"
+
+cd $ROOT_DIR
+autoninja -C out/Debug
+cd $ROOT_DIR/build
+ninja
diff --git a/tools/trim-includes/config.cfg b/tools/trim-includes/config.cfg
new file mode 100644
index 0000000..e6b69f6
--- /dev/null
+++ b/tools/trim-includes/config.cfg
@@ -0,0 +1,6 @@
+{
+  "paths": [
+    { "include": [ "src/**.h", "src/**.cc" ] },
+    { "exclude": [ "src/**_windows.*", "src/**_other.*" ] }
+  ]
+}
diff --git a/tools/trim-includes/glob/glob.go b/tools/trim-includes/glob/glob.go
new file mode 100644
index 0000000..7234007
--- /dev/null
+++ b/tools/trim-includes/glob/glob.go
@@ -0,0 +1,185 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package glob provides file globbing utilities
+package glob
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+
+	"dawn.googlesource.com/tint/tools/trim-includes/match"
+)
+
+// Scan walks all files and subdirectories from root, returning those
+// that Config.shouldExamine() returns true for.
+func Scan(root string, cfg Config) ([]string, error) {
+	files := []string{}
+	err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
+		rel, err := filepath.Rel(root, path)
+		if err != nil {
+			rel = path
+		}
+
+		if rel == ".git" {
+			return filepath.SkipDir
+		}
+
+		if !cfg.shouldExamine(root, path) {
+			return nil
+		}
+
+		if !info.IsDir() {
+			files = append(files, rel)
+		}
+
+		return nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	return files, nil
+}
+
+// Configs is a slice of Config.
+type Configs []Config
+
+// Config is used to parse the JSON configuration file.
+type Config struct {
+	// Paths holds a number of JSON objects that contain either a "includes" or
+	// "excludes" key to an array of path patterns.
+	// Each path pattern is considered in turn to either include or exclude the
+	// file path for license scanning. Pattern use forward-slashes '/' for
+	// directory separators, and may use the following wildcards:
+	//  ?  - matches any single non-separator character
+	//  *  - matches any sequence of non-separator characters
+	//  ** - matches any sequence of characters including separators
+	//
+	// Rules are processed in the order in which they are declared, with later
+	// rules taking precedence over earlier rules.
+	//
+	// All files are excluded before the first rule is evaluated.
+	//
+	// Example:
+	//
+	// {
+	//   "paths": [
+	// 	  { "exclude": [ "out/*", "build/*" ] },
+	// 	  { "include": [ "out/foo.txt" ] }
+	//   ],
+	// }
+	Paths searchRules
+}
+
+// LoadConfig loads a config file at path.
+func LoadConfig(path string) (Config, error) {
+	cfgBody, err := ioutil.ReadFile(path)
+	if err != nil {
+		return Config{}, err
+	}
+	d := json.NewDecoder(bytes.NewReader(cfgBody))
+	cfg := Config{}
+	if err := d.Decode(&cfg); err != nil {
+		return Config{}, err
+	}
+	return cfg, nil
+}
+
+// rule is a search path predicate.
+// root is the project relative path.
+// cond is the value to return if the rule doesn't either include or exclude.
+type rule func(path string, cond bool) bool
+
+// searchRules is a ordered list of search rules.
+// searchRules is its own type as it has to perform custom JSON unmarshalling.
+type searchRules []rule
+
+// UnmarshalJSON unmarshals the array of rules in the form:
+// { "include": [ ... ] } or { "exclude": [ ... ] }
+func (l *searchRules) UnmarshalJSON(body []byte) error {
+	type parsed struct {
+		Include []string
+		Exclude []string
+	}
+
+	p := []parsed{}
+	if err := json.NewDecoder(bytes.NewReader(body)).Decode(&p); err != nil {
+		return err
+	}
+
+	*l = searchRules{}
+	for _, rule := range p {
+		rule := rule
+		switch {
+		case len(rule.Include) > 0 && len(rule.Exclude) > 0:
+			return fmt.Errorf("Rule cannot contain both include and exclude")
+		case len(rule.Include) > 0:
+			tests := make([]match.Test, len(rule.Include))
+			for i, pattern := range rule.Include {
+				test, err := match.New(pattern)
+				if err != nil {
+					return err
+				}
+				tests[i] = test
+			}
+			*l = append(*l, func(path string, cond bool) bool {
+				for _, test := range tests {
+					if test(path) {
+						return true
+					}
+				}
+				return cond
+			})
+		case len(rule.Exclude) > 0:
+			tests := make([]match.Test, len(rule.Exclude))
+			for i, pattern := range rule.Exclude {
+				test, err := match.New(pattern)
+				if err != nil {
+					return err
+				}
+				tests[i] = test
+			}
+			*l = append(*l, func(path string, cond bool) bool {
+				for _, test := range tests {
+					if test(path) {
+						return false
+					}
+				}
+				return cond
+			})
+		}
+	}
+	return nil
+}
+
+// shouldExamine returns true if the file at absPath should be scanned.
+func (c Config) shouldExamine(root, absPath string) bool {
+	root = filepath.ToSlash(root)       // Canonicalize
+	absPath = filepath.ToSlash(absPath) // Canonicalize
+	relPath, err := filepath.Rel(root, absPath)
+	if err != nil {
+		return false
+	}
+
+	res := false
+	for _, rule := range c.Paths {
+		res = rule(relPath, res)
+	}
+
+	return res
+}
diff --git a/tools/trim-includes/go.mod b/tools/trim-includes/go.mod
new file mode 100644
index 0000000..19aab32
--- /dev/null
+++ b/tools/trim-includes/go.mod
@@ -0,0 +1,3 @@
+module dawn.googlesource.com/tint/tools/trim-includes
+
+go 1.16
diff --git a/tools/trim-includes/main.go b/tools/trim-includes/main.go
new file mode 100644
index 0000000..3e049b9
--- /dev/null
+++ b/tools/trim-includes/main.go
@@ -0,0 +1,295 @@
+// Copyright 2021 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// trim-includes is a tool to try removing unnecessary include statements from
+// all .cc and .h files in the tint project.
+//
+// trim-includes removes each #include from each file, then runs the provided
+// build script to determine whether the line was necessary. If the include is
+// required, it is restored, otherwise it is left deleted.
+// After all the #include statements have been tested, the file is
+// clang-formatted and git staged.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"runtime"
+	"strings"
+	"sync"
+	"time"
+
+	"dawn.googlesource.com/tint/tools/trim-includes/glob"
+)
+
+var (
+	// Directory to this .go file
+	toolRoot = getToolRoot()
+	// Root directory of the Tint project
+	projectRoot = getProjectRoot(toolRoot)
+
+	// Path to the build script to run after each attempting to remove each
+	// #include
+	buildScript = ""
+)
+
+func main() {
+	if err := run(); err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+}
+
+func showUsage() {
+	fmt.Println(`
+trim-includes is a tool to try removing unnecessary include statements from all
+.cc and .h files in the tint project.
+
+trim-includes removes each #include from each file, then runs the provided build
+script to determine whether the line was necessary. If the include is required,
+it is restored, otherwise it is left deleted.
+After all the #include statements have been tested, the file is clang-formatted
+and git staged.
+
+Usage:
+  trim-includes <path-to-build-script>`)
+	os.Exit(1)
+}
+
+func run() error {
+	flag.Parse()
+	args := flag.Args()
+	if len(args) < 1 {
+		showUsage()
+	}
+
+	var err error
+	buildScript, err = exec.LookPath(args[0])
+	if err != nil {
+		return err
+	}
+	buildScript, err = filepath.Abs(buildScript)
+	if err != nil {
+		return err
+	}
+
+	cfg, err := glob.LoadConfig("config.cfg")
+	if err != nil {
+		return err
+	}
+
+	fmt.Println("Checking the project builds with no changes...")
+	ok, err := tryBuild()
+	if err != nil {
+		return err
+	}
+	if !ok {
+		return fmt.Errorf("Project does not build without edits")
+	}
+
+	fmt.Println("Scanning for files...")
+	paths, err := glob.Scan(projectRoot, cfg)
+	if err != nil {
+		return err
+	}
+
+	fmt.Printf("Loading %v source files...\n", len(paths))
+	files, err := loadFiles(paths)
+	if err != nil {
+		return err
+	}
+
+	for fileIdx, file := range files {
+		fmt.Printf("[%d/%d]: %v\n", fileIdx+1, len(files), file.path)
+		includeLines := file.includesLineNumbers()
+		enabled := make(map[int]bool, len(file.lines))
+		for i := range file.lines {
+			enabled[i] = true
+		}
+		for includeIdx, line := range includeLines {
+			fmt.Printf("    [%d/%d]: %v", includeIdx+1, len(includeLines), file.lines[line])
+			enabled[line] = false
+			if err := file.save(enabled); err != nil {
+				return err
+			}
+			ok, err := tryBuild()
+			if err != nil {
+				return err
+			}
+			if ok {
+				fmt.Printf(" removed\n")
+				// Wait a bit so file timestamps get an opportunity to change.
+				// Attempting to save too soon after a successful build may
+				// result in a false-positive build.
+				time.Sleep(time.Second)
+			} else {
+				fmt.Printf(" required\n")
+				enabled[line] = true
+			}
+		}
+		if err := file.save(enabled); err != nil {
+			return err
+		}
+		if err := file.format(); err != nil {
+			return err
+		}
+		if err := file.stage(); err != nil {
+			return err
+		}
+	}
+	fmt.Println("Done")
+	return nil
+}
+
+// Attempt to build the project. Returns true on successful build, false if
+// there was a build failure.
+func tryBuild() (bool, error) {
+	cmd := exec.Command("sh", "-c", buildScript)
+	out, err := cmd.CombinedOutput()
+	switch err := err.(type) {
+	case nil:
+		return cmd.ProcessState.Success(), nil
+	case *exec.ExitError:
+		return false, nil
+	default:
+		return false, fmt.Errorf("Test failed with error: %v\n%v", err, string(out))
+	}
+}
+
+type file struct {
+	path  string
+	lines []string
+}
+
+var includeRE = regexp.MustCompile(`^\s*#include (?:\"([^"]*)\"|:?\<([^"]*)\>)`)
+
+// Returns the file path with the extension stripped
+func stripExtension(path string) string {
+	if dot := strings.IndexRune(path, '.'); dot > 0 {
+		return path[:dot]
+	}
+	return path
+}
+
+// Returns the zero-based line numbers of all #include statements in the file
+func (f *file) includesLineNumbers() []int {
+	out := []int{}
+	for i, l := range f.lines {
+		matches := includeRE.FindStringSubmatch(l)
+		if len(matches) == 0 {
+			continue
+		}
+
+		include := matches[1]
+		if include == "" {
+			include = matches[2]
+		}
+
+		if strings.HasSuffix(stripExtension(f.path), stripExtension(include)) {
+			// Don't remove #include for header of cc
+			continue
+		}
+
+		out = append(out, i)
+	}
+	return out
+}
+
+// Saves the file, omitting the lines with the zero-based line number that are
+// either not in `lines` or have a `false` value.
+func (f *file) save(lines map[int]bool) error {
+	content := []string{}
+	for i, l := range f.lines {
+		if lines[i] {
+			content = append(content, l)
+		}
+	}
+	data := []byte(strings.Join(content, "\n"))
+	return ioutil.WriteFile(f.path, data, 0666)
+}
+
+// Runs clang-format on the file
+func (f *file) format() error {
+	err := exec.Command("clang-format", "-i", f.path).Run()
+	if err != nil {
+		return fmt.Errorf("Couldn't format file '%v': %w", f.path, err)
+	}
+	return nil
+}
+
+// Runs git add on the file
+func (f *file) stage() error {
+	err := exec.Command("git", "-C", projectRoot, "add", f.path).Run()
+	if err != nil {
+		return fmt.Errorf("Couldn't stage file '%v': %w", f.path, err)
+	}
+	return nil
+}
+
+// Loads all the files with the given file paths, splitting their content into
+// into lines.
+func loadFiles(paths []string) ([]file, error) {
+	wg := sync.WaitGroup{}
+	wg.Add(len(paths))
+	files := make([]file, len(paths))
+	errs := make([]error, len(paths))
+	for i, path := range paths {
+		i, path := i, filepath.Join(projectRoot, path)
+		go func() {
+			defer wg.Done()
+			body, err := ioutil.ReadFile(path)
+			if err != nil {
+				errs[i] = fmt.Errorf("Failed to open %v: %w", path, err)
+			} else {
+				content := string(body)
+				lines := strings.Split(content, "\n")
+				files[i] = file{path: path, lines: lines}
+			}
+		}()
+	}
+	wg.Wait()
+	for _, err := range errs {
+		if err != nil {
+			return nil, err
+		}
+	}
+	return files, nil
+}
+
+// Returns the path to the directory holding this .go file
+func getToolRoot() string {
+	_, filename, _, ok := runtime.Caller(0)
+	if !ok {
+		panic("No caller information")
+	}
+	mainPath, err := filepath.Abs(filename)
+	if err != nil {
+		panic(err)
+	}
+	return filepath.Dir(mainPath)
+}
+
+// Returns the path to the project root
+func getProjectRoot(toolRoot string) string {
+	root, err := filepath.Abs(filepath.Join(toolRoot, "../.."))
+	if err != nil {
+		panic(err)
+	}
+	return root
+}
diff --git a/tools/trim-includes/match/match.go b/tools/trim-includes/match/match.go
new file mode 100644
index 0000000..24f463d
--- /dev/null
+++ b/tools/trim-includes/match/match.go
@@ -0,0 +1,76 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package match provides functions for performing filepath [?,*,**] wildcard
+// matching.
+package match
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+)
+
+// Test is the match predicate returned by New.
+type Test func(path string) bool
+
+// New returns a Test function that returns true iff the path matches the
+// provided pattern.
+//
+// pattern uses forward-slashes for directory separators '/', and may use the
+// following wildcards:
+//  ?  - matches any single non-separator character
+//  *  - matches any sequence of non-separator characters
+//  ** - matches any sequence of characters including separators
+func New(pattern string) (Test, error) {
+	// Transform pattern into a regex by replacing the uses of `?`, `*`, `**`
+	// with corresponding regex patterns.
+	// As the pattern may contain other regex sequences, the string has to be
+	// escaped. So:
+	// a) Replace the patterns of `?`, `*`, `**` with unique placeholder tokens.
+	// b) Escape the expression so that other sequences don't confuse the regex
+	//    parser.
+	// c) Replace the placeholder tokens with the corresponding regex tokens.
+
+	// Temporary placeholder tokens
+	const (
+		starstar     = "••"
+		star         = "•"
+		questionmark = "¿"
+	)
+	// Check pattern doesn't contain any of our placeholder tokens
+	for _, r := range []rune{'•', '¿'} {
+		if strings.ContainsRune(pattern, r) {
+			return nil, fmt.Errorf("Pattern must not contain '%c'", r)
+		}
+	}
+	// Replace **, * and ? with placeholder tokens
+	subbed := pattern
+	subbed = strings.ReplaceAll(subbed, "**", starstar)
+	subbed = strings.ReplaceAll(subbed, "*", star)
+	subbed = strings.ReplaceAll(subbed, "?", questionmark)
+	// Escape any remaining regex characters
+	escaped := regexp.QuoteMeta(subbed)
+	// Insert regex matchers for the substituted tokens
+	regex := "^" + escaped + "$"
+	regex = strings.ReplaceAll(regex, starstar, ".*")
+	regex = strings.ReplaceAll(regex, star, "[^/]*")
+	regex = strings.ReplaceAll(regex, questionmark, "[^/]")
+
+	re, err := regexp.Compile(regex)
+	if err != nil {
+		return nil, fmt.Errorf(`Failed to compile regex "%v" for pattern "%v": %w`, regex, pattern, err)
+	}
+	return re.MatchString, nil
+}
diff --git a/tools/trim-includes/match/match_test.go b/tools/trim-includes/match/match_test.go
new file mode 100644
index 0000000..a2d926b
--- /dev/null
+++ b/tools/trim-includes/match/match_test.go
@@ -0,0 +1,106 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package match_test
+
+import (
+	"strings"
+	"testing"
+
+	match "."
+)
+
+func TestMatch(t *testing.T) {
+	for _, test := range []struct {
+		pattern string
+		path    string
+		expect  bool
+	}{
+		{"a", "a", true},
+		{"b", "a", false},
+
+		{"?", "a", true},
+		{"a/?/c", "a/x/c", true},
+		{"a/??/c", "a/x/c", false},
+		{"a/??/c", "a/xx/c", true},
+		{"a/???/c", "a/x z/c", true},
+		{"a/?/c", "a/xx/c", false},
+		{"a/?/?/c", "a/x/y/c", true},
+		{"a/?/?/?/c", "a/x/y/z/c", true},
+		{"a/???/c", "a/x/y/c", false},
+		{"a/?????", "a/x/y/c", false},
+
+		{"*", "a", true},
+		{"*", "abc", true},
+		{"*", "abc 123", true},
+		{"*", "xxx/yyy", false},
+		{"*/*", "xxx/yyy", true},
+		{"*/*", "xxx/yyy/zzz", false},
+		{"*/*/c", "xxx/yyy/c", true},
+		{"a/*/*", "a/xxx/yyy", true},
+		{"a/*/c", "a/xxx/c", true},
+		{"a/*/c", "a/xxx/c", true},
+		{"a/*/*/c", "a/b/c", false},
+
+		{"**", "a", true},
+		{"**", "abc", true},
+		{"**", "abc 123", true},
+		{"**", "xxx/yyy", true},
+		{"**", "xxx/yyy/zzz", true},
+		{"**/**", "xxx", false},
+		{"**/**", "xxx/yyy", true},
+		{"**/**", "xxx/yyy/zzz", true},
+		{"**/**/**", "xxx/yyy/zzz", true},
+		{"**/**/c", "xxx/yyy/c", true},
+		{"**/**/c", "xxx/yyy/c/d", false},
+		{"a/**/**", "a/xxx/yyy", true},
+		{"a/**/c", "a/xxx/c", true},
+		{"a/**/c", "a/xxx/yyy/c", true},
+		{"a/**/c", "a/xxx/y y/zzz/c", true},
+
+		{"a/**/c", "a/c", false},
+		{"a/**c", "a/c", true},
+
+		{"xxx/**.foo", "xxx/aaa.foo", true},
+		{"xxx/**.foo", "xxx/yyy/zzz/.foo", true},
+		{"xxx/**.foo", "xxx/yyy/zzz/bar.foo", true},
+	} {
+		f, err := match.New(test.pattern)
+		if err != nil {
+			t.Errorf(`match.New("%v")`, test.pattern)
+			continue
+		}
+		matched := f(test.path)
+		switch {
+		case matched && !test.expect:
+			t.Errorf(`Path "%v" matched against pattern "%v"`, test.path, test.pattern)
+		case !matched && test.expect:
+			t.Errorf(`Path "%v" did not match against pattern "%v"`, test.path, test.pattern)
+		}
+	}
+}
+
+func TestErrOnPlaceholder(t *testing.T) {
+	for _, pattern := range []string{"a/b••c", "a/b•c", "a/b/¿c"} {
+		_, err := match.New(pattern)
+		if err == nil {
+			t.Errorf(`match.New("%v") did not return an expected error`, pattern)
+			continue
+		}
+		if !strings.Contains(err.Error(), "Pattern must not contain") {
+			t.Errorf(`match.New("%v") returned unrecognised error: %v`, pattern, err)
+			continue
+		}
+	}
+}