Add tools/trim-includes: Script to clean up includes
Change-Id: I37e98f7bf6cc4f6a10eaae6b87b2c8e46dcdee18
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/43885
Commit-Queue: Ben Clayton <bclayton@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/tools/trim-includes/build.sh b/tools/trim-includes/build.sh
new file mode 100755
index 0000000..3b6faeb
--- /dev/null
+++ b/tools/trim-includes/build.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -e # Fail on any error.
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$( cd "${SCRIPT_DIR}/../.." >/dev/null 2>&1 && pwd )"
+
+cd $ROOT_DIR
+autoninja -C out/Debug
+cd $ROOT_DIR/build
+ninja
diff --git a/tools/trim-includes/config.cfg b/tools/trim-includes/config.cfg
new file mode 100644
index 0000000..e6b69f6
--- /dev/null
+++ b/tools/trim-includes/config.cfg
@@ -0,0 +1,6 @@
+{
+ "paths": [
+ { "include": [ "src/**.h", "src/**.cc" ] },
+ { "exclude": [ "src/**_windows.*", "src/**_other.*" ] }
+ ]
+}
diff --git a/tools/trim-includes/glob/glob.go b/tools/trim-includes/glob/glob.go
new file mode 100644
index 0000000..7234007
--- /dev/null
+++ b/tools/trim-includes/glob/glob.go
@@ -0,0 +1,185 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package glob provides file globbing utilities
+package glob
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+
+ "dawn.googlesource.com/tint/tools/trim-includes/match"
+)
+
+// Scan walks all files and subdirectories from root, returning those
+// that Config.shouldExamine() returns true for.
+func Scan(root string, cfg Config) ([]string, error) {
+ files := []string{}
+ err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
+ rel, err := filepath.Rel(root, path)
+ if err != nil {
+ rel = path
+ }
+
+ if rel == ".git" {
+ return filepath.SkipDir
+ }
+
+ if !cfg.shouldExamine(root, path) {
+ return nil
+ }
+
+ if !info.IsDir() {
+ files = append(files, rel)
+ }
+
+ return nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ return files, nil
+}
+
+// Configs is a slice of Config.
+type Configs []Config
+
+// Config is used to parse the JSON configuration file.
+type Config struct {
+ // Paths holds a number of JSON objects that contain either a "includes" or
+ // "excludes" key to an array of path patterns.
+ // Each path pattern is considered in turn to either include or exclude the
+ // file path for license scanning. Pattern use forward-slashes '/' for
+ // directory separators, and may use the following wildcards:
+ // ? - matches any single non-separator character
+ // * - matches any sequence of non-separator characters
+ // ** - matches any sequence of characters including separators
+ //
+ // Rules are processed in the order in which they are declared, with later
+ // rules taking precedence over earlier rules.
+ //
+ // All files are excluded before the first rule is evaluated.
+ //
+ // Example:
+ //
+ // {
+ // "paths": [
+ // { "exclude": [ "out/*", "build/*" ] },
+ // { "include": [ "out/foo.txt" ] }
+ // ],
+ // }
+ Paths searchRules
+}
+
+// LoadConfig loads a config file at path.
+func LoadConfig(path string) (Config, error) {
+ cfgBody, err := ioutil.ReadFile(path)
+ if err != nil {
+ return Config{}, err
+ }
+ d := json.NewDecoder(bytes.NewReader(cfgBody))
+ cfg := Config{}
+ if err := d.Decode(&cfg); err != nil {
+ return Config{}, err
+ }
+ return cfg, nil
+}
+
+// rule is a search path predicate.
+// root is the project relative path.
+// cond is the value to return if the rule doesn't either include or exclude.
+type rule func(path string, cond bool) bool
+
+// searchRules is a ordered list of search rules.
+// searchRules is its own type as it has to perform custom JSON unmarshalling.
+type searchRules []rule
+
+// UnmarshalJSON unmarshals the array of rules in the form:
+// { "include": [ ... ] } or { "exclude": [ ... ] }
+func (l *searchRules) UnmarshalJSON(body []byte) error {
+ type parsed struct {
+ Include []string
+ Exclude []string
+ }
+
+ p := []parsed{}
+ if err := json.NewDecoder(bytes.NewReader(body)).Decode(&p); err != nil {
+ return err
+ }
+
+ *l = searchRules{}
+ for _, rule := range p {
+ rule := rule
+ switch {
+ case len(rule.Include) > 0 && len(rule.Exclude) > 0:
+ return fmt.Errorf("Rule cannot contain both include and exclude")
+ case len(rule.Include) > 0:
+ tests := make([]match.Test, len(rule.Include))
+ for i, pattern := range rule.Include {
+ test, err := match.New(pattern)
+ if err != nil {
+ return err
+ }
+ tests[i] = test
+ }
+ *l = append(*l, func(path string, cond bool) bool {
+ for _, test := range tests {
+ if test(path) {
+ return true
+ }
+ }
+ return cond
+ })
+ case len(rule.Exclude) > 0:
+ tests := make([]match.Test, len(rule.Exclude))
+ for i, pattern := range rule.Exclude {
+ test, err := match.New(pattern)
+ if err != nil {
+ return err
+ }
+ tests[i] = test
+ }
+ *l = append(*l, func(path string, cond bool) bool {
+ for _, test := range tests {
+ if test(path) {
+ return false
+ }
+ }
+ return cond
+ })
+ }
+ }
+ return nil
+}
+
+// shouldExamine returns true if the file at absPath should be scanned.
+func (c Config) shouldExamine(root, absPath string) bool {
+ root = filepath.ToSlash(root) // Canonicalize
+ absPath = filepath.ToSlash(absPath) // Canonicalize
+ relPath, err := filepath.Rel(root, absPath)
+ if err != nil {
+ return false
+ }
+
+ res := false
+ for _, rule := range c.Paths {
+ res = rule(relPath, res)
+ }
+
+ return res
+}
diff --git a/tools/trim-includes/go.mod b/tools/trim-includes/go.mod
new file mode 100644
index 0000000..19aab32
--- /dev/null
+++ b/tools/trim-includes/go.mod
@@ -0,0 +1,3 @@
+module dawn.googlesource.com/tint/tools/trim-includes
+
+go 1.16
diff --git a/tools/trim-includes/main.go b/tools/trim-includes/main.go
new file mode 100644
index 0000000..3e049b9
--- /dev/null
+++ b/tools/trim-includes/main.go
@@ -0,0 +1,295 @@
+// Copyright 2021 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// trim-includes is a tool to try removing unnecessary include statements from
+// all .cc and .h files in the tint project.
+//
+// trim-includes removes each #include from each file, then runs the provided
+// build script to determine whether the line was necessary. If the include is
+// required, it is restored, otherwise it is left deleted.
+// After all the #include statements have been tested, the file is
+// clang-formatted and git staged.
+package main
+
+import (
+ "flag"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "regexp"
+ "runtime"
+ "strings"
+ "sync"
+ "time"
+
+ "dawn.googlesource.com/tint/tools/trim-includes/glob"
+)
+
+var (
+ // Directory to this .go file
+ toolRoot = getToolRoot()
+ // Root directory of the Tint project
+ projectRoot = getProjectRoot(toolRoot)
+
+ // Path to the build script to run after each attempting to remove each
+ // #include
+ buildScript = ""
+)
+
+func main() {
+ if err := run(); err != nil {
+ fmt.Println(err)
+ os.Exit(1)
+ }
+}
+
+func showUsage() {
+ fmt.Println(`
+trim-includes is a tool to try removing unnecessary include statements from all
+.cc and .h files in the tint project.
+
+trim-includes removes each #include from each file, then runs the provided build
+script to determine whether the line was necessary. If the include is required,
+it is restored, otherwise it is left deleted.
+After all the #include statements have been tested, the file is clang-formatted
+and git staged.
+
+Usage:
+ trim-includes <path-to-build-script>`)
+ os.Exit(1)
+}
+
+func run() error {
+ flag.Parse()
+ args := flag.Args()
+ if len(args) < 1 {
+ showUsage()
+ }
+
+ var err error
+ buildScript, err = exec.LookPath(args[0])
+ if err != nil {
+ return err
+ }
+ buildScript, err = filepath.Abs(buildScript)
+ if err != nil {
+ return err
+ }
+
+ cfg, err := glob.LoadConfig("config.cfg")
+ if err != nil {
+ return err
+ }
+
+ fmt.Println("Checking the project builds with no changes...")
+ ok, err := tryBuild()
+ if err != nil {
+ return err
+ }
+ if !ok {
+ return fmt.Errorf("Project does not build without edits")
+ }
+
+ fmt.Println("Scanning for files...")
+ paths, err := glob.Scan(projectRoot, cfg)
+ if err != nil {
+ return err
+ }
+
+ fmt.Printf("Loading %v source files...\n", len(paths))
+ files, err := loadFiles(paths)
+ if err != nil {
+ return err
+ }
+
+ for fileIdx, file := range files {
+ fmt.Printf("[%d/%d]: %v\n", fileIdx+1, len(files), file.path)
+ includeLines := file.includesLineNumbers()
+ enabled := make(map[int]bool, len(file.lines))
+ for i := range file.lines {
+ enabled[i] = true
+ }
+ for includeIdx, line := range includeLines {
+ fmt.Printf(" [%d/%d]: %v", includeIdx+1, len(includeLines), file.lines[line])
+ enabled[line] = false
+ if err := file.save(enabled); err != nil {
+ return err
+ }
+ ok, err := tryBuild()
+ if err != nil {
+ return err
+ }
+ if ok {
+ fmt.Printf(" removed\n")
+ // Wait a bit so file timestamps get an opportunity to change.
+ // Attempting to save too soon after a successful build may
+ // result in a false-positive build.
+ time.Sleep(time.Second)
+ } else {
+ fmt.Printf(" required\n")
+ enabled[line] = true
+ }
+ }
+ if err := file.save(enabled); err != nil {
+ return err
+ }
+ if err := file.format(); err != nil {
+ return err
+ }
+ if err := file.stage(); err != nil {
+ return err
+ }
+ }
+ fmt.Println("Done")
+ return nil
+}
+
+// Attempt to build the project. Returns true on successful build, false if
+// there was a build failure.
+func tryBuild() (bool, error) {
+ cmd := exec.Command("sh", "-c", buildScript)
+ out, err := cmd.CombinedOutput()
+ switch err := err.(type) {
+ case nil:
+ return cmd.ProcessState.Success(), nil
+ case *exec.ExitError:
+ return false, nil
+ default:
+ return false, fmt.Errorf("Test failed with error: %v\n%v", err, string(out))
+ }
+}
+
+type file struct {
+ path string
+ lines []string
+}
+
+var includeRE = regexp.MustCompile(`^\s*#include (?:\"([^"]*)\"|:?\<([^"]*)\>)`)
+
+// Returns the file path with the extension stripped
+func stripExtension(path string) string {
+ if dot := strings.IndexRune(path, '.'); dot > 0 {
+ return path[:dot]
+ }
+ return path
+}
+
+// Returns the zero-based line numbers of all #include statements in the file
+func (f *file) includesLineNumbers() []int {
+ out := []int{}
+ for i, l := range f.lines {
+ matches := includeRE.FindStringSubmatch(l)
+ if len(matches) == 0 {
+ continue
+ }
+
+ include := matches[1]
+ if include == "" {
+ include = matches[2]
+ }
+
+ if strings.HasSuffix(stripExtension(f.path), stripExtension(include)) {
+ // Don't remove #include for header of cc
+ continue
+ }
+
+ out = append(out, i)
+ }
+ return out
+}
+
+// Saves the file, omitting the lines with the zero-based line number that are
+// either not in `lines` or have a `false` value.
+func (f *file) save(lines map[int]bool) error {
+ content := []string{}
+ for i, l := range f.lines {
+ if lines[i] {
+ content = append(content, l)
+ }
+ }
+ data := []byte(strings.Join(content, "\n"))
+ return ioutil.WriteFile(f.path, data, 0666)
+}
+
+// Runs clang-format on the file
+func (f *file) format() error {
+ err := exec.Command("clang-format", "-i", f.path).Run()
+ if err != nil {
+ return fmt.Errorf("Couldn't format file '%v': %w", f.path, err)
+ }
+ return nil
+}
+
+// Runs git add on the file
+func (f *file) stage() error {
+ err := exec.Command("git", "-C", projectRoot, "add", f.path).Run()
+ if err != nil {
+ return fmt.Errorf("Couldn't stage file '%v': %w", f.path, err)
+ }
+ return nil
+}
+
+// Loads all the files with the given file paths, splitting their content into
+// into lines.
+func loadFiles(paths []string) ([]file, error) {
+ wg := sync.WaitGroup{}
+ wg.Add(len(paths))
+ files := make([]file, len(paths))
+ errs := make([]error, len(paths))
+ for i, path := range paths {
+ i, path := i, filepath.Join(projectRoot, path)
+ go func() {
+ defer wg.Done()
+ body, err := ioutil.ReadFile(path)
+ if err != nil {
+ errs[i] = fmt.Errorf("Failed to open %v: %w", path, err)
+ } else {
+ content := string(body)
+ lines := strings.Split(content, "\n")
+ files[i] = file{path: path, lines: lines}
+ }
+ }()
+ }
+ wg.Wait()
+ for _, err := range errs {
+ if err != nil {
+ return nil, err
+ }
+ }
+ return files, nil
+}
+
+// Returns the path to the directory holding this .go file
+func getToolRoot() string {
+ _, filename, _, ok := runtime.Caller(0)
+ if !ok {
+ panic("No caller information")
+ }
+ mainPath, err := filepath.Abs(filename)
+ if err != nil {
+ panic(err)
+ }
+ return filepath.Dir(mainPath)
+}
+
+// Returns the path to the project root
+func getProjectRoot(toolRoot string) string {
+ root, err := filepath.Abs(filepath.Join(toolRoot, "../.."))
+ if err != nil {
+ panic(err)
+ }
+ return root
+}
diff --git a/tools/trim-includes/match/match.go b/tools/trim-includes/match/match.go
new file mode 100644
index 0000000..24f463d
--- /dev/null
+++ b/tools/trim-includes/match/match.go
@@ -0,0 +1,76 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package match provides functions for performing filepath [?,*,**] wildcard
+// matching.
+package match
+
+import (
+ "fmt"
+ "regexp"
+ "strings"
+)
+
+// Test is the match predicate returned by New.
+type Test func(path string) bool
+
+// New returns a Test function that returns true iff the path matches the
+// provided pattern.
+//
+// pattern uses forward-slashes for directory separators '/', and may use the
+// following wildcards:
+// ? - matches any single non-separator character
+// * - matches any sequence of non-separator characters
+// ** - matches any sequence of characters including separators
+func New(pattern string) (Test, error) {
+ // Transform pattern into a regex by replacing the uses of `?`, `*`, `**`
+ // with corresponding regex patterns.
+ // As the pattern may contain other regex sequences, the string has to be
+ // escaped. So:
+ // a) Replace the patterns of `?`, `*`, `**` with unique placeholder tokens.
+ // b) Escape the expression so that other sequences don't confuse the regex
+ // parser.
+ // c) Replace the placeholder tokens with the corresponding regex tokens.
+
+ // Temporary placeholder tokens
+ const (
+ starstar = "••"
+ star = "•"
+ questionmark = "¿"
+ )
+ // Check pattern doesn't contain any of our placeholder tokens
+ for _, r := range []rune{'•', '¿'} {
+ if strings.ContainsRune(pattern, r) {
+ return nil, fmt.Errorf("Pattern must not contain '%c'", r)
+ }
+ }
+ // Replace **, * and ? with placeholder tokens
+ subbed := pattern
+ subbed = strings.ReplaceAll(subbed, "**", starstar)
+ subbed = strings.ReplaceAll(subbed, "*", star)
+ subbed = strings.ReplaceAll(subbed, "?", questionmark)
+ // Escape any remaining regex characters
+ escaped := regexp.QuoteMeta(subbed)
+ // Insert regex matchers for the substituted tokens
+ regex := "^" + escaped + "$"
+ regex = strings.ReplaceAll(regex, starstar, ".*")
+ regex = strings.ReplaceAll(regex, star, "[^/]*")
+ regex = strings.ReplaceAll(regex, questionmark, "[^/]")
+
+ re, err := regexp.Compile(regex)
+ if err != nil {
+ return nil, fmt.Errorf(`Failed to compile regex "%v" for pattern "%v": %w`, regex, pattern, err)
+ }
+ return re.MatchString, nil
+}
diff --git a/tools/trim-includes/match/match_test.go b/tools/trim-includes/match/match_test.go
new file mode 100644
index 0000000..a2d926b
--- /dev/null
+++ b/tools/trim-includes/match/match_test.go
@@ -0,0 +1,106 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package match_test
+
+import (
+ "strings"
+ "testing"
+
+ match "."
+)
+
+func TestMatch(t *testing.T) {
+ for _, test := range []struct {
+ pattern string
+ path string
+ expect bool
+ }{
+ {"a", "a", true},
+ {"b", "a", false},
+
+ {"?", "a", true},
+ {"a/?/c", "a/x/c", true},
+ {"a/??/c", "a/x/c", false},
+ {"a/??/c", "a/xx/c", true},
+ {"a/???/c", "a/x z/c", true},
+ {"a/?/c", "a/xx/c", false},
+ {"a/?/?/c", "a/x/y/c", true},
+ {"a/?/?/?/c", "a/x/y/z/c", true},
+ {"a/???/c", "a/x/y/c", false},
+ {"a/?????", "a/x/y/c", false},
+
+ {"*", "a", true},
+ {"*", "abc", true},
+ {"*", "abc 123", true},
+ {"*", "xxx/yyy", false},
+ {"*/*", "xxx/yyy", true},
+ {"*/*", "xxx/yyy/zzz", false},
+ {"*/*/c", "xxx/yyy/c", true},
+ {"a/*/*", "a/xxx/yyy", true},
+ {"a/*/c", "a/xxx/c", true},
+ {"a/*/c", "a/xxx/c", true},
+ {"a/*/*/c", "a/b/c", false},
+
+ {"**", "a", true},
+ {"**", "abc", true},
+ {"**", "abc 123", true},
+ {"**", "xxx/yyy", true},
+ {"**", "xxx/yyy/zzz", true},
+ {"**/**", "xxx", false},
+ {"**/**", "xxx/yyy", true},
+ {"**/**", "xxx/yyy/zzz", true},
+ {"**/**/**", "xxx/yyy/zzz", true},
+ {"**/**/c", "xxx/yyy/c", true},
+ {"**/**/c", "xxx/yyy/c/d", false},
+ {"a/**/**", "a/xxx/yyy", true},
+ {"a/**/c", "a/xxx/c", true},
+ {"a/**/c", "a/xxx/yyy/c", true},
+ {"a/**/c", "a/xxx/y y/zzz/c", true},
+
+ {"a/**/c", "a/c", false},
+ {"a/**c", "a/c", true},
+
+ {"xxx/**.foo", "xxx/aaa.foo", true},
+ {"xxx/**.foo", "xxx/yyy/zzz/.foo", true},
+ {"xxx/**.foo", "xxx/yyy/zzz/bar.foo", true},
+ } {
+ f, err := match.New(test.pattern)
+ if err != nil {
+ t.Errorf(`match.New("%v")`, test.pattern)
+ continue
+ }
+ matched := f(test.path)
+ switch {
+ case matched && !test.expect:
+ t.Errorf(`Path "%v" matched against pattern "%v"`, test.path, test.pattern)
+ case !matched && test.expect:
+ t.Errorf(`Path "%v" did not match against pattern "%v"`, test.path, test.pattern)
+ }
+ }
+}
+
+func TestErrOnPlaceholder(t *testing.T) {
+ for _, pattern := range []string{"a/b••c", "a/b•c", "a/b/¿c"} {
+ _, err := match.New(pattern)
+ if err == nil {
+ t.Errorf(`match.New("%v") did not return an expected error`, pattern)
+ continue
+ }
+ if !strings.Contains(err.Error(), "Pattern must not contain") {
+ t.Errorf(`match.New("%v") returned unrecognised error: %v`, pattern, err)
+ continue
+ }
+ }
+}