Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 1 | // Copyright 2020 Google LLC |
| 2 | // |
Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 3 | // Redistribution and use in source and binary forms, with or without |
| 4 | // modification, are permitted provided that the following conditions are met: |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 5 | // |
Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 6 | // 1. Redistributions of source code must retain the above copyright notice, this |
| 7 | // list of conditions and the following disclaimer. |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 8 | // |
Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 9 | // 2. Redistributions in binary form must reproduce the above copyright notice, |
| 10 | // this list of conditions and the following disclaimer in the documentation |
| 11 | // and/or other materials provided with the distribution. |
| 12 | // |
| 13 | // 3. Neither the name of the copyright holder nor the names of its |
| 14 | // contributors may be used to endorse or promote products derived from |
| 15 | // this software without specific prior written permission. |
| 16 | // |
| 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 18 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 20 | // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
| 21 | // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 22 | // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 23 | // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 24 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 25 | // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 27 | |
| 28 | // Package glob provides file globbing utilities |
| 29 | package glob |
| 30 | |
| 31 | import ( |
| 32 | "bytes" |
| 33 | "encoding/json" |
| 34 | "fmt" |
| 35 | "io/ioutil" |
| 36 | "os" |
| 37 | "path/filepath" |
Ben Clayton | 57b2a06 | 2021-05-14 19:48:43 +0000 | [diff] [blame] | 38 | "strings" |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 39 | |
Ben Clayton | 59e96fe | 2022-04-07 17:50:24 +0000 | [diff] [blame] | 40 | "dawn.googlesource.com/dawn/tools/src/match" |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 41 | ) |
| 42 | |
Ben Clayton | 2880e5d | 2023-11-16 17:16:57 +0000 | [diff] [blame] | 43 | // Glob returns all the paths that match the given filepath glob |
Ben Clayton | 0699b4f | 2023-05-15 12:49:23 +0000 | [diff] [blame] | 44 | func Glob(str string) ([]string, error) { |
| 45 | abs, err := filepath.Abs(str) |
| 46 | if err != nil { |
| 47 | return nil, err |
| 48 | } |
| 49 | root, glob := "", "" |
| 50 | // Look for rightmost directory delimiter that's left of a wildcard. Use |
| 51 | // that to split the 'root' from the match 'glob'. |
| 52 | for i, c := range abs { |
| 53 | switch c { |
Ben Clayton | 7cfcc93 | 2023-08-14 23:05:38 +0000 | [diff] [blame] | 54 | case filepath.Separator: |
Ben Clayton | 0699b4f | 2023-05-15 12:49:23 +0000 | [diff] [blame] | 55 | root, glob = abs[:i], abs[i+1:] |
| 56 | case '*', '?': |
| 57 | test, err := match.New(glob) |
| 58 | if err != nil { |
| 59 | return nil, err |
| 60 | } |
| 61 | files, err := Scan(root, Config{Paths: searchRules{ |
| 62 | func(path string, cond bool) bool { return test(path) }, |
| 63 | }}) |
| 64 | if err != nil { |
| 65 | return nil, err |
| 66 | } |
| 67 | for i, f := range files { |
| 68 | files[i] = filepath.Join(root, f) // rel -> abs |
| 69 | } |
| 70 | return files, nil |
| 71 | } |
| 72 | } |
| 73 | // No wildcard found. Does the file exist at 'str'? |
Ben Clayton | b171bec | 2023-11-21 16:10:32 +0000 | [diff] [blame] | 74 | if s, err := os.Stat(str); err == nil && !s.IsDir() { |
Ben Clayton | 0699b4f | 2023-05-15 12:49:23 +0000 | [diff] [blame] | 75 | return []string{str}, nil |
| 76 | } |
| 77 | return []string{}, nil |
| 78 | } |
| 79 | |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 80 | // Scan walks all files and subdirectories from root, returning those |
| 81 | // that Config.shouldExamine() returns true for. |
| 82 | func Scan(root string, cfg Config) ([]string, error) { |
| 83 | files := []string{} |
| 84 | err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { |
Ben Clayton | a8d384e | 2024-02-02 15:00:05 +0000 | [diff] [blame] | 85 | if err != nil { |
| 86 | return err |
| 87 | } |
| 88 | |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 89 | rel, err := filepath.Rel(root, path) |
| 90 | if err != nil { |
| 91 | rel = path |
| 92 | } |
| 93 | |
| 94 | if rel == ".git" { |
| 95 | return filepath.SkipDir |
| 96 | } |
| 97 | |
| 98 | if !cfg.shouldExamine(root, path) { |
| 99 | return nil |
| 100 | } |
| 101 | |
| 102 | if !info.IsDir() { |
| 103 | files = append(files, rel) |
| 104 | } |
| 105 | |
| 106 | return nil |
| 107 | }) |
| 108 | if err != nil { |
| 109 | return nil, err |
| 110 | } |
| 111 | return files, nil |
| 112 | } |
| 113 | |
| 114 | // Configs is a slice of Config. |
| 115 | type Configs []Config |
| 116 | |
| 117 | // Config is used to parse the JSON configuration file. |
| 118 | type Config struct { |
| 119 | // Paths holds a number of JSON objects that contain either a "includes" or |
| 120 | // "excludes" key to an array of path patterns. |
| 121 | // Each path pattern is considered in turn to either include or exclude the |
| 122 | // file path for license scanning. Pattern use forward-slashes '/' for |
| 123 | // directory separators, and may use the following wildcards: |
| 124 | // ? - matches any single non-separator character |
| 125 | // * - matches any sequence of non-separator characters |
| 126 | // ** - matches any sequence of characters including separators |
| 127 | // |
| 128 | // Rules are processed in the order in which they are declared, with later |
| 129 | // rules taking precedence over earlier rules. |
| 130 | // |
| 131 | // All files are excluded before the first rule is evaluated. |
| 132 | // |
| 133 | // Example: |
| 134 | // |
| 135 | // { |
| 136 | // "paths": [ |
| 137 | // { "exclude": [ "out/*", "build/*" ] }, |
| 138 | // { "include": [ "out/foo.txt" ] } |
| 139 | // ], |
| 140 | // } |
| 141 | Paths searchRules |
| 142 | } |
| 143 | |
| 144 | // LoadConfig loads a config file at path. |
| 145 | func LoadConfig(path string) (Config, error) { |
| 146 | cfgBody, err := ioutil.ReadFile(path) |
| 147 | if err != nil { |
| 148 | return Config{}, err |
| 149 | } |
Ben Clayton | 57b2a06 | 2021-05-14 19:48:43 +0000 | [diff] [blame] | 150 | return ParseConfig(string(cfgBody)) |
| 151 | } |
| 152 | |
| 153 | // ParseConfig parses the config from a JSON string. |
| 154 | func ParseConfig(config string) (Config, error) { |
| 155 | d := json.NewDecoder(strings.NewReader(config)) |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 156 | cfg := Config{} |
| 157 | if err := d.Decode(&cfg); err != nil { |
| 158 | return Config{}, err |
| 159 | } |
| 160 | return cfg, nil |
| 161 | } |
| 162 | |
Ben Clayton | 57b2a06 | 2021-05-14 19:48:43 +0000 | [diff] [blame] | 163 | // MustParseConfig parses the config from a JSON string, panicing if the config |
| 164 | // does not parse |
| 165 | func MustParseConfig(config string) Config { |
| 166 | d := json.NewDecoder(strings.NewReader(config)) |
| 167 | cfg := Config{} |
| 168 | if err := d.Decode(&cfg); err != nil { |
| 169 | panic(fmt.Errorf("Failed to parse config: %w\nConfig:\n%v", err, config)) |
| 170 | } |
| 171 | return cfg |
| 172 | } |
| 173 | |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 174 | // rule is a search path predicate. |
| 175 | // root is the project relative path. |
| 176 | // cond is the value to return if the rule doesn't either include or exclude. |
| 177 | type rule func(path string, cond bool) bool |
| 178 | |
| 179 | // searchRules is a ordered list of search rules. |
| 180 | // searchRules is its own type as it has to perform custom JSON unmarshalling. |
| 181 | type searchRules []rule |
| 182 | |
| 183 | // UnmarshalJSON unmarshals the array of rules in the form: |
| 184 | // { "include": [ ... ] } or { "exclude": [ ... ] } |
| 185 | func (l *searchRules) UnmarshalJSON(body []byte) error { |
| 186 | type parsed struct { |
| 187 | Include []string |
| 188 | Exclude []string |
| 189 | } |
| 190 | |
| 191 | p := []parsed{} |
| 192 | if err := json.NewDecoder(bytes.NewReader(body)).Decode(&p); err != nil { |
| 193 | return err |
| 194 | } |
| 195 | |
| 196 | *l = searchRules{} |
| 197 | for _, rule := range p { |
| 198 | rule := rule |
| 199 | switch { |
| 200 | case len(rule.Include) > 0 && len(rule.Exclude) > 0: |
| 201 | return fmt.Errorf("Rule cannot contain both include and exclude") |
| 202 | case len(rule.Include) > 0: |
| 203 | tests := make([]match.Test, len(rule.Include)) |
| 204 | for i, pattern := range rule.Include { |
| 205 | test, err := match.New(pattern) |
| 206 | if err != nil { |
| 207 | return err |
| 208 | } |
| 209 | tests[i] = test |
| 210 | } |
| 211 | *l = append(*l, func(path string, cond bool) bool { |
Ben Clayton | 0699b4f | 2023-05-15 12:49:23 +0000 | [diff] [blame] | 212 | if cond { |
| 213 | return true |
| 214 | } |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 215 | for _, test := range tests { |
| 216 | if test(path) { |
| 217 | return true |
| 218 | } |
| 219 | } |
Ben Clayton | 0699b4f | 2023-05-15 12:49:23 +0000 | [diff] [blame] | 220 | return false |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 221 | }) |
| 222 | case len(rule.Exclude) > 0: |
| 223 | tests := make([]match.Test, len(rule.Exclude)) |
| 224 | for i, pattern := range rule.Exclude { |
| 225 | test, err := match.New(pattern) |
| 226 | if err != nil { |
| 227 | return err |
| 228 | } |
| 229 | tests[i] = test |
| 230 | } |
| 231 | *l = append(*l, func(path string, cond bool) bool { |
Ben Clayton | 0699b4f | 2023-05-15 12:49:23 +0000 | [diff] [blame] | 232 | if !cond { |
| 233 | return false |
| 234 | } |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 235 | for _, test := range tests { |
| 236 | if test(path) { |
| 237 | return false |
| 238 | } |
| 239 | } |
Ben Clayton | 0699b4f | 2023-05-15 12:49:23 +0000 | [diff] [blame] | 240 | return true |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 241 | }) |
| 242 | } |
| 243 | } |
| 244 | return nil |
| 245 | } |
| 246 | |
| 247 | // shouldExamine returns true if the file at absPath should be scanned. |
| 248 | func (c Config) shouldExamine(root, absPath string) bool { |
| 249 | root = filepath.ToSlash(root) // Canonicalize |
| 250 | absPath = filepath.ToSlash(absPath) // Canonicalize |
| 251 | relPath, err := filepath.Rel(root, absPath) |
| 252 | if err != nil { |
| 253 | return false |
| 254 | } |
Ben Clayton | 892aaf3 | 2021-07-05 16:43:17 +0000 | [diff] [blame] | 255 | relPath = filepath.ToSlash(relPath) // Canonicalize |
Ben Clayton | b78251f | 2021-03-08 20:49:47 +0000 | [diff] [blame] | 256 | |
| 257 | res := false |
| 258 | for _, rule := range c.Paths { |
| 259 | res = rule(relPath, res) |
| 260 | } |
| 261 | |
| 262 | return res |
| 263 | } |