blob: dc559335e4a4e7defdf241b2f30edbb57c7421a4 [file] [log] [blame]
Ben Claytonb78251f2021-03-08 20:49:47 +00001// Copyright 2020 Google LLC
2//
Austin Engcc2516a2023-10-17 20:57:54 +00003// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are met:
Ben Claytonb78251f2021-03-08 20:49:47 +00005//
Austin Engcc2516a2023-10-17 20:57:54 +00006// 1. Redistributions of source code must retain the above copyright notice, this
7// list of conditions and the following disclaimer.
Ben Claytonb78251f2021-03-08 20:49:47 +00008//
Austin Engcc2516a2023-10-17 20:57:54 +00009// 2. Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// 3. Neither the name of the copyright holder nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Ben Claytonb78251f2021-03-08 20:49:47 +000027
28// Package glob provides file globbing utilities
29package glob
30
31import (
32 "bytes"
33 "encoding/json"
34 "fmt"
35 "io/ioutil"
36 "os"
37 "path/filepath"
Ben Clayton57b2a062021-05-14 19:48:43 +000038 "strings"
Ben Claytonb78251f2021-03-08 20:49:47 +000039
Ben Clayton59e96fe2022-04-07 17:50:24 +000040 "dawn.googlesource.com/dawn/tools/src/match"
Ben Claytonb78251f2021-03-08 20:49:47 +000041)
42
Ben Clayton2880e5d2023-11-16 17:16:57 +000043// Glob returns all the paths that match the given filepath glob
Ben Clayton0699b4f2023-05-15 12:49:23 +000044func Glob(str string) ([]string, error) {
45 abs, err := filepath.Abs(str)
46 if err != nil {
47 return nil, err
48 }
49 root, glob := "", ""
50 // Look for rightmost directory delimiter that's left of a wildcard. Use
51 // that to split the 'root' from the match 'glob'.
52 for i, c := range abs {
53 switch c {
Ben Clayton7cfcc932023-08-14 23:05:38 +000054 case filepath.Separator:
Ben Clayton0699b4f2023-05-15 12:49:23 +000055 root, glob = abs[:i], abs[i+1:]
56 case '*', '?':
57 test, err := match.New(glob)
58 if err != nil {
59 return nil, err
60 }
61 files, err := Scan(root, Config{Paths: searchRules{
62 func(path string, cond bool) bool { return test(path) },
63 }})
64 if err != nil {
65 return nil, err
66 }
67 for i, f := range files {
68 files[i] = filepath.Join(root, f) // rel -> abs
69 }
70 return files, nil
71 }
72 }
73 // No wildcard found. Does the file exist at 'str'?
Ben Claytonb171bec2023-11-21 16:10:32 +000074 if s, err := os.Stat(str); err == nil && !s.IsDir() {
Ben Clayton0699b4f2023-05-15 12:49:23 +000075 return []string{str}, nil
76 }
77 return []string{}, nil
78}
79
Ben Claytonb78251f2021-03-08 20:49:47 +000080// Scan walks all files and subdirectories from root, returning those
81// that Config.shouldExamine() returns true for.
82func Scan(root string, cfg Config) ([]string, error) {
83 files := []string{}
84 err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
Ben Claytona8d384e2024-02-02 15:00:05 +000085 if err != nil {
86 return err
87 }
88
Ben Claytonb78251f2021-03-08 20:49:47 +000089 rel, err := filepath.Rel(root, path)
90 if err != nil {
91 rel = path
92 }
93
94 if rel == ".git" {
95 return filepath.SkipDir
96 }
97
98 if !cfg.shouldExamine(root, path) {
99 return nil
100 }
101
102 if !info.IsDir() {
103 files = append(files, rel)
104 }
105
106 return nil
107 })
108 if err != nil {
109 return nil, err
110 }
111 return files, nil
112}
113
114// Configs is a slice of Config.
115type Configs []Config
116
117// Config is used to parse the JSON configuration file.
118type Config struct {
119 // Paths holds a number of JSON objects that contain either a "includes" or
120 // "excludes" key to an array of path patterns.
121 // Each path pattern is considered in turn to either include or exclude the
122 // file path for license scanning. Pattern use forward-slashes '/' for
123 // directory separators, and may use the following wildcards:
124 // ? - matches any single non-separator character
125 // * - matches any sequence of non-separator characters
126 // ** - matches any sequence of characters including separators
127 //
128 // Rules are processed in the order in which they are declared, with later
129 // rules taking precedence over earlier rules.
130 //
131 // All files are excluded before the first rule is evaluated.
132 //
133 // Example:
134 //
135 // {
136 // "paths": [
137 // { "exclude": [ "out/*", "build/*" ] },
138 // { "include": [ "out/foo.txt" ] }
139 // ],
140 // }
141 Paths searchRules
142}
143
144// LoadConfig loads a config file at path.
145func LoadConfig(path string) (Config, error) {
146 cfgBody, err := ioutil.ReadFile(path)
147 if err != nil {
148 return Config{}, err
149 }
Ben Clayton57b2a062021-05-14 19:48:43 +0000150 return ParseConfig(string(cfgBody))
151}
152
153// ParseConfig parses the config from a JSON string.
154func ParseConfig(config string) (Config, error) {
155 d := json.NewDecoder(strings.NewReader(config))
Ben Claytonb78251f2021-03-08 20:49:47 +0000156 cfg := Config{}
157 if err := d.Decode(&cfg); err != nil {
158 return Config{}, err
159 }
160 return cfg, nil
161}
162
Ben Clayton57b2a062021-05-14 19:48:43 +0000163// MustParseConfig parses the config from a JSON string, panicing if the config
164// does not parse
165func MustParseConfig(config string) Config {
166 d := json.NewDecoder(strings.NewReader(config))
167 cfg := Config{}
168 if err := d.Decode(&cfg); err != nil {
169 panic(fmt.Errorf("Failed to parse config: %w\nConfig:\n%v", err, config))
170 }
171 return cfg
172}
173
Ben Claytonb78251f2021-03-08 20:49:47 +0000174// rule is a search path predicate.
175// root is the project relative path.
176// cond is the value to return if the rule doesn't either include or exclude.
177type rule func(path string, cond bool) bool
178
179// searchRules is a ordered list of search rules.
180// searchRules is its own type as it has to perform custom JSON unmarshalling.
181type searchRules []rule
182
183// UnmarshalJSON unmarshals the array of rules in the form:
184// { "include": [ ... ] } or { "exclude": [ ... ] }
185func (l *searchRules) UnmarshalJSON(body []byte) error {
186 type parsed struct {
187 Include []string
188 Exclude []string
189 }
190
191 p := []parsed{}
192 if err := json.NewDecoder(bytes.NewReader(body)).Decode(&p); err != nil {
193 return err
194 }
195
196 *l = searchRules{}
197 for _, rule := range p {
198 rule := rule
199 switch {
200 case len(rule.Include) > 0 && len(rule.Exclude) > 0:
201 return fmt.Errorf("Rule cannot contain both include and exclude")
202 case len(rule.Include) > 0:
203 tests := make([]match.Test, len(rule.Include))
204 for i, pattern := range rule.Include {
205 test, err := match.New(pattern)
206 if err != nil {
207 return err
208 }
209 tests[i] = test
210 }
211 *l = append(*l, func(path string, cond bool) bool {
Ben Clayton0699b4f2023-05-15 12:49:23 +0000212 if cond {
213 return true
214 }
Ben Claytonb78251f2021-03-08 20:49:47 +0000215 for _, test := range tests {
216 if test(path) {
217 return true
218 }
219 }
Ben Clayton0699b4f2023-05-15 12:49:23 +0000220 return false
Ben Claytonb78251f2021-03-08 20:49:47 +0000221 })
222 case len(rule.Exclude) > 0:
223 tests := make([]match.Test, len(rule.Exclude))
224 for i, pattern := range rule.Exclude {
225 test, err := match.New(pattern)
226 if err != nil {
227 return err
228 }
229 tests[i] = test
230 }
231 *l = append(*l, func(path string, cond bool) bool {
Ben Clayton0699b4f2023-05-15 12:49:23 +0000232 if !cond {
233 return false
234 }
Ben Claytonb78251f2021-03-08 20:49:47 +0000235 for _, test := range tests {
236 if test(path) {
237 return false
238 }
239 }
Ben Clayton0699b4f2023-05-15 12:49:23 +0000240 return true
Ben Claytonb78251f2021-03-08 20:49:47 +0000241 })
242 }
243 }
244 return nil
245}
246
247// shouldExamine returns true if the file at absPath should be scanned.
248func (c Config) shouldExamine(root, absPath string) bool {
249 root = filepath.ToSlash(root) // Canonicalize
250 absPath = filepath.ToSlash(absPath) // Canonicalize
251 relPath, err := filepath.Rel(root, absPath)
252 if err != nil {
253 return false
254 }
Ben Clayton892aaf32021-07-05 16:43:17 +0000255 relPath = filepath.ToSlash(relPath) // Canonicalize
Ben Claytonb78251f2021-03-08 20:49:47 +0000256
257 res := false
258 for _, rule := range c.Paths {
259 res = rule(relPath, res)
260 }
261
262 return res
263}