Ben Clayton | f8e0aac | 2022-12-12 21:49:02 +0000 | [diff] [blame] | 1 | // Copyright 2022 The Dawn Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package cov |
| 16 | |
| 17 | import ( |
| 18 | "log" |
| 19 | "sort" |
| 20 | "sync" |
| 21 | ) |
| 22 | |
Ben Clayton | 408ace6 | 2022-12-13 15:48:49 +0000 | [diff] [blame] | 23 | // Optimize optimizes the Tree by de-duplicating common spans into a tree of SpanGroups. |
| 24 | // |
| 25 | // Breaking down tests into group hierarchies provide a natural way to structure |
| 26 | // coverage data, as tests of the same suite, file or test are likely to have |
| 27 | // similar coverage spans. |
| 28 | // |
| 29 | // For each source file in the codebase, we create a tree of SpanGroups, where the |
| 30 | // leaves are the test cases. |
| 31 | // |
| 32 | // For example, given the following Paths: |
| 33 | // |
| 34 | // a.b.d.h |
| 35 | // a.b.d.i.n |
| 36 | // a.b.d.i.o |
| 37 | // a.b.e.j |
| 38 | // a.b.e.k.p |
| 39 | // a.b.e.k.q |
| 40 | // a.c.f |
| 41 | // a.c.g.l.r |
| 42 | // a.c.g.m |
| 43 | // |
| 44 | // We would construct the following tree: |
| 45 | // |
| 46 | // a |
| 47 | // ╭──────┴──────╮ |
| 48 | // b c |
| 49 | // ╭───┴───╮ ╭───┴───╮ |
| 50 | // d e f g |
| 51 | // ╭─┴─╮ ╭─┴─╮ ╭─┴─╮ |
| 52 | // h i j k l m |
| 53 | // ╭┴╮ ╭┴╮ │ |
| 54 | // n o p q r |
| 55 | // |
| 56 | // Each leaf node in this tree (`h`, `n`, `o`, `j`, `p`, `q`, `f`, `r`, `m`) |
| 57 | // represent a test case, and non-leaf nodes (`a`, `b`, `c`, `d`, `e`, `g`, `i`, |
| 58 | // `k`, `l`) are suite, file or tests. |
| 59 | // |
| 60 | // To begin, we create a test tree structure, and associate the full list of test |
| 61 | // coverage spans with every leaf node (test case) in this tree. |
| 62 | // |
| 63 | // This data structure hasn't given us any compression benefits yet, but we can |
| 64 | // now do a few tricks to dramatically reduce number of spans needed to describe |
| 65 | // the graph: |
| 66 | // |
| 67 | // ~ Optimization 1: Common span promotion ~ |
| 68 | // |
| 69 | // The first compression scheme is to promote common spans up the tree when they |
| 70 | // are common for all children. This will reduce the number of spans needed to be |
| 71 | // encoded in the final file. |
| 72 | // |
| 73 | // For example, if the test group `a` has 4 children that all share the same span |
| 74 | // `X`: |
| 75 | // |
| 76 | // a |
| 77 | // ╭───┬─┴─┬───╮ |
| 78 | // b c d e |
| 79 | // [X,Y] [X] [X] [X,Z] |
| 80 | // |
| 81 | // Then span `X` can be promoted up to `a`: |
| 82 | // |
| 83 | // [X] |
| 84 | // a |
| 85 | // ╭───┬─┴─┬───╮ |
| 86 | // b c d e |
| 87 | // [Y] [] [] [Z] |
| 88 | // |
| 89 | // ~ Optimization 2: Span XOR promotion ~ |
| 90 | // |
| 91 | // This idea can be extended further, by not requiring all the children to share |
| 92 | // the same span before promotion. If *most* child nodes share the same span, we |
| 93 | // can still promote the span, but this time we *remove* the span from the |
| 94 | // children *if they had it*, and *add* the span to children *if they didn't |
| 95 | // have it*. |
| 96 | // |
| 97 | // For example, if the test group `a` has 4 children with 3 that share the span |
| 98 | // `X`: |
| 99 | // |
| 100 | // a |
| 101 | // ╭───┬─┴─┬───╮ |
| 102 | // b c d e |
| 103 | // [X,Y] [X] [] [X,Z] |
| 104 | // |
| 105 | // Then span `X` can be promoted up to `a` by flipping the presence of `X` on the |
| 106 | // child nodes: |
| 107 | // |
| 108 | // [X] |
| 109 | // a |
| 110 | // ╭───┬─┴─┬───╮ |
| 111 | // b c d e |
| 112 | // [Y] [] [X] [Z] |
| 113 | // |
| 114 | // This process repeats up the tree. |
| 115 | // |
| 116 | // With this optimization applied, we now need to traverse the tree from root to |
| 117 | // leaf in order to know whether a given span is in use for the leaf node (test case): |
| 118 | // |
| 119 | // * If the span is encountered an *odd* number of times during traversal, then |
| 120 | // the span is *covered*. |
| 121 | // * If the span is encountered an *even* number of times during traversal, then |
| 122 | // the span is *not covered*. |
| 123 | // |
| 124 | // See tools/src/cov/coverage_test.go for more examples of this optimization. |
| 125 | // |
| 126 | // ~ Optimization 3: Common span grouping ~ |
| 127 | // |
| 128 | // With real world data, we encounter groups of spans that are commonly found |
| 129 | // together. To further reduce coverage data, the whole graph is scanned for common |
| 130 | // span patterns, and are indexed by each tree node. |
| 131 | // The XOR'ing of spans as described above is performed as if the spans were not |
| 132 | // grouped. |
| 133 | // |
| 134 | // ~ Optimization 4: Lookup tables ~ |
| 135 | // |
| 136 | // All spans, span-groups and strings are stored in de-duplicated tables, and are |
| 137 | // indexed wherever possible. |
Ben Clayton | f8e0aac | 2022-12-12 21:49:02 +0000 | [diff] [blame] | 138 | func (t *Tree) Optimize() { |
| 139 | log.Printf("Optimizing coverage tree...") |
| 140 | |
| 141 | // Start by gathering all of the unique spansets |
| 142 | wg := sync.WaitGroup{} |
| 143 | wg.Add(len(t.files)) |
| 144 | for _, file := range t.files { |
| 145 | file := file |
| 146 | go func() { |
| 147 | defer wg.Done() |
| 148 | o := optimizer{} |
| 149 | for idx, tc := range file.tcm { |
| 150 | o.invertForCommon(tc, &t.testRoot.children[idx]) |
| 151 | } |
| 152 | o.createGroups(file) |
| 153 | }() |
| 154 | } |
| 155 | wg.Wait() |
| 156 | } |
| 157 | |
| 158 | type optimizer struct{} |
| 159 | |
| 160 | // createGroups looks for common SpanSets, and creates indexable span groups |
| 161 | // which are then used instead. |
| 162 | func (o *optimizer) createGroups(f *treeFile) { |
| 163 | const minSpansInGroup = 2 |
| 164 | |
| 165 | type spansetKey string |
| 166 | spansetMap := map[spansetKey]SpanSet{} |
| 167 | |
| 168 | f.tcm.traverse(func(tc *TestCoverage) { |
| 169 | if len(tc.Spans) >= minSpansInGroup { |
| 170 | key := spansetKey(tc.Spans.String()) |
| 171 | if _, ok := spansetMap[key]; !ok { |
| 172 | spansetMap[key] = tc.Spans |
| 173 | } |
| 174 | } |
| 175 | }) |
| 176 | |
| 177 | if len(spansetMap) == 0 { |
| 178 | return |
| 179 | } |
| 180 | |
| 181 | type spansetInfo struct { |
| 182 | key spansetKey |
| 183 | set SpanSet // fully expanded set |
| 184 | grp SpanGroup |
| 185 | id SpanGroupID |
| 186 | } |
| 187 | spansets := make([]*spansetInfo, 0, len(spansetMap)) |
| 188 | for key, set := range spansetMap { |
| 189 | spansets = append(spansets, &spansetInfo{ |
| 190 | key: key, |
| 191 | set: set, |
| 192 | grp: SpanGroup{Spans: set}, |
| 193 | }) |
| 194 | } |
| 195 | |
| 196 | // Sort by number of spans in each sets starting with the largest. |
| 197 | sort.Slice(spansets, func(i, j int) bool { |
| 198 | a, b := spansets[i].set, spansets[j].set |
| 199 | switch { |
| 200 | case len(a) > len(b): |
| 201 | return true |
| 202 | case len(a) < len(b): |
| 203 | return false |
| 204 | } |
| 205 | return a.List().Compare(b.List()) == -1 // Just to keep output stable |
| 206 | }) |
| 207 | |
| 208 | // Assign IDs now that we have stable order. |
| 209 | for i := range spansets { |
| 210 | spansets[i].id = SpanGroupID(i) |
| 211 | } |
| 212 | |
| 213 | // Loop over the spanGroups starting from the largest, and try to fold them |
| 214 | // into the larger sets. |
| 215 | // This is O(n^2) complexity. |
| 216 | nextSpan: |
| 217 | for i, a := range spansets[:len(spansets)-1] { |
| 218 | for _, b := range spansets[i+1:] { |
| 219 | if len(a.set) > len(b.set) && a.set.containsAll(b.set) { |
| 220 | extend := b.id // Do not take address of iterator! |
| 221 | a.grp.Spans = a.set.removeAll(b.set) |
| 222 | a.grp.Extend = &extend |
| 223 | continue nextSpan |
| 224 | } |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | // Rebuild a map of spansetKey to SpanGroup |
| 229 | spangroupMap := make(map[spansetKey]*spansetInfo, len(spansets)) |
| 230 | for _, s := range spansets { |
| 231 | spangroupMap[s.key] = s |
| 232 | } |
| 233 | |
| 234 | // Store the groups in the tree |
| 235 | f.spangroups = make(map[SpanGroupID]SpanGroup, len(spansets)) |
| 236 | for _, s := range spansets { |
| 237 | f.spangroups[s.id] = s.grp |
| 238 | } |
| 239 | |
| 240 | // Update all the uses. |
| 241 | f.tcm.traverse(func(tc *TestCoverage) { |
| 242 | key := spansetKey(tc.Spans.String()) |
| 243 | if g, ok := spangroupMap[key]; ok { |
| 244 | tc.Spans = nil |
| 245 | tc.Group = &g.id |
| 246 | } |
| 247 | }) |
| 248 | } |
| 249 | |
| 250 | // invertCommon looks for tree nodes with the majority of the child nodes with |
| 251 | // the same spans. This span is promoted up to the parent, and the children |
| 252 | // have the span inverted. |
| 253 | func (o *optimizer) invertForCommon(tc *TestCoverage, t *Test) { |
| 254 | wg := sync.WaitGroup{} |
| 255 | wg.Add(len(tc.Children)) |
| 256 | for id, child := range tc.Children { |
| 257 | id, child := id, child |
| 258 | go func() { |
| 259 | defer wg.Done() |
| 260 | o.invertForCommon(child, &t.children[id]) |
| 261 | }() |
| 262 | } |
| 263 | wg.Wait() |
| 264 | |
| 265 | counts := map[SpanID]int{} |
| 266 | for _, child := range tc.Children { |
| 267 | for span := range child.Spans { |
| 268 | counts[span] = counts[span] + 1 |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | for span, count := range counts { |
| 273 | if count > len(t.children)/2 { |
| 274 | tc.Spans = tc.Spans.invert(span) |
| 275 | for _, idx := range t.indices { |
| 276 | child := tc.Children.index(idx) |
| 277 | child.Spans = child.Spans.invert(span) |
| 278 | if child.deletable() { |
| 279 | delete(tc.Children, idx) |
| 280 | } |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | } |