blob: d531e0b4d3e7a4dbfdb99b2af1d100288e266c4a [file] [log] [blame]
Ben Claytonf8e0aac2022-12-12 21:49:02 +00001// Copyright 2022 The Dawn Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package cov
16
17import (
18 "log"
19 "sort"
20 "sync"
21)
22
Ben Clayton408ace62022-12-13 15:48:49 +000023// Optimize optimizes the Tree by de-duplicating common spans into a tree of SpanGroups.
24//
25// Breaking down tests into group hierarchies provide a natural way to structure
26// coverage data, as tests of the same suite, file or test are likely to have
27// similar coverage spans.
28//
29// For each source file in the codebase, we create a tree of SpanGroups, where the
30// leaves are the test cases.
31//
32// For example, given the following Paths:
33//
34// a.b.d.h
35// a.b.d.i.n
36// a.b.d.i.o
37// a.b.e.j
38// a.b.e.k.p
39// a.b.e.k.q
40// a.c.f
41// a.c.g.l.r
42// a.c.g.m
43//
44// We would construct the following tree:
45//
46// a
47// ╭──────┴──────╮
48// b c
49// ╭───┴───╮ ╭───┴───╮
50// d e f g
51// ╭─┴─╮ ╭─┴─╮ ╭─┴─╮
52// h i j k l m
53// ╭┴╮ ╭┴╮ │
54// n o p q r
55//
56// Each leaf node in this tree (`h`, `n`, `o`, `j`, `p`, `q`, `f`, `r`, `m`)
57// represent a test case, and non-leaf nodes (`a`, `b`, `c`, `d`, `e`, `g`, `i`,
58// `k`, `l`) are suite, file or tests.
59//
60// To begin, we create a test tree structure, and associate the full list of test
61// coverage spans with every leaf node (test case) in this tree.
62//
63// This data structure hasn't given us any compression benefits yet, but we can
64// now do a few tricks to dramatically reduce number of spans needed to describe
65// the graph:
66//
67// ~ Optimization 1: Common span promotion ~
68//
69// The first compression scheme is to promote common spans up the tree when they
70// are common for all children. This will reduce the number of spans needed to be
71// encoded in the final file.
72//
73// For example, if the test group `a` has 4 children that all share the same span
74// `X`:
75//
76// a
77// ╭───┬─┴─┬───╮
78// b c d e
79// [X,Y] [X] [X] [X,Z]
80//
81// Then span `X` can be promoted up to `a`:
82//
83// [X]
84// a
85// ╭───┬─┴─┬───╮
86// b c d e
87// [Y] [] [] [Z]
88//
89// ~ Optimization 2: Span XOR promotion ~
90//
91// This idea can be extended further, by not requiring all the children to share
92// the same span before promotion. If *most* child nodes share the same span, we
93// can still promote the span, but this time we *remove* the span from the
94// children *if they had it*, and *add* the span to children *if they didn't
95// have it*.
96//
97// For example, if the test group `a` has 4 children with 3 that share the span
98// `X`:
99//
100// a
101// ╭───┬─┴─┬───╮
102// b c d e
103// [X,Y] [X] [] [X,Z]
104//
105// Then span `X` can be promoted up to `a` by flipping the presence of `X` on the
106// child nodes:
107//
108// [X]
109// a
110// ╭───┬─┴─┬───╮
111// b c d e
112// [Y] [] [X] [Z]
113//
114// This process repeats up the tree.
115//
116// With this optimization applied, we now need to traverse the tree from root to
117// leaf in order to know whether a given span is in use for the leaf node (test case):
118//
119// * If the span is encountered an *odd* number of times during traversal, then
120// the span is *covered*.
121// * If the span is encountered an *even* number of times during traversal, then
122// the span is *not covered*.
123//
124// See tools/src/cov/coverage_test.go for more examples of this optimization.
125//
126// ~ Optimization 3: Common span grouping ~
127//
128// With real world data, we encounter groups of spans that are commonly found
129// together. To further reduce coverage data, the whole graph is scanned for common
130// span patterns, and are indexed by each tree node.
131// The XOR'ing of spans as described above is performed as if the spans were not
132// grouped.
133//
134// ~ Optimization 4: Lookup tables ~
135//
136// All spans, span-groups and strings are stored in de-duplicated tables, and are
137// indexed wherever possible.
Ben Claytonf8e0aac2022-12-12 21:49:02 +0000138func (t *Tree) Optimize() {
139 log.Printf("Optimizing coverage tree...")
140
141 // Start by gathering all of the unique spansets
142 wg := sync.WaitGroup{}
143 wg.Add(len(t.files))
144 for _, file := range t.files {
145 file := file
146 go func() {
147 defer wg.Done()
148 o := optimizer{}
149 for idx, tc := range file.tcm {
150 o.invertForCommon(tc, &t.testRoot.children[idx])
151 }
152 o.createGroups(file)
153 }()
154 }
155 wg.Wait()
156}
157
158type optimizer struct{}
159
160// createGroups looks for common SpanSets, and creates indexable span groups
161// which are then used instead.
162func (o *optimizer) createGroups(f *treeFile) {
163 const minSpansInGroup = 2
164
165 type spansetKey string
166 spansetMap := map[spansetKey]SpanSet{}
167
168 f.tcm.traverse(func(tc *TestCoverage) {
169 if len(tc.Spans) >= minSpansInGroup {
170 key := spansetKey(tc.Spans.String())
171 if _, ok := spansetMap[key]; !ok {
172 spansetMap[key] = tc.Spans
173 }
174 }
175 })
176
177 if len(spansetMap) == 0 {
178 return
179 }
180
181 type spansetInfo struct {
182 key spansetKey
183 set SpanSet // fully expanded set
184 grp SpanGroup
185 id SpanGroupID
186 }
187 spansets := make([]*spansetInfo, 0, len(spansetMap))
188 for key, set := range spansetMap {
189 spansets = append(spansets, &spansetInfo{
190 key: key,
191 set: set,
192 grp: SpanGroup{Spans: set},
193 })
194 }
195
196 // Sort by number of spans in each sets starting with the largest.
197 sort.Slice(spansets, func(i, j int) bool {
198 a, b := spansets[i].set, spansets[j].set
199 switch {
200 case len(a) > len(b):
201 return true
202 case len(a) < len(b):
203 return false
204 }
205 return a.List().Compare(b.List()) == -1 // Just to keep output stable
206 })
207
208 // Assign IDs now that we have stable order.
209 for i := range spansets {
210 spansets[i].id = SpanGroupID(i)
211 }
212
213 // Loop over the spanGroups starting from the largest, and try to fold them
214 // into the larger sets.
215 // This is O(n^2) complexity.
216nextSpan:
217 for i, a := range spansets[:len(spansets)-1] {
218 for _, b := range spansets[i+1:] {
219 if len(a.set) > len(b.set) && a.set.containsAll(b.set) {
220 extend := b.id // Do not take address of iterator!
221 a.grp.Spans = a.set.removeAll(b.set)
222 a.grp.Extend = &extend
223 continue nextSpan
224 }
225 }
226 }
227
228 // Rebuild a map of spansetKey to SpanGroup
229 spangroupMap := make(map[spansetKey]*spansetInfo, len(spansets))
230 for _, s := range spansets {
231 spangroupMap[s.key] = s
232 }
233
234 // Store the groups in the tree
235 f.spangroups = make(map[SpanGroupID]SpanGroup, len(spansets))
236 for _, s := range spansets {
237 f.spangroups[s.id] = s.grp
238 }
239
240 // Update all the uses.
241 f.tcm.traverse(func(tc *TestCoverage) {
242 key := spansetKey(tc.Spans.String())
243 if g, ok := spangroupMap[key]; ok {
244 tc.Spans = nil
245 tc.Group = &g.id
246 }
247 })
248}
249
250// invertCommon looks for tree nodes with the majority of the child nodes with
251// the same spans. This span is promoted up to the parent, and the children
252// have the span inverted.
253func (o *optimizer) invertForCommon(tc *TestCoverage, t *Test) {
254 wg := sync.WaitGroup{}
255 wg.Add(len(tc.Children))
256 for id, child := range tc.Children {
257 id, child := id, child
258 go func() {
259 defer wg.Done()
260 o.invertForCommon(child, &t.children[id])
261 }()
262 }
263 wg.Wait()
264
265 counts := map[SpanID]int{}
266 for _, child := range tc.Children {
267 for span := range child.Spans {
268 counts[span] = counts[span] + 1
269 }
270 }
271
272 for span, count := range counts {
273 if count > len(t.children)/2 {
274 tc.Spans = tc.Spans.invert(span)
275 for _, idx := range t.indices {
276 child := tc.Children.index(idx)
277 child.Spans = child.Spans.invert(span)
278 if child.deletable() {
279 delete(tc.Children, idx)
280 }
281 }
282 }
283 }
284}