| // Copyright 2022 The Dawn Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package cov |
| |
| import ( |
| "log" |
| "sort" |
| "sync" |
| ) |
| |
| // Optimize optimizes the Tree by de-duplicating common spans into a tree of SpanGroups. |
| // |
| // Breaking down tests into group hierarchies provide a natural way to structure |
| // coverage data, as tests of the same suite, file or test are likely to have |
| // similar coverage spans. |
| // |
| // For each source file in the codebase, we create a tree of SpanGroups, where the |
| // leaves are the test cases. |
| // |
| // For example, given the following Paths: |
| // |
| // a.b.d.h |
| // a.b.d.i.n |
| // a.b.d.i.o |
| // a.b.e.j |
| // a.b.e.k.p |
| // a.b.e.k.q |
| // a.c.f |
| // a.c.g.l.r |
| // a.c.g.m |
| // |
| // We would construct the following tree: |
| // |
| // a |
| // ╭──────┴──────╮ |
| // b c |
| // ╭───┴───╮ ╭───┴───╮ |
| // d e f g |
| // ╭─┴─╮ ╭─┴─╮ ╭─┴─╮ |
| // h i j k l m |
| // ╭┴╮ ╭┴╮ │ |
| // n o p q r |
| // |
| // Each leaf node in this tree (`h`, `n`, `o`, `j`, `p`, `q`, `f`, `r`, `m`) |
| // represent a test case, and non-leaf nodes (`a`, `b`, `c`, `d`, `e`, `g`, `i`, |
| // `k`, `l`) are suite, file or tests. |
| // |
| // To begin, we create a test tree structure, and associate the full list of test |
| // coverage spans with every leaf node (test case) in this tree. |
| // |
| // This data structure hasn't given us any compression benefits yet, but we can |
| // now do a few tricks to dramatically reduce number of spans needed to describe |
| // the graph: |
| // |
| // ~ Optimization 1: Common span promotion ~ |
| // |
| // The first compression scheme is to promote common spans up the tree when they |
| // are common for all children. This will reduce the number of spans needed to be |
| // encoded in the final file. |
| // |
| // For example, if the test group `a` has 4 children that all share the same span |
| // `X`: |
| // |
| // a |
| // ╭───┬─┴─┬───╮ |
| // b c d e |
| // [X,Y] [X] [X] [X,Z] |
| // |
| // Then span `X` can be promoted up to `a`: |
| // |
| // [X] |
| // a |
| // ╭───┬─┴─┬───╮ |
| // b c d e |
| // [Y] [] [] [Z] |
| // |
| // ~ Optimization 2: Span XOR promotion ~ |
| // |
| // This idea can be extended further, by not requiring all the children to share |
| // the same span before promotion. If *most* child nodes share the same span, we |
| // can still promote the span, but this time we *remove* the span from the |
| // children *if they had it*, and *add* the span to children *if they didn't |
| // have it*. |
| // |
| // For example, if the test group `a` has 4 children with 3 that share the span |
| // `X`: |
| // |
| // a |
| // ╭───┬─┴─┬───╮ |
| // b c d e |
| // [X,Y] [X] [] [X,Z] |
| // |
| // Then span `X` can be promoted up to `a` by flipping the presence of `X` on the |
| // child nodes: |
| // |
| // [X] |
| // a |
| // ╭───┬─┴─┬───╮ |
| // b c d e |
| // [Y] [] [X] [Z] |
| // |
| // This process repeats up the tree. |
| // |
| // With this optimization applied, we now need to traverse the tree from root to |
| // leaf in order to know whether a given span is in use for the leaf node (test case): |
| // |
| // * If the span is encountered an *odd* number of times during traversal, then |
| // the span is *covered*. |
| // * If the span is encountered an *even* number of times during traversal, then |
| // the span is *not covered*. |
| // |
| // See tools/src/cov/coverage_test.go for more examples of this optimization. |
| // |
| // ~ Optimization 3: Common span grouping ~ |
| // |
| // With real world data, we encounter groups of spans that are commonly found |
| // together. To further reduce coverage data, the whole graph is scanned for common |
| // span patterns, and are indexed by each tree node. |
| // The XOR'ing of spans as described above is performed as if the spans were not |
| // grouped. |
| // |
| // ~ Optimization 4: Lookup tables ~ |
| // |
| // All spans, span-groups and strings are stored in de-duplicated tables, and are |
| // indexed wherever possible. |
| func (t *Tree) Optimize() { |
| log.Printf("Optimizing coverage tree...") |
| |
| // Start by gathering all of the unique spansets |
| wg := sync.WaitGroup{} |
| wg.Add(len(t.files)) |
| for _, file := range t.files { |
| file := file |
| go func() { |
| defer wg.Done() |
| o := optimizer{} |
| for idx, tc := range file.tcm { |
| o.invertForCommon(tc, &t.testRoot.children[idx]) |
| } |
| o.createGroups(file) |
| }() |
| } |
| wg.Wait() |
| } |
| |
| type optimizer struct{} |
| |
| // createGroups looks for common SpanSets, and creates indexable span groups |
| // which are then used instead. |
| func (o *optimizer) createGroups(f *treeFile) { |
| const minSpansInGroup = 2 |
| |
| type spansetKey string |
| spansetMap := map[spansetKey]SpanSet{} |
| |
| f.tcm.traverse(func(tc *TestCoverage) { |
| if len(tc.Spans) >= minSpansInGroup { |
| key := spansetKey(tc.Spans.String()) |
| if _, ok := spansetMap[key]; !ok { |
| spansetMap[key] = tc.Spans |
| } |
| } |
| }) |
| |
| if len(spansetMap) == 0 { |
| return |
| } |
| |
| type spansetInfo struct { |
| key spansetKey |
| set SpanSet // fully expanded set |
| grp SpanGroup |
| id SpanGroupID |
| } |
| spansets := make([]*spansetInfo, 0, len(spansetMap)) |
| for key, set := range spansetMap { |
| spansets = append(spansets, &spansetInfo{ |
| key: key, |
| set: set, |
| grp: SpanGroup{Spans: set}, |
| }) |
| } |
| |
| // Sort by number of spans in each sets starting with the largest. |
| sort.Slice(spansets, func(i, j int) bool { |
| a, b := spansets[i].set, spansets[j].set |
| switch { |
| case len(a) > len(b): |
| return true |
| case len(a) < len(b): |
| return false |
| } |
| return a.List().Compare(b.List()) == -1 // Just to keep output stable |
| }) |
| |
| // Assign IDs now that we have stable order. |
| for i := range spansets { |
| spansets[i].id = SpanGroupID(i) |
| } |
| |
| // Loop over the spanGroups starting from the largest, and try to fold them |
| // into the larger sets. |
| // This is O(n^2) complexity. |
| nextSpan: |
| for i, a := range spansets[:len(spansets)-1] { |
| for _, b := range spansets[i+1:] { |
| if len(a.set) > len(b.set) && a.set.containsAll(b.set) { |
| extend := b.id // Do not take address of iterator! |
| a.grp.Spans = a.set.removeAll(b.set) |
| a.grp.Extend = &extend |
| continue nextSpan |
| } |
| } |
| } |
| |
| // Rebuild a map of spansetKey to SpanGroup |
| spangroupMap := make(map[spansetKey]*spansetInfo, len(spansets)) |
| for _, s := range spansets { |
| spangroupMap[s.key] = s |
| } |
| |
| // Store the groups in the tree |
| f.spangroups = make(map[SpanGroupID]SpanGroup, len(spansets)) |
| for _, s := range spansets { |
| f.spangroups[s.id] = s.grp |
| } |
| |
| // Update all the uses. |
| f.tcm.traverse(func(tc *TestCoverage) { |
| key := spansetKey(tc.Spans.String()) |
| if g, ok := spangroupMap[key]; ok { |
| tc.Spans = nil |
| tc.Group = &g.id |
| } |
| }) |
| } |
| |
| // invertCommon looks for tree nodes with the majority of the child nodes with |
| // the same spans. This span is promoted up to the parent, and the children |
| // have the span inverted. |
| func (o *optimizer) invertForCommon(tc *TestCoverage, t *Test) { |
| wg := sync.WaitGroup{} |
| wg.Add(len(tc.Children)) |
| for id, child := range tc.Children { |
| id, child := id, child |
| go func() { |
| defer wg.Done() |
| o.invertForCommon(child, &t.children[id]) |
| }() |
| } |
| wg.Wait() |
| |
| counts := map[SpanID]int{} |
| for _, child := range tc.Children { |
| for span := range child.Spans { |
| counts[span] = counts[span] + 1 |
| } |
| } |
| |
| for span, count := range counts { |
| if count > len(t.children)/2 { |
| tc.Spans = tc.Spans.invert(span) |
| for _, idx := range t.indices { |
| child := tc.Children.index(idx) |
| child.Spans = child.Spans.invert(span) |
| if child.deletable() { |
| delete(tc.Children, idx) |
| } |
| } |
| } |
| } |
| } |