Blame - tools/src/cov/optimization.go - dawn

blob: d531e0b4d3e7a4dbfdb99b2af1d100288e266c4a [file] [log] [blame]

Ben Clayton	f8e0aac	2022-12-12 21:49:02 +0000	[diff] [blame]	1	// Copyright 2022 The Dawn Authors
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	package cov
				16
				17	import (
				18	"log"
				19	"sort"
				20	"sync"
				21	)
				22
Ben Clayton	408ace6	2022-12-13 15:48:49 +0000	[diff] [blame]	23	// Optimize optimizes the Tree by de-duplicating common spans into a tree of SpanGroups.
				24	//
				25	// Breaking down tests into group hierarchies provide a natural way to structure
				26	// coverage data, as tests of the same suite, file or test are likely to have
				27	// similar coverage spans.
				28	//
				29	// For each source file in the codebase, we create a tree of SpanGroups, where the
				30	// leaves are the test cases.
				31	//
				32	// For example, given the following Paths:
				33	//
				34	// a.b.d.h
				35	// a.b.d.i.n
				36	// a.b.d.i.o
				37	// a.b.e.j
				38	// a.b.e.k.p
				39	// a.b.e.k.q
				40	// a.c.f
				41	// a.c.g.l.r
				42	// a.c.g.m
				43	//
				44	// We would construct the following tree:
				45	//
				46	// a
				47	// ╭──────┴──────╮
				48	// b c
				49	// ╭───┴───╮ ╭───┴───╮
				50	// d e f g
				51	// ╭─┴─╮ ╭─┴─╮ ╭─┴─╮
				52	// h i j k l m
				53	// ╭┴╮ ╭┴╮ │
				54	// n o p q r
				55	//
				56	// Each leaf node in this tree (`h`, `n`, `o`, `j`, `p`, `q`, `f`, `r`, `m`)
				57	// represent a test case, and non-leaf nodes (`a`, `b`, `c`, `d`, `e`, `g`, `i`,
				58	// `k`, `l`) are suite, file or tests.
				59	//
				60	// To begin, we create a test tree structure, and associate the full list of test
				61	// coverage spans with every leaf node (test case) in this tree.
				62	//
				63	// This data structure hasn't given us any compression benefits yet, but we can
				64	// now do a few tricks to dramatically reduce number of spans needed to describe
				65	// the graph:
				66	//
				67	// ~ Optimization 1: Common span promotion ~
				68	//
				69	// The first compression scheme is to promote common spans up the tree when they
				70	// are common for all children. This will reduce the number of spans needed to be
				71	// encoded in the final file.
				72	//
				73	// For example, if the test group `a` has 4 children that all share the same span
				74	// `X`:
				75	//
				76	// a
				77	// ╭───┬─┴─┬───╮
				78	// b c d e
				79	// [X,Y] [X] [X] [X,Z]
				80	//
				81	// Then span `X` can be promoted up to `a`:
				82	//
				83	// [X]
				84	// a
				85	// ╭───┬─┴─┬───╮
				86	// b c d e
				87	// [Y] [] [] [Z]
				88	//
				89	// ~ Optimization 2: Span XOR promotion ~
				90	//
				91	// This idea can be extended further, by not requiring all the children to share
				92	// the same span before promotion. If most child nodes share the same span, we
				93	// can still promote the span, but this time we remove the span from the
				94	// children if they had it, and add the span to children *if they didn't
				95	// have it*.
				96	//
				97	// For example, if the test group `a` has 4 children with 3 that share the span
				98	// `X`:
				99	//
				100	// a
				101	// ╭───┬─┴─┬───╮
				102	// b c d e
				103	// [X,Y] [X] [] [X,Z]
				104	//
				105	// Then span `X` can be promoted up to `a` by flipping the presence of `X` on the
				106	// child nodes:
				107	//
				108	// [X]
				109	// a
				110	// ╭───┬─┴─┬───╮
				111	// b c d e
				112	// [Y] [] [X] [Z]
				113	//
				114	// This process repeats up the tree.
				115	//
				116	// With this optimization applied, we now need to traverse the tree from root to
				117	// leaf in order to know whether a given span is in use for the leaf node (test case):
				118	//
				119	// * If the span is encountered an odd number of times during traversal, then
				120	// the span is covered.
				121	// * If the span is encountered an even number of times during traversal, then
				122	// the span is not covered.
				123	//
				124	// See tools/src/cov/coverage_test.go for more examples of this optimization.
				125	//
				126	// ~ Optimization 3: Common span grouping ~
				127	//
				128	// With real world data, we encounter groups of spans that are commonly found
				129	// together. To further reduce coverage data, the whole graph is scanned for common
				130	// span patterns, and are indexed by each tree node.
				131	// The XOR'ing of spans as described above is performed as if the spans were not
				132	// grouped.
				133	//
				134	// ~ Optimization 4: Lookup tables ~
				135	//
				136	// All spans, span-groups and strings are stored in de-duplicated tables, and are
				137	// indexed wherever possible.
Ben Clayton	f8e0aac	2022-12-12 21:49:02 +0000	[diff] [blame]	138	func (t *Tree) Optimize() {
				139	log.Printf("Optimizing coverage tree...")
				140
				141	// Start by gathering all of the unique spansets
				142	wg := sync.WaitGroup{}
				143	wg.Add(len(t.files))
				144	for _, file := range t.files {
				145	file := file
				146	go func() {
				147	defer wg.Done()
				148	o := optimizer{}
				149	for idx, tc := range file.tcm {
				150	o.invertForCommon(tc, &t.testRoot.children[idx])
				151	}
				152	o.createGroups(file)
				153	}()
				154	}
				155	wg.Wait()
				156	}
				157
				158	type optimizer struct{}
				159
				160	// createGroups looks for common SpanSets, and creates indexable span groups
				161	// which are then used instead.
				162	func (o optimizer) createGroups(f treeFile) {
				163	const minSpansInGroup = 2
				164
				165	type spansetKey string
				166	spansetMap := map[spansetKey]SpanSet{}
				167
				168	f.tcm.traverse(func(tc *TestCoverage) {
				169	if len(tc.Spans) >= minSpansInGroup {
				170	key := spansetKey(tc.Spans.String())
				171	if _, ok := spansetMap[key]; !ok {
				172	spansetMap[key] = tc.Spans
				173	}
				174	}
				175	})
				176
				177	if len(spansetMap) == 0 {
				178	return
				179	}
				180
				181	type spansetInfo struct {
				182	key spansetKey
				183	set SpanSet // fully expanded set
				184	grp SpanGroup
				185	id SpanGroupID
				186	}
				187	spansets := make([]*spansetInfo, 0, len(spansetMap))
				188	for key, set := range spansetMap {
				189	spansets = append(spansets, &spansetInfo{
				190	key: key,
				191	set: set,
				192	grp: SpanGroup{Spans: set},
				193	})
				194	}
				195
				196	// Sort by number of spans in each sets starting with the largest.
				197	sort.Slice(spansets, func(i, j int) bool {
				198	a, b := spansets[i].set, spansets[j].set
				199	switch {
				200	case len(a) > len(b):
				201	return true
				202	case len(a) < len(b):
				203	return false
				204	}
				205	return a.List().Compare(b.List()) == -1 // Just to keep output stable
				206	})
				207
				208	// Assign IDs now that we have stable order.
				209	for i := range spansets {
				210	spansets[i].id = SpanGroupID(i)
				211	}
				212
				213	// Loop over the spanGroups starting from the largest, and try to fold them
				214	// into the larger sets.
				215	// This is O(n^2) complexity.
				216	nextSpan:
				217	for i, a := range spansets[:len(spansets)-1] {
				218	for _, b := range spansets[i+1:] {
				219	if len(a.set) > len(b.set) && a.set.containsAll(b.set) {
				220	extend := b.id // Do not take address of iterator!
				221	a.grp.Spans = a.set.removeAll(b.set)
				222	a.grp.Extend = &extend
				223	continue nextSpan
				224	}
				225	}
				226	}
				227
				228	// Rebuild a map of spansetKey to SpanGroup
				229	spangroupMap := make(map[spansetKey]*spansetInfo, len(spansets))
				230	for _, s := range spansets {
				231	spangroupMap[s.key] = s
				232	}
				233
				234	// Store the groups in the tree
				235	f.spangroups = make(map[SpanGroupID]SpanGroup, len(spansets))
				236	for _, s := range spansets {
				237	f.spangroups[s.id] = s.grp
				238	}
				239
				240	// Update all the uses.
				241	f.tcm.traverse(func(tc *TestCoverage) {
				242	key := spansetKey(tc.Spans.String())
				243	if g, ok := spangroupMap[key]; ok {
				244	tc.Spans = nil
				245	tc.Group = &g.id
				246	}
				247	})
				248	}
				249
				250	// invertCommon looks for tree nodes with the majority of the child nodes with
				251	// the same spans. This span is promoted up to the parent, and the children
				252	// have the span inverted.
				253	func (o optimizer) invertForCommon(tc TestCoverage, t *Test) {
				254	wg := sync.WaitGroup{}
				255	wg.Add(len(tc.Children))
				256	for id, child := range tc.Children {
				257	id, child := id, child
				258	go func() {
				259	defer wg.Done()
				260	o.invertForCommon(child, &t.children[id])
				261	}()
				262	}
				263	wg.Wait()
				264
				265	counts := map[SpanID]int{}
				266	for _, child := range tc.Children {
				267	for span := range child.Spans {
				268	counts[span] = counts[span] + 1
				269	}
				270	}
				271
				272	for span, count := range counts {
				273	if count > len(t.children)/2 {
				274	tc.Spans = tc.Spans.invert(span)
				275	for _, idx := range t.indices {
				276	child := tc.Children.index(idx)
				277	child.Spans = child.Spans.invert(span)
				278	if child.deletable() {
				279	delete(tc.Children, idx)
				280	}
				281	}
				282	}
				283	}
				284	}