blob: 5f538248ae3050f453e1acb1477306a503ea23d7 [file] [log] [blame]
// Copyright 2022 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package expectations
import (
"errors"
"fmt"
"strings"
"time"
"dawn.googlesource.com/dawn/tools/src/container"
"dawn.googlesource.com/dawn/tools/src/cts/query"
"dawn.googlesource.com/dawn/tools/src/cts/result"
)
// Update performs an incremental update on the expectations using the provided
// results.
//
// Update will:
// - Remove any expectation lines that have a query where no results match.
// - Remove expectations lines that are in a chunk which is not annotated with
// 'KEEP', and all test results have the status 'Pass'.
// - Remove chunks that have had all expectation lines removed.
// - Appends new chunks for flaky and failing tests which are not covered by
// existing expectation lines.
//
// Update returns a list of diagnostics for things that should be addressed.
//
// Note: Validate() should be called before attempting to update the
// expectations. If Validate() returns errors, then Update() behaviour is
// undefined.
func (c *Content) Update(results result.List, testlist []query.Query) (Diagnostics, error) {
// Make a copy of the results. This code mutates the list.
results = append(result.List{}, results...)
// Replace statuses that the CTS runner doesn't recognize with 'Failure'
simplifyStatuses(results)
// Produce a list of tag sets.
// We reverse the declared order, as webgpu-cts/expectations.txt lists the
// most important first (OS, GPU, etc), and result.MinimalVariantTags will
// prioritize folding away the earlier tag-sets.
tagSets := make([]result.Tags, len(c.Tags.Sets))
for i, s := range c.Tags.Sets {
tagSets[len(tagSets)-i-1] = s.Tags
}
// Scan the full result list to obtain all the test variants
// (unique tag combinations).
variants := results.Variants()
// Add 'consumed' results for tests that were skipped.
// This ensures that skipped results are not included in reduced trees.
results = c.appendConsumedResultsForSkippedTests(results, testlist, variants)
u := updater{
in: *c,
out: Content{},
qt: newQueryTree(results),
variants: variants,
tagSets: tagSets,
}
// Update those expectations!
if err := u.build(); err != nil {
return nil, fmt.Errorf("while updating expectations: %w", err)
}
*c = u.out
return u.diags, nil
}
// updater holds the state used for updating the expectations
type updater struct {
in Content // the original expectations Content
out Content // newly built expectations Content
qt queryTree // the query tree
variants []container.Set[string]
diags []Diagnostic // diagnostics raised during update
tagSets []result.Tags // reverse-ordered tag-sets of 'in'
}
// Returns 'results' with additional 'consumed' results for tests that have
// 'Skip' expectations. This fills in gaps for results, preventing tree
// reductions from marking skipped results as failure, which could result in
// expectation collisions.
func (c *Content) appendConsumedResultsForSkippedTests(results result.List,
testlist []query.Query,
variants []container.Set[string]) result.List {
tree := query.Tree[struct{}]{}
for _, q := range testlist {
tree.Add(q, struct{}{})
}
for _, c := range c.Chunks {
for _, ex := range c.Expectations {
if container.NewSet(ex.Status...).Contains(string(result.Skip)) {
for _, variant := range variants {
if !variant.ContainsAll(ex.Tags) {
continue
}
glob, _ := tree.Glob(query.Parse(ex.Query))
for _, qd := range glob {
results = append(results, result.Result{
Query: qd.Query,
Tags: variant,
Status: consumed,
})
}
}
}
}
}
return results
}
// simplifyStatuses replaces all result statuses that are not one of
// 'Pass', 'RetryOnFailure', 'Slow', 'Skip' with 'Failure', and also replaces
// 'Skip' results with 'Pass'.
func simplifyStatuses(results result.List) {
for i, r := range results {
switch r.Status {
case result.Pass, result.RetryOnFailure, result.Slow:
// keep
case result.Skip:
// Typically represents a .unimplemented() test
results[i].Status = result.Pass
default:
results[i].Status = result.Failure
}
}
}
const (
// Status used to mark results that have been already handled by an
// expectation.
consumed result.Status = "<<consumed>>"
// Chunk comment for new flakes
newFlakesComment = "# New flakes. Please triage:"
// Chunk comment for new failures
newFailuresComment = "# New failures. Please triage:"
)
// queryTree holds tree of queries to all results (no filtering by tag or
// status). The queryTree is used to glob all the results that match a
// particular query.
type queryTree struct {
// All the results.
results result.List
// consumedAt is a list of line numbers for the i'th result in 'results'
// Initially all line numbers are 0. When a result is consumed the line
// number is set.
consumedAt []int
// Each tree node holds a list of indices to results.
tree query.Tree[[]int]
}
// newQueryTree builds the queryTree from the list of results.
func newQueryTree(results result.List) queryTree {
// Build a map of query to result indices
queryToIndices := map[query.Query][]int{}
for i, r := range results {
l := queryToIndices[r.Query]
l = append(l, i)
queryToIndices[r.Query] = l
}
// Construct the query tree to result indices
tree := query.Tree[[]int]{}
for query, indices := range queryToIndices {
if err := tree.Add(query, indices); err != nil {
// Unreachable: The only error we could get is duplicate data for
// the same query, which should be impossible.
panic(err)
}
}
consumedAt := make([]int, len(results))
return queryTree{results, consumedAt, tree}
}
// glob returns the list of results matching the given tags under (or with) the
// given query.
func (qt *queryTree) glob(q query.Query) (result.List, error) {
glob, err := qt.tree.Glob(q)
if err != nil {
return nil, fmt.Errorf("while gathering results for query '%v': %w", q, err)
}
out := result.List{}
for _, indices := range glob {
for _, idx := range indices.Data {
out = append(out, qt.results[idx])
}
}
return out, nil
}
// globTags returns the list of results matching the given tags under (or with)
// the given query.
func (qt *queryTree) globTags(q query.Query, t result.Tags) (result.List, error) {
glob, err := qt.tree.Glob(q)
if err != nil {
return nil, err
}
out := result.List{}
for _, indices := range glob {
for _, idx := range indices.Data {
if r := qt.results[idx]; r.Tags.ContainsAll(t) {
out = append(out, r)
}
}
}
return out, nil
}
// markAsConsumed marks all the results matching the given tags
// under (or with) the given query, as consumed.
// line is used to record the line at which the results were consumed. If the
// results were consumed as part of generating new expectations then line should
// be 0.
func (qt *queryTree) markAsConsumed(q query.Query, t result.Tags, line int) {
if glob, err := qt.tree.Glob(q); err == nil {
for _, indices := range glob {
for _, idx := range indices.Data {
r := &qt.results[idx]
if r.Tags.ContainsAll(t) {
r.Status = consumed
qt.consumedAt[idx] = line
}
}
}
}
}
// build is the updater top-level function.
// build first appends to u.out all chunks from 'u.in' with expectations updated
// using the new results, and then appends any new expectations to u.out.
func (u *updater) build() error {
// Update all the existing chunks
for _, in := range u.in.Chunks {
out := u.chunk(in)
// If all chunk had expectations, but now they've gone, remove the chunk
if len(in.Expectations) > 0 && len(out.Expectations) == 0 {
continue
}
if out.IsBlankLine() {
u.out.MaybeAddBlankLine()
continue
}
u.out.Chunks = append(u.out.Chunks, out)
}
// Emit new expectations (flaky, failing)
if err := u.addNewExpectations(); err != nil {
return fmt.Errorf("failed to add new expectations: %w", err)
}
return nil
}
// chunk returns a new Chunk, based on 'in', with the expectations updated.
func (u *updater) chunk(in Chunk) Chunk {
if len(in.Expectations) == 0 {
return in // Just a comment / blank line
}
// Skip over any untriaged failures / flake chunks.
// We'll just rebuild them at the end.
if len(in.Comments) > 0 {
switch in.Comments[0] {
case newFailuresComment, newFlakesComment:
return Chunk{}
}
}
keep := false // Does the chunk comment contain 'KEEP' ?
for _, l := range in.Comments {
if strings.Contains(l, "KEEP") {
keep = true
break
}
}
// Begin building the output chunk.
// Copy over the chunk's comments.
out := Chunk{Comments: in.Comments}
// Build the new chunk's expectations
for _, exIn := range in.Expectations {
exOut := u.expectation(exIn, keep)
out.Expectations = append(out.Expectations, exOut...)
}
// Sort the expectations to keep things clean and tidy.
out.Expectations.Sort()
return out
}
// expectation returns a new list of Expectations, based on the Expectation 'in',
// using the new result data.
func (u *updater) expectation(in Expectation, keep bool) []Expectation {
// noResults is a helper for returning when the expectation has no test
// results.
noResults := func() []Expectation {
if len(in.Tags) > 0 {
u.diag(Warning, in.Line, "no results found for '%v' with tags %v", in.Query, in.Tags)
} else {
u.diag(Warning, in.Line, "no results found for '%v'", in.Query)
}
// Remove the no-results expectation
return []Expectation{}
}
q := query.Parse(in.Query)
// Glob the results for the expectation's query + tag combination.
// Ensure that none of these are already consumed.
results, err := u.qt.globTags(q, in.Tags)
// If we can't find any results for this query + tag combination, then bail.
switch {
case errors.As(err, &query.ErrNoDataForQuery{}):
return noResults()
case err != nil:
u.diag(Error, in.Line, "%v", err)
return []Expectation{}
case len(results) == 0:
return noResults()
}
// Before returning, mark all the results as consumed.
// Note: this has to happen *after* we've generated the new expectations, as
// marking the results as 'consumed' will impact the logic of
// expectationsForRoot()
defer u.qt.markAsConsumed(q, in.Tags, in.Line)
if keep { // Expectation chunk was marked with 'KEEP'
// Add a diagnostic if all tests of the expectation were 'Pass'
if s := results.Statuses(); len(s) == 1 && s.One() == result.Pass {
if ex := container.NewSet(in.Status...); len(ex) == 1 && ex.One() == string(result.Slow) {
// Expectation was 'Slow'. Give feedback on actual time taken.
var longest, average time.Duration
for _, r := range results {
if r.Duration > longest {
longest = r.Duration
}
average += r.Duration
}
if c := len(results); c > 1 {
average /= time.Duration(c)
u.diag(Note, in.Line, "longest test took %v (average %v)", longest, average)
} else {
u.diag(Note, in.Line, "test took %v", longest)
}
} else {
if c := len(results); c > 1 {
u.diag(Note, in.Line, "all %d tests now pass", len(results))
} else {
u.diag(Note, in.Line, "test now passes")
}
}
}
return []Expectation{in}
}
// Rebuild the expectations for this query.
return u.expectationsForRoot(q, in.Line, in.Bug, in.Comment)
}
// addNewExpectations (potentially) appends to 'u.out' chunks for new flaky and
// failing tests.
func (u *updater) addNewExpectations() error {
// For each variant:
// • Build a query tree using the results filtered to the variant, and then
// reduce the tree.
// • Take all the reduced-tree leaf nodes, and add these to 'roots'.
// Once we've collected all the roots, we'll use these to build the
// expectations across the reduced set of tags.
roots := query.Tree[bool]{}
for _, variant := range u.variants {
// Build a tree from the results matching the given variant.
tree, err := u.qt.results.FilterByVariant(variant).StatusTree()
if err != nil {
return fmt.Errorf("while building tree for tags '%v': %w", variant, err)
}
// Reduce the tree.
tree.Reduce(treeReducer)
// Add all the reduced leaf nodes to 'roots'.
for _, qd := range tree.List() {
// Use Split() to ensure that only the leaves have data (true) in the tree
roots.Split(qd.Query, true)
}
}
// Build all the expectations for each of the roots.
expectations := []Expectation{}
for _, root := range roots.List() {
expectations = append(expectations, u.expectationsForRoot(
root.Query, // Root query
0, // Line number
"crbug.com/dawn/0000", // Bug
"", // Comment
)...)
}
// Bin the expectations by failure or flake.
flakes, failures := []Expectation{}, []Expectation{}
for _, r := range expectations {
if container.NewSet(r.Status...).Contains(string(result.RetryOnFailure)) {
flakes = append(flakes, r)
} else {
failures = append(failures, r)
}
}
// Create chunks for any flakes and failures, in that order.
for _, group := range []struct {
results []Expectation
comment string
}{
{flakes, newFlakesComment},
{failures, newFailuresComment},
} {
if len(group.results) > 0 {
u.out.MaybeAddBlankLine()
u.out.Chunks = append(u.out.Chunks, Chunk{
Comments: []string{group.comment},
Expectations: group.results,
})
}
}
return nil
}
// expectationsForRoot builds a list of expectations that cover the failing
// tests for the results under root.
// The returned list of expectations is optimized by reducing queries to the
// most common root, and reducing tags to the smallest required set.
func (u *updater) expectationsForRoot(
root query.Query, // The sub-tree query root
line int, // The originating line, when producing diagnostics
bug string, // The bug to apply to all returned expectations
comment string, // The comment to apply to all returned expectations
) []Expectation {
results, err := u.qt.glob(root)
if err != nil {
u.diag(Error, line, "%v", err)
return nil
}
// Using the full list of unfiltered tests, generate the minimal set of
// variants (tags) that uniquely classify the results with differing status.
minimalVariants := u.
cleanupTags(results).
MinimalVariantTags(u.tagSets)
// For each minimized variant...
reduced := result.List{}
for _, variant := range minimalVariants {
// Build a query tree from this variant...
tree := result.StatusTree{}
filtered := results.FilterByTags(variant)
for _, r := range filtered {
// Note: variants may overlap, but overlaped queries will have
// identical statuses, so we can just ignore the error for Add().
tree.Add(r.Query, r.Status)
}
// ... and reduce the tree by collapsing sub-trees that have common
// statuses.
tree.ReduceUnder(root, treeReducer)
// Append the reduced tree nodes to the results list
for _, qs := range tree.List() {
reduced = append(reduced, result.Result{
Query: qs.Query,
Tags: variant,
Status: qs.Data,
})
}
}
// Filter out any results that passed or have already been consumed
filtered := reduced.Filter(func(r result.Result) bool {
return r.Status != result.Pass && r.Status != consumed
})
// Mark all the new expectation results as consumed.
for _, r := range filtered {
u.qt.markAsConsumed(r.Query, r.Tags, 0)
}
// Transform the results to expectations.
return u.resultsToExpectations(filtered, bug, comment)
}
// resultsToExpectations returns a list of expectations from the given results.
// Each expectation will have the same query, tags and status as the input
// result, along with the specified bug and comment.
//
// If the result query target is a test without a wildcard, then the expectation
// will have a wildcard automatically appended. This is to satisfy a requirement
// of the expectation validator.
func (u *updater) resultsToExpectations(results result.List, bug, comment string) []Expectation {
results.Sort()
out := make([]Expectation, len(results))
for i, r := range results {
q := r.Query.String()
if r.Query.Target() == query.Tests && !r.Query.IsWildcard() {
// The expectation validator wants a trailing ':' for test queries
q += query.TargetDelimiter
}
out[i] = Expectation{
Bug: bug,
Tags: r.Tags,
Query: q,
Status: []string{string(r.Status)},
Comment: comment,
}
}
return out
}
// cleanupTags returns a copy of the provided results with:
// - All tags not found in the expectations list removed
// - All but the highest priority tag for any tag-set.
// The tag sets are defined by the `BEGIN TAG HEADER` / `END TAG HEADER`
// section at the top of the expectations file.
func (u *updater) cleanupTags(results result.List) result.List {
return results.TransformTags(func(t result.Tags) result.Tags {
type HighestPrioritySetTag struct {
tag string
priority int
}
// Set name to highest priority tag for that set
best := map[string]HighestPrioritySetTag{}
for tag := range t {
sp, ok := u.in.Tags.ByName[tag]
if ok {
if set := best[sp.Set]; sp.Priority >= set.priority {
best[sp.Set] = HighestPrioritySetTag{tag, sp.Priority}
}
}
}
t = result.NewTags()
for _, ts := range best {
t.Add(ts.tag)
}
return t
})
}
// treeReducer is a function that can be used by StatusTree.Reduce() to reduce
// tree nodes with the same status.
// treeReducer will collapse trees nodes if any of the following are true:
// - All child nodes have the same status
// - More than 75% of the child nodes have a non-pass status, and none of the
// children are consumed.
// - There are more than 20 child nodes with a non-pass status, and none of the
// children are consumed.
func treeReducer(statuses []result.Status) *result.Status {
counts := map[result.Status]int{}
for _, s := range statuses {
counts[s] = counts[s] + 1
}
if len(counts) == 1 {
return &statuses[0] // All the same status
}
if counts[consumed] > 0 {
return nil // Partially consumed trees cannot be merged
}
highestNonPassCount := 0
highestNonPassStatus := result.Failure
for s, n := range counts {
if s != result.Pass {
if percent := (100 * n) / len(statuses); percent > 75 {
// Over 75% of all the children are of non-pass status s.
return &s
}
if n > highestNonPassCount {
highestNonPassCount = n
highestNonPassStatus = s
}
}
}
if highestNonPassCount > 20 {
// Over 20 child node failed.
return &highestNonPassStatus
}
return nil
}
// diag appends a new diagnostic to u.diags with the given severity, line and
// message.
func (u *updater) diag(severity Severity, line int, msg string, args ...interface{}) {
u.diags = append(u.diags, Diagnostic{
Severity: severity,
Line: line,
Message: fmt.Sprintf(msg, args...),
})
}