tools: Add src/cts/expectations Update()

Implement the logic to update an existing expectations file with new
results.

Bug: dawn:1342
Change-Id: Idcbad57946712539cc5f0d238f89a2adf42f5aa0
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/87702
Reviewed-by: Dan Sinclair <dsinclair@chromium.org>
Commit-Queue: Ben Clayton <bclayton@google.com>
diff --git a/tools/src/cts/expectations/diagnostic.go b/tools/src/cts/expectations/diagnostic.go
new file mode 100644
index 0000000..c7059aa
--- /dev/null
+++ b/tools/src/cts/expectations/diagnostic.go
@@ -0,0 +1,56 @@
+// Copyright 2022 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package expectations
+
+import (
+	"fmt"
+	"strings"
+)
+
+// Severity is an enumerator of diagnostic severity
+type Severity string
+
+const (
+	Error   Severity = "error"
+	Warning Severity = "warning"
+	Note    Severity = "note"
+)
+
+// Diagnostic holds a line, column, message and severity.
+// Diagnostic also implements the 'error' interface.
+type Diagnostic struct {
+	Severity Severity
+	Line     int // 1-based
+	Column   int // 1-based
+	Message  string
+}
+
+func (e Diagnostic) String() string {
+	sb := &strings.Builder{}
+	if e.Line > 0 {
+		fmt.Fprintf(sb, "%v", e.Line)
+		if e.Column > 0 {
+			fmt.Fprintf(sb, ":%v", e.Column)
+		}
+		sb.WriteString(" ")
+	}
+	sb.WriteString(string(e.Severity))
+	sb.WriteString(": ")
+	sb.WriteString(e.Message)
+	return sb.String()
+}
+
+// Error implements the 'error' interface.
+func (e Diagnostic) Error() string { return e.String() }
diff --git a/tools/src/cts/expectations/parse.go b/tools/src/cts/expectations/parse.go
index 7331f03..aafb800 100644
--- a/tools/src/cts/expectations/parse.go
+++ b/tools/src/cts/expectations/parse.go
@@ -15,7 +15,6 @@
 package expectations
 
 import (
-	"fmt"
 	"strings"
 
 	"dawn.googlesource.com/dawn/tools/src/cts/result"
@@ -26,19 +25,6 @@
 	tagHeaderEnd   = `END TAG HEADER`
 )
 
-// SyntaxError is the error type returned by Parse() when a syntax error is
-// encountered.
-type SyntaxError struct {
-	Line    int // 1-based
-	Column  int // 1-based
-	Message string
-}
-
-// Error implements the 'error' interface.
-func (e SyntaxError) Error() string {
-	return fmt.Sprintf("%v:%v: %v", e.Line, e.Column, e.Message)
-}
-
 // Parse parses an expectations file, returning the Content
 func Parse(body string) (Content, error) {
 	// LineType is an enumerator classifying the 'type' of the line.
@@ -130,11 +116,11 @@
 		// syntaxErr is a helper for returning a SyntaxError with the current
 		// line and column index.
 		syntaxErr := func(at Token, msg string) error {
-			column := at.start + 1
-			if column == 1 {
-				column = len(l) + 1
+			columnIdx := at.start + 1
+			if columnIdx == 1 {
+				columnIdx = len(l) + 1
 			}
-			return SyntaxError{lineIdx, column, msg}
+			return Diagnostic{Error, lineIdx, columnIdx, msg}
 		}
 
 		// peek returns the next token without consuming it.
diff --git a/tools/src/cts/expectations/parse_test.go b/tools/src/cts/expectations/parse_test.go
index af233e1..041080b 100644
--- a/tools/src/cts/expectations/parse_test.go
+++ b/tools/src/cts/expectations/parse_test.go
@@ -432,27 +432,27 @@
 		{
 			name:      "err missing tag ']'",
 			in:        `[`,
-			expectErr: "1:2: expected ']' for tags",
+			expectErr: "1:2 error: expected ']' for tags",
 		}, /////////////////////////////////////////////////////////////////////
 		{
 			name:      "err missing test query",
 			in:        `[ a ]`,
-			expectErr: "1:6: expected test query",
+			expectErr: "1:6 error: expected test query",
 		}, /////////////////////////////////////////////////////////////////////
 		{
 			name:      "err missing status EOL",
 			in:        `[ a ] b`,
-			expectErr: "1:8: expected status",
+			expectErr: "1:8 error: expected status",
 		}, /////////////////////////////////////////////////////////////////////
 		{
 			name:      "err missing status comment",
 			in:        `[ a ] b # c`,
-			expectErr: "1:9: expected status",
+			expectErr: "1:9 error: expected status",
 		}, /////////////////////////////////////////////////////////////////////
 		{
 			name:      "err missing status ']'",
 			in:        `[ a ] b [ c`,
-			expectErr: "1:12: expected ']' for status",
+			expectErr: "1:12 error: expected ']' for status",
 		},
 	} {
 
diff --git a/tools/src/cts/expectations/update.go b/tools/src/cts/expectations/update.go
new file mode 100644
index 0000000..13a11ab
--- /dev/null
+++ b/tools/src/cts/expectations/update.go
@@ -0,0 +1,592 @@
+// Copyright 2022 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package expectations
+
+import (
+	"errors"
+	"fmt"
+	"sort"
+	"strings"
+
+	"dawn.googlesource.com/dawn/tools/src/container"
+	"dawn.googlesource.com/dawn/tools/src/cts/query"
+	"dawn.googlesource.com/dawn/tools/src/cts/result"
+)
+
+// Update performs an incremental update on the expectations using the provided
+// results.
+//
+// Update will:
+// • Remove any expectation lines that have a query where no results match.
+// • Remove expectations lines that are in a chunk which is not annotated with
+//   'KEEP', and all test results have the status 'Pass'.
+// • Remove chunks that have had all expectation lines removed.
+// • Appends new chunks for flaky and failing tests which are not covered by
+//   existing expectation lines.
+//
+// Update returns a list of diagnostics for things that should be addressed.
+func (c *Content) Update(results result.List) ([]Diagnostic, error) {
+	// Make a copy of the results. This code mutates the list.
+	results = append(result.List{}, results...)
+
+	// Replace statuses that the CTS runner doesn't recognize with 'Failure'
+	simplifyStatuses(results)
+
+	// Produce a list of tag sets.
+	// We reverse the declared order, as webgpu-cts/expectations.txt lists the
+	// most important first (OS, GPU, etc), and result.MinimalVariantTags will
+	// prioritize folding away the earlier tag-sets.
+	tagSets := make([]result.Tags, len(c.Tags.Sets))
+	for i, s := range c.Tags.Sets {
+		tagSets[len(tagSets)-i-1] = s.Tags
+	}
+
+	// Update those expectations!
+	u := updater{
+		in:      *c,
+		out:     Content{},
+		qt:      newQueryTree(results),
+		tagSets: tagSets,
+	}
+	if err := u.build(); err != nil {
+		return nil, fmt.Errorf("while updating expectations: %w", err)
+	}
+
+	*c = u.out
+	return u.diags, nil
+}
+
+// updater holds the state used for updating the expectations
+type updater struct {
+	in      Content       // the original expectations Content
+	out     Content       // newly built expectations Content
+	qt      queryTree     // the query tree
+	diags   []Diagnostic  // diagnostics raised during update
+	tagSets []result.Tags // reverse-ordered tag-sets of 'in'
+}
+
+// simplifyStatuses replaces all result statuses that are not 'Pass',
+// 'RetryOnFailure', 'Slow', 'Skip' with 'Failure'.
+func simplifyStatuses(results result.List) {
+	for i, r := range results {
+		switch r.Status {
+		case result.Pass, result.RetryOnFailure, result.Slow, result.Skip:
+			// keep
+		default:
+			results[i].Status = result.Failure
+		}
+	}
+}
+
+const (
+	// Status used to mark results that have been already handled by an
+	// expectation.
+	consumed result.Status = "<<consumed>>"
+	// Chunk comment for new flakes
+	newFlakesComment = "# New flakes. Please triage:"
+	// Chunk comment for new failures
+	newFailuresComment = "# New failures. Please triage:"
+)
+
+// queryTree holds tree of queries to all results (no filtering by tag or
+// status). The queryTree is used to glob all the results that match a
+// particular query.
+type queryTree struct {
+	// All the results.
+	results result.List
+	// consumedAt is a list of line numbers for the i'th result in 'results'
+	// Initially all line numbers are 0. When a result is consumed the line
+	// number is set.
+	consumedAt []int
+	// Each tree node holds a list of indices to results.
+	tree query.Tree[[]int]
+}
+
+// newQueryTree builds the queryTree from the list of results.
+func newQueryTree(results result.List) queryTree {
+	// Build a map of query to result indices
+	queryToIndices := map[query.Query][]int{}
+	for i, r := range results {
+		l := queryToIndices[r.Query]
+		l = append(l, i)
+		queryToIndices[r.Query] = l
+	}
+
+	// Construct the query tree to result indices
+	tree := query.Tree[[]int]{}
+	for query, indices := range queryToIndices {
+		if err := tree.Add(query, indices); err != nil {
+			// Unreachable: The only error we could get is duplicate data for
+			// the same query, which should be impossible.
+			panic(err)
+		}
+	}
+
+	consumedAt := make([]int, len(results))
+	return queryTree{results, consumedAt, tree}
+}
+
+// glob returns the list of results matching the given tags under (or with) the
+// given query.
+func (qt *queryTree) glob(q query.Query) (result.List, error) {
+	glob, err := qt.tree.Glob(q)
+	if err != nil {
+		return nil, fmt.Errorf("while gathering results for query '%v': %w", q, err)
+	}
+
+	out := result.List{}
+	for _, indices := range glob {
+		for _, idx := range indices.Data {
+			out = append(out, qt.results[idx])
+		}
+	}
+
+	return out, nil
+}
+
+// globAndCheckForCollisions returns the list of results matching the given tags
+// under (or with) the given query.
+// globAndCheckForCollisions will return an error if any of the results are
+// already consumed by a non-zero line. The non-zero line distinguishes between
+// results consumed by expectations declared in the input (non-zero line), vs
+// those that were introduced by the update (zero line). We only want to error
+// if there's a collision in user declared expectations.
+func (qt *queryTree) globAndCheckForCollisions(q query.Query, t result.Tags) (result.List, error) {
+	glob, err := qt.tree.Glob(q)
+	if err != nil {
+		return nil, err
+	}
+
+	out := result.List{}
+	for _, indices := range glob {
+		for _, idx := range indices.Data {
+			if r := qt.results[idx]; r.Tags.ContainsAll(t) {
+				if at := qt.consumedAt[idx]; at > 0 {
+					if len(t) > 0 {
+						return nil, fmt.Errorf("%v %v collides with expectation at line %v", t, q, at)
+					}
+					return nil, fmt.Errorf("%v collides with expectation at line %v", q, at)
+				}
+				out = append(out, r)
+			}
+		}
+	}
+	return out, nil
+}
+
+// markAsConsumed marks all the results matching the given tags
+// under (or with) the given query, as consumed.
+// line is used to record the line at which the results were consumed. If the
+// results were consumed as part of generating new expectations then line should
+// be 0. See queryTree.globAndCheckForCollisions().
+func (qt *queryTree) markAsConsumed(q query.Query, t result.Tags, line int) {
+	if glob, err := qt.tree.Glob(q); err == nil {
+		for _, indices := range glob {
+			for _, idx := range indices.Data {
+				r := &qt.results[idx]
+				if r.Tags.ContainsAll(t) {
+					r.Status = consumed
+					qt.consumedAt[idx] = line
+				}
+			}
+		}
+	}
+}
+
+// build is the updater top-level function.
+// build first appends to u.out all chunks from 'u.in' with expectations updated
+// using the new results, and then appends any new expectations to u.out.
+func (u *updater) build() error {
+	// Update all the existing chunks
+	for _, in := range u.in.Chunks {
+		out := u.chunk(in)
+
+		// If all chunk had expectations, but now they've gone, remove the chunk
+		if len(in.Expectations) > 0 && len(out.Expectations) == 0 {
+			continue
+		}
+		if out.IsBlankLine() {
+			u.out.MaybeAddBlankLine()
+			continue
+		}
+		u.out.Chunks = append(u.out.Chunks, out)
+	}
+
+	// Emit new expectations (flaky, failing)
+	if err := u.addNewExpectations(); err != nil {
+		return fmt.Errorf("failed to add new expectations: %w", err)
+	}
+
+	return nil
+}
+
+// chunk returns a new Chunk, based on 'in', with the expectations updated.
+func (u *updater) chunk(in Chunk) Chunk {
+	if len(in.Expectations) == 0 {
+		return in // Just a comment / blank line
+	}
+
+	// Skip over any untriaged failures / flake chunks.
+	// We'll just rebuild them at the end.
+	if len(in.Comments) > 0 {
+		switch in.Comments[0] {
+		case newFailuresComment, newFlakesComment:
+			return Chunk{}
+		}
+	}
+
+	keep := false // Does the chunk comment contain 'KEEP' ?
+	for _, l := range in.Comments {
+		if strings.Contains(l, "KEEP") {
+			keep = true
+			break
+		}
+	}
+
+	// Begin building the output chunk.
+	// Copy over the chunk's comments.
+	out := Chunk{Comments: in.Comments}
+
+	// Build the new chunk's expectations
+	for _, exIn := range in.Expectations {
+		exOut := u.expectation(exIn, keep)
+		out.Expectations = append(out.Expectations, exOut...)
+	}
+
+	// Sort the expectations to keep things clean and tidy.
+	sort.Slice(out.Expectations, func(i, j int) bool {
+		switch {
+		case out.Expectations[i].Query < out.Expectations[j].Query:
+			return true
+		case out.Expectations[i].Query > out.Expectations[j].Query:
+			return false
+		}
+		a := result.TagsToString(out.Expectations[i].Tags)
+		b := result.TagsToString(out.Expectations[j].Tags)
+		switch {
+		case a < b:
+			return true
+		case a > b:
+			return false
+		}
+		return false
+	})
+
+	return out
+}
+
+// chunk returns a new list of Expectations, based on the Expectation 'in',
+// using the new result data.
+func (u *updater) expectation(in Expectation, keep bool) []Expectation {
+	// noResults is a helper for returning when the expectation has no test
+	// results.
+	// If the expectation has an expected 'Skip' result, then we're likely
+	// to be missing results (as the test was not run). In this situation
+	// the expectation is preserved, and no diagnostics are raised.
+	// If the expectation did not have a 'Skip' result, then a diagnostic will
+	// be raised and the expectation will be removed.
+	noResults := func() []Expectation {
+		if container.NewSet(in.Status...).Contains(string(result.Skip)) {
+			return []Expectation{in}
+		}
+		// Expectation does not have a 'Skip' result.
+		if len(in.Tags) > 0 {
+			u.diag(Warning, in.Line, "no results found for '%v' with tags %v", in.Query, in.Tags)
+		} else {
+			u.diag(Warning, in.Line, "no results found for '%v'", in.Query)
+		}
+		// Remove the no-results expectation
+		return []Expectation{}
+	}
+
+	// Grab all the results that match the expectation's query
+	q := query.Parse(in.Query)
+
+	// Glob the results for the expectation's query + tag combination.
+	// Ensure that none of these are already consumed.
+	results, err := u.qt.globAndCheckForCollisions(q, in.Tags)
+	// If we can't find any results for this query + tag combination, then bail.
+	switch {
+	case errors.As(err, &query.ErrNoDataForQuery{}):
+		return noResults()
+	case err != nil:
+		u.diag(Error, in.Line, "%v", err)
+		return []Expectation{}
+	case len(results) == 0:
+		return noResults()
+	}
+
+	// Before returning, mark all the results as consumed.
+	// Note: this has to happen *after* we've generated the new expectations, as
+	// marking the results as 'consumed' will impact the logic of
+	// expectationsForRoot()
+	defer u.qt.markAsConsumed(q, in.Tags, in.Line)
+
+	if keep { // Expectation chunk was marked with 'KEEP'
+		// Add a diagnostic if all tests of the expectation were 'Pass'
+		if s := results.Statuses(); len(s) == 1 && s.One() == result.Pass {
+			if c := len(results); c > 1 {
+				u.diag(Note, in.Line, "all %d tests now pass", len(results))
+			} else {
+				u.diag(Note, in.Line, "test now passes")
+			}
+		}
+		return []Expectation{in}
+	}
+
+	// Rebuild the expectations for this query.
+	return u.expectationsForRoot(q, in.Line, in.Bug, in.Comment)
+}
+
+// addNewExpectations (potentially) appends to 'u.out' chunks for new flaky and
+// failing tests.
+func (u *updater) addNewExpectations() error {
+	// Scan the full result list to obtain all the test variants
+	// (unique tag combinations).
+	allVariants := u.qt.results.Variants()
+
+	// For each variant:
+	// • Build a query tree using the results filtered to the variant, and then
+	//   reduce the tree.
+	// • Take all the reduced-tree leaf nodes, and add these to 'roots'.
+	// Once we've collected all the roots, we'll use these to build the
+	// expectations across the reduced set of tags.
+	roots := container.NewMap[string, query.Query]()
+	for _, variant := range allVariants {
+		// Build a tree from the results matching the given variant.
+		tree, err := u.qt.results.FilterByVariant(variant).StatusTree()
+		if err != nil {
+			return fmt.Errorf("while building tree for tags '%v': %w", variant, err)
+		}
+		// Reduce the tree.
+		tree.Reduce(treeReducer)
+		// Add all the reduced leaf nodes to 'roots'.
+		for _, qd := range tree.List() {
+			roots.Add(qd.Query.String(), qd.Query)
+		}
+	}
+
+	// Build all the expectations for each of the roots.
+	expectations := []Expectation{}
+	for _, root := range roots.Values() {
+		expectations = append(expectations, u.expectationsForRoot(
+			root,                  // Root query
+			0,                     // Line number
+			"crbug.com/dawn/0000", // Bug
+			"",                    // Comment
+		)...)
+	}
+
+	// Bin the expectations by failure or flake.
+	flakes, failures := []Expectation{}, []Expectation{}
+	for _, r := range expectations {
+		if container.NewSet(r.Status...).Contains(string(result.RetryOnFailure)) {
+			flakes = append(flakes, r)
+		} else {
+			failures = append(failures, r)
+		}
+	}
+
+	// Create chunks for any flakes and failures, in that order.
+	for _, group := range []struct {
+		results []Expectation
+		comment string
+	}{
+		{flakes, newFlakesComment},
+		{failures, newFailuresComment},
+	} {
+		if len(group.results) > 0 {
+			u.out.MaybeAddBlankLine()
+			u.out.Chunks = append(u.out.Chunks, Chunk{
+				Comments:     []string{group.comment},
+				Expectations: group.results,
+			})
+		}
+	}
+
+	return nil
+}
+
+// expectationsForRoot builds a list of expectations that cover the failing
+// tests for the results under root.
+// The returned list of expectations is optimized by reducing queries to the
+// most common root, and reducing tags to the smallest required set.
+func (u *updater) expectationsForRoot(
+	root query.Query, // The sub-tree query root
+	line int, // The originating line, when producing diagnostics
+	bug string, // The bug to apply to all returned expectations
+	comment string, // The comment to apply to all returned expectations
+) []Expectation {
+	results, err := u.qt.glob(root)
+	if err != nil {
+		u.diag(Error, line, "%v", err)
+		return nil
+	}
+
+	// Using the full list of unfiltered tests, generate the minimal set of
+	// variants (tags) that uniquely classify the results with differing status.
+	minimalVariants := u.
+		cleanupTags(results).
+		MinimalVariantTags(u.tagSets)
+
+	// For each minimized variant...
+	reduced := result.List{}
+	for _, variant := range minimalVariants {
+		// Build a query tree from this variant...
+		tree := result.StatusTree{}
+		filtered := results.FilterByTags(variant)
+		for _, r := range filtered {
+			// Note: variants may overlap, but overlaped queries will have
+			// identical statuses, so we can just ignore the error for Add().
+			tree.Add(r.Query, r.Status)
+		}
+
+		// ... and reduce the tree by collapsing sub-trees that have common
+		// statuses.
+		tree.ReduceUnder(root, treeReducer)
+
+		// Append the reduced tree nodes to the results list
+		for _, qs := range tree.List() {
+			reduced = append(reduced, result.Result{
+				Query:  qs.Query,
+				Tags:   variant,
+				Status: qs.Data,
+			})
+		}
+	}
+
+	// Filter out any results that passed or have already been consumed
+	filtered := reduced.Filter(func(r result.Result) bool {
+		return r.Status != result.Pass && r.Status != consumed
+	})
+
+	// Mark all the new expectation results as consumed.
+	for _, r := range filtered {
+		u.qt.markAsConsumed(r.Query, r.Tags, 0)
+	}
+
+	// Transform the results to expectations.
+	return u.resultsToExpectations(filtered, bug, comment)
+}
+
+// resultsToExpectations returns a list of expectations from the given results.
+// Each expectation will have the same query, tags and status as the input
+// result, along with the specified bug and comment.
+//
+// If the result query target is a test without a wildcard, then the expectation
+// will have a wildcard automatically appended. This is to satisfy a requirement
+// of the expectation validator.
+func (u *updater) resultsToExpectations(results result.List, bug, comment string) []Expectation {
+	results.Sort()
+
+	out := make([]Expectation, len(results))
+	for i, r := range results {
+		q := r.Query.String()
+		if r.Query.Target() == query.Tests && !r.Query.IsWildcard() {
+			// The expectation validator wants a trailing ':' for test queries
+			q += query.TargetDelimiter
+		}
+		out[i] = Expectation{
+			Bug:     bug,
+			Tags:    r.Tags,
+			Query:   q,
+			Status:  []string{string(r.Status)},
+			Comment: comment,
+		}
+	}
+
+	return out
+}
+
+// cleanupTags returns a copy of the provided results with:
+// • All tags not found in the expectations list removed
+// • All but the highest priority tag for any tag-set.
+//   The tag sets are defined by the `BEGIN TAG HEADER` / `END TAG HEADER`
+//   section at the top of the expectations file.
+func (u *updater) cleanupTags(results result.List) result.List {
+	return results.TransformTags(func(t result.Tags) result.Tags {
+		type HighestPrioritySetTag struct {
+			tag      string
+			priority int
+		}
+		// Set name to highest priority tag for that set
+		best := map[string]HighestPrioritySetTag{}
+		for tag := range t {
+			sp, ok := u.in.Tags.ByName[tag]
+			if ok {
+				if set := best[sp.Set]; sp.Priority >= set.priority {
+					best[sp.Set] = HighestPrioritySetTag{tag, sp.Priority}
+				}
+			}
+		}
+		t = result.NewTags()
+		for _, ts := range best {
+			t.Add(ts.tag)
+		}
+		return t
+	})
+}
+
+// treeReducer is a function that can be used by StatusTree.Reduce() to reduce
+// tree nodes with the same status.
+// treeReducer will collapse trees nodes if any of the following are true:
+// • All child nodes have the same status
+// • More than 75% of the child nodes have a non-pass status, and none of the
+//   children are consumed.
+// • There are more than 20 child nodes with a non-pass status, and none of the
+//   children are consumed.
+func treeReducer(statuses []result.Status) *result.Status {
+	counts := map[result.Status]int{}
+	for _, s := range statuses {
+		counts[s] = counts[s] + 1
+	}
+	if len(counts) == 1 {
+		return &statuses[0] // All the same status
+	}
+	if counts[consumed] > 0 {
+		return nil // Partially consumed trees cannot be merged
+	}
+	highestNonPassCount := 0
+	highestNonPassStatus := result.Failure
+	for s, n := range counts {
+		if s != result.Pass {
+			if percent := (100 * n) / len(statuses); percent > 75 {
+				// Over 75% of all the children are of non-pass status s.
+				return &s
+			}
+			if n > highestNonPassCount {
+				highestNonPassCount = n
+				highestNonPassStatus = s
+			}
+		}
+	}
+
+	if highestNonPassCount > 20 {
+		// Over 20 child node failed.
+		return &highestNonPassStatus
+	}
+
+	return nil
+}
+
+// diag appends a new diagnostic to u.diags with the given severity, line and
+// message.
+func (u *updater) diag(severity Severity, line int, msg string, args ...interface{}) {
+	u.diags = append(u.diags, Diagnostic{
+		Severity: severity,
+		Line:     line,
+		Message:  fmt.Sprintf(msg, args...),
+	})
+}
diff --git a/tools/src/cts/expectations/update_test.go b/tools/src/cts/expectations/update_test.go
new file mode 100644
index 0000000..69cc314
--- /dev/null
+++ b/tools/src/cts/expectations/update_test.go
@@ -0,0 +1,672 @@
+// Copyright 2022 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package expectations_test
+
+import (
+	"strings"
+	"testing"
+
+	"dawn.googlesource.com/dawn/tools/src/cts/expectations"
+	"dawn.googlesource.com/dawn/tools/src/cts/query"
+	"dawn.googlesource.com/dawn/tools/src/cts/result"
+	"github.com/google/go-cmp/cmp"
+)
+
+var Q = query.Parse
+
+func TestUpdate(t *testing.T) {
+	header := `# BEGIN TAG HEADER
+# OS
+# tags: [ os-a os-b os-c ]
+# GPU
+# tags: [ gpu-a gpu-b gpu-c ]
+# END TAG HEADER
+`
+	headerLines := strings.Count(header, "\n")
+
+	type Test struct {
+		name         string
+		expectations string
+		results      result.List
+		updated      string
+		diagnostics  []expectations.Diagnostic
+		err          string
+	}
+	for _, test := range []Test{
+		{ //////////////////////////////////////////////////////////////////////
+			name:         "empty results",
+			expectations: ``,
+			results:      result.List{},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "no results found",
+			expectations: `
+crbug.com/a/123 a:missing,test,result:* [ Failure ]
+crbug.com/a/123 [ tag ] another:missing,test,result:* [ Failure ]
+
+some:other,test:* [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("some:other,test:*"),
+					Tags:   result.NewTags("os-a", "gpu-a"),
+					Status: result.Failure,
+				},
+				result.Result{
+					Query:  Q("some:other,test:*"),
+					Tags:   result.NewTags("os-b", "gpu-b"),
+					Status: result.Failure,
+				},
+			},
+			updated: `
+some:other,test:* [ Failure ]
+`,
+			diagnostics: []expectations.Diagnostic{
+				{
+					Severity: expectations.Warning,
+					Line:     headerLines + 2,
+					Message:  "no results found for 'a:missing,test,result:*'",
+				},
+				{
+					Severity: expectations.Warning,
+					Line:     headerLines + 3,
+					Message:  "no results found for 'another:missing,test,result:*' with tags [tag]",
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "no results found KEEP",
+			expectations: `
+# KEEP
+crbug.com/a/123 a:missing,test,result:* [ Failure ]
+
+some:other,test:* [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("some:other,test:*"),
+					Tags:   result.NewTags("os-a", "gpu-a"),
+					Status: result.Failure,
+				},
+				result.Result{
+					Query:  Q("some:other,test:*"),
+					Tags:   result.NewTags("os-b", "gpu-b"),
+					Status: result.Failure,
+				},
+			},
+			updated: `
+some:other,test:* [ Failure ]
+`,
+			diagnostics: []expectations.Diagnostic{
+				{
+					Severity: expectations.Warning,
+					Line:     headerLines + 3,
+					Message:  "no results found for 'a:missing,test,result:*'",
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "no results found Skip",
+			expectations: `
+crbug.com/a/123 a:missing,test,result:* [ Skip ]
+
+some:other,test:* [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("some:other,test:*"),
+					Tags:   result.NewTags("os-a", "gpu-a"),
+					Status: result.Failure,
+				},
+				result.Result{
+					Query:  Q("some:other,test:*"),
+					Tags:   result.NewTags("os-b", "gpu-b"),
+					Status: result.Failure,
+				},
+			},
+			updated: `
+crbug.com/a/123 a:missing,test,result:* [ Skip ]
+
+some:other,test:* [ Failure ]
+`,
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "simple expectation collision",
+			expectations: `
+a:b,c:* [ Failure ]
+a:b,c:* [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("a:b,c:d"),
+					Tags:   result.NewTags("os-a", "os-c", "gpu-b"),
+					Status: result.Failure,
+				},
+			},
+			updated: `
+a:b,c:* [ Failure ]
+`,
+			diagnostics: []expectations.Diagnostic{
+				{
+					Severity: expectations.Error,
+					Line:     headerLines + 3,
+					Message:  "a:b,c:* collides with expectation at line 8",
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "simple expectation with tags",
+			expectations: `
+[ os-a ] a:b,c:* [ Failure ]
+[ gpu-b ] a:b,c:* [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("a:b,c:d"),
+					Tags:   result.NewTags("os-a", "os-c", "gpu-b"),
+					Status: result.Failure,
+				},
+			},
+			updated: `
+a:b,c:* [ Failure ]
+`,
+			diagnostics: []expectations.Diagnostic{
+				{
+					Severity: expectations.Error,
+					Line:     headerLines + 3,
+					Message:  "[gpu-b] a:b,c:* collides with expectation at line 8",
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "simple expectation collision KEEP",
+			expectations: `
+# KEEP
+a:b,c:* [ Failure ]
+a:b,c:* [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("a:b,c:d"),
+					Tags:   result.NewTags("os-a", "os-c", "gpu-b"),
+					Status: result.Failure,
+				},
+			},
+			updated: `
+# KEEP
+a:b,c:* [ Failure ]
+`,
+			diagnostics: []expectations.Diagnostic{
+				{
+					Severity: expectations.Error,
+					Line:     headerLines + 4,
+					Message:  "a:b,c:* collides with expectation at line 9",
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "collision with child-expectation",
+			expectations: `
+a:b:x:* [ Failure ]
+a:b:* [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("a:b:x:*"),
+					Tags:   result.NewTags("os-a", "os-c", "gpu-b"),
+					Status: result.Failure,
+				},
+				result.Result{
+					Query:  Q("a:b:y:*"),
+					Tags:   result.NewTags("os-a", "os-c", "gpu-b"),
+					Status: result.Failure,
+				},
+			},
+			updated: `
+a:b:x:* [ Failure ]
+
+# New failures. Please triage:
+crbug.com/dawn/0000 a:b:y:* [ Failure ]
+`,
+			diagnostics: []expectations.Diagnostic{
+				{
+					Severity: expectations.Error,
+					Line:     headerLines + 3,
+					Message:  "a:b:* collides with expectation at line 8",
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "collision with parent-expectation",
+			expectations: `
+a:b:* [ Failure ]
+a:b:x:* [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("a:b:x:*"),
+					Tags:   result.NewTags("os-a", "os-c", "gpu-b"),
+					Status: result.Failure,
+				},
+				result.Result{
+					Query:  Q("a:b:y:*"),
+					Tags:   result.NewTags("os-a", "os-c", "gpu-b"),
+					Status: result.Failure,
+				},
+			},
+			updated: `
+a:b:* [ Failure ]
+`,
+			diagnostics: []expectations.Diagnostic{
+				{
+					Severity: expectations.Error,
+					Line:     headerLines + 3,
+					Message:  "a:b:x:* collides with expectation at line 8",
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "expectation test now passes",
+			expectations: `
+crbug.com/a/123 [ gpu-a os-a ] a:b,c:* [ Failure ]
+crbug.com/a/123 [ gpu-b os-b ] a:b,c:* [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("a:b,c:*"),
+					Tags:   result.NewTags("os-a", "gpu-a"),
+					Status: result.Pass,
+				},
+				result.Result{
+					Query:  Q("a:b,c:*"),
+					Tags:   result.NewTags("os-b", "gpu-b"),
+					Status: result.Abort,
+				},
+			},
+			updated: `
+crbug.com/a/123 [ os-b ] a:b,c:* [ Failure ]
+`,
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "expectation case now passes",
+			expectations: `
+crbug.com/a/123 [ gpu-a os-a ] a:b,c:d [ Failure ]
+crbug.com/a/123 [ gpu-b os-b ] a:b,c:d [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("a:b,c:d"),
+					Tags:   result.NewTags("os-a", "gpu-a"),
+					Status: result.Pass,
+				},
+				result.Result{
+					Query:  Q("a:b,c:d"),
+					Tags:   result.NewTags("os-b", "gpu-b"),
+					Status: result.Abort,
+				},
+			},
+			updated: `
+crbug.com/a/123 [ os-b ] a:b,c:d: [ Failure ]
+`,
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "expectation case now passes KEEP - single",
+			expectations: `
+# KEEP
+crbug.com/a/123 [ gpu-a os-a ] a:b,c:d [ Failure ]
+crbug.com/a/123 [ gpu-b os-b ] a:b,c:d [ Failure ]
+`,
+			results: result.List{
+				result.Result{
+					Query:  Q("a:b,c:d"),
+					Tags:   result.NewTags("os-a", "gpu-a"),
+					Status: result.Pass,
+				},
+				result.Result{
+					Query:  Q("a:b,c:d"),
+					Tags:   result.NewTags("os-b", "gpu-b"),
+					Status: result.Abort,
+				},
+			},
+			updated: `
+# KEEP
+crbug.com/a/123 [ gpu-a os-a ] a:b,c:d [ Failure ]
+crbug.com/a/123 [ gpu-b os-b ] a:b,c:d [ Failure ]
+`,
+			diagnostics: []expectations.Diagnostic{
+				{
+					Severity: expectations.Note,
+					Line:     headerLines + 3,
+					Message:  "test now passes",
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name: "expectation case now passes KEEP - multiple",
+			expectations: `
+# KEEP
+crbug.com/a/123 a:b,c:d:* [ Failure ]
+`,
+			results: result.List{
+				result.Result{Query: Q("a:b,c:d:a"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:d:b"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:d:c"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:d:d"), Status: result.Pass},
+			},
+			updated: `
+# KEEP
+crbug.com/a/123 a:b,c:d:* [ Failure ]
+`,
+			diagnostics: []expectations.Diagnostic{
+				{
+					Severity: expectations.Note,
+					Line:     headerLines + 3,
+					Message:  "all 4 tests now pass",
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name:         "new test results",
+			expectations: `# A comment`,
+			results: result.List{
+				result.Result{
+					Query:  Q("suite:dir_a,dir_b:test_a:*"),
+					Tags:   result.NewTags("os-a", "gpu-a"),
+					Status: result.Abort,
+				},
+				result.Result{
+					Query:  Q("suite:dir_a,dir_b:test_a:*"),
+					Tags:   result.NewTags("os-a", "gpu-b"),
+					Status: result.Abort,
+				},
+				result.Result{
+					Query:  Q("suite:dir_a,dir_b:test_c:case=4;*"),
+					Tags:   result.NewTags("os-b", "gpu-b"),
+					Status: result.Crash,
+				},
+				result.Result{
+					Query:  Q("suite:dir_a,dir_b:test_c:case=5;*"),
+					Tags:   result.NewTags("os-b", "gpu-b"),
+					Status: result.RetryOnFailure,
+				},
+				result.Result{
+					Query:  Q("suite:dir_a,dir_b:test_b;case=5;*"),
+					Tags:   result.NewTags("os-b", "gpu-b"),
+					Status: result.Pass,
+				},
+				result.Result{
+					Query:  Q("suite:dir_a,dir_b:test_b:*"),
+					Tags:   result.NewTags("os-a", "gpu-a"),
+					Status: result.Skip,
+				},
+				result.Result{
+					Query:  Q("suite:dir_a,dir_b:test_b:*"),
+					Tags:   result.NewTags("os-b", "gpu-a"),
+					Status: result.Pass,
+				},
+				result.Result{
+					Query:  Q("suite:dir_a,dir_b:test_c:*"),
+					Tags:   result.NewTags("os-a", "gpu-a"),
+					Status: result.RetryOnFailure,
+				},
+			},
+			updated: `# A comment
+
+# New flakes. Please triage:
+crbug.com/dawn/0000 [ gpu-a os-a ] suite:dir_a,dir_b:test_c:* [ RetryOnFailure ]
+crbug.com/dawn/0000 [ gpu-b os-b ] suite:dir_a,dir_b:test_c:case=5;* [ RetryOnFailure ]
+
+# New failures. Please triage:
+crbug.com/dawn/0000 [ gpu-b os-a ] suite:* [ Failure ]
+crbug.com/dawn/0000 [ gpu-a os-a ] suite:dir_a,dir_b:test_a:* [ Failure ]
+crbug.com/dawn/0000 [ gpu-a os-a ] suite:dir_a,dir_b:test_b:* [ Skip ]
+crbug.com/dawn/0000 [ gpu-b os-b ] suite:dir_a,dir_b:test_c:case=4;* [ Failure ]
+`,
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name:         "filter unknown tags",
+			expectations: ``,
+			results: result.List{
+				result.Result{
+					Query:  Q("a:b,c:*"),
+					Tags:   result.NewTags("os-a", "gpu-x"),
+					Status: result.Failure,
+				},
+				result.Result{
+					Query:  Q("a:b,c:*"),
+					Tags:   result.NewTags("os-b", "gpu-x"),
+					Status: result.Crash,
+				},
+				result.Result{
+					Query:  Q("a:b,c:*"),
+					Tags:   result.NewTags("os-x", "gpu-b"),
+					Status: result.Failure,
+				},
+				result.Result{
+					Query:  Q("a:b,c:*"),
+					Tags:   result.NewTags("os-x", "gpu-a"),
+					Status: result.Crash,
+				},
+				result.Result{
+					Query:  Q("a:b,c:*"),
+					Tags:   result.NewTags("os-c", "gpu-c"),
+					Status: result.Pass,
+				},
+			},
+			updated: `
+# New failures. Please triage:
+crbug.com/dawn/0000 [ gpu-a ] a:* [ Failure ]
+crbug.com/dawn/0000 [ gpu-b ] a:* [ Failure ]
+crbug.com/dawn/0000 [ os-a ] a:* [ Failure ]
+crbug.com/dawn/0000 [ os-b ] a:* [ Failure ]
+`,
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name:         "prioritize tag sets",
+			expectations: ``,
+			results: result.List{
+				result.Result{
+					Query:  Q("a:b,c:*"),
+					Tags:   result.NewTags("os-a", "os-c", "gpu-b"),
+					Status: result.Failure,
+				},
+				result.Result{
+					Query:  Q("a:b,c:*"),
+					Tags:   result.NewTags("gpu-a", "os-b", "gpu-c"),
+					Status: result.Failure,
+				},
+				result.Result{
+					Query:  Q("a:b,c:*"),
+					Tags:   result.NewTags("os-c", "gpu-c"),
+					Status: result.Pass,
+				},
+			},
+			updated: `
+# New failures. Please triage:
+crbug.com/dawn/0000 [ gpu-b os-c ] a:* [ Failure ]
+crbug.com/dawn/0000 [ gpu-c os-b ] a:* [ Failure ]
+`,
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name:         "merge when over 75% of children fail",
+			expectations: ``,
+			results: result.List{
+				result.Result{Query: Q("a:b,c:t0:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t1:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t2:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t3:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t4:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t5:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t6:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t7:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t8:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t9:*"), Status: result.Failure},
+			},
+			updated: `
+# New failures. Please triage:
+crbug.com/dawn/0000 a:* [ Failure ]
+`,
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name:         "don't merge when under 75% of children fail",
+			expectations: ``,
+			results: result.List{
+				result.Result{Query: Q("a:b,c:t0:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t1:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t2:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t3:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t4:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t5:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t6:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t7:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t8:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t9:*"), Status: result.Failure},
+			},
+			updated: `
+# New failures. Please triage:
+crbug.com/dawn/0000 a:b,c:t0:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t2:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t3:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t5:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t6:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t8:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t9:* [ Failure ]
+`,
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name:         "merge when over 20 children fail",
+			expectations: ``,
+			results: result.List{ // 21 failures, 70% fail
+				result.Result{Query: Q("a:b,c:t00:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t01:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t02:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t03:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t04:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t05:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t06:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t07:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t08:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t09:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t10:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t11:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t12:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t13:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t14:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t15:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t16:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t17:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t18:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t19:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t20:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t21:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t22:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t23:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t24:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t25:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t26:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t27:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t28:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t29:*"), Status: result.Failure},
+			},
+			updated: `
+# New failures. Please triage:
+crbug.com/dawn/0000 a:* [ Failure ]
+`,
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			name:         "dont merge when under 21 children fail",
+			expectations: ``,
+			results: result.List{ // 20 failures, 66% fail
+				result.Result{Query: Q("a:b,c:t00:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t01:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t02:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t03:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t04:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t05:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t06:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t07:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t08:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t09:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t10:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t11:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t12:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t13:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t14:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t15:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t16:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t17:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t18:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t19:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t20:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t21:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t22:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t23:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t24:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t25:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t26:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t27:*"), Status: result.Pass},
+				result.Result{Query: Q("a:b,c:t28:*"), Status: result.Failure},
+				result.Result{Query: Q("a:b,c:t29:*"), Status: result.Failure},
+			},
+			updated: `
+# New failures. Please triage:
+crbug.com/dawn/0000 a:b,c:t00:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t02:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t04:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t05:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t06:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t08:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t09:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t10:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t13:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t15:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t16:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t18:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t19:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t20:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t22:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t23:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t25:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t26:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t28:* [ Failure ]
+crbug.com/dawn/0000 a:b,c:t29:* [ Failure ]
+`,
+		},
+	} {
+		ex, err := expectations.Parse(header + test.expectations)
+		if err != nil {
+			t.Fatalf("'%v': expectations.Parse():\n%v", test.name, err)
+		}
+
+		errMsg := ""
+		diagnostics, err := ex.Update(test.results)
+		if err != nil {
+			errMsg = err.Error()
+		}
+		if diff := cmp.Diff(errMsg, test.err); diff != "" {
+			t.Errorf("'%v': expectations.Update() error:\n%v", test.name, diff)
+		}
+
+		if diff := cmp.Diff(diagnostics, test.diagnostics); diff != "" {
+			t.Errorf("'%v': diagnostics were not as expected:\n%v", test.name, diff)
+		}
+
+		if diff := cmp.Diff(
+			strings.Split(ex.String(), "\n"),
+			strings.Split(header+test.updated, "\n")); diff != "" {
+			t.Errorf("'%v': updated was not as expected:\n%v", test.name, diff)
+		}
+	}
+}
diff --git a/tools/src/cts/result/result.go b/tools/src/cts/result/result.go
index 0ec6095..e234c42 100644
--- a/tools/src/cts/result/result.go
+++ b/tools/src/cts/result/result.go
@@ -221,6 +221,14 @@
 	})
 }
 
+// FilterByVariant returns the results that exactly match the given tags
+func (l List) FilterByVariant(tags Tags) List {
+	str := TagsToString(tags)
+	return l.Filter(func(r Result) bool {
+		return len(r.Tags) == len(tags) && TagsToString(r.Tags) == str
+	})
+}
+
 // Statuses is a set of Status
 type Statuses = container.Set[Status]
 
diff --git a/tools/src/cts/result/result_test.go b/tools/src/cts/result/result_test.go
index 7eae418..bd1eb27 100644
--- a/tools/src/cts/result/result_test.go
+++ b/tools/src/cts/result/result_test.go
@@ -782,6 +782,96 @@
 	}
 }
 
+func TestFilterByVariant(t *testing.T) {
+	type Test struct {
+		results result.List
+		tags    result.Tags
+		expect  result.List
+	}
+	for _, test := range []Test{
+		{ //////////////////////////////////////////////////////////////////////
+			results: result.List{
+				result.Result{
+					Query:  Q(`a`),
+					Status: result.Pass,
+					Tags:   result.NewTags("x"),
+				},
+				result.Result{
+					Query:  Q(`b`),
+					Status: result.Failure,
+					Tags:   result.NewTags("y"),
+				},
+				result.Result{
+					Query:  Q(`c`),
+					Status: result.Pass,
+					Tags:   result.NewTags("x", "y"),
+				},
+			},
+			tags: result.NewTags("x", "y"),
+			expect: result.List{
+				result.Result{
+					Query:  Q(`c`),
+					Status: result.Pass,
+					Tags:   result.NewTags("x", "y"),
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			results: result.List{
+				result.Result{
+					Query:  Q(`a`),
+					Status: result.Pass,
+					Tags:   result.NewTags("x"),
+				},
+				result.Result{
+					Query:  Q(`b`),
+					Status: result.Failure,
+					Tags:   result.NewTags("y"),
+				},
+				result.Result{
+					Query:  Q(`c`),
+					Status: result.Pass,
+					Tags:   result.NewTags("x", "y"),
+				},
+			},
+			tags: result.NewTags("x"),
+			expect: result.List{
+				result.Result{
+					Query:  Q(`a`),
+					Status: result.Pass,
+					Tags:   result.NewTags("x"),
+				},
+			},
+		},
+		{ //////////////////////////////////////////////////////////////////////
+			results: result.List{
+				result.Result{
+					Query:  Q(`a`),
+					Status: result.Pass,
+					Tags:   result.NewTags("x"),
+				},
+				result.Result{
+					Query:  Q(`b`),
+					Status: result.Failure,
+					Tags:   result.NewTags("y"),
+				},
+				result.Result{
+					Query:  Q(`c`),
+					Status: result.Pass,
+					Tags:   result.NewTags("x", "y"),
+				},
+			},
+			tags:   result.NewTags("q"),
+			expect: result.List{},
+		},
+	} {
+		got := test.results.FilterByVariant(test.tags)
+		if diff := cmp.Diff(got, test.expect); diff != "" {
+			t.Errorf("Results:\n%v\nFilterByVariant(%v) was not as expected:\n%v", test.results, test.tags, diff)
+		}
+	}
+}
+
 func TestStatuses(t *testing.T) {
 	type Test struct {
 		results result.List