Compat: Make roller deal with compat expectations

This CL changes these tools so instead of passing around
a list of results they pass around map of lists of results
where the key of the map is "core" or "compat".

Change-Id: I3b64b5cc9a1b637eb4fe2f971691c5abdf2cb9a4
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/157561
Commit-Queue: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/tools/src/cmd/cts/common/config.go b/tools/src/cmd/cts/common/config.go
index 40a7f00..978cedf 100644
--- a/tools/src/cmd/cts/common/config.go
+++ b/tools/src/cmd/cts/common/config.go
@@ -34,6 +34,7 @@
 	"io/ioutil"
 
 	"dawn.googlesource.com/dawn/tools/src/buildbucket"
+	"dawn.googlesource.com/dawn/tools/src/cts/result"
 	"github.com/tidwall/jsonc"
 )
 
@@ -42,7 +43,9 @@
 // tools/src/cmd/cts/config.json.
 type Config struct {
 	// Test holds configuration data for test results.
-	Test struct {
+	Tests []struct {
+		// Mode used to refer to tests
+		ExecutionMode result.ExecutionMode
 		// The ResultDB string prefix for CTS tests.
 		Prefixes []string
 	}
diff --git a/tools/src/cmd/cts/common/results.go b/tools/src/cmd/cts/common/results.go
index c9276c0..7f93c46 100644
--- a/tools/src/cmd/cts/common/results.go
+++ b/tools/src/cmd/cts/common/results.go
@@ -77,7 +77,7 @@
 // GetResults loads or fetches the results, based on the values of r.
 // GetResults will update the ResultSource with the inferred patchset, if a file
 // and specific patchset was not specified.
-func (r *ResultSource) GetResults(ctx context.Context, cfg Config, auth auth.Options) (result.List, error) {
+func (r *ResultSource) GetResults(ctx context.Context, cfg Config, auth auth.Options) (result.ResultsByExecutionMode, error) {
 	// Check that File and Patchset weren't both specified
 	ps := &r.Patchset
 	if r.File != "" && ps.Change != 0 {
@@ -113,13 +113,13 @@
 			return nil, err
 		}
 		fmt.Printf("scanning for latest patchset of %v...\n", latest.Number)
-		var results result.List
-		results, *ps, err = MostRecentResultsForChange(ctx, cfg, r.CacheDir, gerrit, bb, rdb, latest.Number)
+		var resultsByExecutionMode result.ResultsByExecutionMode
+		resultsByExecutionMode, *ps, err = MostRecentResultsForChange(ctx, cfg, r.CacheDir, gerrit, bb, rdb, latest.Number)
 		if err != nil {
 			return nil, err
 		}
 		fmt.Printf("using results from cl %v ps %v...\n", ps.Change, ps.Patchset)
-		return results, nil
+		return resultsByExecutionMode, nil
 	}
 
 	// If a change, but no patchset was specified, then query the most recent
@@ -144,12 +144,12 @@
 		return nil, err
 	}
 
-	results, err := CacheResults(ctx, cfg, *ps, r.CacheDir, rdb, builds)
+	resultsByExecutionMode, err := CacheResults(ctx, cfg, *ps, r.CacheDir, rdb, builds)
 	if err != nil {
 		return nil, err
 	}
 
-	return results, nil
+	return resultsByExecutionMode, nil
 }
 
 // CacheResults looks in the cache at 'cacheDir' for the results for the given
@@ -162,7 +162,7 @@
 	ps gerrit.Patchset,
 	cacheDir string,
 	rdb *resultsdb.ResultsDB,
-	builds BuildsByName) (result.List, error) {
+	builds BuildsByName) (result.ResultsByExecutionMode, error) {
 
 	var cachePath string
 	if cacheDir != "" {
@@ -194,7 +194,7 @@
 	ctx context.Context,
 	cfg Config,
 	rdb *resultsdb.ResultsDB,
-	builds BuildsByName) (result.List, error) {
+	builds BuildsByName) (result.ResultsByExecutionMode, error) {
 
 	fmt.Printf("fetching results from resultdb...")
 
@@ -217,57 +217,66 @@
 		}
 	}
 
-	results := result.List{}
+	resultsByExecutionMode := result.ResultsByExecutionMode{}
 	var err error = nil
-	for _, prefix := range cfg.Test.Prefixes {
-		err = rdb.QueryTestResults(ctx, builds.ids(), prefix+".*", func(rpb *rdbpb.TestResult) error {
-			if time.Since(lastPrintedDot) > 5*time.Second {
-				lastPrintedDot = time.Now()
-				fmt.Printf(".")
-			}
+	for _, test := range cfg.Tests {
+		results := result.List{}
+		for _, prefix := range test.Prefixes {
+			err = rdb.QueryTestResults(ctx, builds.ids(), prefix+".*", func(rpb *rdbpb.TestResult) error {
+				if time.Since(lastPrintedDot) > 5*time.Second {
+					lastPrintedDot = time.Now()
+					fmt.Printf(".")
+				}
 
-			if !strings.HasPrefix(rpb.GetTestId(), prefix) {
+				if !strings.HasPrefix(rpb.GetTestId(), prefix) {
+					return nil
+				}
+
+				testName := rpb.GetTestId()[len(prefix):]
+				status := toStatus(rpb.Status)
+				tags := result.NewTags()
+
+				duration := rpb.GetDuration().AsDuration()
+				mayExonerate := false
+
+				for _, sp := range rpb.Tags {
+					if sp.Key == "typ_tag" {
+						tags.Add(sp.Value)
+					}
+					if sp.Key == "javascript_duration" {
+						var err error
+						if duration, err = time.ParseDuration(sp.Value); err != nil {
+							return err
+						}
+					}
+					if sp.Key == "may_exonerate" {
+						var err error
+						if mayExonerate, err = strconv.ParseBool(sp.Value); err != nil {
+							return err
+						}
+					}
+				}
+
+				results = append(results, result.Result{
+					Query:        query.Parse(testName),
+					Status:       status,
+					Tags:         tags,
+					Duration:     duration,
+					MayExonerate: mayExonerate,
+				})
+
 				return nil
-			}
-
-			testName := rpb.GetTestId()[len(prefix):]
-			status := toStatus(rpb.Status)
-			tags := result.NewTags()
-
-			duration := rpb.GetDuration().AsDuration()
-			mayExonerate := false
-
-			for _, sp := range rpb.Tags {
-				if sp.Key == "typ_tag" {
-					tags.Add(sp.Value)
-				}
-				if sp.Key == "javascript_duration" {
-					var err error
-					if duration, err = time.ParseDuration(sp.Value); err != nil {
-						return err
-					}
-				}
-				if sp.Key == "may_exonerate" {
-					var err error
-					if mayExonerate, err = strconv.ParseBool(sp.Value); err != nil {
-						return err
-					}
-				}
-			}
-
-			results = append(results, result.Result{
-				Query:        query.Parse(testName),
-				Status:       status,
-				Tags:         tags,
-				Duration:     duration,
-				MayExonerate: mayExonerate,
 			})
+			if err != nil {
+				break
+			}
 
-			return nil
-		})
-		if err != nil {
-			break
+			// Expand aliased tags, remove specific tags
+			CleanTags(cfg, &results)
+
+			results.Sort()
 		}
+		resultsByExecutionMode[test.ExecutionMode] = results
 	}
 
 	fmt.Println(" done")
@@ -276,11 +285,7 @@
 		return nil, err
 	}
 
-	// Expand aliased tags, remove specific tags
-	CleanTags(cfg, &results)
-
-	results.Sort()
-	return results, err
+	return resultsByExecutionMode, err
 }
 
 // LatestCTSRoll returns for the latest merged CTS roll that landed in the past
@@ -323,7 +328,7 @@
 	g *gerrit.Gerrit,
 	bb *buildbucket.Buildbucket,
 	rdb *resultsdb.ResultsDB,
-	change int) (result.List, gerrit.Patchset, error) {
+	change int) (result.ResultsByExecutionMode, gerrit.Patchset, error) {
 
 	ps, err := LatestPatchset(g, change)
 	if err != nil {
diff --git a/tools/src/cmd/cts/config.json b/tools/src/cmd/cts/config.json
index aa6d697..02d1efb 100644
--- a/tools/src/cmd/cts/config.json
+++ b/tools/src/cmd/cts/config.json
@@ -1,11 +1,19 @@
 {
-  "Test": {
-    "Prefixes": [
-      "ninja://chrome/test:telemetry_gpu_integration_test/gpu_tests.webgpu_cts_integration_test.WebGpuCtsIntegrationTest.",
-      "ninja://chrome/test:telemetry_gpu_integration_test_android_chrome/gpu_tests.webgpu_cts_integration_test.WebGpuCtsIntegrationTest."
-    ],
-    "SlowThreshold": 45000000000 // 45 seconds
-  },
+  "Tests": [
+    {
+      "ExecutionMode": "core",
+      "Prefixes": [
+        "ninja://chrome/test:telemetry_gpu_integration_test/gpu_tests.webgpu_cts_integration_test.WebGpuCtsIntegrationTest.",
+        "ninja://chrome/test:telemetry_gpu_integration_test_android_chrome/gpu_tests.webgpu_cts_integration_test.WebGpuCtsIntegrationTest."
+      ]
+    },
+    {
+      "ExecutionMode": "compat",
+      "Prefixes": [
+        "ninja://chrome/test:telemetry_gpu_integration_test/gpu_tests.webgpu_compat_cts_integration_test.WebGpuCompatCtsIntegrationTest."
+      ]
+    }
+  ],
   "Gerrit": {
     "Host": "https://dawn-review.googlesource.com",
     "Project": "dawn"
diff --git a/tools/src/cmd/cts/export/export.go b/tools/src/cmd/cts/export/export.go
index fc663b8..bdbb58a 100644
--- a/tools/src/cmd/cts/export/export.go
+++ b/tools/src/cmd/cts/export/export.go
@@ -125,12 +125,12 @@
 	// Fetch the table column names
 	columns, err := fetchRow[string](s, spreadsheet, dataSheet, 0)
 
-	// Grab the results
-	results, err := c.flags.results.GetResults(ctx, cfg, auth)
+	// Grab the resultsByExecutionMode
+	resultsByExecutionMode, err := c.flags.results.GetResults(ctx, cfg, auth)
 	if err != nil {
 		return err
 	}
-	if len(results) == 0 {
+	if len(resultsByExecutionMode) == 0 {
 		return fmt.Errorf("no results found")
 	}
 	ps := c.flags.results.Patchset
@@ -157,46 +157,48 @@
 
 	// Generate a new set of counts of test by status
 	log.Printf("exporting results from cl %v ps %v...", ps.Change, ps.Patchset)
-	counts := map[result.Status]int{}
-	for _, r := range results {
-		counts[r.Status] = counts[r.Status] + 1
-	}
-
-	// Generate new cell data based on the table column names
-	data := []any{}
-	for _, column := range columns {
-		switch strings.ToLower(column) {
-		case "date":
-			data = append(data, time.Now().UTC().Format("2006-01-02"))
-		case "change":
-			data = append(data, ps.Change)
-		case "unimplemented":
-			data = append(data, numUnimplemented)
-		default:
-			count, ok := counts[result.Status(column)]
-			if !ok {
-				log.Println("no results with status", column)
-			}
-			data = append(data, count)
+	for _, results := range resultsByExecutionMode {
+		counts := map[result.Status]int{}
+		for _, r := range results {
+			counts[r.Status] = counts[r.Status] + 1
 		}
-	}
 
-	// Insert a blank row under the column header row
-	if err := insertBlankRows(s, spreadsheet, dataSheet, 1, 1); err != nil {
-		return err
-	}
+		// Generate new cell data based on the table column names
+		data := []any{}
+		for _, column := range columns {
+			switch strings.ToLower(column) {
+			case "date":
+				data = append(data, time.Now().UTC().Format("2006-01-02"))
+			case "change":
+				data = append(data, ps.Change)
+			case "unimplemented":
+				data = append(data, numUnimplemented)
+			default:
+				count, ok := counts[result.Status(column)]
+				if !ok {
+					log.Println("no results with status", column)
+				}
+				data = append(data, count)
+			}
+		}
 
-	// Add a new row to the spreadsheet
-	_, err = s.Spreadsheets.Values.BatchUpdate(spreadsheet.SpreadsheetId,
-		&sheets.BatchUpdateValuesRequest{
-			ValueInputOption: "RAW",
-			Data: []*sheets.ValueRange{{
-				Range:  rowRange(1, dataSheet),
-				Values: [][]any{data},
-			}},
-		}).Do()
-	if err != nil {
-		return fmt.Errorf("failed to update spreadsheet: %v", err)
+		// Insert a blank row under the column header row
+		if err := insertBlankRows(s, spreadsheet, dataSheet, 1, 1); err != nil {
+			return err
+		}
+
+		// Add a new row to the spreadsheet
+		_, err = s.Spreadsheets.Values.BatchUpdate(spreadsheet.SpreadsheetId,
+			&sheets.BatchUpdateValuesRequest{
+				ValueInputOption: "RAW",
+				Data: []*sheets.ValueRange{{
+					Range:  rowRange(1, dataSheet),
+					Values: [][]any{data},
+				}},
+			}).Do()
+		if err != nil {
+			return fmt.Errorf("failed to update spreadsheet: %v", err)
+		}
 	}
 
 	return nil
diff --git a/tools/src/cmd/cts/merge/merge.go b/tools/src/cmd/cts/merge/merge.go
index 15cb3e3..b5803e6 100644
--- a/tools/src/cmd/cts/merge/merge.go
+++ b/tools/src/cmd/cts/merge/merge.go
@@ -57,8 +57,8 @@
 }
 
 func (c *cmd) Run(ctx context.Context, cfg common.Config) error {
-	// Load each of the results files and merge together
-	var results result.List
+	// Load each of the resultsByExecutionMode files and merge together
+	var resultsByExecutionMode result.ResultsByExecutionMode
 	for _, path := range flag.Args() {
 		// Load results
 		r, err := result.Load(path)
@@ -66,7 +66,9 @@
 			return fmt.Errorf("while reading '%v': %w", path, err)
 		}
 		// Combine and merge
-		results = result.Merge(results, r)
+		for _, test := range cfg.Tests {
+			resultsByExecutionMode[test.ExecutionMode] = result.Merge(resultsByExecutionMode[test.ExecutionMode], r[test.ExecutionMode])
+		}
 	}
 
 	// Open output file
@@ -81,5 +83,5 @@
 	}
 
 	// Write out
-	return result.Write(output, results)
+	return result.Write(output, resultsByExecutionMode)
 }
diff --git a/tools/src/cmd/cts/roll/roll.go b/tools/src/cmd/cts/roll/roll.go
index f368eba..c0b1850 100644
--- a/tools/src/cmd/cts/roll/roll.go
+++ b/tools/src/cmd/cts/roll/roll.go
@@ -242,27 +242,52 @@
 	}
 	ctsLog = ctsLog[:len(ctsLog)-1] // Don't include the oldest change in the log
 
-	// Download and parse the expectations file
-	expectationsFile, err := r.dawn.DownloadFile(ctx, refMain, common.RelativeExpectationsPath)
-	if err != nil {
-		return err
-	}
-	ex, err := expectations.Parse(common.RelativeExpectationsPath, expectationsFile)
-	if err != nil {
-		return fmt.Errorf("failed to load expectations: %v", err)
+	type ExpectationsFileInfo struct {
+		path            string
+		expectations    expectations.Content
+		newExpectations expectations.Content
+		executionMode   result.ExecutionMode
+		results         result.List
 	}
 
-	// If the user requested a full rebuild of the expectations, strip out
-	// everything but comment chunks.
-	if r.flags.rebuild {
-		rebuilt := ex.Clone()
-		rebuilt.Chunks = rebuilt.Chunks[:0]
-		for _, c := range ex.Chunks {
-			if c.IsCommentOnly() {
-				rebuilt.Chunks = append(rebuilt.Chunks, c)
-			}
+	var exInfos = []*ExpectationsFileInfo{
+		{
+			path:          common.RelativeExpectationsPath,
+			executionMode: "core",
+			results:       result.List{},
+		},
+		{
+			path:          common.RelativeCompatExpectationsPath,
+			executionMode: "compat",
+			results:       result.List{},
+		},
+	}
+
+	// Download and parse the expectations files
+	for _, exInfo := range exInfos {
+		expectationsFile, err := r.dawn.DownloadFile(ctx, refMain, exInfo.path)
+		if err != nil {
+			return err
 		}
-		ex = rebuilt
+		ex, err := expectations.Parse(exInfo.path, expectationsFile)
+		if err != nil {
+			return fmt.Errorf("failed to load expectations: %v", err)
+		}
+
+		// If the user requested a full rebuild of the expectations, strip out
+		// everything but comment chunks.
+		if r.flags.rebuild {
+			rebuilt := ex.Clone()
+			rebuilt.Chunks = rebuilt.Chunks[:0]
+			for _, c := range ex.Chunks {
+				if c.IsCommentOnly() {
+					rebuilt.Chunks = append(rebuilt.Chunks, c)
+				}
+			}
+			ex = rebuilt
+		}
+
+		exInfo.expectations = ex
 	}
 
 	generatedFiles, err := r.generateFiles(ctx)
@@ -333,10 +358,12 @@
 	}
 
 	// Update the DEPS, expectations, and other generated files.
-	updateExpectationUpdateTimestamp(&ex)
+	for _, exInfo := range exInfos {
+		updateExpectationUpdateTimestamp(&exInfo.expectations)
+		generatedFiles[exInfo.path] = exInfo.expectations.String()
+	}
 	generatedFiles[depsRelPath] = updatedDEPS
 	generatedFiles[gitLinkPath] = newCTSHash
-	generatedFiles[common.RelativeExpectationsPath] = ex.String()
 
 	msg := r.rollCommitMessage(oldCTSHash, newCTSHash, ctsLog, changeID)
 	ps, err := r.gerrit.EditFiles(changeID, msg, generatedFiles, deletedFiles)
@@ -345,7 +372,6 @@
 	}
 
 	// Begin main roll loop
-	results := result.List{}
 	for attempt := 0; ; attempt++ {
 		// Kick builds
 		log.Printf("building (attempt %v)...\n", attempt)
@@ -368,29 +394,31 @@
 
 		// Gather the build results
 		log.Println("gathering results...")
-		psResults, err := common.CacheResults(ctx, r.cfg, ps, r.flags.cacheDir, r.rdb, builds)
+		psResultsByExecutionMode, err := common.CacheResults(ctx, r.cfg, ps, r.flags.cacheDir, r.rdb, builds)
 		if err != nil {
 			return err
 		}
 
-		// Merge the new results into the accumulated results
-		log.Println("merging results...")
-		results = result.Merge(results, psResults)
-
 		// Rebuild the expectations with the accumulated results
 		log.Println("building new expectations...")
 		// Note: The new expectations are not used if the last attempt didn't
 		// fail, but we always want to post the diagnostics
-		newExpectations := ex.Clone()
-		diags, err := newExpectations.Update(results, testlist)
-		if err != nil {
-			return err
-		}
+		for _, exInfo := range exInfos {
+			// Merge the new results into the accumulated results
+			log.Printf("merging results for %s ...\n", exInfo.executionMode)
+			exInfo.results = result.Merge(exInfo.results, psResultsByExecutionMode[exInfo.executionMode])
 
-		// Post statistics and expectation diagnostics
-		log.Println("posting stats & diagnostics...")
-		if err := r.postComments(ps, diags, results); err != nil {
-			return err
+			exInfo.newExpectations = exInfo.expectations.Clone()
+			diags, err := exInfo.newExpectations.Update(exInfo.results, testlist)
+			if err != nil {
+				return err
+			}
+
+			// Post statistics and expectation diagnostics
+			log.Printf("posting stats & diagnostics for %s...\n", exInfo.executionMode)
+			if err := r.postComments(ps, exInfo.path, diags, exInfo.results); err != nil {
+				return err
+			}
 		}
 
 		// If all the builds attempted, then we're done!
@@ -400,10 +428,13 @@
 
 		// Otherwise, push the updated expectations, and try again
 		log.Println("updating expectations...")
-		updateExpectationUpdateTimestamp(&newExpectations)
-		ps, err = r.gerrit.EditFiles(changeID, msg, map[string]string{
-			common.RelativeExpectationsPath: newExpectations.String(),
-		}, nil)
+
+		editedFiles := map[string]string{}
+		for _, exInfo := range exInfos {
+			updateExpectationUpdateTimestamp(&exInfo.newExpectations)
+			editedFiles[exInfo.path] = exInfo.newExpectations.String()
+		}
+		ps, err = r.gerrit.EditFiles(changeID, msg, editedFiles, nil)
 		if err != nil {
 			return fmt.Errorf("failed to update change '%v': %v", changeID, err)
 		}
@@ -436,7 +467,7 @@
 			return err
 		}
 		if len(jsonRes.Emails) < 1 {
-			return fmt.Errorf("Expected at least one email in JSON response %s", jsonRes)
+			return fmt.Errorf("expected at least one email in JSON response %s", jsonRes)
 		}
 		reviewer = jsonRes.Emails[0]
 	}
@@ -498,6 +529,7 @@
 	msg.WriteString("\n\n")
 	msg.WriteString("Regenerated:\n")
 	msg.WriteString(" - expectations.txt\n")
+	msg.WriteString(" - compat-expectations.txt\n")
 	msg.WriteString(" - ts_sources.txt\n")
 	msg.WriteString(" - test_list.txt\n")
 	msg.WriteString(" - cache_list.txt\n")
@@ -551,7 +583,7 @@
 	return msg.String()
 }
 
-func (r *roller) postComments(ps gerrit.Patchset, diags []expectations.Diagnostic, results result.List) error {
+func (r *roller) postComments(ps gerrit.Patchset, path string, diags []expectations.Diagnostic, results result.List) error {
 	fc := make([]gerrit.FileComment, len(diags))
 	for i, d := range diags {
 		var prefix string
@@ -564,7 +596,7 @@
 			prefix = "🟦"
 		}
 		fc[i] = gerrit.FileComment{
-			Path:    common.RelativeExpectationsPath,
+			Path:    path,
 			Side:    gerrit.Left,
 			Line:    d.Line,
 			Message: fmt.Sprintf("%v %v: %v", prefix, d.Severity, d.Message),
diff --git a/tools/src/cmd/cts/roll/roll_test.go b/tools/src/cmd/cts/roll/roll_test.go
index 800d9ff..552509b 100644
--- a/tools/src/cmd/cts/roll/roll_test.go
+++ b/tools/src/cmd/cts/roll/roll_test.go
@@ -73,6 +73,7 @@
 
 Regenerated:
  - expectations.txt
+ - compat-expectations.txt
  - ts_sources.txt
  - test_list.txt
  - cache_list.txt
diff --git a/tools/src/cmd/cts/time/time.go b/tools/src/cmd/cts/time/time.go
index 7f6e5e2..a75429a 100644
--- a/tools/src/cmd/cts/time/time.go
+++ b/tools/src/cmd/cts/time/time.go
@@ -85,27 +85,31 @@
 		return fmt.Errorf("failed to obtain authentication options: %w", err)
 	}
 
-	// Obtain the results
-	results, err := c.flags.source.GetResults(ctx, cfg, auth)
+	// Obtain the resultsByExecutionMode
+	resultsByExecutionMode, err := c.flags.source.GetResults(ctx, cfg, auth)
 	if err != nil {
 		return err
 	}
 
-	if len(results) == 0 {
+	if len(resultsByExecutionMode) == 0 {
 		return fmt.Errorf("no results found")
 	}
 
 	// If tags were provided, filter the results to those that contain these tags
 	if c.flags.tags != "" {
-		results = results.FilterByTags(result.StringToTags(c.flags.tags))
-		if len(results) == 0 {
+		for name := range resultsByExecutionMode {
+			resultsByExecutionMode[name] = resultsByExecutionMode[name].FilterByTags(result.StringToTags(c.flags.tags))
+		}
+		if len(resultsByExecutionMode) == 0 {
 			return fmt.Errorf("no results after filtering by tags")
 		}
 	}
 
 	if c.flags.query != "" {
-		results = results.FilterByQuery(query.Parse(c.flags.query))
-		if len(results) == 0 {
+		for name := range resultsByExecutionMode {
+			resultsByExecutionMode[name] = resultsByExecutionMode[name].FilterByQuery(query.Parse(c.flags.query))
+		}
+		if len(resultsByExecutionMode) == 0 {
 			return fmt.Errorf("no results after filtering by test query")
 		}
 	}
@@ -117,86 +121,97 @@
 			Tags   string
 		}
 		merged := map[Key]result.Result{}
-		for _, r := range results {
-			k := Key{
-				Query: query.Query{
-					Suite: r.Query.Suite,
-					Files: r.Query.Files,
-					Tests: r.Query.Tests,
-					Cases: "*",
-				},
-				Status: r.Status,
-				Tags:   result.TagsToString(r.Tags),
-			}
-			entry, exists := merged[k]
-			if exists {
-				entry.Duration += r.Duration
-			} else {
-				entry = result.Result{
-					Query:    k.Query,
-					Duration: r.Duration,
-					Status:   r.Status,
-					Tags:     r.Tags,
+		for name, results := range resultsByExecutionMode {
+			for _, r := range results {
+				k := Key{
+					Query: query.Query{
+						Suite: r.Query.Suite,
+						Files: r.Query.Files,
+						Tests: r.Query.Tests,
+						Cases: "*",
+					},
+					Status: r.Status,
+					Tags:   result.TagsToString(r.Tags),
 				}
+				entry, exists := merged[k]
+				if exists {
+					entry.Duration += r.Duration
+				} else {
+					entry = result.Result{
+						Query:    k.Query,
+						Duration: r.Duration,
+						Status:   r.Status,
+						Tags:     r.Tags,
+					}
+				}
+				merged[k] = entry
 			}
-			merged[k] = entry
-		}
 
-		results = result.List{}
-		for _, r := range merged {
-			results = append(results, r)
+			newResultList := result.List{}
+			for _, r := range merged {
+				newResultList = append(results, r)
+			}
+			resultsByExecutionMode[name] = newResultList
 		}
 	}
 
 	// Sort the results with longest duration first
-	sort.Slice(results, func(i, j int) bool {
-		return results[i].Duration > results[j].Duration
-	})
+	for name, results := range resultsByExecutionMode {
+		sort.Slice(results, func(i, j int) bool {
+			return results[i].Duration > results[j].Duration
+		})
+		resultsByExecutionMode[name] = results
+	}
 
 	didSomething := false
 
 	// Did the user request --top N ?
 	if c.flags.topN > 0 {
 		didSomething = true
-		topN := results
-		if c.flags.topN < len(results) {
-			topN = topN[:c.flags.topN]
-		}
-		for i, r := range topN {
-			fmt.Printf("%3.1d: %v\n", i, r)
+		for name, results := range resultsByExecutionMode {
+			topN := results
+			if c.flags.topN < len(results) {
+				topN = topN[:c.flags.topN]
+			}
+			for i, r := range topN {
+				fmt.Printf("%s %3.1d: %v\n", name, i, r)
+			}
 		}
 	}
 
 	// Did the user request --histogram ?
 	if c.flags.histogram {
-		maxTime := results[0].Duration
+		for name, results := range resultsByExecutionMode {
+			maxTime := results[0].Duration
 
-		const (
-			numBins = 25
-			pow     = 2.0
-		)
+			const (
+				numBins = 25
+				pow     = 2.0
+			)
 
-		binToDuration := func(i int) time.Duration {
-			frac := math.Pow(float64(i)/float64(numBins), pow)
-			return time.Duration(float64(maxTime) * frac)
-		}
-		durationToBin := func(d time.Duration) int {
-			frac := math.Pow(float64(d)/float64(maxTime), 1.0/pow)
-			idx := int(frac * numBins)
-			if idx >= numBins-1 {
-				return numBins - 1
+			binToDuration := func(i int) time.Duration {
+				frac := math.Pow(float64(i)/float64(numBins), pow)
+				return time.Duration(float64(maxTime) * frac)
 			}
-			return idx
-		}
+			durationToBin := func(d time.Duration) int {
+				frac := math.Pow(float64(d)/float64(maxTime), 1.0/pow)
+				idx := int(frac * numBins)
+				if idx >= numBins-1 {
+					return numBins - 1
+				}
+				return idx
+			}
 
-		didSomething = true
-		bins := make([]int, numBins)
-		for _, r := range results {
-			idx := durationToBin(r.Duration)
-			bins[idx] = bins[idx] + 1
-		}
-		for i, bin := range bins {
-			fmt.Printf("[%.8v, %.8v]: %v\n", binToDuration(i), binToDuration(i+1), bin)
+			didSomething = true
+			bins := make([]int, numBins)
+			for _, r := range results {
+				idx := durationToBin(r.Duration)
+				bins[idx] = bins[idx] + 1
+			}
+			fmt.Printf("%s\n", name)
+			for i, bin := range bins {
+				fmt.Printf("[%.8v, %.8v]: %v\n", binToDuration(i), binToDuration(i+1), bin)
+			}
 		}
 	}
 
diff --git a/tools/src/cmd/cts/update/update.go b/tools/src/cmd/cts/update/update.go
index b7cc0c6..b550e37 100644
--- a/tools/src/cmd/cts/update/update.go
+++ b/tools/src/cmd/cts/update/update.go
@@ -47,10 +47,21 @@
 	common.Register(&cmd{})
 }
 
+type arrayFlags []string
+
+func (i *arrayFlags) String() string {
+	return strings.Join((*i), " ")
+}
+
+func (i *arrayFlags) Set(value string) error {
+	*i = append(*i, value)
+	return nil
+}
+
 type cmd struct {
 	flags struct {
 		results      common.ResultSource
-		expectations string
+		expectations arrayFlags
 		auth         authcli.Flags
 	}
 }
@@ -64,10 +75,9 @@
 }
 
 func (c *cmd) RegisterFlags(ctx context.Context, cfg common.Config) ([]string, error) {
-	defaultExpectations := common.DefaultExpectationsPath()
 	c.flags.results.RegisterFlags(cfg)
 	c.flags.auth.Register(flag.CommandLine, auth.DefaultAuthOptions( /* needsCloudScopes */ false))
-	flag.StringVar(&c.flags.expectations, "expectations", defaultExpectations, "path to CTS expectations file to update")
+	flag.Var(&c.flags.expectations, "expectations", "path to CTS expectations file(s) to update")
 	return nil, nil
 }
 
@@ -85,6 +95,10 @@
 }
 
 func (c *cmd) Run(ctx context.Context, cfg common.Config) error {
+	if len(c.flags.expectations) == 0 {
+		c.flags.expectations = common.DefaultExpectationsPaths()
+	}
+
 	// Validate command line arguments
 	auth, err := c.flags.auth.Options()
 	if err != nil {
@@ -93,20 +107,15 @@
 
 	// Fetch the results
 	log.Println("fetching results...")
-	results, err := c.flags.results.GetResults(ctx, cfg, auth)
+	resultsByExecutionMode, err := c.flags.results.GetResults(ctx, cfg, auth)
 	if err != nil {
 		return err
 	}
 
 	// Merge to remove duplicates
 	log.Println("removing duplicate results...")
-	results = result.Merge(results)
-
-	// Load the expectations file
-	log.Println("loading expectations...")
-	ex, err := expectations.Load(c.flags.expectations)
-	if err != nil {
-		return err
+	for name := range resultsByExecutionMode {
+		resultsByExecutionMode[name] = result.Merge(resultsByExecutionMode[name])
 	}
 
 	log.Println("loading test list...")
@@ -115,22 +124,40 @@
 		return err
 	}
 
-	log.Println("validating...")
-	if diag := ex.Validate(); diag.NumErrors() > 0 {
-		diag.Print(os.Stdout, c.flags.expectations)
-		return fmt.Errorf("validation failed")
+	for _, expectationsFilename := range c.flags.expectations {
+		// Load the expectations file
+		log.Printf("loading expectations %s...\n", expectationsFilename)
+		ex, err := expectations.Load(expectationsFilename)
+		if err != nil {
+			return err
+		}
+
+		log.Printf("validating %s...\n", expectationsFilename)
+		if diag := ex.Validate(); diag.NumErrors() > 0 {
+			diag.Print(os.Stdout, expectationsFilename)
+			return fmt.Errorf("validation failed")
+		}
+
+		// Update the expectations file with the results
+		log.Printf("updating expectations %s...\n", expectationsFilename)
+		// Not clear what to do here
+		name := result.ExecutionMode("core")
+		if strings.Contains(expectationsFilename, "compat") {
+			name = "compat"
+		}
+		diag, err := ex.Update(resultsByExecutionMode[name], testlist)
+		if err != nil {
+			return err
+		}
+
+		// Print any diagnostics
+		diag.Print(os.Stdout, expectationsFilename)
+
+		// Save the updated expectations file
+		err = ex.Save(expectationsFilename)
+		if err != nil {
+			break
+		}
 	}
-
-	// Update the expectations file with the results
-	log.Println("updating expectations...")
-	diag, err := ex.Update(results, testlist)
-	if err != nil {
-		return err
-	}
-
-	// Print any diagnostics
-	diag.Print(os.Stdout, c.flags.expectations)
-
-	// Save the updated expectations file
-	return ex.Save(c.flags.expectations)
+	return err
 }
diff --git a/tools/src/cts/result/result.go b/tools/src/cts/result/result.go
index 0fd38db..0e32189 100644
--- a/tools/src/cts/result/result.go
+++ b/tools/src/cts/result/result.go
@@ -110,7 +110,12 @@
 //	<query> <tags> <status>
 //
 // <tags> may be omitted if there were no tags.
-func Parse(in string) (Result, error) {
+//
+// Tests are separated into sections where the section name
+// appears at the end of the list as just a line with
+//
+//	<section-name>
+func Parse(in string) (ExecutionMode, Result, error) {
 	line := in
 	token := func() string {
 		for i, c := range line {
@@ -136,8 +141,12 @@
 	c := token()
 	d := token()
 	e := token()
+
+	if a != "" && b == "" && token() == "" {
+		return ExecutionMode(a), Result{}, nil
+	}
 	if a == "" || b == "" || c == "" || d == "" || token() != "" {
-		return Result{}, fmt.Errorf("unable to parse result '%v'", in)
+		return "", Result{}, fmt.Errorf("unable to parse result '%v'", in)
 	}
 
 	query := query.Parse(a)
@@ -146,31 +155,37 @@
 		status := Status(b)
 		duration, err := time.ParseDuration(c)
 		if err != nil {
-			return Result{}, fmt.Errorf("unable to parse result '%v': %w", in, err)
+			return "", Result{}, fmt.Errorf("unable to parse result '%v': %w", in, err)
 		}
 		mayExonerate, err := strconv.ParseBool(d)
 		if err != nil {
-			return Result{}, fmt.Errorf("unable to parse result '%v': %w", in, err)
+			return "", Result{}, fmt.Errorf("unable to parse result '%v': %w", in, err)
 		}
-		return Result{query, nil, status, duration, mayExonerate}, nil
+		return "", Result{query, nil, status, duration, mayExonerate}, nil
 	} else {
 		tags := StringToTags(b)
 		status := Status(c)
 		duration, err := time.ParseDuration(d)
 		if err != nil {
-			return Result{}, fmt.Errorf("unable to parse result '%v': %w", in, err)
+			return "", Result{}, fmt.Errorf("unable to parse result '%v': %w", in, err)
 		}
 		mayExonerate, err := strconv.ParseBool(e)
 		if err != nil {
-			return Result{}, fmt.Errorf("unable to parse result '%v': %w", in, err)
+			return "", Result{}, fmt.Errorf("unable to parse result '%v': %w", in, err)
 		}
-		return Result{query, tags, status, duration, mayExonerate}, nil
+		return "", Result{query, tags, status, duration, mayExonerate}, nil
 	}
 }
 
 // List is a list of results
 type List []Result
 
+// The mode the tests were run in, "core" or" "compat"
+type ExecutionMode string
+
+// Lists of test results by execution mode.
+type ResultsByExecutionMode map[ExecutionMode]List
+
 // Variant is a collection of tags that uniquely identify a test
 // configuration (e.g the combination of OS, GPU, validation-modes, etc).
 type Variant = Tags
@@ -359,7 +374,7 @@
 }
 
 // Load loads the result list from the file with the given path
-func Load(path string) (List, error) {
+func Load(path string) (ResultsByExecutionMode, error) {
 	file, err := os.Open(path)
 	if err != nil {
 		return nil, err
@@ -374,7 +389,7 @@
 }
 
 // Save saves the result list to the file with the given path
-func Save(path string, results List) error {
+func Save(path string, results ResultsByExecutionMode) error {
 	dir := filepath.Dir(path)
 	if err := os.MkdirAll(dir, 0777); err != nil {
 		return err
@@ -388,23 +403,34 @@
 }
 
 // Read reads a result list from the given reader
-func Read(r io.Reader) (List, error) {
+func Read(r io.Reader) (ResultsByExecutionMode, error) {
 	scanner := bufio.NewScanner(r)
+	results := ResultsByExecutionMode{}
 	l := List{}
 	for scanner.Scan() {
-		r, err := Parse(scanner.Text())
+		section, r, err := Parse(scanner.Text())
 		if err != nil {
 			return nil, err
 		}
-		l = append(l, r)
+		if section != "" {
+			results[section] = l
+			l = List{}
+		} else {
+			l = append(l, r)
+		}
 	}
-	return l, nil
+	return results, nil
 }
 
 // Write writes a result list to the given writer
-func Write(w io.Writer, l List) error {
-	for _, r := range l {
-		if _, err := fmt.Fprintln(w, r); err != nil {
+func Write(w io.Writer, r ResultsByExecutionMode) error {
+	for name, l := range r {
+		for _, r := range l {
+			if _, err := fmt.Fprintln(w, r); err != nil {
+				return err
+			}
+		}
+		if _, err := fmt.Fprintln(w, name); err != nil {
 			return err
 		}
 	}
diff --git a/tools/src/cts/result/result_test.go b/tools/src/cts/result/result_test.go
index 634e0a9..28b7007 100644
--- a/tools/src/cts/result/result_test.go
+++ b/tools/src/cts/result/result_test.go
@@ -84,7 +84,7 @@
 			t.Errorf("'%v'.String() was not as expected:\n%v", test.result, diff)
 			continue
 		}
-		parsed, err := result.Parse(test.expect)
+		_, parsed, err := result.Parse(test.expect)
 		if err != nil {
 			t.Errorf("Parse('%v') returned %v", test.expect, err)
 			continue
@@ -100,11 +100,11 @@
 		in, expect string
 	}{
 		{``, `unable to parse result ''`},
-		{`a`, `unable to parse result 'a'`},
+		{`a b`, `unable to parse result 'a b'`},
 		{`a b c d e`, `unable to parse result 'a b c d e': time: invalid duration "d"`},
 		{`a b c 10s e`, `unable to parse result 'a b c 10s e': strconv.ParseBool: parsing "e": invalid syntax`},
 	} {
-		_, err := result.Parse(test.in)
+		_, _, err := result.Parse(test.in)
 		got := ""
 		if err != nil {
 			got = err.Error()
@@ -1054,13 +1054,19 @@
 }
 
 func TestReadWrite(t *testing.T) {
-	in := result.List{
-		{Query: Q(`suite:a:*`), Tags: T(`x`), Status: result.Pass},
-		{Query: Q(`suite:b,*`), Tags: T(`y`), Status: result.Failure},
-		{Query: Q(`suite:a:b:*`), Tags: T(`x`, `y`), Status: result.Skip},
-		{Query: Q(`suite:a:c,*`), Tags: T(`y`, `x`), Status: result.Failure},
-		{Query: Q(`suite:a,b:c,*`), Tags: T(`y`, `x`), Status: result.Crash},
-		{Query: Q(`suite:a,b:c:*`), Status: result.Slow},
+	in := result.ResultsByExecutionMode{
+		"bar": result.List{
+			{Query: Q(`suite:a:*`), Tags: T(`x`), Status: result.Pass},
+			{Query: Q(`suite:b,*`), Tags: T(`y`), Status: result.Failure},
+			{Query: Q(`suite:a:b:*`), Tags: T(`x`, `y`), Status: result.Skip},
+			{Query: Q(`suite:a:c,*`), Tags: T(`y`, `x`), Status: result.Failure},
+			{Query: Q(`suite:a,b:c,*`), Tags: T(`y`, `x`), Status: result.Crash},
+			{Query: Q(`suite:a,b:c:*`), Status: result.Slow},
+		},
+		"foo": result.List{
+			{Query: Q(`suite:d:*`), Tags: T(`x`), Status: result.Pass},
+			{Query: Q(`suite:e,*`), Tags: T(`y`), Status: result.Failure},
+		},
 	}
 	buf := &bytes.Buffer{}
 	if err := result.Write(buf, in); err != nil {