tools: Add documentation for coverage viewer

Change-Id: I5605925bf4dc4012b38f4e2da48f45321d5e047d
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/113860
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/node/README.md b/src/dawn/node/README.md
index 7453e48..a6bcf34 100644
--- a/src/dawn/node/README.md
+++ b/src/dawn/node/README.md
@@ -86,6 +86,23 @@
 
 You can write out an expectations file with the `--output <path>` command line flag, and then compare this snapshot to a later run with `--expect <path>`.
 
+## Viewing Dawn per-test coverage
+
+### Requirements:
+
+Dawn needs to be built with clang and the `DAWN_EMIT_COVERAGE` CMake flag.
+
+Optionally, the `LLVM_SOURCE_DIR` CMake flag can also be specified to point the the `./llvm` directory of [an LLVM checkout](https://github.com/llvm/llvm-project), which will build [`turbo-cov`](../../../tools/src/cmd/turbo-cov/README.md) and dramatically speed up the processing of coverage data.
+
+### Usage
+
+Run `./src/tools/run run-cts` like before, but include the `--coverage` flag.
+After running the tests, your browser will open with a coverage viewer.
+
+Click a source file in the left hand panel, then click a green span in the file source to see the tests that exercised that code.
+
+You can also highlight multiple lines to view all the tests that covered any of that highlighted source.
+
 ## Debugging TypeScript with VSCode
 
 Open or create the `.vscode/launch.json` file, and add:
diff --git a/tools/src/cmd/run-cts/main.go b/tools/src/cmd/run-cts/main.go
index df6aef0..2efc137 100644
--- a/tools/src/cmd/run-cts/main.go
+++ b/tools/src/cmd/run-cts/main.go
@@ -886,7 +886,7 @@
 		}
 
 		if res.coverage != nil {
-			covTree.Add(splitTestCaseForCoverage(res.testcase), res.coverage)
+			covTree.Add(SplitCTSQuery(res.testcase), res.coverage)
 		}
 	}
 	fmt.Fprint(r.stdout, ansiProgressBar(animFrame, numTests, numByExpectedStatus))
@@ -963,11 +963,20 @@
 			return nil
 		}
 
-		cov := &bytes.Buffer{}
-		if err := covTree.Encode(revision, cov); err != nil {
+		covData := &bytes.Buffer{}
+		if err := covTree.Encode(revision, covData); err != nil {
 			return fmt.Errorf("failed to encode coverage file: %w", err)
 		}
-		return showCoverageServer(ctx, cov.Bytes(), r.stdout)
+
+		const port = 9392
+		url := fmt.Sprintf("http://localhost:%v/index.html", port)
+
+		return cov.StartServer(ctx, port, covData.Bytes(), func() error {
+			fmt.Fprintln(r.stdout)
+			fmt.Fprintln(r.stdout, blue+"Serving coverage view at "+url+ansiReset)
+			launchBrowser(url)
+			return nil
+		})
 	}
 
 	return nil
@@ -1366,74 +1375,8 @@
 	return <-w.err
 }
 
-func splitTestCaseForCoverage(testcase string) []string {
-	out := []string{}
-	s := 0
-	for e, r := range testcase {
-		switch r {
-		case ':', '.':
-			out = append(out, testcase[s:e])
-			s = e
-		}
-	}
-	return out
-}
-
-// showCoverageServer starts a localhost http server to display the coverage data, launching a
-// browser if one can be found. Blocks until the context is cancelled.
-func showCoverageServer(ctx context.Context, covData []byte, stdout io.Writer) error {
-	const port = "9392"
-	url := fmt.Sprintf("http://localhost:%v/index.html", port)
-
-	handler := http.NewServeMux()
-	handler.HandleFunc("/index.html", func(w http.ResponseWriter, r *http.Request) {
-		f, err := os.Open(filepath.Join(fileutils.ThisDir(), "view-coverage.html"))
-		if err != nil {
-			fmt.Fprint(w, "file not found")
-			w.WriteHeader(http.StatusNotFound)
-			return
-		}
-		defer f.Close()
-		io.Copy(w, f)
-	})
-	handler.HandleFunc("/coverage.dat", func(w http.ResponseWriter, r *http.Request) {
-		io.Copy(w, bytes.NewReader(covData))
-	})
-	handler.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
-		rel := r.URL.Path
-		if r.URL.Path == "" {
-			http.Redirect(w, r, url, http.StatusSeeOther)
-			return
-		}
-		if strings.Contains(rel, "..") {
-			w.WriteHeader(http.StatusBadRequest)
-			fmt.Fprint(w, "file path must not contain '..'")
-			return
-		}
-		f, err := os.Open(filepath.Join(fileutils.DawnRoot(), r.URL.Path))
-		if err != nil {
-			w.WriteHeader(http.StatusNotFound)
-			fmt.Fprintf(w, "file '%v' not found", r.URL.Path)
-			return
-		}
-		defer f.Close()
-		io.Copy(w, f)
-	})
-
-	server := &http.Server{Addr: ":" + port, Handler: handler}
-	go server.ListenAndServe()
-
-	fmt.Fprintln(stdout)
-	fmt.Fprintln(stdout, "Serving coverage view at "+blue+url+ansiReset)
-
-	openBrowser(url)
-
-	<-ctx.Done()
-	return server.Shutdown(ctx)
-}
-
-// openBrowser launches a browser to open the given url
-func openBrowser(url string) error {
+// launchBrowser launches a browser to open the given url
+func launchBrowser(url string) error {
 	switch runtime.GOOS {
 	case "linux":
 		return exec.Command("xdg-open", url).Start()
@@ -1445,3 +1388,41 @@
 		return fmt.Errorf("unsupported platform")
 	}
 }
+
+// SplitCTSQuery splits a CTS query into a cov.Path
+//
+// Each WebGPU CTS test is uniquely identified by a test query.
+// See https://github.com/gpuweb/cts/blob/main/docs/terms.md#queries about how a
+// query is officially structured.
+//
+// A Path is a simplified form of a CTS Query, where all colons ':' and comma
+// ',' denote a split point in the tree. These delimiters are included in the
+// parent node's string.
+//
+// For example, the query string for the single test case:
+//
+//	webgpu:shader,execution,expression,call,builtin,acos:f32:inputSource="storage_r";vectorize=4
+//
+// Is broken down into the following strings:
+//
+//	'webgpu:'
+//	'shader,'
+//	'execution,'
+//	'expression,'
+//	'call,'
+//	'builtin,'
+//	'acos:'
+//	'f32:'
+//	'inputSource="storage_r";vectorize=4'
+func SplitCTSQuery(testcase string) cov.Path {
+	out := []string{}
+	s := 0
+	for e, r := range testcase {
+		switch r {
+		case ':', '.':
+			out = append(out, testcase[s:e+1])
+			s = e + 1
+		}
+	}
+	return out
+}
diff --git a/tools/src/cmd/turbo-cov/README.md b/tools/src/cmd/turbo-cov/README.md
new file mode 100644
index 0000000..56ce1f9
--- /dev/null
+++ b/tools/src/cmd/turbo-cov/README.md
@@ -0,0 +1,49 @@
+# `turbo-cov`
+
+## About
+
+`turbo-cov` can be used by the `./tools/run cts run-cts` tool, when passing the `--coverage` flag.
+`turbo-cov` is substantially faster at processing coverage data than using the standard LLVM tools.
+
+## Requirements
+
+To build `turbo-cov`, you will need to set the CMake define the CMake flag `LLVM_SOURCE_DIR` to the `/llvm` subdirectory of a LLVM checkout. `turbo-cov` requires LLVM 9+.
+
+## Details
+
+[Clang provides two tools](https://clang.llvm.org/docs/SourceBasedCodeCoverage.html#creating-coverage-reports) for processing coverage data:
+
+* `llvm-profdata` indexes the raw `.profraw` coverage profile file and emits a `.profdata` file.
+* `llvm-cov` further processes the `.profdata` file into something human readable or machine parsable.
+
+`llvm-cov` provides many options, including emitting an pretty HTML file, but is remarkably slow at producing easily machine-parsable data.
+Fortunately the core of `llvm-cov` is [a few hundreds of lines of code](https://github.com/llvm/llvm-project/tree/master/llvm/tools/llvm-cov), as it relies on LLVM libraries to do the heavy lifting.
+
+`turbo-cov` is a a simple `llvm-cov` replacement, which efficiently converts a `.profdata` into a simple binary stream which can be consumed by the `tools/src/cov` package.
+
+## File structure
+
+`turbo-cov` is a trivial binary stream, which takes the tightly-packed form:
+
+```c++
+struct Root {
+    uint32_t num_files;
+    File file[num_files];
+};
+struct File {
+    uint32_t name_length
+    uint8_t  name_data[name_length];
+    uint32_t num_segments;
+    Segment  segments[num_segments];
+};
+struct Segment {
+    // The line where this segment begins.
+    uint32_t line;
+    // The column where this segment begins.
+    uint32_t column;
+    // The execution count, or zero if no count was recorded.
+    uint32_t count;
+    // When 0, the segment was uninstrumented or skipped.
+    uint8_t  hasCount;
+}
+```
diff --git a/tools/src/cmd/turbo-cov/main.cpp b/tools/src/cmd/turbo-cov/main.cpp
index 07e4aa5..817111a 100644
--- a/tools/src/cmd/turbo-cov/main.cpp
+++ b/tools/src/cmd/turbo-cov/main.cpp
@@ -78,7 +78,7 @@
     //         uint8  hasCount
     //       file[0].segment[1]
     //         ...
-    //   file[2]
+    //   file[1]
     //     ...
 
     auto files = coverage->getUniqueSourceFiles();
diff --git a/tools/src/cov/cov.go b/tools/src/cov/cov.go
index 7a550a3..6229bfd 100644
--- a/tools/src/cov/cov.go
+++ b/tools/src/cov/cov.go
@@ -14,4 +14,8 @@
 
 // Package cov provides functions for consuming and combining llvm coverage
 // information from multiple processes.
+//
+// Combined coverage data is compressed by Tree.Optimize() and is encoded to
+// JSON and zlib compressed with Tree.Encode(). This file can be viewed with
+// tools/src/cov/view-coverage.html.
 package cov
diff --git a/tools/src/cov/import.go b/tools/src/cov/import.go
index 9ed8257..104d604 100644
--- a/tools/src/cov/import.go
+++ b/tools/src/cov/import.go
@@ -202,6 +202,8 @@
 	return c, nil
 }
 
+// parseTurboCov parses coverage information from a `turbo-cov` file.
+// See tools/src/cmd/turbo-cov/README.md for more information
 func (e Env) parseTurboCov(data []byte) (*Coverage, error) {
 	u32 := func() uint32 {
 		out := binary.LittleEndian.Uint32(data)
@@ -272,5 +274,8 @@
 	return c, nil
 }
 
-// Path is a tree node path formed from a list of strings
+// Path uniquely identifies a test that was run to produce coverage.
+// Paths are split into a hierarchical sequence of strings, where the 0'th
+// string represents the root of the hierarchy and the last string is typically
+// the leaf name of the test.
 type Path []string
diff --git a/tools/src/cov/optimization.go b/tools/src/cov/optimization.go
index 4d79cb3..d531e0b 100644
--- a/tools/src/cov/optimization.go
+++ b/tools/src/cov/optimization.go
@@ -20,8 +20,121 @@
 	"sync"
 )
 
-// Optimize optimizes the Tree by de-duplicating common spans into a tree of
-// SpanGroups.
+// Optimize optimizes the Tree by de-duplicating common spans into a tree of SpanGroups.
+//
+// Breaking down tests into group hierarchies provide a natural way to structure
+// coverage data, as tests of the same suite, file or test are likely to have
+// similar coverage spans.
+//
+// For each source file in the codebase, we create a tree of SpanGroups, where the
+// leaves are the test cases.
+//
+// For example, given the following Paths:
+//
+//	a.b.d.h
+//	a.b.d.i.n
+//	a.b.d.i.o
+//	a.b.e.j
+//	a.b.e.k.p
+//	a.b.e.k.q
+//	a.c.f
+//	a.c.g.l.r
+//	a.c.g.m
+//
+// We would construct the following tree:
+//
+//	             a
+//	      ╭──────┴──────╮
+//	      b             c
+//	  ╭───┴───╮     ╭───┴───╮
+//	  d       e     f       g
+//	╭─┴─╮   ╭─┴─╮         ╭─┴─╮
+//	h   i   j   k         l   m
+//	   ╭┴╮     ╭┴╮        │
+//	   n o     p q        r
+//
+// Each leaf node in this tree (`h`, `n`, `o`, `j`, `p`, `q`, `f`, `r`, `m`)
+// represent a test case, and non-leaf nodes (`a`, `b`, `c`, `d`, `e`, `g`, `i`,
+// `k`, `l`) are suite, file or tests.
+//
+// To begin, we create a test tree structure, and associate the full list of test
+// coverage spans with every leaf node (test case) in this tree.
+//
+// This data structure hasn't given us any compression benefits yet, but we can
+// now do a few tricks to dramatically reduce number of spans needed to describe
+// the graph:
+//
+//	~ Optimization 1: Common span promotion ~
+//
+// The first compression scheme is to promote common spans up the tree when they
+// are common for all children. This will reduce the number of spans needed to be
+// encoded in the final file.
+//
+// For example, if the test group `a` has 4 children that all share the same span
+// `X`:
+//
+//	         a
+//	   ╭───┬─┴─┬───╮
+//	   b   c   d   e
+//	[X,Y] [X] [X] [X,Z]
+//
+// Then span `X` can be promoted up to `a`:
+//
+//	      [X]
+//	       a
+//	 ╭───┬─┴─┬───╮
+//	 b   c   d   e
+//	[Y] []   [] [Z]
+//
+//	~ Optimization 2: Span XOR promotion ~
+//
+// This idea can be extended further, by not requiring all the children to share
+// the same span before promotion. If *most* child nodes share the same span, we
+// can still promote the span, but this time we *remove* the span from the
+// children *if they had it*, and *add* the span to children *if they didn't
+// have it*.
+//
+// For example, if the test group `a` has 4 children with 3 that share the span
+// `X`:
+//
+//	         a
+//	   ╭───┬─┴─┬───╮
+//	   b   c   d   e
+//	[X,Y] [X]  [] [X,Z]
+//
+// Then span `X` can be promoted up to `a` by flipping the presence of `X` on the
+// child nodes:
+//
+//	      [X]
+//	       a
+//	 ╭───┬─┴─┬───╮
+//	 b   c   d   e
+//	[Y] []  [X] [Z]
+//
+// This process repeats up the tree.
+//
+// With this optimization applied, we now need to traverse the tree from root to
+// leaf in order to know whether a given span is in use for the leaf node (test case):
+//
+// * If the span is encountered an *odd* number of times during traversal, then
+// the span is *covered*.
+// * If the span is encountered an *even* number of times during traversal, then
+// the span is *not covered*.
+//
+// See tools/src/cov/coverage_test.go for more examples of this optimization.
+//
+//	~ Optimization 3: Common span grouping ~
+//
+// With real world data, we encounter groups of spans that are commonly found
+// together. To further reduce coverage data, the whole graph is scanned for common
+// span patterns, and are indexed by each tree node.
+// The XOR'ing of spans as described above is performed as if the spans were not
+// grouped.
+//
+//	~ Optimization 4: Lookup tables ~
+//
+// All spans, span-groups and strings are stored in de-duplicated tables, and are
+// indexed wherever possible.
 func (t *Tree) Optimize() {
 	log.Printf("Optimizing coverage tree...")
 
diff --git a/tools/src/cov/serialization.go b/tools/src/cov/serialization.go
index efb8daa..d0d11f4 100644
--- a/tools/src/cov/serialization.go
+++ b/tools/src/cov/serialization.go
@@ -31,7 +31,7 @@
 	return p.parse()
 }
 
-// Encode zlib encodes the JSON coverage tree to w.
+// Encode compresses the JSON coverage tree with zlib and writes the result to w.
 func (t *Tree) Encode(revision string, w io.Writer) error {
 	t.Optimize()
 
diff --git a/tools/src/cov/server.go b/tools/src/cov/server.go
new file mode 100644
index 0000000..6c814fe
--- /dev/null
+++ b/tools/src/cov/server.go
@@ -0,0 +1,78 @@
+// Copyright 2022 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cov
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"dawn.googlesource.com/dawn/tools/src/fileutils"
+)
+
+// StartServer starts a localhost http server to display the coverage data.
+// Calls started() when the server is started, and then blocks until the context is cancelled.
+func StartServer(ctx context.Context, port int, covData []byte, started func() error) error {
+	url := fmt.Sprintf("http://localhost:%v/index.html", port)
+	handler := http.NewServeMux()
+	handler.HandleFunc("/index.html", func(w http.ResponseWriter, r *http.Request) {
+		f, err := os.Open(filepath.Join(fileutils.DawnRoot(), "tools/src/cov/view-coverage.html"))
+		if err != nil {
+			fmt.Fprint(w, "file not found")
+			w.WriteHeader(http.StatusNotFound)
+			return
+		}
+		defer f.Close()
+		io.Copy(w, f)
+	})
+	handler.HandleFunc("/coverage.dat", func(w http.ResponseWriter, r *http.Request) {
+		io.Copy(w, bytes.NewReader(covData))
+	})
+	handler.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		rel := r.URL.Path
+		if r.URL.Path == "" {
+			http.Redirect(w, r, url, http.StatusSeeOther)
+			return
+		}
+		if strings.Contains(rel, "..") {
+			w.WriteHeader(http.StatusBadRequest)
+			fmt.Fprint(w, "file path must not contain '..'")
+			return
+		}
+		f, err := os.Open(filepath.Join(fileutils.DawnRoot(), r.URL.Path))
+		if err != nil {
+			w.WriteHeader(http.StatusNotFound)
+			fmt.Fprintf(w, "file '%v' not found", r.URL.Path)
+			return
+		}
+		defer f.Close()
+		io.Copy(w, f)
+	})
+
+	server := &http.Server{Addr: fmt.Sprint(":", port), Handler: handler}
+	go server.ListenAndServe()
+
+	if err := started(); err != nil {
+		return err
+	}
+
+	<-ctx.Done()
+	return server.Shutdown(ctx)
+}
diff --git a/tools/src/cov/tree.go b/tools/src/cov/tree.go
index d77975a..1946ac9 100644
--- a/tools/src/cov/tree.go
+++ b/tools/src/cov/tree.go
@@ -470,8 +470,7 @@
 // SpanGroupID is an identifier of a SpanGroup.
 type SpanGroupID int
 
-// SpanGroup holds a number of spans, potentially extending from another
-// SpanGroup.
+// SpanGroup holds a number of spans, potentially extending from another SpanGroup.
 type SpanGroup struct {
 	Spans  SpanSet
 	Extend *SpanGroupID
diff --git a/tools/src/cmd/run-cts/view-coverage.html b/tools/src/cov/view-coverage.html
similarity index 99%
rename from tools/src/cmd/run-cts/view-coverage.html
rename to tools/src/cov/view-coverage.html
index f7333bb..28132c1 100644
--- a/tools/src/cmd/run-cts/view-coverage.html
+++ b/tools/src/cov/view-coverage.html
@@ -202,7 +202,7 @@
             var s = search_params.get('s');
             var e = search_params.get('e');
             if (f) {
-                pending.file = f; // f.replace(/\./g, '/');
+                pending.file = f;
             }
             if (s) {
                 s = s.split('.');