Generate shader corpora for SPIR-V fuzzers

Add a script that generates corpora of SPIR-V shaders for the tint
SPIR-V fuzzers, from test cases in the repository.

Fixes: tint:966
Change-Id: I3be5a868ed8ac9c9cfe3b1d5d7d5607e2e26168d
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/57881
Auto-Submit: Alastair Donaldson <afdx@google.com>
Commit-Queue: Ryan Harrison <rharrison@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
diff --git a/fuzzers/BUILD.gn b/fuzzers/BUILD.gn
index 6a1b1d7..83cfebb 100644
--- a/fuzzers/BUILD.gn
+++ b/fuzzers/BUILD.gn
@@ -29,6 +29,19 @@
     outputs = [ fuzzer_corpus_wgsl_dir ]
   }
 
+  fuzzer_corpus_spirv_dir = "${target_gen_dir}/fuzzer_corpus_spirv"
+  action("tint_generate_spirv_corpus") {
+
+    spirv_as_target = "${tint_spirv_tools_dir}/:spirv-as(${host_toolchain})"
+    spirv_as_out_dir = get_label_info(spirv_as_target, "root_out_dir")
+    deps = [ spirv_as_target ]
+
+    script = "generate_spirv_corpus.py"
+    sources = [ "generate_spirv_corpus.py" ]
+    args = [ rebase_path("${tint_root_dir}/test", root_build_dir), rebase_path(fuzzer_corpus_spirv_dir, root_build_dir), rebase_path("${spirv_as_out_dir}/spirv-as", root_build_dir) ]
+    outputs = [ fuzzer_corpus_spirv_dir ]
+  }
+
   # fuzzer_test doesn't have configs members, so need to define them in an empty
   # source_set.
 
@@ -167,6 +180,8 @@
     fuzzer_test("tint_spv_reader_fuzzer") {
       sources = [ "tint_spv_reader_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      seed_corpus = fuzzer_corpus_spirv_dir
+      seed_corpus_deps = [ ":tint_generate_spirv_corpus" ]
     }
   }
 
@@ -174,6 +189,8 @@
     fuzzer_test("tint_spv_reader_wgsl_writer_fuzzer") {
       sources = [ "tint_spv_reader_wgsl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      seed_corpus = fuzzer_corpus_spirv_dir
+      seed_corpus_deps = [ ":tint_generate_spirv_corpus" ]
     }
   }
 
@@ -181,6 +198,8 @@
     fuzzer_test("tint_spv_reader_spv_writer_fuzzer") {
       sources = [ "tint_spv_reader_spv_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      seed_corpus = fuzzer_corpus_spirv_dir
+      seed_corpus_deps = [ ":tint_generate_spirv_corpus" ]
     }
   }
 
@@ -188,6 +207,8 @@
     fuzzer_test("tint_spv_reader_hlsl_writer_fuzzer") {
       sources = [ "tint_spv_reader_hlsl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      seed_corpus = fuzzer_corpus_spirv_dir
+      seed_corpus_deps = [ ":tint_generate_spirv_corpus" ]
     }
   }
 
@@ -195,6 +216,8 @@
     fuzzer_test("tint_spv_reader_msl_writer_fuzzer") {
       sources = [ "tint_spv_reader_msl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      seed_corpus = fuzzer_corpus_spirv_dir
+      seed_corpus_deps = [ ":tint_generate_spirv_corpus" ]
     }
   }
 
diff --git a/fuzzers/generate_spirv_corpus.py b/fuzzers/generate_spirv_corpus.py
new file mode 100644
index 0000000..6f84033
--- /dev/null
+++ b/fuzzers/generate_spirv_corpus.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+# Copyright 2021 The Tint Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Collect all .spvasm files under a given directory, assemble them using
+# spirv-as, and emit the assembled binaries to a given corpus directory,
+# flattening their file names by replacing path separators with underscores.
+# If the output directory already exists, it will be deleted and re-created.
+# Files ending with ".expected.spvasm" are skipped.
+#
+# The intended use of this script is to generate a corpus of SPIR-V
+# binaries for fuzzing.
+#
+# Usage:
+#    generate_spirv_corpus.py <input_dir> <corpus_dir> <path to spirv-as>
+
+
+import os
+import pathlib
+import shutil
+import subprocess
+import sys
+
+
+def list_spvasm_files(root_search_dir):
+    for root, folders, files in os.walk(root_search_dir):
+        for filename in folders + files:
+            if pathlib.Path(filename).suffix == ".spvasm":
+                yield os.path.join(root, filename)
+
+
+def main():
+    if len(sys.argv) != 4:
+        print("Usage: " + sys.argv[0] +
+              " <input dir> <output dir> <spirv-as path>")
+        return 1
+    input_dir: str = os.path.abspath(sys.argv[1].rstrip(os.sep))
+    corpus_dir: str = os.path.abspath(sys.argv[2])
+    spirv_as_path: str = os.path.abspath(sys.argv[3])
+    print(' '.join([input_dir, corpus_dir, spirv_as_path]))
+    if os.path.exists(corpus_dir):
+        shutil.rmtree(corpus_dir)
+    os.makedirs(corpus_dir)
+    for in_file in list_spvasm_files(input_dir):
+        if in_file.endswith(".expected.spvasm"):
+            continue
+        out_file = os.path.splitext(corpus_dir + os.sep +
+                                    in_file[len(input_dir) + 1:]
+                                    .replace(os.sep, '_'))[0] + ".spv"
+        cmd = [spirv_as_path,
+               "--target-env",
+               "spv1.3",
+               in_file,
+               "-o",
+               out_file]
+        proc = subprocess.Popen(cmd,
+                                stdout=subprocess.PIPE,
+                                stderr=subprocess.PIPE)
+        stdout, stderr = proc.communicate()
+        if proc.returncode != 0:
+            print("Error running " + " ".join(cmd) + ": " + stdout, stderr)
+            return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())