Generate shader corpora for fuzzers

Adds scripts that generated corpora of WGSL and SPIR-V shaders for
the tint fuzzers, from test cases in the repository.

Fixed: tint:966
Change-Id: I7e86ef5e34676d0c4f5b7e413a5c0f444fca08ff
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/57204
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Commit-Queue: Alastair Donaldson <afdx@google.com>
diff --git a/fuzzers/BUILD.gn b/fuzzers/BUILD.gn
index 64eefc8..4224e85 100644
--- a/fuzzers/BUILD.gn
+++ b/fuzzers/BUILD.gn
@@ -21,6 +21,23 @@
 if (build_with_chromium) {
   import("//testing/libfuzzer/fuzzer_test.gni")
 
+  fuzzer_corpus_wgsl_dir = "${target_gen_dir}/fuzzer_corpus_wgsl"
+  action("tint_generate_wgsl_corpus") {
+    script = "generate_wgsl_corpus.py"
+    sources = [ "generate_wgsl_corpus.py" ]
+    args = [ rebase_path("${tint_root_dir}/test", root_build_dir), rebase_path(fuzzer_corpus_wgsl_dir, root_build_dir) ]
+    outputs = [ fuzzer_corpus_wgsl_dir ]
+  }
+
+  fuzzer_corpus_spirv_dir = "${target_gen_dir}/fuzzer_corpus_spirv"
+  action("tint_generate_spirv_corpus") {
+    deps = [ "${tint_spirv_tools_dir}/:spirv-as" ]
+    script = "generate_spirv_corpus.py"
+    sources = [ "generate_spirv_corpus.py" ]
+    args = [ rebase_path("${tint_root_dir}/test", root_build_dir), rebase_path(fuzzer_corpus_spirv_dir, root_build_dir), rebase_path("${root_out_dir}/spirv-as", root_build_dir) ]
+    outputs = [ fuzzer_corpus_spirv_dir ]
+  }
+
   # fuzzer_test doesn't have configs members, so need to define them in an empty
   # source_set.
 
@@ -45,6 +62,9 @@
     fuzzer_test("tint_wgsl_reader_fuzzer") {
       sources = [ "tint_wgsl_reader_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
@@ -52,6 +72,9 @@
     fuzzer_test("tint_wgsl_reader_wgsl_writer_fuzzer") {
       sources = [ "tint_wgsl_reader_wgsl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
@@ -59,51 +82,81 @@
     fuzzer_test("tint_all_transforms_fuzzer") {
       sources = [ "tint_all_transforms_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_binding_remapper_fuzzer") {
       sources = [ "tint_binding_remapper_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_first_index_offset_fuzzer") {
       sources = [ "tint_first_index_offset_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_inspector_fuzzer") {
       sources = [ "tint_inspector_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_renamer_fuzzer") {
       sources = [ "tint_renamer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_robustness_fuzzer") {
       sources = [ "tint_robustness_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_single_entry_point_fuzzer") {
       sources = [ "tint_single_entry_point_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_spirv_transform_fuzzer") {
       sources = [ "tint_spirv_transform_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_vertex_pulling_fuzzer") {
       sources = [ "tint_vertex_pulling_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_wgsl_reader_spv_writer_fuzzer") {
       sources = [ "tint_wgsl_reader_spv_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
@@ -111,11 +164,17 @@
     fuzzer_test("tint_hlsl_transform_fuzzer") {
       sources = [ "tint_hlsl_transform_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_wgsl_reader_hlsl_writer_fuzzer") {
       sources = [ "tint_wgsl_reader_hlsl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
@@ -123,11 +182,17 @@
     fuzzer_test("tint_msl_transform_fuzzer") {
       sources = [ "tint_msl_transform_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_wgsl_reader_msl_writer_fuzzer") {
       sources = [ "tint_wgsl_reader_msl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
@@ -135,6 +200,8 @@
     fuzzer_test("tint_spv_reader_fuzzer") {
       sources = [ "tint_spv_reader_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      seed_corpus = fuzzer_corpus_spirv_dir
+      seed_corpus_deps = [ ":tint_generate_spirv_corpus" ]
     }
   }
 
@@ -142,6 +209,8 @@
     fuzzer_test("tint_spv_reader_wgsl_writer_fuzzer") {
       sources = [ "tint_spv_reader_wgsl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      seed_corpus = fuzzer_corpus_spirv_dir
+      seed_corpus_deps = [ ":tint_generate_spirv_corpus" ]
     }
   }
 
@@ -149,6 +218,8 @@
     fuzzer_test("tint_spv_reader_spv_writer_fuzzer") {
       sources = [ "tint_spv_reader_spv_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      seed_corpus = fuzzer_corpus_spirv_dir
+      seed_corpus_deps = [ ":tint_generate_spirv_corpus" ]
     }
   }
 
@@ -156,6 +227,8 @@
     fuzzer_test("tint_spv_reader_hlsl_writer_fuzzer") {
       sources = [ "tint_spv_reader_hlsl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      seed_corpus = fuzzer_corpus_spirv_dir
+      seed_corpus_deps = [ ":tint_generate_spirv_corpus" ]
     }
   }
 
@@ -163,6 +236,8 @@
     fuzzer_test("tint_spv_reader_msl_writer_fuzzer") {
       sources = [ "tint_spv_reader_msl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      seed_corpus = fuzzer_corpus_spirv_dir
+      seed_corpus_deps = [ ":tint_generate_spirv_corpus" ]
     }
   }
 
@@ -170,6 +245,9 @@
     fuzzer_test("tint_ast_clone_fuzzer") {
       sources = [ "tint_ast_clone_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
diff --git a/fuzzers/generate_spirv_corpus.py b/fuzzers/generate_spirv_corpus.py
new file mode 100644
index 0000000..6f84033
--- /dev/null
+++ b/fuzzers/generate_spirv_corpus.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+# Copyright 2021 The Tint Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Collect all .spvasm files under a given directory, assemble them using
+# spirv-as, and emit the assembled binaries to a given corpus directory,
+# flattening their file names by replacing path separators with underscores.
+# If the output directory already exists, it will be deleted and re-created.
+# Files ending with ".expected.spvasm" are skipped.
+#
+# The intended use of this script is to generate a corpus of SPIR-V
+# binaries for fuzzing.
+#
+# Usage:
+#    generate_spirv_corpus.py <input_dir> <corpus_dir> <path to spirv-as>
+
+
+import os
+import pathlib
+import shutil
+import subprocess
+import sys
+
+
+def list_spvasm_files(root_search_dir):
+    for root, folders, files in os.walk(root_search_dir):
+        for filename in folders + files:
+            if pathlib.Path(filename).suffix == ".spvasm":
+                yield os.path.join(root, filename)
+
+
+def main():
+    if len(sys.argv) != 4:
+        print("Usage: " + sys.argv[0] +
+              " <input dir> <output dir> <spirv-as path>")
+        return 1
+    input_dir: str = os.path.abspath(sys.argv[1].rstrip(os.sep))
+    corpus_dir: str = os.path.abspath(sys.argv[2])
+    spirv_as_path: str = os.path.abspath(sys.argv[3])
+    print(' '.join([input_dir, corpus_dir, spirv_as_path]))
+    if os.path.exists(corpus_dir):
+        shutil.rmtree(corpus_dir)
+    os.makedirs(corpus_dir)
+    for in_file in list_spvasm_files(input_dir):
+        if in_file.endswith(".expected.spvasm"):
+            continue
+        out_file = os.path.splitext(corpus_dir + os.sep +
+                                    in_file[len(input_dir) + 1:]
+                                    .replace(os.sep, '_'))[0] + ".spv"
+        cmd = [spirv_as_path,
+               "--target-env",
+               "spv1.3",
+               in_file,
+               "-o",
+               out_file]
+        proc = subprocess.Popen(cmd,
+                                stdout=subprocess.PIPE,
+                                stderr=subprocess.PIPE)
+        stdout, stderr = proc.communicate()
+        if proc.returncode != 0:
+            print("Error running " + " ".join(cmd) + ": " + stdout, stderr)
+            return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/fuzzers/generate_wgsl_corpus.py b/fuzzers/generate_wgsl_corpus.py
new file mode 100644
index 0000000..c28cf58
--- /dev/null
+++ b/fuzzers/generate_wgsl_corpus.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+# Copyright 2021 The Tint Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Collect all .wgsl files under a given directory and copy them to a given
+# corpus directory, flattening their file names by replacing path
+# separators with underscores. If the output directory already exists, it
+# will be deleted and re-created. Files ending with ".expected.spvasm" are
+# skipped.
+#
+# The intended use of this script is to generate a corpus of WGSL shaders
+# for fuzzing.
+#
+# Usage:
+#    generate_wgsl_corpus.py <input_dir> <corpus_dir>
+
+
+import os
+import pathlib
+import shutil
+import sys
+
+
+def list_wgsl_files(root_search_dir):
+    for root, folders, files in os.walk(root_search_dir):
+        for filename in folders + files:
+            if pathlib.Path(filename).suffix == '.wgsl':
+                yield os.path.join(root, filename)
+
+
+def main():
+    if len(sys.argv) != 3:
+        print("Usage: " + sys.argv[0] + " <input dir> <output dir>")
+        return 1
+    input_dir: str = os.path.abspath(sys.argv[1].rstrip(os.sep))
+    corpus_dir: str = os.path.abspath(sys.argv[2])
+    print(' '.join([input_dir, corpus_dir]))
+    if os.path.exists(corpus_dir):
+        shutil.rmtree(corpus_dir)
+    os.makedirs(corpus_dir)
+    for in_file in list_wgsl_files(input_dir):
+        if in_file.endswith(".expected.wgsl"):
+            continue
+        out_file = in_file[len(input_dir) + 1:].replace(os.sep, '_')
+        shutil.copy(in_file, corpus_dir + os.sep + out_file)
+
+
+if __name__ == "__main__":
+    sys.exit(main())