Generate shader corpora for WGSL fuzzers

Adds scripts that generate corpora of WGSL shaders for the tint
WGSL fuzzers, from test cases in the repository.

Bug: tint:966
Change-Id: Icf8293472ff04ca15111acacda8582b11c0723be
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/57880
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Auto-Submit: Alastair Donaldson <afdx@google.com>
diff --git a/fuzzers/BUILD.gn b/fuzzers/BUILD.gn
index b19e55e..6a1b1d7 100644
--- a/fuzzers/BUILD.gn
+++ b/fuzzers/BUILD.gn
@@ -21,6 +21,14 @@
 if (build_with_chromium) {
   import("//testing/libfuzzer/fuzzer_test.gni")
 
+  fuzzer_corpus_wgsl_dir = "${target_gen_dir}/fuzzer_corpus_wgsl"
+  action("tint_generate_wgsl_corpus") {
+    script = "generate_wgsl_corpus.py"
+    sources = [ "generate_wgsl_corpus.py" ]
+    args = [ rebase_path("${tint_root_dir}/test", root_build_dir), rebase_path(fuzzer_corpus_wgsl_dir, root_build_dir) ]
+    outputs = [ fuzzer_corpus_wgsl_dir ]
+  }
+
   # fuzzer_test doesn't have configs members, so need to define them in an empty
   # source_set.
 
@@ -45,6 +53,9 @@
     fuzzer_test("tint_wgsl_reader_fuzzer") {
       sources = [ "tint_wgsl_reader_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
@@ -52,6 +63,9 @@
     fuzzer_test("tint_wgsl_reader_wgsl_writer_fuzzer") {
       sources = [ "tint_wgsl_reader_wgsl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
@@ -59,46 +73,73 @@
     fuzzer_test("tint_all_transforms_fuzzer") {
       sources = [ "tint_all_transforms_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_binding_remapper_fuzzer") {
       sources = [ "tint_binding_remapper_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_first_index_offset_fuzzer") {
       sources = [ "tint_first_index_offset_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_inspector_fuzzer") {
       sources = [ "tint_inspector_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_renamer_fuzzer") {
       sources = [ "tint_renamer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_robustness_fuzzer") {
       sources = [ "tint_robustness_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_single_entry_point_fuzzer") {
       sources = [ "tint_single_entry_point_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_vertex_pulling_fuzzer") {
       sources = [ "tint_vertex_pulling_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
 
     fuzzer_test("tint_wgsl_reader_spv_writer_fuzzer") {
       sources = [ "tint_wgsl_reader_spv_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
@@ -106,6 +147,9 @@
     fuzzer_test("tint_wgsl_reader_hlsl_writer_fuzzer") {
       sources = [ "tint_wgsl_reader_hlsl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
@@ -113,6 +157,9 @@
     fuzzer_test("tint_wgsl_reader_msl_writer_fuzzer") {
       sources = [ "tint_wgsl_reader_msl_writer_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
@@ -155,6 +202,9 @@
     fuzzer_test("tint_ast_clone_fuzzer") {
       sources = [ "tint_ast_clone_fuzzer.cc" ]
       deps = [ ":tint_fuzzer_common" ]
+      dict = "dictionary.txt"
+      seed_corpus = fuzzer_corpus_wgsl_dir
+      seed_corpus_deps = [ ":tint_generate_wgsl_corpus" ]
     }
   }
 
diff --git a/fuzzers/generate_wgsl_corpus.py b/fuzzers/generate_wgsl_corpus.py
new file mode 100644
index 0000000..c28cf58
--- /dev/null
+++ b/fuzzers/generate_wgsl_corpus.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+# Copyright 2021 The Tint Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Collect all .wgsl files under a given directory and copy them to a given
+# corpus directory, flattening their file names by replacing path
+# separators with underscores. If the output directory already exists, it
+# will be deleted and re-created. Files ending with ".expected.spvasm" are
+# skipped.
+#
+# The intended use of this script is to generate a corpus of WGSL shaders
+# for fuzzing.
+#
+# Usage:
+#    generate_wgsl_corpus.py <input_dir> <corpus_dir>
+
+
+import os
+import pathlib
+import shutil
+import sys
+
+
+def list_wgsl_files(root_search_dir):
+    for root, folders, files in os.walk(root_search_dir):
+        for filename in folders + files:
+            if pathlib.Path(filename).suffix == '.wgsl':
+                yield os.path.join(root, filename)
+
+
+def main():
+    if len(sys.argv) != 3:
+        print("Usage: " + sys.argv[0] + " <input dir> <output dir>")
+        return 1
+    input_dir: str = os.path.abspath(sys.argv[1].rstrip(os.sep))
+    corpus_dir: str = os.path.abspath(sys.argv[2])
+    print(' '.join([input_dir, corpus_dir]))
+    if os.path.exists(corpus_dir):
+        shutil.rmtree(corpus_dir)
+    os.makedirs(corpus_dir)
+    for in_file in list_wgsl_files(input_dir):
+        if in_file.endswith(".expected.wgsl"):
+            continue
+        out_file = in_file[len(input_dir) + 1:].replace(os.sep, '_')
+        shutil.copy(in_file, corpus_dir + os.sep + out_file)
+
+
+if __name__ == "__main__":
+    sys.exit(main())