[tint][fuzz] Add fuzzer corpora minimization

This CL does a major rework of how fuzzer corpora are generated in
anticipation of moving to storing them in GCS. Future CLs and work
will be needed to migrate to using GCS.

- Reworks corpus generation into a single script
  'generate_tint_corpus.py'
- Changes 'ir_fuzz_as' command line args to support either one file
  being assembled or a directory of files being assembled
- Reworks GN rules to have a non-minimized and minimized generation
  actions for WGSL and IR, which have appropriate dependencies.
- Explicitly added the non-minimized corpora as dependencies to the
  fuzzer targets. Since they are low cost to generate but are needed
  for things like the fuzz check in the CQ.
- The minimized rules are not used as dependencies to avoid dependency
  loops and keep build time manageable, since minimization can take a
  long time).

Issue: 399689217
Change-Id: If05d5359743b9e05ebf51c3de0db3b73dea6287c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/229135
Auto-Submit: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: James Price <jrprice@google.com>
Reviewed-by: dan sinclair <dsinclair@chromium.org>
Commit-Queue: James Price <jrprice@google.com>
diff --git a/src/tint/BUILD.gn b/src/tint/BUILD.gn
index af4db9d..e013bcd 100644
--- a/src/tint/BUILD.gn
+++ b/src/tint/BUILD.gn
@@ -308,30 +308,73 @@
 ###############################################################################
 if (tint_has_fuzzers) {
   action("tint_generate_wgsl_corpus") {
-    script = "${tint_src_dir}/cmd/fuzz/wgsl/generate_wgsl_corpus.py"
+    testonly = true
+    script = "${tint_src_dir}/cmd/fuzz/generate_tint_corpus.py"
     sources = [ "${script}" ]
+    deps = []
     args = [
-      "--stamp=" + rebase_path(fuzzer_corpus_wgsl_stamp, root_build_dir),
       rebase_path("${tint_root_dir}/test", root_build_dir),
-      rebase_path(fuzzer_corpus_wgsl_dir, root_build_dir),
+      rebase_path(fuzzer_corpus_dir, root_build_dir),
     ]
     outputs = [ fuzzer_corpus_wgsl_stamp ]
   }
 
+  # This action can take an hour to run, for local dev work
+  # ':tint_generate_wgsl_corpus' can be used
+  action("tint_generate_wgsl_minimized_corpus") {
+    testonly = true
+    script = "${tint_src_dir}/cmd/fuzz/generate_tint_corpus.py"
+    sources = [ "${script}" ]
+    deps = [
+      ":tint_generate_wgsl_corpus",
+      "${tint_src_dir}/cmd/fuzz/wgsl",
+    ]
+    args = [
+      rebase_path(fuzzer_corpus_wgsl_dir, root_build_dir),
+      rebase_path(fuzzer_corpus_dir, root_build_dir),
+      "--wgsl_fuzzer=" +
+          rebase_path("${root_build_dir}/tint_wgsl_fuzzer", root_build_dir),
+    ]
+    outputs = [ fuzzer_corpus_wgsl_min_stamp ]
+  }
+
   if (tint_build_ir_fuzzer) {
     if (tint_build_cmds && tint_build_wgsl_reader) {
       action("tint_generate_ir_corpus") {
-        script = "${tint_src_dir}/cmd/fuzz/ir/generate_ir_corpus.py"
+        testonly = true
+        script = "${tint_src_dir}/cmd/fuzz/generate_tint_corpus.py"
         sources = [ "${script}" ]
-        deps = [ "${tint_src_dir}/cmd/fuzz/ir/as" ]
+        deps = [
+          ":tint_generate_wgsl_corpus",
+          "${tint_src_dir}/cmd/fuzz/ir/as",
+        ]
         args = [
-          "--stamp=" + rebase_path(fuzzer_corpus_ir_stamp, root_build_dir),
-          rebase_path("${root_build_dir}/ir_fuzz_as", root_build_dir),
-          rebase_path("${tint_root_dir}/test", root_build_dir),
-          rebase_path(fuzzer_corpus_ir_dir, root_build_dir),
+          rebase_path(fuzzer_corpus_wgsl_dir, root_build_dir),
+          rebase_path(fuzzer_corpus_dir, root_build_dir),
+          "--ir_as=" +
+              rebase_path("${root_build_dir}/ir_fuzz_as", root_build_dir),
         ]
         outputs = [ fuzzer_corpus_ir_stamp ]
       }
+
+      # This action can take an hour to run, for local dev work
+      # ':tint_generate_ir_corpus' can be used
+      action("tint_generate_ir_minimized_corpus") {
+        testonly = true
+        script = "${tint_src_dir}/cmd/fuzz/generate_tint_corpus.py"
+        sources = [ "${script}" ]
+        deps = [
+          ":tint_generate_ir_corpus",
+          "${tint_src_dir}/cmd/fuzz/ir",
+        ]
+        args = [
+          rebase_path(fuzzer_corpus_ir_dir, root_build_dir),
+          rebase_path(fuzzer_corpus_dir, root_build_dir),
+          "--ir_fuzzer=" +
+              rebase_path("${root_build_dir}/tint_ir_fuzzer", root_build_dir),
+        ]
+        outputs = [ fuzzer_corpus_ir_min_stamp ]
+      }
     }
   }
 }
diff --git a/src/tint/cmd/fuzz/generate_tint_corpus.py b/src/tint/cmd/fuzz/generate_tint_corpus.py
new file mode 100644
index 0000000..d07421d
--- /dev/null
+++ b/src/tint/cmd/fuzz/generate_tint_corpus.py
@@ -0,0 +1,394 @@
+#!/usr/bin/env python3
+
+# Copyright 2025 The Dawn & Tint Authors
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+""" Script for generating the various Tint fuzzer corpora.
+
+The basic functionality is to collect all .wgsl files under a given directory
+and copy them to a given directory, flattening their file names by replacing
+path separators with underscores. If the output directory already exists, it
+will be deleted and re-created. Files ending with ".expected.wgsl" are
+skipped.
+
+Additional flags can be used for providing tooling for converting this corpus
+of .wgsl  files into Tint IR fuzzer protobufs (.tirb) or for minimizing the
+corpus.
+
+usage: generate_tint_corpus.py [-h] [-v] [--debug] [--wgsl_fuzzer WGSL_FUZZER | --ir_as IR_AS | --ir_fuzzer IR_FUZZER] input_dir output_dir
+
+Generates Tint fuzzer corpus from provided test files, using the provided tool.
+
+positional arguments:
+  input_dir             Directory containing source files to be turned into a
+                        corpus, what format these are expected to be is
+                        determined by the type of corpus being generated (which
+                        is determined by the tool flag provided). If no tool is
+                        provided the non-minimized WGSL corpus will be
+                        generated, and the inputs are expected to contain .wgsl
+                        WGSL shader files, non-WGSL and '*.expected.wgsl` files
+                        will be ignored
+  output_dir            Output directory that the results directory should be
+                        placed in. The base WGSL fuzzer corpus is created, in
+                        '<output_dir>/wgsl_corpus'. Other corpus locations will
+                        be specified in their tool specific flags. If a
+                        directory already exists, it will be overwritten.
+
+options:
+  -h, --help            show this help message and exit
+  -v, --verbose         Enables verbose logging
+  --debug               Enables developer debug logging
+
+tool flags:
+  Flags for tool to use for generating the corpus. Which tool that is provided
+  determines which type of corpus is generated. If no tool is
+  provided the non-minimized WGSL fuzzer corpus will be generated. Only one
+  corpus will be generated per invocation, so these flags are mutually
+  exclusive.
+
+  --wgsl_fuzzer WGSL_FUZZER
+                        Instance of tint_wgsl_fuzzer to use for minimization, if
+                         provided a minimized WGSL corpus will be generated in
+                        '<output_dir>/wgsl_min_corpus'. (This can take over an
+                        hour to run). <input_dir> is expected to only contain
+                        .wgsl WGSL shader files, as would be generated for the
+                        non-minimized WGSL corpus.
+  --ir_as IR_AS         Instance of ir_fuzz_as to use for assembling IR binary
+                        test cases, if provided a non-minimized IR corpus will
+                        be generated in '<output_dir>/ir_corpus'. <input_dir> is
+                        expected to only contain .wgsl WGSL shader files, as
+                        would be generated for the non-minimized WGSL corpus.
+  --ir_fuzzer IR_FUZZER
+                        Instance of tint_ir_fuzzer to use for minimization, if
+                        provided a minimized corpus will be generated in
+                        '<output_dir>/ir_min_corpus'. (This can take over an
+                        hour to run). <input_dir> is expected to only contain
+                        .tirb IR binary test case files, as would be generated
+                        for the non-minimized IR corpus.
+"""
+
+import argparse
+from enum import Enum
+import logging
+import os
+import pathlib
+import shutil
+import subprocess
+import sys
+from typing import TypedDict, List, Any, Generator
+
+
+class Mode(Enum):
+    """Different corpus output modes that the script can run in"""
+    WGSL = 1
+    WGSL_MIN = 2
+    IR = 3
+    IR_MIN = 4
+
+
+class Options(TypedDict):
+    """Container of all the control options parsed from the command line args"""
+    mode: Mode
+    wgsl_fuzzer_bin: str | None
+    ir_as_bin: str | None
+    ir_fuzzer_bin: str | None
+    input_dir: str
+    output_dir: str
+    wgsl_corpus_dir: str
+    wgsl_min_corpus_dir: str
+    ir_corpus_dir: str
+    ir_min_corpus_dir: str
+
+
+logger = logging.getLogger(__name__)
+
+
+def parse_args() -> Options:
+    """Parse command line arguments and produce control options structure.
+    Returns:
+        A populated Options structure.
+    """
+    parser = argparse.ArgumentParser(
+        prog='generate_tint_corpus.py',
+        description=
+        'Generates Tint fuzzer corpus from provided test files, using the provided tool.'
+    )
+    parser.add_argument(
+        'input_dir',
+        help=
+        "Directory containing source files to be turned into a corpus, what format these are expected to be is determined by the type of corpus being generated (which is determined by the tool flag provided). If no tool is provided the non-minimized WGSL corpus will be generated, and the inputs are expected to contain .wgsl WGSL shader files, non-WGSL and '*.expected.wgsl` files will be ignored",
+        type=str)
+    parser.add_argument(
+        'output_dir',
+        help=
+        "Output directory that the results directory should be placed in. The base WGSL fuzzer corpus is created, in '<output_dir>/wgsl_corpus'. Other corpus locations will be specified in their tool specific flags. If a directory already exists, it will be overwritten.",
+        type=str)
+    parser.add_argument('-v',
+                        '--verbose',
+                        help="Enables verbose logging",
+                        action="store_const",
+                        dest="loglevel",
+                        const=logging.INFO)
+    parser.add_argument('--debug',
+                        help="Enables developer debug logging",
+                        action="store_const",
+                        dest="loglevel",
+                        const=logging.DEBUG)
+    tool_group = parser.add_argument_group(
+        'tool flags',
+        'Flags for tool to use for generating the corpus. Which tool that is provided determines which type of corpus is generated. If no tool is provided the non-minimized WGSL fuzzer corpus will be generated.\nOnly one corpus will be generated per invocation, so these flags are mutually exclusive.'
+    )
+    tool_group = tool_group.add_mutually_exclusive_group()
+    tool_group.add_argument(
+        '--wgsl_fuzzer',
+        help=
+        "Instance of tint_wgsl_fuzzer to use for minimization, if provided a minimized WGSL corpus will be generated in '<output_dir>/wgsl_min_corpus'. (This can take over an hour to  run). <input_dir> is expected to be only contain .wgsl WGSL shader files, as would be generated for the non-minimized WGSL corpus.",
+        type=str)
+    tool_group.add_argument(
+        '--ir_as',
+        help=
+        "Instance of ir_fuzz_as to use for assembling IR binary test cases, if provided a non-minimized IR corpus will be generated in '<output_dir>/ir_corpus'. <input_dir> is expected to only contain .wgsl WGSL shader files, as would be generated for the non-minimized WGSL corpus.",
+        type=str)
+    tool_group.add_argument(
+        '--ir_fuzzer',
+        help=
+        "Instance of tint_ir_fuzzer to use for minimization, if provided a minimized corpus will be generated in '<output_dir>/ir_min_corpus'. (This can take over an hour to  run). <input_dir> is expected to only contain .tirb IR binary test case files, as would be generated for the non-minimized IR corpus.",
+        type=str)
+    args = parser.parse_args()
+    logging.basicConfig(level=args.loglevel)
+    logger.debug(vars(args))
+
+    output_dir: str = os.path.abspath(args.output_dir)
+
+    wgsl_fuzzer_bin = check_binary_accessible(
+        args.wgsl_fuzzer, "--wgsl_fuzzer=<tint_wgsl_fuzzer>")
+    ir_as_bin = check_binary_accessible(args.ir_as, "--ir_as=<ir_fuzz_as>")
+    ir_fuzzer_bin = check_binary_accessible(args.ir_fuzzer,
+                                            "--ir_fuzzer=<tint_ir_fuzzer>")
+
+    # This can be replaced with match/case if Python >= 3.10 is guaranteed
+    if not wgsl_fuzzer_bin and not ir_as_bin and not ir_fuzzer_bin:
+        mode = Mode.WGSL
+    elif wgsl_fuzzer_bin and not ir_as_bin and not ir_fuzzer_bin:
+        mode = Mode.WGSL_MIN
+    elif not wgsl_fuzzer_bin and ir_as_bin and not ir_fuzzer_bin:
+        mode = Mode.IR
+    elif not wgsl_fuzzer_bin and not ir_as_bin and ir_fuzzer_bin:
+        mode = Mode.IR_MIN
+    else:
+        logger.critical(
+            f"Some how more than one tool managed to get set after parsing args, wgsl_fuzzer_bin = '{wgsl_fuzzer_bin}, ir_as_bin  = '{ir_as_bin}', ir_fuzzer_bin = '{ir_fuzzer_bin}"
+        )
+        sys.exit(1)
+
+    options: Options = {
+        "mode": mode,
+        "wgsl_fuzzer_bin": wgsl_fuzzer_bin,
+        "ir_as_bin": ir_as_bin,
+        "ir_fuzzer_bin": ir_fuzzer_bin,
+        "input_dir": os.path.abspath(args.input_dir.rstrip(os.sep)),
+        "output_dir": output_dir,
+        "wgsl_corpus_dir": os.path.join(output_dir, "wgsl_corpus"),
+        "wgsl_min_corpus_dir": os.path.join(output_dir, "wgsl_min_corpus"),
+        "ir_corpus_dir": os.path.join(output_dir, "ir_corpus"),
+        "ir_min_corpus_dir": os.path.join(output_dir, "ir_min_corpus"),
+    }
+
+    return options
+
+
+def list_files_with_suffix(
+        root_search_dir: str,
+        suffix: str,
+        excludes: List[str] | None = None) -> Generator[str, Any, None]:
+    """Lists all the files beneath a root directory with a given suffix.
+
+    Args:
+        root_search_dir (str): The directory to search for WGSL files in.
+        suffix (str): The suffix that is being looked for
+        excludes (list | None): A list of suffixes that would match 'suffix', but should not be included in the result.
+    Returns:
+        Yields path to any file that ends in 'suffix' but does not also end with any entry from excludes
+    """
+    if excludes is None:
+        excludes = []
+
+    for root, folders, files in os.walk(root_search_dir):
+        for filename in folders + files:
+            if filename.endswith(suffix):
+                if any(filename.endswith(e) for e in excludes):
+                    logger.debug(f"Skipping {filename}")
+                    continue
+                yield os.path.join(root, filename)
+
+
+def check_binary_accessible(bin_filename: str | None,
+                            log_text: str) -> str | None:
+    """Check if a binary file exists and accessible.
+
+    Args:
+        bin_filename (str|None): The filename of the binary file to check.
+        log_text (str): String describing the related flag for error messages.
+
+    Returns:
+        bin_filename if it is executable & accessible, or None if it is None. Causes a fatal error if bin_filename is not None, but not executable & accessible.
+    """
+    if not bin_filename:
+        return None
+
+    which_bin = shutil.which(bin_filename, mode=os.F_OK | os.X_OK, path='.')
+    if not which_bin:
+        logger.critical(f"Unable to run {log_text} cmd: {bin_filename}")
+        sys.exit(1)
+    return which_bin
+
+
+def create_clean_dir(dirname: str) -> None:
+    """Makes sure there is an empty directory at location.
+
+    Will remove the directory if it already exists and recreate it.
+
+    Args:
+        dirname (str): The directory to create.
+    """
+    if os.path.exists(dirname):
+        shutil.rmtree(dirname)
+    os.makedirs(dirname)
+
+
+def touch_stamp_file(output_dir: str, task_name: str) -> None:
+    """Touches a stamp file to record when a task completed.
+
+    Args:
+        output_dir (str): The directory to touch the stamp file in.
+        task_name (str): The name of the task being stamped for.
+    """
+    stamp_file = os.path.join(output_dir, f"{task_name}.stamp")
+    logger.debug(f"Touching {task_name}.stamp")
+    pathlib.Path(stamp_file).touch(mode=0o644, exist_ok=True)
+
+
+def generate_wgsl_corpus(options: Options) -> None:
+    """Generate non-minimized WGSL corpus
+
+    Args:
+        options (Options): Control options parsed from the command line.
+    """
+    logger.info(
+        f"Generating WGSL corpus to \'{options['wgsl_corpus_dir']}\' ...")
+    create_clean_dir(options["wgsl_corpus_dir"])
+    for in_file in list_files_with_suffix(options["input_dir"], ".wgsl",
+                                          [".expected.wgsl"]):
+        out_file: str = in_file[len(options["input_dir"]) + 1:].replace(
+            os.sep, '_')
+        logger.debug("Copying " + in_file + " to " + out_file)
+        shutil.copy(in_file, os.path.join(options["wgsl_corpus_dir"],
+                                          out_file))
+    touch_stamp_file(options["output_dir"], "wgsl")
+    logger.info("Finished generating WGSL corpus")
+
+
+def generate_wgsl_minimized_corpus(options: Options) -> None:
+    """Generate minimized WGSL corpus
+
+    Args:
+        options (Options): Control options parsed from the command line.
+    """
+    logger.info(
+        f"Minimizing WGSL corpus to \'{options['wgsl_min_corpus_dir']}\' (this will take a while) ..."
+    )
+    create_clean_dir(options["wgsl_min_corpus_dir"])
+
+    # libFuzzer uses TO FROM args for merging/minimization
+    min_cmd = [
+        options["wgsl_fuzzer_bin"], '-merge=1', options["wgsl_min_corpus_dir"],
+        options["input_dir"]
+    ]
+    logger.info(f"Invoking \'{' '.join(min_cmd)}\'")
+    subprocess.run(min_cmd)
+
+    touch_stamp_file(options["output_dir"], "wgsl_min")
+    logger.info("Finished minimizing WGSL corpus")
+
+
+def generate_ir_corpus(options: Options) -> None:
+    """Generate non-minimized IR corpus
+
+    Args:
+        options (Options): Control options parsed from the command line.
+    """
+    logger.info(f"Generating IR corpus to \'{options['ir_corpus_dir']}\' ...")
+    create_clean_dir(options["ir_corpus_dir"])
+
+    gen_cmd = [
+        options["ir_as_bin"], options["input_dir"], options["ir_corpus_dir"]
+    ]
+    logger.info(f"Invoking \'{' '.join(gen_cmd)}\'")
+    subprocess.run(gen_cmd)
+
+    touch_stamp_file(options["output_dir"], "ir")
+    logger.info("Finished generating IR corpus")
+
+
+def generated_ir_minimized_corpus(options: Options) -> None:
+    """Generate minimized IR corpus
+
+    Args:
+        options (Options): Control options parsed from the command line.
+    """
+    logger.info(
+        f"Minimizing IR corpus to \'{options['ir_min_corpus_dir']}\' (this will take a while) ..."
+    )
+    create_clean_dir(options["ir_min_corpus_dir"])
+
+    # libFuzzer uses TO FROM args for merging/minimization
+    min_cmd = [
+        options["ir_fuzzer_bin"], '-merge=1', options["ir_min_corpus_dir"],
+        options["input_dir"]
+    ]
+    logger.info(f"Invoking \'{' '.join(min_cmd)}\'")
+    subprocess.run(min_cmd)
+
+    touch_stamp_file(options["output_dir"], "ir_min")
+    logger.info("Finished minimizing IR corpus")
+
+
+# Builder function map (can be replaced with match/case if Python >= 3.10 is guaranteed)
+builders = {
+    Mode.WGSL: generate_wgsl_corpus,
+    Mode.WGSL_MIN: generate_wgsl_minimized_corpus,
+    Mode.IR: generate_ir_corpus,
+    Mode.IR_MIN: generated_ir_minimized_corpus,
+}
+
+
+def main():
+    options = parse_args()
+    builders[options["mode"]](options)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/tint/cmd/fuzz/ir/as/main.cc b/src/tint/cmd/fuzz/ir/as/main.cc
index 6d62e38..27d2e08 100644
--- a/src/tint/cmd/fuzz/ir/as/main.cc
+++ b/src/tint/cmd/fuzz/ir/as/main.cc
@@ -59,11 +59,13 @@
 
     std::string input_filename;
     std::string output_filename;
-    std::string io_dirname;
+    std::string input_dirname;
+    std::string output_dirname;
 
     bool dump_ir = false;
     bool dump_proto = false;
     bool verbose = false;
+    bool batch_mode = false;
 };
 
 bool ParseArgs(tint::VectorRef<std::string_view> arguments, Options* opts) {
@@ -80,11 +82,6 @@
         ShortName{"col"}, Default{tint::ColorModeDefault()});
     TINT_DEFER(opts->printer = CreatePrinter(*col.value));
 
-    auto& output = options.Add<StringOption>(
-        "output-filename", "Output file name, only usable if single input file provided",
-        ShortName{"o"}, Parameter{"name"});
-    TINT_DEFER(opts->output_filename = output.value.value_or(""));
-
     auto& dump_ir = options.Add<BoolOption>("dump-ir", "Writes the IR form of input to stdout",
                                             Alias{"emit-ir"}, Default{false});
     TINT_DEFER(opts->dump_ir = *dump_ir.value);
@@ -102,14 +99,16 @@
 
     auto show_usage = [&] {
         std::cout
-            << R"(Usage: ir_fuzz_as [options] [-o|--output-filename] <output-file> <input-file> or tint [options] <io-dir>
-If a single WGSL file is provided, the suffix of the input file is not checked, and
-'-o|--output-filename' must be provided.
+            << R"(Usage: ir_fuzz_as [options] '<input-file> <output-file>' or '<input-dir> <output-dir>'
+The first form takes in a single WGSL shader via <input-file> and produces the corresponding IR test
+binary in <output-file>.
 
-If a directory is provided, the files it contains will be scanned and any .wgsl files will have a
-corresponding .tirb file generated.
+In the second form, the input and output arguments are directories. The files it contained in
+<input-dir> will be scanned and any .wgsl files will have acorresponding .tirb file generated in
+<output-dir>.
 
-Passing in '-o|--output-filename' when providing a directory will cause a failure.
+If you are wanting to generate human readable IR from a WGSL file, either --emit-ir should be added
+to options, or the tint CLI used
 
 Options:
 )";
@@ -128,17 +127,25 @@
     }
 
     auto args = result.Get();
-    if (args.Length() > 1) {
-        std::cerr << "More than one input arg specified: "
+    if (args.Length() != 2) {
+        std::cerr << "Expected exactly 2 args, found: "
                   << tint::Join(Transform(args, tint::Quote), ", ") << "\n";
         return false;
     }
-    if (args.Length() == 1) {
-        if (is_directory(std::filesystem::path{args[0]})) {
-            opts->io_dirname = args[0];
-        } else {
-            opts->input_filename = args[0];
-        }
+
+    if (is_directory(std::filesystem::path{args[0]}) &&
+        is_directory(std::filesystem::path{args[1]})) {
+        opts->input_dirname = args[0];
+        opts->output_dirname = args[1];
+        opts->batch_mode = true;
+    } else if ((!is_directory(std::filesystem::path{args[0]}) &&
+                !is_directory(std::filesystem::path{args[1]}))) {
+        opts->input_filename = args[0];
+        opts->output_filename = args[1];
+        opts->batch_mode = false;
+    } else {
+        std::cerr << "Expected args to either both be directories or both be files\n";
+        return false;
     }
 
     return true;
@@ -289,42 +296,23 @@
         return EXIT_FAILURE;
     }
 
-    if (!options.input_filename.empty() && !options.io_dirname.empty()) {
-        std::cerr << "Somehow both input_filename '" << options.input_filename
-                  << ", and io_dirname '" << options.io_dirname
-                  << "' were set after parsing arguments\n";
-        return EXIT_FAILURE;
-    }
-
-    if (options.output_filename.empty() && !options.dump_ir && !options.dump_proto &&
-        options.io_dirname.empty()) {
-        std::cerr << "None of --output-name, --dump-ir, --dump-proto, or <io-dir> were provided, "
-                     "so no output would be generated...\n";
-        return EXIT_FAILURE;
-    }
-
-    if (!options.input_filename.empty()) {
+    if (!options.batch_mode) {
         if (!ProcessFile(options)) {
             return EXIT_FAILURE;
         }
     } else {
-        tint::Vector<std::string, 8> wgsl_filenames;
-
-        // Need to collect the WGSL filenames and then process them in a second phase, so that the
-        // contents of the directory isn't changing during the iteration.
-        for (auto const& io_entry :
-             std::filesystem::directory_iterator{std::filesystem::path{options.io_dirname}}) {
-            const std::string entry_filename = io_entry.path().string();
-            if (entry_filename.substr(entry_filename.size() - 5) == ".wgsl") {
-                wgsl_filenames.Push(std::move(entry_filename));
+        for (auto const& input_entry :
+             std::filesystem::directory_iterator{std::filesystem::path{options.input_dirname}}) {
+            const std::string input_path = input_entry.path().string();
+            if (input_path.substr(input_path.size() - 5) != ".wgsl") {
+                continue;
             }
-        }
 
-        for (auto const& input_filename : wgsl_filenames) {
-            const auto output_filename =
-                std::string(input_filename.substr(0, input_filename.size() - 5)) + ".tirb";
-            options.input_filename = input_filename;
-            options.output_filename = output_filename;
+            options.input_filename = input_entry.path().string();
+
+            auto output_entry = std::filesystem::path{options.output_dirname} /
+                                input_entry.path().filename().replace_extension(".tirb");
+            options.output_filename = output_entry.string();
 
             ProcessFile(options);  // Ignoring the return value, so that one bad file doesn't cause
                                    // the processing batch to stop.
diff --git a/src/tint/cmd/fuzz/ir/generate_ir_corpus.py b/src/tint/cmd/fuzz/ir/generate_ir_corpus.py
deleted file mode 100644
index 46f952b..0000000
--- a/src/tint/cmd/fuzz/ir/generate_ir_corpus.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2024 The Dawn & Tint Authors
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# Collect all .wgsl files under a given directory and convert them to IR
-# protobuf in a given corpus directory, flattening their file names by replacing
-# path separators with underscores. If the output directory already exists, it
-# will be deleted and re-created. Files ending with ".expected.wgsl" are
-# skipped.
-#
-# The intended use of this script is to generate a  corpus of IR protobufs
-# for fuzzing.
-#
-# Based off of generate_wgsl_corpus.py
-#
-# Usage:
-#    generate_ir_corpus.py <path to ir_fuzz_as cmd> <input_dir> <corpus_dir>
-
-import optparse
-import subprocess
-
-import os
-import pathlib
-import shutil
-import sys
-
-
-def list_wgsl_files(root_search_dir):
-    for root, folders, files in os.walk(root_search_dir):
-        for filename in folders + files:
-            if pathlib.Path(filename).suffix == '.wgsl':
-                yield os.path.join(root, filename)
-
-
-def main():
-    parser = optparse.OptionParser(
-        usage="usage: %prog [option] <ir_fuzz_as cmd> input-dir output-dir")
-    parser.add_option('--stamp', dest='stamp', help='stamp file')
-    options, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) != 3:
-        parser.error("incorrect number of arguments")
-
-    # Look for ir_fuzz_as in current directory, and make sure it exists and is executable
-    ir_fuzz_as: str = shutil.which(args[0], mode=os.F_OK | os.X_OK, path='.')
-    if not ir_fuzz_as:
-        parser.error("Unable to run ir_fuzz_as cmd: " + args[0])
-
-    input_dir: str = os.path.abspath(args[1].rstrip(os.sep))
-    output_dir: str = os.path.abspath(args[2])
-
-    if os.path.exists(output_dir):
-        shutil.rmtree(output_dir)
-    os.makedirs(output_dir)
-
-    for in_file in list_wgsl_files(input_dir):
-        if in_file.endswith(".expected.wgsl"):
-            continue
-
-        out_file = in_file[len(input_dir) + 1:].replace(os.sep, '_')
-        shutil.copy(in_file, os.path.join(output_dir, out_file))
-
-    subprocess.run([ir_fuzz_as, output_dir], stderr=subprocess.STDOUT)
-
-    for f in os.listdir(output_dir):
-        if f.endswith(".wgsl"):
-            os.remove(os.path.join(output_dir, f))
-
-    if options.stamp:
-        pathlib.Path(options.stamp).touch(mode=0o644, exist_ok=True)
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/src/tint/cmd/fuzz/wgsl/generate_wgsl_corpus.py b/src/tint/cmd/fuzz/wgsl/generate_wgsl_corpus.py
deleted file mode 100644
index 1cd6ea9..0000000
--- a/src/tint/cmd/fuzz/wgsl/generate_wgsl_corpus.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2021 The Dawn & Tint Authors
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# Collect all .wgsl files under a given directory and copy them to a given
-# corpus directory, flattening their file names by replacing path
-# separators with underscores. If the output directory already exists, it
-# will be deleted and re-created. Files ending with ".expected.spvasm" are
-# skipped.
-#
-# The intended use of this script is to generate a corpus of WGSL shaders
-# for fuzzing.
-#
-# Usage:
-#    generate_wgsl_corpus.py <input_dir> <corpus_dir>
-
-import optparse
-import os
-import pathlib
-import shutil
-import sys
-
-
-def list_wgsl_files(root_search_dir):
-    for root, folders, files in os.walk(root_search_dir):
-        for filename in folders + files:
-            if pathlib.Path(filename).suffix == '.wgsl':
-                yield os.path.join(root, filename)
-
-
-def main():
-    parser = optparse.OptionParser(
-        usage="usage: %prog [option] input-dir output-dir")
-    parser.add_option('--stamp', dest='stamp', help='stamp file')
-    options, args = parser.parse_args(sys.argv[1:])
-    if len(args) != 2:
-        parser.error("incorrect number of arguments")
-    input_dir: str = os.path.abspath(args[0].rstrip(os.sep))
-    corpus_dir: str = os.path.abspath(args[1])
-    if os.path.exists(corpus_dir):
-        shutil.rmtree(corpus_dir)
-    os.makedirs(corpus_dir)
-    for in_file in list_wgsl_files(input_dir):
-        if in_file.endswith(".expected.wgsl"):
-            continue
-        out_file = in_file[len(input_dir) + 1:].replace(os.sep, '_')
-        shutil.copy(in_file, corpus_dir + os.sep + out_file)
-    if options.stamp:
-        pathlib.Path(options.stamp).touch(mode=0o644, exist_ok=True)
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/src/tint/tint.gni b/src/tint/tint.gni
index 866c0db..b3033a2 100644
--- a/src/tint/tint.gni
+++ b/src/tint/tint.gni
@@ -162,10 +162,15 @@
 ###############################################################################
 if (tint_has_fuzzers) {
   import("//testing/libfuzzer/fuzzer_test.gni")
-  fuzzer_corpus_wgsl_dir = "${root_gen_dir}/fuzzers/wgsl_corpus"
-  fuzzer_corpus_wgsl_stamp = "${fuzzer_corpus_wgsl_dir}.stamp"
-  fuzzer_corpus_ir_dir = "${root_gen_dir}/fuzzers/ir_corpus"
-  fuzzer_corpus_ir_stamp = "${fuzzer_corpus_ir_dir}.stamp"
+  fuzzer_corpus_dir = "${root_gen_dir}/fuzzers"
+  fuzzer_corpus_wgsl_dir = "${fuzzer_corpus_dir}/wgsl_corpus"
+  fuzzer_corpus_wgsl_stamp = "${fuzzer_corpus_dir}/wgsl.stamp"
+  fuzzer_corpus_wgsl_min_dir = "${fuzzer_corpus_dir}/wgsl_min_corpus"
+  fuzzer_corpus_wgsl_min_stamp = "${fuzzer_corpus_dir}/wgsl_min.stamp"
+  fuzzer_corpus_ir_dir = "${fuzzer_corpus_dir}/ir_corpus"
+  fuzzer_corpus_ir_stamp = "${fuzzer_corpus_dir}/ir.stamp"
+  fuzzer_corpus_ir_min_dir = "${fuzzer_corpus_dir}/ir_min_corpus"
+  fuzzer_corpus_ir_min_stamp = "${fuzzer_corpus_dir}/ir_min.stamp"
 
   template("tint_fuzz_source_set") {
     source_set(target_name) {
@@ -225,20 +230,18 @@
           "only_ascii=1",
           "max_len=10000",
         ]
+
+        # TODO(https://crbug.com/399689217): When migrating to using
+        # GCS for corpus storage the seed_*  properties can be removed
+        deps += [ "${tint_src_dir}:tint_generate_wgsl_corpus" ]
         seed_corpus = fuzzer_corpus_wgsl_dir
         seed_corpus_deps = [ "${tint_src_dir}:tint_generate_wgsl_corpus" ]
       } else if (target_name == "ir") {
-        # The IR corpus takes a long time to generate and may not
-        # actually be correctly formatted (crbug.com/345377541 &
-        # crbug.com/343218481).
-        # Disabling generating it by default until these issues
-        # are resolved.
-        # The corpus can still be generated by explicitly
-        # invoking tint_generate_ir_corpus
-        #
-        # seed_corpus = fuzzer_corpus_ir_dir
-        # seed_corpus_deps = [ "${tint_src_dir}:tint_generate_ir_corpus" ]
-        #
+        # TODO(https://crbug.com/399689217): When migrating to using
+        # GCS for corpus storage the seed_*  properties can be removed
+        deps += [ "${tint_src_dir}:tint_generate_ir_corpus" ]
+        seed_corpus = fuzzer_corpus_ir_dir
+        seed_corpus_deps = [ "${tint_src_dir}:tint_generate_ir_corpus" ]
       } else {
         assert(false, "unsupported tint fuzzer target")
       }