Reapply "Reapply "[tint][fuzz] Add fuzzer corpora minimization""

This reverts commit a2afb6c6461ce01cec9750573e0c26ad8a7c5daa.

Removed TypeDict and type annotations, which are not fully supported
on older versions of Python, i.e. the ones shipped by OSX

Issue: 399689217
Change-Id: Ic306e0dc0e1727684810fc52ceb8a67b0dc33bbd
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/230214
Auto-Submit: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: dan sinclair <dsinclair@chromium.org>
Commit-Queue: Ryan Harrison <rharrison@chromium.org>
diff --git a/docs/tint/fuzzers.md b/docs/tint/fuzzers.md
index d70068b..3c66b74 100644
--- a/docs/tint/fuzzers.md
+++ b/docs/tint/fuzzers.md
@@ -103,6 +103,45 @@
 out/libfuzz/tint_ir_fuzzer.exe out/libfuzz/gen/fuzzers/ir_corpus
 ```
 
+#### Minimizing the corpus
+
+By design our GN rules do not minimize the corpus, since this can take
+over an hour to run, so would be way too costly to run on the
+bots. Additionally there is no rule provided to do this, because there
+are bots/checks in the Chromium ecosystem that try running/building
+every target.
+
+There is not a lot of need to minimize the corpus, since the libFuzzer
+will do it automatically as needed, and ClusterFuzz stores the working
+corpus in GCS, so the cost for minimization will be amortized over
+time.
+
+The one time that it might be needed is if the corpus has radically
+changed, i.e. a major language change, or new feature. Then a dev may
+want to generate a new corpus, minimize it, and then manually update
+the GCS bucket with it to help the fuzzer out.
+
+Minimizing the corpus can be done via manually invoking the underlying
+script that the GN targets use for generating the corpus.
+
+This will generated a minimized version of the WGSL corpus in
+`out/Fuzzer/gen/fuzzers/wgsl_min_corpus`:
+```bash
+autoninja -C out/Fuzzer tint_wgsl_fuzzer
+python3 src/tint/cmd/fuzz/generate_tint_corpus.py out/Fuzzer/gen/fuzzers/wgsl_corpus out/Fuzzer/gen/fuzzers/ --wgsl_fuzzer=out/Fuzzer/tint_wgsl_fuzzer
+```
+
+This will generated a minimized version of the IR corpus in
+`out/Fuzzer/gen/fuzzers/ir_min_corpus`:
+```bash
+autoninja -C out/Fuzzer tint_ir_fuzzer
+python3 src/tint/cmd/fuzz/generate_tint_corpus.py out/Fuzzer/gen/fuzzers/wgsl_ir out/Fuzzer/gen/fuzzers/ --ir_fuzzer=out/Fuzzer/tint_ir_fuzzer
+```
+
+(Building the fuzzer binary via GN will generate the non-minimized
+corpus, which is needed for minimizing).
+
+
 ## Writing fuzzers
 
 ### Registering a new `tint::Program` fuzzer
diff --git a/src/tint/BUILD.gn b/src/tint/BUILD.gn
index af4db9d..3771013 100644
--- a/src/tint/BUILD.gn
+++ b/src/tint/BUILD.gn
@@ -308,12 +308,13 @@
 ###############################################################################
 if (tint_has_fuzzers) {
   action("tint_generate_wgsl_corpus") {
-    script = "${tint_src_dir}/cmd/fuzz/wgsl/generate_wgsl_corpus.py"
+    testonly = true
+    script = "${tint_src_dir}/cmd/fuzz/generate_tint_corpus.py"
     sources = [ "${script}" ]
+    deps = []
     args = [
-      "--stamp=" + rebase_path(fuzzer_corpus_wgsl_stamp, root_build_dir),
       rebase_path("${tint_root_dir}/test", root_build_dir),
-      rebase_path(fuzzer_corpus_wgsl_dir, root_build_dir),
+      rebase_path(fuzzer_corpus_dir, root_build_dir),
     ]
     outputs = [ fuzzer_corpus_wgsl_stamp ]
   }
@@ -321,14 +322,18 @@
   if (tint_build_ir_fuzzer) {
     if (tint_build_cmds && tint_build_wgsl_reader) {
       action("tint_generate_ir_corpus") {
-        script = "${tint_src_dir}/cmd/fuzz/ir/generate_ir_corpus.py"
+        testonly = true
+        script = "${tint_src_dir}/cmd/fuzz/generate_tint_corpus.py"
         sources = [ "${script}" ]
-        deps = [ "${tint_src_dir}/cmd/fuzz/ir/as" ]
+        deps = [
+          ":tint_generate_wgsl_corpus",
+          "${tint_src_dir}/cmd/fuzz/ir/as",
+        ]
         args = [
-          "--stamp=" + rebase_path(fuzzer_corpus_ir_stamp, root_build_dir),
-          rebase_path("${root_build_dir}/ir_fuzz_as", root_build_dir),
-          rebase_path("${tint_root_dir}/test", root_build_dir),
-          rebase_path(fuzzer_corpus_ir_dir, root_build_dir),
+          rebase_path(fuzzer_corpus_wgsl_dir, root_build_dir),
+          rebase_path(fuzzer_corpus_dir, root_build_dir),
+          "--ir_as=" +
+              rebase_path("${root_build_dir}/ir_fuzz_as", root_build_dir),
         ]
         outputs = [ fuzzer_corpus_ir_stamp ]
       }
diff --git a/src/tint/cmd/fuzz/generate_tint_corpus.py b/src/tint/cmd/fuzz/generate_tint_corpus.py
new file mode 100644
index 0000000..d4a4402
--- /dev/null
+++ b/src/tint/cmd/fuzz/generate_tint_corpus.py
@@ -0,0 +1,397 @@
+#!/usr/bin/env python3
+
+# Copyright 2025 The Dawn & Tint Authors
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+""" Script for generating the various Tint fuzzer corpora.
+
+The basic functionality is to collect all .wgsl files under a given directory
+and copy them to a given directory, flattening their file names by replacing
+path separators with underscores. If the output directory already exists, it
+will be deleted and re-created. Files ending with ".expected.wgsl" are
+skipped.
+
+Additional flags can be used for providing tooling for converting this corpus
+of .wgsl  files into Tint IR fuzzer protobufs (.tirb) or for minimizing the
+corpus.
+
+usage: generate_tint_corpus.py [-h] [-v] [--debug] [--wgsl_fuzzer WGSL_FUZZER | --ir_as IR_AS | --ir_fuzzer IR_FUZZER] input_dir output_dir
+
+Generates Tint fuzzer corpus from provided test files, using the provided tool.
+
+positional arguments:
+  input_dir             Directory containing source files to be turned into a
+                        corpus, what format these are expected to be is
+                        determined by the type of corpus being generated (which
+                        is determined by the tool flag provided). If no tool is
+                        provided the non-minimized WGSL corpus will be
+                        generated, and the inputs are expected to contain .wgsl
+                        WGSL shader files, non-WGSL and '*.expected.wgsl` files
+                        will be ignored
+  output_dir            Output directory that the results directory should be
+                        placed in. The base WGSL fuzzer corpus is created, in
+                        '<output_dir>/wgsl_corpus'. Other corpus locations will
+                        be specified in their tool specific flags. If a
+                        directory already exists, it will be overwritten.
+
+options:
+  -h, --help            show this help message and exit
+  -v, --verbose         Enables verbose logging
+  --debug               Enables developer debug logging
+
+tool flags:
+  Flags for tool to use for generating the corpus. Which tool that is provided
+  determines which type of corpus is generated. If no tool is
+  provided the non-minimized WGSL fuzzer corpus will be generated. Only one
+  corpus will be generated per invocation, so these flags are mutually
+  exclusive.
+
+  --wgsl_fuzzer WGSL_FUZZER
+                        Instance of tint_wgsl_fuzzer to use for minimization, if
+                         provided a minimized WGSL corpus will be generated in
+                        '<output_dir>/wgsl_min_corpus'. (This can take over an
+                        hour to run). <input_dir> is expected to only contain
+                        .wgsl WGSL shader files, as would be generated for the
+                        non-minimized WGSL corpus.
+  --ir_as IR_AS         Instance of ir_fuzz_as to use for assembling IR binary
+                        test cases, if provided a non-minimized IR corpus will
+                        be generated in '<output_dir>/ir_corpus'. <input_dir> is
+                        expected to only contain .wgsl WGSL shader files, as
+                        would be generated for the non-minimized WGSL corpus.
+  --ir_fuzzer IR_FUZZER
+                        Instance of tint_ir_fuzzer to use for minimization, if
+                        provided a minimized corpus will be generated in
+                        '<output_dir>/ir_min_corpus'. (This can take over an
+                        hour to run). <input_dir> is expected to only contain
+                        .tirb IR binary test case files, as would be generated
+                        for the non-minimized IR corpus.
+"""
+
+import argparse
+from enum import Enum
+import logging
+import os
+import pathlib
+import shutil
+import subprocess
+import sys
+
+
+class Mode(Enum):
+    """Different corpus output modes that the script can run in"""
+    WGSL = 1
+    WGSL_MIN = 2
+    IR = 3
+    IR_MIN = 4
+
+
+# Note: When Mac upgrades to a modern version Python, this can be replaced with a TypedDict and type annotations can be used through out
+class Options:
+    """Container of all the control options parsed from the command line args"""
+
+    def __init__(self, mode, wgsl_fuzzer_bin, ir_as_bin, ir_fuzzer_bin,
+                 input_dir, output_dir, wgsl_corpus_dir, wgsl_min_corpus_dir,
+                 ir_corpus_dir, ir_min_corpus_dir):
+        self.mode = mode
+        self.wgsl_fuzzer_bin = wgsl_fuzzer_bin
+        self.ir_as_bin = ir_as_bin
+        self.ir_fuzzer_bin = ir_fuzzer_bin
+        self.input_dir = input_dir
+        self.output_dir = output_dir
+        self.wgsl_corpus_dir = wgsl_corpus_dir
+        self.wgsl_min_corpus_dir = wgsl_min_corpus_dir
+        self.ir_corpus_dir = ir_corpus_dir
+        self.ir_min_corpus_dir = ir_min_corpus_dir
+
+
+logger = logging.getLogger(__name__)
+
+
+def parse_args():
+    """Parse command line arguments and produce control options structure.
+    Returns:
+        A populated Options structure.
+    """
+    parser = argparse.ArgumentParser(
+        prog='generate_tint_corpus.py',
+        description=
+        'Generates Tint fuzzer corpus from provided test files, using the provided tool.'
+    )
+    parser.add_argument(
+        'input_dir',
+        help=
+        "Directory containing source files to be turned into a corpus, what format these are expected to be is determined by the type of corpus being generated (which is determined by the tool flag provided). If no tool is provided the non-minimized WGSL corpus will be generated, and the inputs are expected to contain .wgsl WGSL shader files, non-WGSL and '*.expected.wgsl` files will be ignored",
+        type=str)
+    parser.add_argument(
+        'output_dir',
+        help=
+        "Output directory that the results directory should be placed in. The base WGSL fuzzer corpus is created, in '<output_dir>/wgsl_corpus'. Other corpus locations will be specified in their tool specific flags. If a directory already exists, it will be overwritten.",
+        type=str)
+    parser.add_argument('-v',
+                        '--verbose',
+                        help="Enables verbose logging",
+                        action="store_const",
+                        dest="loglevel",
+                        const=logging.INFO)
+    parser.add_argument('--debug',
+                        help="Enables developer debug logging",
+                        action="store_const",
+                        dest="loglevel",
+                        const=logging.DEBUG)
+    tool_group = parser.add_argument_group(
+        'tool flags',
+        'Flags for tool to use for generating the corpus. Which tool that is provided determines which type of corpus is generated. If no tool is provided the non-minimized WGSL fuzzer corpus will be generated.\nOnly one corpus will be generated per invocation, so these flags are mutually exclusive.'
+    )
+    tool_group = tool_group.add_mutually_exclusive_group()
+    tool_group.add_argument(
+        '--wgsl_fuzzer',
+        help=
+        "Instance of tint_wgsl_fuzzer to use for minimization, if provided a minimized WGSL corpus will be generated in '<output_dir>/wgsl_min_corpus'. (This can take over an hour to  run). <input_dir> is expected to be only contain .wgsl WGSL shader files, as would be generated for the non-minimized WGSL corpus.",
+        type=str)
+    tool_group.add_argument(
+        '--ir_as',
+        help=
+        "Instance of ir_fuzz_as to use for assembling IR binary test cases, if provided a non-minimized IR corpus will be generated in '<output_dir>/ir_corpus'. <input_dir> is expected to only contain .wgsl WGSL shader files, as would be generated for the non-minimized WGSL corpus.",
+        type=str)
+    tool_group.add_argument(
+        '--ir_fuzzer',
+        help=
+        "Instance of tint_ir_fuzzer to use for minimization, if provided a minimized corpus will be generated in '<output_dir>/ir_min_corpus'. (This can take over an hour to  run). <input_dir> is expected to only contain .tirb IR binary test case files, as would be generated for the non-minimized IR corpus.",
+        type=str)
+    args = parser.parse_args()
+    logging.basicConfig(level=args.loglevel)
+    logger.debug(vars(args))
+
+    output_dir = os.path.abspath(args.output_dir)
+
+    wgsl_fuzzer_bin = check_binary_accessible(
+        args.wgsl_fuzzer, "--wgsl_fuzzer=<tint_wgsl_fuzzer>")
+    ir_as_bin = check_binary_accessible(args.ir_as, "--ir_as=<ir_fuzz_as>")
+    ir_fuzzer_bin = check_binary_accessible(args.ir_fuzzer,
+                                            "--ir_fuzzer=<tint_ir_fuzzer>")
+
+    # This can be replaced with match/case if Python >= 3.10 is guaranteed
+    if not wgsl_fuzzer_bin and not ir_as_bin and not ir_fuzzer_bin:
+        mode = Mode.WGSL
+    elif wgsl_fuzzer_bin and not ir_as_bin and not ir_fuzzer_bin:
+        mode = Mode.WGSL_MIN
+    elif not wgsl_fuzzer_bin and ir_as_bin and not ir_fuzzer_bin:
+        mode = Mode.IR
+    elif not wgsl_fuzzer_bin and not ir_as_bin and ir_fuzzer_bin:
+        mode = Mode.IR_MIN
+    else:
+        logger.critical(
+            f"Some how more than one tool managed to get set after parsing args, wgsl_fuzzer_bin = '{wgsl_fuzzer_bin}, ir_as_bin  = '{ir_as_bin}', ir_fuzzer_bin = '{ir_fuzzer_bin}"
+        )
+        sys.exit(1)
+
+    return Options(mode=mode,
+                   wgsl_fuzzer_bin=wgsl_fuzzer_bin,
+                   ir_as_bin=ir_as_bin,
+                   ir_fuzzer_bin=ir_fuzzer_bin,
+                   input_dir=os.path.abspath(args.input_dir.rstrip(os.sep)),
+                   output_dir=output_dir,
+                   wgsl_corpus_dir=os.path.join(output_dir, "wgsl_corpus"),
+                   wgsl_min_corpus_dir=os.path.join(output_dir,
+                                                    "wgsl_min_corpus"),
+                   ir_corpus_dir=os.path.join(output_dir, "ir_corpus"),
+                   ir_min_corpus_dir=os.path.join(output_dir, "ir_min_corpus"))
+
+
+def list_files_with_suffix(root_search_dir, suffix, excludes):
+    """Lists all the files beneath a root directory with a given suffix.
+
+    Args:
+        root_search_dir (str): The directory to search for WGSL files in.
+        suffix (str): The suffix that is being looked for
+        excludes (list | None): A list of suffixes that would match 'suffix', but should not be included in the result.
+    Returns:
+        Yields path to any file that ends in 'suffix' but does not also end with any entry from excludes
+    """
+    if excludes is None:
+        excludes = []
+
+    for root, folders, files in os.walk(root_search_dir):
+        for filename in folders + files:
+            if filename.endswith(suffix):
+                if any(filename.endswith(e) for e in excludes):
+                    logger.debug(f"Skipping {filename}")
+                    continue
+                yield os.path.join(root, filename)
+
+
+def check_binary_accessible(bin_filename, log_text):
+    """Check if a binary file exists and accessible.
+
+    Args:
+        bin_filename (str|None): The filename of the binary file to check.
+        log_text (str): String describing the related flag for error messages.
+
+    Returns:
+        bin_filename if it is executable & accessible, or None if it is None. Causes a fatal error if bin_filename is not None, but not executable & accessible.
+    """
+    if not bin_filename:
+        return None
+
+    which_bin = shutil.which(bin_filename, mode=os.F_OK | os.X_OK, path='.')
+    if not which_bin:
+        if not os.path.exists(os.path.join('.', bin_filename)):
+            logger.critical(
+                f"Unable to run {log_text} cmd: '{bin_filename}' does not exist in path"
+            )
+        elif not os.access(os.path.join('.', bin_filename), os.X_OK):
+            logger.critical(
+                f"Unable to run {log_text} cmd: '{bin_filename}' is not executable"
+            )
+        else:
+            logger.critical(
+                f"Unable to run {log_text} cmd: '{bin_filename}' for unknown reason"
+            )
+        sys.exit(1)
+    return which_bin
+
+
+def create_clean_dir(dirname):
+    """Makes sure there is an empty directory at location.
+
+    Will remove the directory if it already exists and recreate it.
+
+    Args:
+        dirname (str): The directory to create.
+    """
+    if os.path.exists(dirname):
+        shutil.rmtree(dirname)
+    os.makedirs(dirname)
+
+
+def touch_stamp_file(output_dir, task_name):
+    """Touches a stamp file to record when a task completed.
+
+    Args:
+        output_dir (str): The directory to touch the stamp file in.
+        task_name (str): The name of the task being stamped for.
+    """
+    stamp_file = os.path.join(output_dir, f"{task_name}.stamp")
+    logger.debug(f"Touching {task_name}.stamp")
+    pathlib.Path(stamp_file).touch(mode=0o644, exist_ok=True)
+
+
+def generate_wgsl_corpus(options):
+    """Generate non-minimized WGSL corpus
+
+    Args:
+        options (Options): Control options parsed from the command line.
+    """
+    logger.info(f"Generating WGSL corpus to \'{options.wgsl_corpus_dir}\' ...")
+    create_clean_dir(options.wgsl_corpus_dir)
+    for in_file in list_files_with_suffix(options.input_dir, ".wgsl",
+                                          [".expected.wgsl"]):
+        out_file = in_file[len(options.input_dir) + 1:].replace(os.sep, '_')
+        logger.debug("Copying " + in_file + " to " + out_file)
+        shutil.copy(in_file, os.path.join(options.wgsl_corpus_dir, out_file))
+    touch_stamp_file(options.output_dir, "wgsl")
+    logger.info("Finished generating WGSL corpus")
+
+
+def generate_wgsl_minimized_corpus(options):
+    """Generate minimized WGSL corpus
+
+    Args:
+        options (Options): Control options parsed from the command line.
+    """
+    logger.info(
+        f"Minimizing WGSL corpus to \'{options.wgsl_min_corpus_dir}\' (this will take a while) ..."
+    )
+    create_clean_dir(options.wgsl_min_corpus_dir)
+
+    # libFuzzer uses TO FROM args for merging/minimization
+    min_cmd = [
+        options.wgsl_fuzzer_bin, '-merge=1', options.wgsl_min_corpus_dir,
+        options.input_dir
+    ]
+    logger.info(f"Invoking \'{' '.join(min_cmd)}\'")
+    subprocess.run(min_cmd)
+
+    touch_stamp_file(options.output_dir, "wgsl_min")
+    logger.info("Finished minimizing WGSL corpus")
+
+
+def generate_ir_corpus(options):
+    """Generate non-minimized IR corpus
+
+    Args:
+        options (Options): Control options parsed from the command line.
+    """
+    logger.info(f"Generating IR corpus to \'{options.ir_corpus_dir}\' ...")
+    create_clean_dir(options.ir_corpus_dir)
+
+    gen_cmd = [options.ir_as_bin, options.input_dir, options.ir_corpus_dir]
+    logger.info(f"Invoking \'{' '.join(gen_cmd)}\'")
+    subprocess.run(gen_cmd)
+
+    touch_stamp_file(options.output_dir, "ir")
+    logger.info("Finished generating IR corpus")
+
+
+def generated_ir_minimized_corpus(options):
+    """Generate minimized IR corpus
+
+    Args:
+        options (Options): Control options parsed from the command line.
+    """
+    logger.info(
+        f"Minimizing IR corpus to \'{options.ir_min_corpus_dir}\' (this will take a while) ..."
+    )
+    create_clean_dir(options.ir_min_corpus_dir)
+
+    # libFuzzer uses TO FROM args for merging/minimization
+    min_cmd = [
+        options.ir_fuzzer_bin, '-merge=1', options.ir_min_corpus_dir,
+        options.input_dir
+    ]
+    logger.info(f"Invoking \'{' '.join(min_cmd)}\'")
+    subprocess.run(min_cmd)
+
+    touch_stamp_file(options.output_dir, "ir_min")
+    logger.info("Finished minimizing IR corpus")
+
+
+# Builder function map (can be replaced with match/case if Python >= 3.10 is guaranteed)
+builders = {
+    Mode.WGSL: generate_wgsl_corpus,
+    Mode.WGSL_MIN: generate_wgsl_minimized_corpus,
+    Mode.IR: generate_ir_corpus,
+    Mode.IR_MIN: generated_ir_minimized_corpus,
+}
+
+
+def main():
+    options = parse_args()
+    builders[options.mode](options)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/tint/cmd/fuzz/ir/as/main.cc b/src/tint/cmd/fuzz/ir/as/main.cc
index 6d62e38..27d2e08 100644
--- a/src/tint/cmd/fuzz/ir/as/main.cc
+++ b/src/tint/cmd/fuzz/ir/as/main.cc
@@ -59,11 +59,13 @@
 
     std::string input_filename;
     std::string output_filename;
-    std::string io_dirname;
+    std::string input_dirname;
+    std::string output_dirname;
 
     bool dump_ir = false;
     bool dump_proto = false;
     bool verbose = false;
+    bool batch_mode = false;
 };
 
 bool ParseArgs(tint::VectorRef<std::string_view> arguments, Options* opts) {
@@ -80,11 +82,6 @@
         ShortName{"col"}, Default{tint::ColorModeDefault()});
     TINT_DEFER(opts->printer = CreatePrinter(*col.value));
 
-    auto& output = options.Add<StringOption>(
-        "output-filename", "Output file name, only usable if single input file provided",
-        ShortName{"o"}, Parameter{"name"});
-    TINT_DEFER(opts->output_filename = output.value.value_or(""));
-
     auto& dump_ir = options.Add<BoolOption>("dump-ir", "Writes the IR form of input to stdout",
                                             Alias{"emit-ir"}, Default{false});
     TINT_DEFER(opts->dump_ir = *dump_ir.value);
@@ -102,14 +99,16 @@
 
     auto show_usage = [&] {
         std::cout
-            << R"(Usage: ir_fuzz_as [options] [-o|--output-filename] <output-file> <input-file> or tint [options] <io-dir>
-If a single WGSL file is provided, the suffix of the input file is not checked, and
-'-o|--output-filename' must be provided.
+            << R"(Usage: ir_fuzz_as [options] '<input-file> <output-file>' or '<input-dir> <output-dir>'
+The first form takes in a single WGSL shader via <input-file> and produces the corresponding IR test
+binary in <output-file>.
 
-If a directory is provided, the files it contains will be scanned and any .wgsl files will have a
-corresponding .tirb file generated.
+In the second form, the input and output arguments are directories. The files it contained in
+<input-dir> will be scanned and any .wgsl files will have acorresponding .tirb file generated in
+<output-dir>.
 
-Passing in '-o|--output-filename' when providing a directory will cause a failure.
+If you are wanting to generate human readable IR from a WGSL file, either --emit-ir should be added
+to options, or the tint CLI used
 
 Options:
 )";
@@ -128,17 +127,25 @@
     }
 
     auto args = result.Get();
-    if (args.Length() > 1) {
-        std::cerr << "More than one input arg specified: "
+    if (args.Length() != 2) {
+        std::cerr << "Expected exactly 2 args, found: "
                   << tint::Join(Transform(args, tint::Quote), ", ") << "\n";
         return false;
     }
-    if (args.Length() == 1) {
-        if (is_directory(std::filesystem::path{args[0]})) {
-            opts->io_dirname = args[0];
-        } else {
-            opts->input_filename = args[0];
-        }
+
+    if (is_directory(std::filesystem::path{args[0]}) &&
+        is_directory(std::filesystem::path{args[1]})) {
+        opts->input_dirname = args[0];
+        opts->output_dirname = args[1];
+        opts->batch_mode = true;
+    } else if ((!is_directory(std::filesystem::path{args[0]}) &&
+                !is_directory(std::filesystem::path{args[1]}))) {
+        opts->input_filename = args[0];
+        opts->output_filename = args[1];
+        opts->batch_mode = false;
+    } else {
+        std::cerr << "Expected args to either both be directories or both be files\n";
+        return false;
     }
 
     return true;
@@ -289,42 +296,23 @@
         return EXIT_FAILURE;
     }
 
-    if (!options.input_filename.empty() && !options.io_dirname.empty()) {
-        std::cerr << "Somehow both input_filename '" << options.input_filename
-                  << ", and io_dirname '" << options.io_dirname
-                  << "' were set after parsing arguments\n";
-        return EXIT_FAILURE;
-    }
-
-    if (options.output_filename.empty() && !options.dump_ir && !options.dump_proto &&
-        options.io_dirname.empty()) {
-        std::cerr << "None of --output-name, --dump-ir, --dump-proto, or <io-dir> were provided, "
-                     "so no output would be generated...\n";
-        return EXIT_FAILURE;
-    }
-
-    if (!options.input_filename.empty()) {
+    if (!options.batch_mode) {
         if (!ProcessFile(options)) {
             return EXIT_FAILURE;
         }
     } else {
-        tint::Vector<std::string, 8> wgsl_filenames;
-
-        // Need to collect the WGSL filenames and then process them in a second phase, so that the
-        // contents of the directory isn't changing during the iteration.
-        for (auto const& io_entry :
-             std::filesystem::directory_iterator{std::filesystem::path{options.io_dirname}}) {
-            const std::string entry_filename = io_entry.path().string();
-            if (entry_filename.substr(entry_filename.size() - 5) == ".wgsl") {
-                wgsl_filenames.Push(std::move(entry_filename));
+        for (auto const& input_entry :
+             std::filesystem::directory_iterator{std::filesystem::path{options.input_dirname}}) {
+            const std::string input_path = input_entry.path().string();
+            if (input_path.substr(input_path.size() - 5) != ".wgsl") {
+                continue;
             }
-        }
 
-        for (auto const& input_filename : wgsl_filenames) {
-            const auto output_filename =
-                std::string(input_filename.substr(0, input_filename.size() - 5)) + ".tirb";
-            options.input_filename = input_filename;
-            options.output_filename = output_filename;
+            options.input_filename = input_entry.path().string();
+
+            auto output_entry = std::filesystem::path{options.output_dirname} /
+                                input_entry.path().filename().replace_extension(".tirb");
+            options.output_filename = output_entry.string();
 
             ProcessFile(options);  // Ignoring the return value, so that one bad file doesn't cause
                                    // the processing batch to stop.
diff --git a/src/tint/cmd/fuzz/ir/generate_ir_corpus.py b/src/tint/cmd/fuzz/ir/generate_ir_corpus.py
deleted file mode 100644
index 46f952b..0000000
--- a/src/tint/cmd/fuzz/ir/generate_ir_corpus.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2024 The Dawn & Tint Authors
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# Collect all .wgsl files under a given directory and convert them to IR
-# protobuf in a given corpus directory, flattening their file names by replacing
-# path separators with underscores. If the output directory already exists, it
-# will be deleted and re-created. Files ending with ".expected.wgsl" are
-# skipped.
-#
-# The intended use of this script is to generate a  corpus of IR protobufs
-# for fuzzing.
-#
-# Based off of generate_wgsl_corpus.py
-#
-# Usage:
-#    generate_ir_corpus.py <path to ir_fuzz_as cmd> <input_dir> <corpus_dir>
-
-import optparse
-import subprocess
-
-import os
-import pathlib
-import shutil
-import sys
-
-
-def list_wgsl_files(root_search_dir):
-    for root, folders, files in os.walk(root_search_dir):
-        for filename in folders + files:
-            if pathlib.Path(filename).suffix == '.wgsl':
-                yield os.path.join(root, filename)
-
-
-def main():
-    parser = optparse.OptionParser(
-        usage="usage: %prog [option] <ir_fuzz_as cmd> input-dir output-dir")
-    parser.add_option('--stamp', dest='stamp', help='stamp file')
-    options, args = parser.parse_args(sys.argv[1:])
-
-    if len(args) != 3:
-        parser.error("incorrect number of arguments")
-
-    # Look for ir_fuzz_as in current directory, and make sure it exists and is executable
-    ir_fuzz_as: str = shutil.which(args[0], mode=os.F_OK | os.X_OK, path='.')
-    if not ir_fuzz_as:
-        parser.error("Unable to run ir_fuzz_as cmd: " + args[0])
-
-    input_dir: str = os.path.abspath(args[1].rstrip(os.sep))
-    output_dir: str = os.path.abspath(args[2])
-
-    if os.path.exists(output_dir):
-        shutil.rmtree(output_dir)
-    os.makedirs(output_dir)
-
-    for in_file in list_wgsl_files(input_dir):
-        if in_file.endswith(".expected.wgsl"):
-            continue
-
-        out_file = in_file[len(input_dir) + 1:].replace(os.sep, '_')
-        shutil.copy(in_file, os.path.join(output_dir, out_file))
-
-    subprocess.run([ir_fuzz_as, output_dir], stderr=subprocess.STDOUT)
-
-    for f in os.listdir(output_dir):
-        if f.endswith(".wgsl"):
-            os.remove(os.path.join(output_dir, f))
-
-    if options.stamp:
-        pathlib.Path(options.stamp).touch(mode=0o644, exist_ok=True)
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/src/tint/cmd/fuzz/wgsl/generate_wgsl_corpus.py b/src/tint/cmd/fuzz/wgsl/generate_wgsl_corpus.py
deleted file mode 100644
index 1cd6ea9..0000000
--- a/src/tint/cmd/fuzz/wgsl/generate_wgsl_corpus.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2021 The Dawn & Tint Authors
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# Collect all .wgsl files under a given directory and copy them to a given
-# corpus directory, flattening their file names by replacing path
-# separators with underscores. If the output directory already exists, it
-# will be deleted and re-created. Files ending with ".expected.spvasm" are
-# skipped.
-#
-# The intended use of this script is to generate a corpus of WGSL shaders
-# for fuzzing.
-#
-# Usage:
-#    generate_wgsl_corpus.py <input_dir> <corpus_dir>
-
-import optparse
-import os
-import pathlib
-import shutil
-import sys
-
-
-def list_wgsl_files(root_search_dir):
-    for root, folders, files in os.walk(root_search_dir):
-        for filename in folders + files:
-            if pathlib.Path(filename).suffix == '.wgsl':
-                yield os.path.join(root, filename)
-
-
-def main():
-    parser = optparse.OptionParser(
-        usage="usage: %prog [option] input-dir output-dir")
-    parser.add_option('--stamp', dest='stamp', help='stamp file')
-    options, args = parser.parse_args(sys.argv[1:])
-    if len(args) != 2:
-        parser.error("incorrect number of arguments")
-    input_dir: str = os.path.abspath(args[0].rstrip(os.sep))
-    corpus_dir: str = os.path.abspath(args[1])
-    if os.path.exists(corpus_dir):
-        shutil.rmtree(corpus_dir)
-    os.makedirs(corpus_dir)
-    for in_file in list_wgsl_files(input_dir):
-        if in_file.endswith(".expected.wgsl"):
-            continue
-        out_file = in_file[len(input_dir) + 1:].replace(os.sep, '_')
-        shutil.copy(in_file, corpus_dir + os.sep + out_file)
-    if options.stamp:
-        pathlib.Path(options.stamp).touch(mode=0o644, exist_ok=True)
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/src/tint/tint.gni b/src/tint/tint.gni
index 866c0db..aa56777 100644
--- a/src/tint/tint.gni
+++ b/src/tint/tint.gni
@@ -162,10 +162,11 @@
 ###############################################################################
 if (tint_has_fuzzers) {
   import("//testing/libfuzzer/fuzzer_test.gni")
-  fuzzer_corpus_wgsl_dir = "${root_gen_dir}/fuzzers/wgsl_corpus"
-  fuzzer_corpus_wgsl_stamp = "${fuzzer_corpus_wgsl_dir}.stamp"
-  fuzzer_corpus_ir_dir = "${root_gen_dir}/fuzzers/ir_corpus"
-  fuzzer_corpus_ir_stamp = "${fuzzer_corpus_ir_dir}.stamp"
+  fuzzer_corpus_dir = "${root_gen_dir}/fuzzers"
+  fuzzer_corpus_wgsl_dir = "${fuzzer_corpus_dir}/wgsl_corpus"
+  fuzzer_corpus_wgsl_stamp = "${fuzzer_corpus_dir}/wgsl.stamp"
+  fuzzer_corpus_ir_dir = "${fuzzer_corpus_dir}/ir_corpus"
+  fuzzer_corpus_ir_stamp = "${fuzzer_corpus_dir}/ir.stamp"
 
   template("tint_fuzz_source_set") {
     source_set(target_name) {
@@ -225,20 +226,14 @@
           "only_ascii=1",
           "max_len=10000",
         ]
+
+        deps += [ "${tint_src_dir}:tint_generate_wgsl_corpus" ]
         seed_corpus = fuzzer_corpus_wgsl_dir
         seed_corpus_deps = [ "${tint_src_dir}:tint_generate_wgsl_corpus" ]
       } else if (target_name == "ir") {
-        # The IR corpus takes a long time to generate and may not
-        # actually be correctly formatted (crbug.com/345377541 &
-        # crbug.com/343218481).
-        # Disabling generating it by default until these issues
-        # are resolved.
-        # The corpus can still be generated by explicitly
-        # invoking tint_generate_ir_corpus
-        #
-        # seed_corpus = fuzzer_corpus_ir_dir
-        # seed_corpus_deps = [ "${tint_src_dir}:tint_generate_ir_corpus" ]
-        #
+        deps += [ "${tint_src_dir}:tint_generate_ir_corpus" ]
+        seed_corpus = fuzzer_corpus_ir_dir
+        seed_corpus_deps = [ "${tint_src_dir}:tint_generate_ir_corpus" ]
       } else {
         assert(false, "unsupported tint fuzzer target")
       }