vulkan: Remove storageInputOutput16 requirement

Tint can now polyfill f16 shader IO by using f32 types, so we can
enable the F16 feature on Vulkan without this capability.

Blocklist f16 on NVIDIA devices due to frequent driver crashes.

Add CTS expectations for new failures on Intel devices.

Bug: dawn:1510, tint:2161, tint:2164
Change-Id: I7926b328608c56f387ea40b7a4a7bfdf0cc6c5ca
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/173706
Reviewed-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: James Price <jrprice@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp
index f8b6ad1..9447f71 100644
--- a/src/dawn/native/Toggles.cpp
+++ b/src/dawn/native/Toggles.cpp
@@ -523,6 +523,11 @@
       "waiting for the next Tick. This enables using the stack trace in which the uncaptured error "
       "occured when breaking into the uncaptured error callback.",
       "https://crbug.com/dawn/1789", ToggleStage::Device}},
+    {Toggle::VulkanUseStorageInputOutput16,
+     {"vulkan_use_storage_input_output_16",
+      "Use the StorageInputOutput16 SPIR-V capability for f16 shader IO types when the device "
+      "supports it.",
+      "https://crbug.com/tint/2161", ToggleStage::Device}},
     {Toggle::NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget,
      {"no_workaround_sample_mask_becomes_zero_for_all_but_last_color_target",
       "MacOS 12.0+ Intel has a bug where the sample mask is only applied for the last color "
diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h
index efcc5cf..505c3da 100644
--- a/src/dawn/native/Toggles.h
+++ b/src/dawn/native/Toggles.h
@@ -129,6 +129,7 @@
     ExposeWGSLExperimentalFeatures,
     DisablePolyfillsOnIntegerDivisonAndModulo,
     EnableImmediateErrorHandling,
+    VulkanUseStorageInputOutput16,
 
     // Unresolved issues.
     NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget,
diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp
index 151b123..e404a84 100644
--- a/src/dawn/native/vulkan/DeviceVk.cpp
+++ b/src/dawn/native/vulkan/DeviceVk.cpp
@@ -449,21 +449,20 @@
         usedKnobs.features.depthClamp = VK_TRUE;
     }
 
-    // TODO(dawn:1510, tint:1473): After implementing a transform to handle the pipeline input /
-    // output if necessary, relax the requirement of storageInputOutput16.
     if (HasFeature(Feature::ShaderF16)) {
         const VulkanDeviceInfo& deviceInfo = ToBackend(GetPhysicalDevice())->GetDeviceInfo();
         DAWN_ASSERT(deviceInfo.HasExt(DeviceExt::ShaderFloat16Int8) &&
                     deviceInfo.shaderFloat16Int8Features.shaderFloat16 == VK_TRUE &&
                     deviceInfo.HasExt(DeviceExt::_16BitStorage) &&
                     deviceInfo._16BitStorageFeatures.storageBuffer16BitAccess == VK_TRUE &&
-                    deviceInfo._16BitStorageFeatures.storageInputOutput16 == VK_TRUE &&
                     deviceInfo._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess == VK_TRUE);
 
         usedKnobs.shaderFloat16Int8Features.shaderFloat16 = VK_TRUE;
         usedKnobs._16BitStorageFeatures.storageBuffer16BitAccess = VK_TRUE;
-        usedKnobs._16BitStorageFeatures.storageInputOutput16 = VK_TRUE;
         usedKnobs._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess = VK_TRUE;
+        if (deviceInfo._16BitStorageFeatures.storageInputOutput16 == VK_TRUE) {
+            usedKnobs._16BitStorageFeatures.storageInputOutput16 = VK_TRUE;
+        }
 
         featuresChain.Add(&usedKnobs.shaderFloat16Int8Features,
                           VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR);
diff --git a/src/dawn/native/vulkan/PhysicalDeviceVk.cpp b/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
index 2530f43..5e4fb35 100644
--- a/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
+++ b/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
@@ -244,9 +244,11 @@
         mDeviceInfo.HasExt(DeviceExt::_16BitStorage) &&
         mDeviceInfo.shaderFloat16Int8Features.shaderFloat16 == VK_TRUE &&
         mDeviceInfo._16BitStorageFeatures.storageBuffer16BitAccess == VK_TRUE &&
-        mDeviceInfo._16BitStorageFeatures.storageInputOutput16 == VK_TRUE &&
         mDeviceInfo._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess == VK_TRUE) {
-        EnableFeature(Feature::ShaderF16);
+        // TODO(crbug.com/tint/2164): Investigate crashes in f16 CTS tests to enable on NVIDIA.
+        if (!gpu_info::IsNvidia(GetVendorId())) {
+            EnableFeature(Feature::ShaderF16);
+        }
     }
 
     // unclippedDepth=true translates to depthClamp=true, which implicitly disables clipping.
@@ -690,6 +692,14 @@
     // extension VK_KHR_zero_initialize_workgroup_memory.
     deviceToggles->Default(Toggle::VulkanUseZeroInitializeWorkgroupMemoryExtension, true);
 
+    // The environment can only request to use StorageInputOutput16 when the capability is
+    // available.
+    if (GetDeviceInfo()._16BitStorageFeatures.storageInputOutput16 == VK_FALSE) {
+        deviceToggles->ForceSet(Toggle::VulkanUseStorageInputOutput16, false);
+    }
+    // By default try to use the StorageInputOutput16 capability.
+    deviceToggles->Default(Toggle::VulkanUseStorageInputOutput16, true);
+
     // Inject fragment shaders in all vertex-only pipelines.
     // TODO(crbug.com/dawn/1698): relax this requirement where the Vulkan spec allows.
     // In particular, enable rasterizer discard if the depth-stencil stage is a no-op, and skip
diff --git a/src/dawn/native/vulkan/ShaderModuleVk.cpp b/src/dawn/native/vulkan/ShaderModuleVk.cpp
index e498fea..253179e 100644
--- a/src/dawn/native/vulkan/ShaderModuleVk.cpp
+++ b/src/dawn/native/vulkan/ShaderModuleVk.cpp
@@ -335,6 +335,8 @@
         GetDevice()->IsToggleEnabled(Toggle::DisableWorkgroupInit);
     req.tintOptions.use_zero_initialize_workgroup_memory_extension =
         GetDevice()->IsToggleEnabled(Toggle::VulkanUseZeroInitializeWorkgroupMemoryExtension);
+    req.tintOptions.use_storage_input_output_16 =
+        GetDevice()->IsToggleEnabled(Toggle::VulkanUseStorageInputOutput16);
     req.tintOptions.bindings = std::move(bindings);
     req.tintOptions.disable_image_robustness =
         GetDevice()->IsToggleEnabled(Toggle::VulkanUseImageRobustAccess2);
diff --git a/src/dawn/tests/end2end/ShaderF16Tests.cpp b/src/dawn/tests/end2end/ShaderF16Tests.cpp
index ae5cc49..b4e51b2 100644
--- a/src/dawn/tests/end2end/ShaderF16Tests.cpp
+++ b/src/dawn/tests/end2end/ShaderF16Tests.cpp
@@ -451,6 +451,7 @@
                             D3D12Backend(),
                             D3D12Backend({"use_dxc"}),
                             VulkanBackend(),
+                            VulkanBackend({}, {"vulkan_use_storage_input_output_16"}),
                             MetalBackend(),
                             OpenGLBackend(),
                             OpenGLESBackend(),
diff --git a/webgpu-cts/expectations.txt b/webgpu-cts/expectations.txt
index 7f3ded3..dbb64ed 100644
--- a/webgpu-cts/expectations.txt
+++ b/webgpu-cts/expectations.txt
@@ -734,6 +734,54 @@
 crbug.com/dawn/2389 webgpu:shader,validation,parse,continuing:placement:stmt="continuing_switch_break" [ Failure ]
 
 ################################################################################
+# Intel F16 failures with Vulkan
+################################################################################
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:matrix_vector:inputSource="uniform";cols=2;rows=3 [ Failure ]
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:matrix_vector:inputSource="uniform";cols=2;rows=4 [ Failure ]
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:matrix_vector:inputSource="uniform";cols=3;rows=2 [ Failure ]
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:matrix_vector:inputSource="uniform";cols=3;rows=3 [ Failure ]
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:matrix_vector:inputSource="uniform";cols=3;rows=4 [ Failure ]
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:matrix_vector:inputSource="uniform";cols=4;rows=2 [ Failure ]
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:matrix_vector:inputSource="uniform";cols=4;rows=3 [ Failure ]
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:matrix_vector:inputSource="uniform";cols=4;rows=4 [ Failure ]
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:vector_matrix:inputSource="uniform";cols=3;rows=4 [ Failure ]
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:vector_matrix:inputSource="uniform";cols=4;rows=4 [ Failure ]
+crbug.com/tint/2165 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_matrix_vector_multiplication:vector_matrix_compound:inputSource="uniform";dim=4 [ Failure ]
+crbug.com/tint/2166 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_remainder:scalar_compound:inputSource="uniform";vectorize=3 [ Failure ]
+crbug.com/tint/2166 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,binary,f16_remainder:vector:inputSource="uniform";vectorize=3 [ Failure ]
+crbug.com/tint/2167 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,determinant:f16:inputSource="uniform";dim=2 [ Failure ]
+crbug.com/tint/2167 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,determinant:f16:inputSource="uniform";dim=3 [ Failure ]
+crbug.com/tint/2167 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,determinant:f16:inputSource="uniform";dim=4 [ Failure ]
+crbug.com/tint/2168 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,fma:f16:inputSource="uniform";vectorize=2 [ Failure ]
+crbug.com/tint/2168 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,fma:f16:inputSource="uniform";vectorize=3 [ Failure ]
+crbug.com/tint/2168 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,fma:f16:inputSource="uniform";vectorize=4 [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="storage_r";vectorize="_undef_" [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="storage_r";vectorize=2 [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="storage_r";vectorize=3 [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="storage_r";vectorize=4 [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="storage_rw";vectorize="_undef_" [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="storage_rw";vectorize=2 [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="storage_rw";vectorize=3 [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="storage_rw";vectorize=4 [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="uniform";vectorize="_undef_" [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="uniform";vectorize=2 [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="uniform";vectorize=3 [ Failure ]
+crbug.com/tint/2169 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,ldexp:f16:inputSource="uniform";vectorize=4 [ Failure ]
+crbug.com/tint/2170 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,mix:f16_nonmatching_vec2:inputSource="uniform" [ Failure ]
+crbug.com/tint/2170 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,mix:f16_nonmatching_vec3:inputSource="uniform" [ Failure ]
+crbug.com/tint/2170 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,mix:f16_nonmatching_vec4:inputSource="uniform" [ Failure ]
+crbug.com/tint/2171 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,saturate:f16:inputSource="storage_r";vectorize=2 [ Failure ]
+crbug.com/tint/2171 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,saturate:f16:inputSource="storage_r";vectorize=3 [ Failure ]
+crbug.com/tint/2171 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,saturate:f16:inputSource="storage_r";vectorize=4 [ Failure ]
+crbug.com/tint/2171 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,saturate:f16:inputSource="storage_rw";vectorize=2 [ Failure ]
+crbug.com/tint/2171 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,saturate:f16:inputSource="storage_rw";vectorize=3 [ Failure ]
+crbug.com/tint/2171 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,expression,call,builtin,saturate:f16:inputSource="storage_rw";vectorize=4 [ Failure ]
+crbug.com/tint/2172 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,memory_layout:read_layout:case="mat2x3h_size";aspace="uniform" [ Failure ]
+crbug.com/tint/2172 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,memory_layout:read_layout:case="mat3x3h_size";aspace="uniform" [ Failure ]
+crbug.com/tint/2172 [ intel-0x9bc5 ubuntu webgpu-adapter-default ] webgpu:shader,execution,memory_layout:read_layout:case="mat4x3h_size";aspace="uniform" [ Failure ]
+
+
+################################################################################
 # Flip video tests failing
 # Related PRs:
 #   - https://github.com/gpuweb/cts/pull/3278