[tint] Improve e2e test output

- Make sure WriteFile flushes to stdout. This avoids cases where the
validation step would crash (e.g. dxcompiler.dll), and we'd end up
emitting partial or no HLSL in the generated file.

- Add a "tint executable returned error: <error string>" line to
generated files that fail. This is useful for when the tint process
crashes. Note that this will affect all generated SKIPs.

Only including one regenerated skip in this change. Will follow up with
more for HLSL.

Change-Id: I1173214ea43d82692ab1d4c3dda7652bb9e6e9f6
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/203716
Reviewed-by: James Price <jrprice@google.com>
Commit-Queue: Antonio Maiorano <amaiorano@google.com>
diff --git a/src/tint/cmd/common/helper.h b/src/tint/cmd/common/helper.h
index ad204b7..aaa068a 100644
--- a/src/tint/cmd/common/helper.h
+++ b/src/tint/cmd/common/helper.h
@@ -169,7 +169,9 @@
         }
         return false;
     }
-    if (!use_stdout) {
+    if (use_stdout) {
+        fflush(file);
+    } else {
         fclose(file);
     }
 
diff --git a/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.ir.dxc.hlsl b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.ir.dxc.hlsl
index b905b42..8594b33 100644
--- a/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.ir.dxc.hlsl
+++ b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.ir.dxc.hlsl
@@ -137,4 +137,72 @@
 
 void v_21(uint offset, float2x4 obj) {
   sb.Store4((offset + 0u), asuint(obj[0u]));
-  sb.Store4((
\ No newline at end of file
+  sb.Store4((offset + 16u), asuint(obj[1u]));
+}
+
+void v_22(uint offset, float2x3 obj) {
+  sb.Store3((offset + 0u), asuint(obj[0u]));
+  sb.Store3((offset + 16u), asuint(obj[1u]));
+}
+
+void v_23(uint offset, float2x2 obj) {
+  sb.Store2((offset + 0u), asuint(obj[0u]));
+  sb.Store2((offset + 8u), asuint(obj[1u]));
+}
+
+void main_inner(uint idx) {
+  uint v_24 = (0u + (uint(idx) * 800u));
+  sb.Store(v_24, asuint(0.0f));
+  uint v_25 = (4u + (uint(idx) * 800u));
+  sb.Store(v_25, asuint(0));
+  sb.Store((8u + (uint(idx) * 800u)), 0u);
+  sb.Store<float16_t>((12u + (uint(idx) * 800u)), float16_t(0.0h));
+  uint v_26 = (16u + (uint(idx) * 800u));
+  sb.Store2(v_26, asuint((0.0f).xx));
+  uint v_27 = (24u + (uint(idx) * 800u));
+  sb.Store2(v_27, asuint((0).xx));
+  sb.Store2((32u + (uint(idx) * 800u)), (0u).xx);
+  sb.Store<vector<float16_t, 2> >((40u + (uint(idx) * 800u)), (float16_t(0.0h)).xx);
+  uint v_28 = (48u + (uint(idx) * 800u));
+  sb.Store3(v_28, asuint((0.0f).xxx));
+  uint v_29 = (64u + (uint(idx) * 800u));
+  sb.Store3(v_29, asuint((0).xxx));
+  sb.Store3((80u + (uint(idx) * 800u)), (0u).xxx);
+  sb.Store<vector<float16_t, 3> >((96u + (uint(idx) * 800u)), (float16_t(0.0h)).xxx);
+  uint v_30 = (112u + (uint(idx) * 800u));
+  sb.Store4(v_30, asuint((0.0f).xxxx));
+  uint v_31 = (128u + (uint(idx) * 800u));
+  sb.Store4(v_31, asuint((0).xxxx));
+  sb.Store4((144u + (uint(idx) * 800u)), (0u).xxxx);
+  sb.Store<vector<float16_t, 4> >((160u + (uint(idx) * 800u)), (float16_t(0.0h)).xxxx);
+  v_23((168u + (uint(idx) * 800u)), float2x2((0.0f).xx, (0.0f).xx));
+  v_22((192u + (uint(idx) * 800u)), float2x3((0.0f).xxx, (0.0f).xxx));
+  v_21((224u + (uint(idx) * 800u)), float2x4((0.0f).xxxx, (0.0f).xxxx));
+  v_20((256u + (uint(idx) * 800u)), float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
+  v_19((288u + (uint(idx) * 800u)), float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  v_18((336u + (uint(idx) * 800u)), float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  v_17((384u + (uint(idx) * 800u)), float4x2((0.0f).xx, (0.0f).xx, (0.0f).xx, (0.0f).xx));
+  v_16((416u + (uint(idx) * 800u)), float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  v_15((480u + (uint(idx) * 800u)), float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  v_14((544u + (uint(idx) * 800u)), matrix<float16_t, 2, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  v_13((552u + (uint(idx) * 800u)), matrix<float16_t, 2, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  v_12((568u + (uint(idx) * 800u)), matrix<float16_t, 2, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  v_11((584u + (uint(idx) * 800u)), matrix<float16_t, 3, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  v_10((600u + (uint(idx) * 800u)), matrix<float16_t, 3, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  v_9((624u + (uint(idx) * 800u)), matrix<float16_t, 3, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  v((648u + (uint(idx) * 800u)), matrix<float16_t, 4, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  v_8((664u + (uint(idx) * 800u)), matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  v_7((696u + (uint(idx) * 800u)), matrix<float16_t, 4, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  float3 v_32[2] = (float3[2])0;
+  v_4((736u + (uint(idx) * 800u)), v_32);
+  matrix<float16_t, 4, 2> v_33[2] = (matrix<float16_t, 4, 2>[2])0;
+  v_1((768u + (uint(idx) * 800u)), v_33);
+}
+
+[numthreads(1, 1, 1)]
+void main(main_inputs inputs) {
+  main_inner(inputs.idx);
+}
+
+
+tint executable returned error: exit status 0xe0000001
diff --git a/tools/src/cmd/tests/main.go b/tools/src/cmd/tests/main.go
index d2fb28d..1f1c4a1 100644
--- a/tools/src/cmd/tests/main.go
+++ b/tools/src/cmd/tests/main.go
@@ -964,6 +964,7 @@
 			return false, fmt.Sprintf("test timed out after %v", testTimeout)
 		}
 		if str != "" {
+			str += fmt.Sprintf("\ntint executable returned error: %v\n", err.Error())
 			return false, str
 		}
 		return false, err.Error()