Implement buffer lazy initialization before mapping

This patch implements buffer lazy initialization beffor MapAsync() and
buffer creation with BufferDescriptor.mappedAtCreation == true.

Note that this patch doesn't initialize buffers in MapReadAsyc() and
MapWriteAsync() because they are deprecated and will be removed soon.

BUG=dawn:414
TEST=dawn_end2end_tests

Change-Id: Ifea99833897081f599c45797e0829c57de1ac926
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24687
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
diff --git a/src/dawn_native/d3d12/BufferD3D12.cpp b/src/dawn_native/d3d12/BufferD3D12.cpp
index 7075661..878cea4 100644
--- a/src/dawn_native/d3d12/BufferD3D12.cpp
+++ b/src/dawn_native/d3d12/BufferD3D12.cpp
@@ -275,6 +275,10 @@
     }
 
     MaybeError Buffer::MapAtCreationImpl() {
+        CommandRecordingContext* commandContext;
+        DAWN_TRY_ASSIGN(commandContext, ToBackend(GetDevice())->GetPendingCommandContext());
+        DAWN_TRY(EnsureDataInitialized(commandContext));
+
         // Setting isMapWrite to false on MapRead buffers to silence D3D12 debug layer warning.
         bool isMapWrite = (GetUsage() & wgpu::BufferUsage::MapWrite) != 0;
         DAWN_TRY(MapInternal(isMapWrite, 0, size_t(GetSize()), "D3D12 map at creation"));
@@ -290,6 +294,10 @@
     }
 
     MaybeError Buffer::MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) {
+        CommandRecordingContext* commandContext;
+        DAWN_TRY_ASSIGN(commandContext, ToBackend(GetDevice())->GetPendingCommandContext());
+        DAWN_TRY(EnsureDataInitialized(commandContext));
+
         return MapInternal(mode & wgpu::MapMode::Write, offset, size, "D3D12 map async");
     }
 
diff --git a/src/dawn_native/metal/BufferMTL.mm b/src/dawn_native/metal/BufferMTL.mm
index a356cd9..cfc6515 100644
--- a/src/dawn_native/metal/BufferMTL.mm
+++ b/src/dawn_native/metal/BufferMTL.mm
@@ -110,6 +110,10 @@
     }
 
     MaybeError Buffer::MapAtCreationImpl() {
+        CommandRecordingContext* commandContext =
+            ToBackend(GetDevice())->GetPendingCommandContext();
+        EnsureDataInitialized(commandContext);
+
         return {};
     }
 
@@ -122,6 +126,10 @@
     }
 
     MaybeError Buffer::MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) {
+        CommandRecordingContext* commandContext =
+            ToBackend(GetDevice())->GetPendingCommandContext();
+        EnsureDataInitialized(commandContext);
+
         return {};
     }
 
diff --git a/src/dawn_native/opengl/BufferGL.cpp b/src/dawn_native/opengl/BufferGL.cpp
index 56fe397..2659894 100644
--- a/src/dawn_native/opengl/BufferGL.cpp
+++ b/src/dawn_native/opengl/BufferGL.cpp
@@ -99,6 +99,8 @@
     }
 
     MaybeError Buffer::MapAtCreationImpl() {
+        EnsureDataInitialized();
+
         const OpenGLFunctions& gl = ToBackend(GetDevice())->gl;
         gl.BindBuffer(GL_ARRAY_BUFFER, mBuffer);
         mMappedData = gl.MapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
@@ -137,6 +139,8 @@
             size = 4;
         }
 
+        EnsureDataInitialized();
+
         // TODO(cwallez@chromium.org): this does GPU->CPU synchronization, we could require a high
         // version of OpenGL that would let us map the buffer unsynchronized.
         gl.BindBuffer(GL_ARRAY_BUFFER, mBuffer);
diff --git a/src/dawn_native/vulkan/BufferVk.cpp b/src/dawn_native/vulkan/BufferVk.cpp
index 613b589..b26ee12 100644
--- a/src/dawn_native/vulkan/BufferVk.cpp
+++ b/src/dawn_native/vulkan/BufferVk.cpp
@@ -240,6 +240,12 @@
     }
 
     MaybeError Buffer::MapAtCreationImpl() {
+        CommandRecordingContext* recordingContext =
+            ToBackend(GetDevice())->GetPendingRecordingContext();
+
+        // TODO(jiawei.shao@intel.com): initialize mapped buffer in CPU side.
+        EnsureDataInitialized(recordingContext);
+
         return {};
     }
 
@@ -263,6 +269,10 @@
         Device* device = ToBackend(GetDevice());
 
         CommandRecordingContext* recordingContext = device->GetPendingRecordingContext();
+
+        // TODO(jiawei.shao@intel.com): initialize mapped buffer in CPU side.
+        EnsureDataInitialized(recordingContext);
+
         if (mode & wgpu::MapMode::Read) {
             TransitionUsageNow(recordingContext, wgpu::BufferUsage::MapRead);
         } else {
diff --git a/src/tests/end2end/BufferZeroInitTests.cpp b/src/tests/end2end/BufferZeroInitTests.cpp
index 28e43f5..52d172f 100644
--- a/src/tests/end2end/BufferZeroInitTests.cpp
+++ b/src/tests/end2end/BufferZeroInitTests.cpp
@@ -30,12 +30,35 @@
 
 class BufferZeroInitTest : public DawnTest {
   public:
-    wgpu::Buffer CreateBuffer(uint64_t size, wgpu::BufferUsage usage) {
+    wgpu::Buffer CreateBuffer(uint64_t size,
+                              wgpu::BufferUsage usage,
+                              bool mappedAtCreation = false) {
         wgpu::BufferDescriptor descriptor;
         descriptor.size = size;
         descriptor.usage = usage;
+        descriptor.mappedAtCreation = mappedAtCreation;
         return device.CreateBuffer(&descriptor);
     }
+
+    void MapAsyncAndWait(wgpu::Buffer buffer,
+                         wgpu::MapMode mapMode,
+                         uint64_t offset,
+                         uint64_t size) {
+        ASSERT(mapMode == wgpu::MapMode::Read || mapMode == wgpu::MapMode::Write);
+
+        bool done = false;
+        buffer.MapAsync(
+            mapMode, offset, size,
+            [](WGPUBufferMapAsyncStatus status, void* userdata) {
+                ASSERT_EQ(WGPUBufferMapAsyncStatus_Success, status);
+                *static_cast<bool*>(userdata) = true;
+            },
+            &done);
+
+        while (!done) {
+            WaitABit();
+        }
+    }
 };
 
 // Test that calling writeBuffer to overwrite the entire buffer doesn't need to lazily initialize
@@ -260,6 +283,101 @@
     }
 }
 
+// Test that the code path of readable buffer mapping clears the buffer correctly when it is the
+// first use of the buffer.
+TEST_P(BufferZeroInitTest, MapReadAsync) {
+    constexpr uint32_t kBufferSize = 16u;
+    constexpr wgpu::BufferUsage kBufferUsage =
+        wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
+
+    constexpr wgpu::MapMode kMapMode = wgpu::MapMode::Read;
+
+    // Map the whole buffer
+    {
+        wgpu::Buffer buffer = CreateBuffer(kBufferSize, kBufferUsage);
+        EXPECT_LAZY_CLEAR(1u, MapAsyncAndWait(buffer, kMapMode, 0, kBufferSize));
+
+        const uint32_t* mappedDataUint = static_cast<const uint32_t*>(buffer.GetConstMappedRange());
+        for (uint32_t i = 0; i < kBufferSize / sizeof(uint32_t); ++i) {
+            EXPECT_EQ(0u, mappedDataUint[i]);
+        }
+        buffer.Unmap();
+    }
+
+    // Map a range of a buffer
+    {
+        wgpu::Buffer buffer = CreateBuffer(kBufferSize, kBufferUsage);
+
+        constexpr uint64_t kOffset = 4u;
+        constexpr uint64_t kSize = 8u;
+        EXPECT_LAZY_CLEAR(1u, MapAsyncAndWait(buffer, kMapMode, kOffset, kSize));
+
+        const uint32_t* mappedDataUint = static_cast<const uint32_t*>(buffer.GetConstMappedRange());
+        for (uint32_t i = 0; i < kSize / sizeof(uint32_t); ++i) {
+            EXPECT_EQ(0u, mappedDataUint[i]);
+        }
+        buffer.Unmap();
+
+        EXPECT_LAZY_CLEAR(0u, MapAsyncAndWait(buffer, kMapMode, 0, kBufferSize));
+        mappedDataUint = static_cast<const uint32_t*>(buffer.GetConstMappedRange());
+        for (uint32_t i = 0; i < kBufferSize / sizeof(uint32_t); ++i) {
+            EXPECT_EQ(0u, mappedDataUint[i]);
+        }
+        buffer.Unmap();
+    }
+}
+
+// Test that the code path of writable buffer mapping clears the buffer correctly when it is the
+// first use of the buffer.
+TEST_P(BufferZeroInitTest, MapWriteAsync) {
+    constexpr uint32_t kBufferSize = 16u;
+    constexpr wgpu::BufferUsage kBufferUsage =
+        wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
+
+    constexpr wgpu::MapMode kMapMode = wgpu::MapMode::Write;
+
+    constexpr std::array<uint32_t, kBufferSize / sizeof(uint32_t)> kExpectedData = {{0, 0, 0, 0}};
+
+    // Map the whole buffer
+    {
+        wgpu::Buffer buffer = CreateBuffer(kBufferSize, kBufferUsage);
+        EXPECT_LAZY_CLEAR(1u, MapAsyncAndWait(buffer, kMapMode, 0, kBufferSize));
+        buffer.Unmap();
+
+        EXPECT_BUFFER_U32_RANGE_EQ(reinterpret_cast<const uint32_t*>(kExpectedData.data()), buffer,
+                                   0, kExpectedData.size());
+    }
+
+    // Map a range of a buffer
+    {
+        wgpu::Buffer buffer = CreateBuffer(kBufferSize, kBufferUsage);
+
+        constexpr uint64_t kOffset = 4u;
+        constexpr uint64_t kSize = 8u;
+        EXPECT_LAZY_CLEAR(1u, MapAsyncAndWait(buffer, kMapMode, kOffset, kSize));
+        buffer.Unmap();
+
+        EXPECT_BUFFER_U32_RANGE_EQ(reinterpret_cast<const uint32_t*>(kExpectedData.data()), buffer,
+                                   0, kExpectedData.size());
+    }
+}
+
+// Test that the code path of creating a buffer with BufferDescriptor.mappedAtCreation == true
+// clears the buffer correctly at the creation of the buffer.
+TEST_P(BufferZeroInitTest, MapAtCreation) {
+    constexpr uint32_t kBufferSize = 16u;
+    constexpr wgpu::BufferUsage kBufferUsage =
+        wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
+
+    wgpu::Buffer buffer;
+    EXPECT_LAZY_CLEAR(1u, buffer = CreateBuffer(kBufferSize, kBufferUsage, true));
+    buffer.Unmap();
+
+    constexpr std::array<uint32_t, kBufferSize / sizeof(uint32_t)> kExpectedData = {{0, 0, 0, 0}};
+    EXPECT_BUFFER_U32_RANGE_EQ(reinterpret_cast<const uint32_t*>(kExpectedData.data()), buffer, 0,
+                               kExpectedData.size());
+}
+
 DAWN_INSTANTIATE_TEST(BufferZeroInitTest,
                       D3D12Backend({"nonzero_clear_resources_on_creation_for_testing",
                                     "lazy_clear_buffer_on_first_use"}),