Fix the initialization of the buffers with MapRead and MapAtCreation

This patch fixes the issues on the initialization of buffers with
MapRead usage and mappedAtCreation == true.
1. The buffers with MapRead usage and mappedAtCreation == true can be
   read from the CPU side just after the creation of the buffer,
   however at that time the internal pending command buffers may not
   be executed, thus causing the buffer is not cleared as is expected.
2. On D3D12 the buffer with MapRead and mappedAtCreation == true is
   created on the READBACK heap, so all the data written in the CPU
   side cannot be uploaded to the GPU memory. When the buffer is mapped
   again all the original data written through the CPU pointer will be
   overwritten by the data in the GPU memory (which means it is also
   cleared to 0).

This patch fixes this issue by:
1. clearing the buffers with mappedAtCreation == true on the CPU side.
2. on D3D12 making the buffer with MapRead and mappedAtCreation == true
   use the staging buffer instead of mapping itself.

Note that this change is only related to the code path with Toggle
"nonzero_clear_resources_on_creation_for_testing" enabled, currently
we don't plan to do the similar change when we enable Dawn wire.

BUG=dawn:414
TEST=dawn_end2end_tests

Change-Id: I2b3d0840333e8d99759800ab9fc141d0a7cf2f8d
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/27220
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
diff --git a/src/dawn_native/Buffer.cpp b/src/dawn_native/Buffer.cpp
index b2bd300..49a7717 100644
--- a/src/dawn_native/Buffer.cpp
+++ b/src/dawn_native/Buffer.cpp
@@ -55,7 +55,7 @@
             }
 
           private:
-            bool IsMappableAtCreation() const override {
+            bool IsCPUWritableAtCreation() const override {
                 UNREACHABLE();
                 return false;
             }
@@ -161,6 +161,23 @@
     }
 
     MaybeError BufferBase::MapAtCreation() {
+        DAWN_TRY(MapAtCreationInternal());
+
+        // TODO(jiawei.shao@intel.com): check Toggle::LazyClearResourceOnFirstUse instead when
+        // buffer lazy initialization is completely supported.
+        DeviceBase* device = GetDevice();
+        if (device->IsToggleEnabled(Toggle::LazyClearBufferOnFirstUse)) {
+            memset(GetMappedRange(0, mSize), uint8_t(0u), mSize);
+            SetIsDataInitialized();
+            device->IncrementLazyClearCountForTesting();
+        } else if (device->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) {
+            memset(GetMappedRange(0, mSize), uint8_t(1u), mSize);
+        }
+
+        return {};
+    }
+
+    MaybeError BufferBase::MapAtCreationInternal() {
         ASSERT(!IsError());
         mState = BufferState::MappedAtCreation;
         mMapOffset = 0;
@@ -173,17 +190,16 @@
         }
 
         // Mappable buffers don't use a staging buffer and are just as if mapped through MapAsync.
-        if (IsMappableAtCreation()) {
+        if (IsCPUWritableAtCreation()) {
             DAWN_TRY(MapAtCreationImpl());
-            return {};
+        } else {
+            // If any of these fail, the buffer will be deleted and replaced with an
+            // error buffer.
+            // TODO(enga): Suballocate and reuse memory from a larger staging buffer so we don't
+            // create many small buffers.
+            DAWN_TRY_ASSIGN(mStagingBuffer, GetDevice()->CreateStagingBuffer(GetSize()));
         }
 
-        // If any of these fail, the buffer will be deleted and replaced with an
-        // error buffer.
-        // TODO(enga): Suballocate and reuse memory from a larger staging buffer so we don't create
-        // many small buffers.
-        DAWN_TRY_ASSIGN(mStagingBuffer, GetDevice()->CreateStagingBuffer(GetSize()));
-
         return {};
     }
 
@@ -298,7 +314,7 @@
             if (mStagingBuffer != nullptr) {
                 mStagingBuffer.reset();
             } else if (mSize != 0) {
-                ASSERT(IsMappableAtCreation());
+                ASSERT(IsCPUWritableAtCreation());
                 Unmap();
             }
         }
@@ -347,7 +363,7 @@
             if (mStagingBuffer != nullptr) {
                 GetDevice()->ConsumedError(CopyFromStagingBuffer());
             } else if (mSize != 0) {
-                ASSERT(IsMappableAtCreation());
+                ASSERT(IsCPUWritableAtCreation());
                 UnmapImpl();
             }
         }
diff --git a/src/dawn_native/Buffer.h b/src/dawn_native/Buffer.h
index fdd0f7c..39ba68c 100644
--- a/src/dawn_native/Buffer.h
+++ b/src/dawn_native/Buffer.h
@@ -81,6 +81,8 @@
 
         bool IsMapped() const;
 
+        MaybeError MapAtCreationInternal();
+
       private:
         virtual MaybeError MapAtCreationImpl() = 0;
         virtual MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) = 0;
@@ -88,7 +90,7 @@
         virtual void DestroyImpl() = 0;
         virtual void* GetMappedPointerImpl() = 0;
 
-        virtual bool IsMappableAtCreation() const = 0;
+        virtual bool IsCPUWritableAtCreation() const = 0;
         MaybeError CopyFromStagingBuffer();
         void* GetMappedRangeInternal(bool writable, size_t offset, size_t size);
         void CallMapCallback(uint32_t serial, WGPUBufferMapAsyncStatus status);
diff --git a/src/dawn_native/d3d12/BufferD3D12.cpp b/src/dawn_native/d3d12/BufferD3D12.cpp
index 095dca0..7501a50 100644
--- a/src/dawn_native/d3d12/BufferD3D12.cpp
+++ b/src/dawn_native/d3d12/BufferD3D12.cpp
@@ -89,7 +89,7 @@
         : BufferBase(device, descriptor) {
     }
 
-    MaybeError Buffer::Initialize() {
+    MaybeError Buffer::Initialize(bool mappedAtCreation) {
         D3D12_RESOURCE_DESC resourceDescriptor;
         resourceDescriptor.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
         resourceDescriptor.Alignment = 0;
@@ -130,7 +130,10 @@
             mResourceAllocation,
             ToBackend(GetDevice())->AllocateMemory(heapType, resourceDescriptor, bufferUsage));
 
-        if (GetDevice()->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) {
+        // The buffers with mappedAtCreation == true will be initialized in
+        // BufferBase::MapAtCreation().
+        if (GetDevice()->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting) &&
+            !mappedAtCreation) {
             CommandRecordingContext* commandRecordingContext;
             DAWN_TRY_ASSIGN(commandRecordingContext,
                             ToBackend(GetDevice())->GetPendingCommandContext());
@@ -251,9 +254,17 @@
         return mResourceAllocation.GetGPUPointer();
     }
 
-    bool Buffer::IsMappableAtCreation() const {
+    bool Buffer::IsCPUWritableAtCreation() const {
+        // We use a staging buffer for the buffers with mappedAtCreation == true and created on the
+        // READBACK heap because for the buffers on the READBACK heap, the data written on the CPU
+        // side won't be uploaded to GPU. When we enable zero-initialization, the CPU side memory
+        // of the buffer is all written to 0 but not the GPU side memory, so on the next mapping
+        // operation the zeroes get overwritten by whatever was in the GPU memory when the buffer
+        // was created. With a staging buffer, the data on the CPU side will first upload to the
+        // staging buffer, and copied from the staging buffer to the GPU memory of the current
+        // buffer in the unmap() call.
         // TODO(enga): Handle CPU-visible memory on UMA
-        return (GetUsage() & (wgpu::BufferUsage::MapRead | wgpu::BufferUsage::MapWrite)) != 0;
+        return (GetUsage() & wgpu::BufferUsage::MapWrite) != 0;
     }
 
     MaybeError Buffer::MapInternal(bool isWrite,
@@ -283,13 +294,15 @@
     }
 
     MaybeError Buffer::MapAtCreationImpl() {
-        CommandRecordingContext* commandContext;
-        DAWN_TRY_ASSIGN(commandContext, ToBackend(GetDevice())->GetPendingCommandContext());
-        DAWN_TRY(EnsureDataInitialized(commandContext));
+        // We will use a staging buffer for MapRead buffers instead so we just clear the staging
+        // buffer and initialize the original buffer by copying the staging buffer to the original
+        // buffer one the first time Unmap() is called.
+        ASSERT((GetUsage() & wgpu::BufferUsage::MapWrite) != 0);
 
-        // Setting isMapWrite to false on MapRead buffers to silence D3D12 debug layer warning.
-        bool isMapWrite = (GetUsage() & wgpu::BufferUsage::MapWrite) != 0;
-        DAWN_TRY(MapInternal(isMapWrite, 0, size_t(GetSize()), "D3D12 map at creation"));
+        // The buffers with mappedAtCreation == true will be initialized in
+        // BufferBase::MapAtCreation().
+        DAWN_TRY(MapInternal(true, 0, size_t(GetSize()), "D3D12 map at creation"));
+
         return {};
     }
 
diff --git a/src/dawn_native/d3d12/BufferD3D12.h b/src/dawn_native/d3d12/BufferD3D12.h
index 7ab6d5a..d355e32 100644
--- a/src/dawn_native/d3d12/BufferD3D12.h
+++ b/src/dawn_native/d3d12/BufferD3D12.h
@@ -30,7 +30,7 @@
       public:
         Buffer(Device* device, const BufferDescriptor* descriptor);
 
-        MaybeError Initialize();
+        MaybeError Initialize(bool mappedAtCreation);
 
         ID3D12Resource* GetD3D12Resource() const;
         D3D12_GPU_VIRTUAL_ADDRESS GetVA() const;
@@ -56,7 +56,7 @@
         MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override;
         void UnmapImpl() override;
         void DestroyImpl() override;
-        bool IsMappableAtCreation() const override;
+        bool IsCPUWritableAtCreation() const override;
         virtual MaybeError MapAtCreationImpl() override;
         void* GetMappedPointerImpl() override;
 
diff --git a/src/dawn_native/d3d12/DeviceD3D12.cpp b/src/dawn_native/d3d12/DeviceD3D12.cpp
index 295548e..a26cbb8 100644
--- a/src/dawn_native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn_native/d3d12/DeviceD3D12.cpp
@@ -276,7 +276,7 @@
     }
     ResultOrError<Ref<BufferBase>> Device::CreateBufferImpl(const BufferDescriptor* descriptor) {
         Ref<Buffer> buffer = AcquireRef(new Buffer(this, descriptor));
-        DAWN_TRY(buffer->Initialize());
+        DAWN_TRY(buffer->Initialize(descriptor->mappedAtCreation));
         return std::move(buffer);
     }
     CommandBufferBase* Device::CreateCommandBuffer(CommandEncoder* encoder,
diff --git a/src/dawn_native/metal/BufferMTL.h b/src/dawn_native/metal/BufferMTL.h
index 59a7dc0..db06e1d 100644
--- a/src/dawn_native/metal/BufferMTL.h
+++ b/src/dawn_native/metal/BufferMTL.h
@@ -40,14 +40,14 @@
 
       private:
         using BufferBase::BufferBase;
-        MaybeError Initialize();
+        MaybeError Initialize(bool mappedAtCreation);
 
         ~Buffer() override;
         MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override;
         void UnmapImpl() override;
         void DestroyImpl() override;
         void* GetMappedPointerImpl() override;
-        bool IsMappableAtCreation() const override;
+        bool IsCPUWritableAtCreation() const override;
         MaybeError MapAtCreationImpl() override;
 
         void InitializeToZero(CommandRecordingContext* commandContext);
diff --git a/src/dawn_native/metal/BufferMTL.mm b/src/dawn_native/metal/BufferMTL.mm
index 30568b4..757e7c0 100644
--- a/src/dawn_native/metal/BufferMTL.mm
+++ b/src/dawn_native/metal/BufferMTL.mm
@@ -33,11 +33,11 @@
     // static
     ResultOrError<Ref<Buffer>> Buffer::Create(Device* device, const BufferDescriptor* descriptor) {
         Ref<Buffer> buffer = AcquireRef(new Buffer(device, descriptor));
-        DAWN_TRY(buffer->Initialize());
+        DAWN_TRY(buffer->Initialize(descriptor->mappedAtCreation));
         return std::move(buffer);
     }
 
-    MaybeError Buffer::Initialize() {
+    MaybeError Buffer::Initialize(bool mappedAtCreation) {
         MTLResourceOptions storageMode;
         if (GetUsage() & (wgpu::BufferUsage::MapRead | wgpu::BufferUsage::MapWrite)) {
             storageMode = MTLResourceStorageModeShared;
@@ -90,7 +90,10 @@
             return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation failed");
         }
 
-        if (GetDevice()->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) {
+        // The buffers with mappedAtCreation == true will be initialized in
+        // BufferBase::MapAtCreation().
+        if (GetDevice()->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting) &&
+            !mappedAtCreation) {
             CommandRecordingContext* commandContext =
                 ToBackend(GetDevice())->GetPendingCommandContext();
             ClearBuffer(commandContext, uint8_t(1u));
@@ -107,16 +110,12 @@
         return mMtlBuffer;
     }
 
-    bool Buffer::IsMappableAtCreation() const {
+    bool Buffer::IsCPUWritableAtCreation() const {
         // TODO(enga): Handle CPU-visible memory on UMA
         return (GetUsage() & (wgpu::BufferUsage::MapRead | wgpu::BufferUsage::MapWrite)) != 0;
     }
 
     MaybeError Buffer::MapAtCreationImpl() {
-        CommandRecordingContext* commandContext =
-            ToBackend(GetDevice())->GetPendingCommandContext();
-        EnsureDataInitialized(commandContext);
-
         return {};
     }
 
diff --git a/src/dawn_native/null/DeviceNull.cpp b/src/dawn_native/null/DeviceNull.cpp
index 26d673e..f765ca4 100644
--- a/src/dawn_native/null/DeviceNull.cpp
+++ b/src/dawn_native/null/DeviceNull.cpp
@@ -287,7 +287,7 @@
         ToBackend(GetDevice())->DecrementMemoryUsage(GetSize());
     }
 
-    bool Buffer::IsMappableAtCreation() const {
+    bool Buffer::IsCPUWritableAtCreation() const {
         // Only return true for mappable buffers so we can test cases that need / don't need a
         // staging buffer.
         return (GetUsage() & (wgpu::BufferUsage::MapRead | wgpu::BufferUsage::MapWrite)) != 0;
diff --git a/src/dawn_native/null/DeviceNull.h b/src/dawn_native/null/DeviceNull.h
index 343c48a..7340ff3 100644
--- a/src/dawn_native/null/DeviceNull.h
+++ b/src/dawn_native/null/DeviceNull.h
@@ -207,7 +207,7 @@
         MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override;
         void UnmapImpl() override;
         void DestroyImpl() override;
-        bool IsMappableAtCreation() const override;
+        bool IsCPUWritableAtCreation() const override;
         MaybeError MapAtCreationImpl() override;
         void* GetMappedPointerImpl() override;
 
diff --git a/src/dawn_native/opengl/BufferGL.cpp b/src/dawn_native/opengl/BufferGL.cpp
index a41b1d4..5de6ed7 100644
--- a/src/dawn_native/opengl/BufferGL.cpp
+++ b/src/dawn_native/opengl/BufferGL.cpp
@@ -27,7 +27,7 @@
                                                             bool shouldLazyClear) {
         Ref<Buffer> buffer = AcquireRef(new Buffer(device, descriptor, shouldLazyClear));
         if (descriptor->mappedAtCreation) {
-            DAWN_TRY(buffer->MapAtCreation());
+            DAWN_TRY(buffer->MapAtCreationInternal());
         }
 
         return std::move(buffer);
@@ -42,7 +42,10 @@
         device->gl.GenBuffers(1, &mBuffer);
         device->gl.BindBuffer(GL_ARRAY_BUFFER, mBuffer);
 
-        if (device->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) {
+        // The buffers with mappedAtCreation == true will be initialized in
+        // BufferBase::MapAtCreation().
+        if (device->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting) &&
+            !descriptor->mappedAtCreation) {
             std::vector<uint8_t> clearValues(size, 1u);
             device->gl.BufferData(GL_ARRAY_BUFFER, size, clearValues.data(), GL_STATIC_DRAW);
         } else {
@@ -127,15 +130,13 @@
         SetIsDataInitialized();
     }
 
-    bool Buffer::IsMappableAtCreation() const {
+    bool Buffer::IsCPUWritableAtCreation() const {
         // TODO(enga): All buffers in GL can be mapped. Investigate if mapping them will cause the
         // driver to migrate it to shared memory.
         return true;
     }
 
     MaybeError Buffer::MapAtCreationImpl() {
-        EnsureDataInitialized();
-
         const OpenGLFunctions& gl = ToBackend(GetDevice())->gl;
         gl.BindBuffer(GL_ARRAY_BUFFER, mBuffer);
         mMappedData = gl.MapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
diff --git a/src/dawn_native/opengl/BufferGL.h b/src/dawn_native/opengl/BufferGL.h
index 2f6f6d9..8038123 100644
--- a/src/dawn_native/opengl/BufferGL.h
+++ b/src/dawn_native/opengl/BufferGL.h
@@ -43,7 +43,7 @@
         MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override;
         void UnmapImpl() override;
         void DestroyImpl() override;
-        bool IsMappableAtCreation() const override;
+        bool IsCPUWritableAtCreation() const override;
         MaybeError MapAtCreationImpl() override;
         void* GetMappedPointerImpl() override;
 
diff --git a/src/dawn_native/vulkan/BufferVk.cpp b/src/dawn_native/vulkan/BufferVk.cpp
index 12224d9..689bfdf 100644
--- a/src/dawn_native/vulkan/BufferVk.cpp
+++ b/src/dawn_native/vulkan/BufferVk.cpp
@@ -119,11 +119,11 @@
     // static
     ResultOrError<Ref<Buffer>> Buffer::Create(Device* device, const BufferDescriptor* descriptor) {
         Ref<Buffer> buffer = AcquireRef(new Buffer(device, descriptor));
-        DAWN_TRY(buffer->Initialize());
+        DAWN_TRY(buffer->Initialize(descriptor->mappedAtCreation));
         return std::move(buffer);
     }
 
-    MaybeError Buffer::Initialize() {
+    MaybeError Buffer::Initialize(bool mappedAtCreation) {
         // Avoid passing ludicrously large sizes to drivers because it causes issues: drivers add
         // some constants to the size passed and align it, but for values close to the maximum
         // VkDeviceSize this can cause overflows and makes drivers crash or return bad sizes in the
@@ -166,7 +166,10 @@
                                         mMemoryAllocation.GetOffset()),
             "vkBindBufferMemory"));
 
-        if (device->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) {
+        // The buffers with mappedAtCreation == true will be initialized in
+        // BufferBase::MapAtCreation().
+        if (device->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting) &&
+            !mappedAtCreation) {
             ClearBuffer(device->GetPendingRecordingContext(), 0x01010101);
         }
 
@@ -235,18 +238,12 @@
         mLastUsage = usage;
     }
 
-    bool Buffer::IsMappableAtCreation() const {
+    bool Buffer::IsCPUWritableAtCreation() const {
         // TODO(enga): Handle CPU-visible memory on UMA
         return mMemoryAllocation.GetMappedPointer() != nullptr;
     }
 
     MaybeError Buffer::MapAtCreationImpl() {
-        CommandRecordingContext* recordingContext =
-            ToBackend(GetDevice())->GetPendingRecordingContext();
-
-        // TODO(jiawei.shao@intel.com): initialize mapped buffer in CPU side.
-        EnsureDataInitialized(recordingContext);
-
         return {};
     }
 
diff --git a/src/dawn_native/vulkan/BufferVk.h b/src/dawn_native/vulkan/BufferVk.h
index f9c6ba0..45922ec 100644
--- a/src/dawn_native/vulkan/BufferVk.h
+++ b/src/dawn_native/vulkan/BufferVk.h
@@ -53,14 +53,14 @@
       private:
         ~Buffer() override;
         using BufferBase::BufferBase;
-        MaybeError Initialize();
+        MaybeError Initialize(bool mappedAtCreation);
         void InitializeToZero(CommandRecordingContext* recordingContext);
         void ClearBuffer(CommandRecordingContext* recordingContext, uint32_t clearValue);
 
         MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override;
         void UnmapImpl() override;
         void DestroyImpl() override;
-        bool IsMappableAtCreation() const override;
+        bool IsCPUWritableAtCreation() const override;
         MaybeError MapAtCreationImpl() override;
         void* GetMappedPointerImpl() override;
 
diff --git a/src/tests/end2end/BufferTests.cpp b/src/tests/end2end/BufferTests.cpp
index 5d80624..e56c3c2 100644
--- a/src/tests/end2end/BufferTests.cpp
+++ b/src/tests/end2end/BufferTests.cpp
@@ -683,12 +683,20 @@
     buffer.Unmap();
 }
 
+// TODO(jiawei.shao@intel.com): remove "lazy_clear_buffer_on_first_use" when we complete the
+// support of buffer lazy initialization.
 DAWN_INSTANTIATE_TEST(BufferMappedAtCreationTests,
                       D3D12Backend(),
                       D3D12Backend({}, {"use_d3d12_resource_heap_tier2"}),
+                      D3D12Backend({"lazy_clear_buffer_on_first_use"}),
+                      D3D12Backend({"lazy_clear_buffer_on_first_use"},
+                                   {"use_d3d12_resource_heap_tier2"}),
                       MetalBackend(),
+                      MetalBackend({"lazy_clear_buffer_on_first_use"}),
                       OpenGLBackend(),
-                      VulkanBackend());
+                      OpenGLBackend({"lazy_clear_buffer_on_first_use"}),
+                      VulkanBackend(),
+                      VulkanBackend({"lazy_clear_buffer_on_first_use"}));
 
 class BufferTests : public DawnTest {};
 
diff --git a/src/tests/end2end/BufferZeroInitTests.cpp b/src/tests/end2end/BufferZeroInitTests.cpp
index d18d1cf..eb46afd 100644
--- a/src/tests/end2end/BufferZeroInitTests.cpp
+++ b/src/tests/end2end/BufferZeroInitTests.cpp
@@ -762,19 +762,91 @@
 
 // Test that the code path of creating a buffer with BufferDescriptor.mappedAtCreation == true
 // clears the buffer correctly at the creation of the buffer.
-TEST_P(BufferZeroInitTest, MapAtCreation) {
+TEST_P(BufferZeroInitTest, MappedAtCreation) {
     constexpr uint32_t kBufferSize = 16u;
-    constexpr wgpu::BufferUsage kBufferUsage =
-        wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
-
-    wgpu::Buffer buffer;
-    EXPECT_LAZY_CLEAR(1u, buffer = CreateBuffer(kBufferSize, kBufferUsage, true));
-    buffer.Unmap();
 
     constexpr std::array<uint32_t, kBufferSize / sizeof(uint32_t)> kExpectedData = {{0, 0, 0, 0}};
-    EXPECT_LAZY_CLEAR(
-        0u, EXPECT_BUFFER_U32_RANGE_EQ(reinterpret_cast<const uint32_t*>(kExpectedData.data()),
-                                       buffer, 0, kExpectedData.size()));
+
+    // Buffer with MapRead usage
+    {
+        constexpr wgpu::BufferUsage kBufferUsage = wgpu::BufferUsage::MapRead;
+
+        wgpu::Buffer buffer;
+        EXPECT_LAZY_CLEAR(1u, buffer = CreateBuffer(kBufferSize, kBufferUsage, true));
+        const uint8_t* mappedData = static_cast<const uint8_t*>(buffer.GetConstMappedRange());
+        EXPECT_EQ(0, memcmp(mappedData, kExpectedData.data(), kBufferSize));
+        buffer.Unmap();
+
+        MapAsyncAndWait(buffer, wgpu::MapMode::Read, 0, kBufferSize);
+        mappedData = static_cast<const uint8_t*>(buffer.GetConstMappedRange());
+        EXPECT_EQ(0, memcmp(mappedData, kExpectedData.data(), kBufferSize));
+        buffer.Unmap();
+    }
+
+    // Buffer with MapRead usage and upload the buffer (from CPU and GPU)
+    {
+        constexpr std::array<uint32_t, kBufferSize / sizeof(uint32_t)> kExpectedFinalData = {
+            {10, 20, 30, 40}};
+
+        constexpr wgpu::BufferUsage kBufferUsage =
+            wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
+
+        wgpu::Buffer buffer;
+        EXPECT_LAZY_CLEAR(1u, buffer = CreateBuffer(kBufferSize, kBufferUsage, true));
+
+        // Update data from the CPU side.
+        uint32_t* mappedData = static_cast<uint32_t*>(buffer.GetMappedRange());
+        mappedData[2] = kExpectedFinalData[2];
+        mappedData[3] = kExpectedFinalData[3];
+        buffer.Unmap();
+
+        // Update data from the GPU side.
+        wgpu::Buffer uploadBuffer = utils::CreateBufferFromData(
+            device, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst,
+            {kExpectedFinalData[0], kExpectedFinalData[1]});
+
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.CopyBufferToBuffer(uploadBuffer, 0, buffer, 0, 2 * sizeof(uint32_t));
+        wgpu::CommandBuffer commandBuffer = encoder.Finish();
+        EXPECT_LAZY_CLEAR(0u, queue.Submit(1, &commandBuffer));
+
+        // Check the content of the buffer on the CPU side
+        MapAsyncAndWait(buffer, wgpu::MapMode::Read, 0, kBufferSize);
+        const uint32_t* constMappedData =
+            static_cast<const uint32_t*>(buffer.GetConstMappedRange());
+        EXPECT_EQ(0, memcmp(kExpectedFinalData.data(), constMappedData, kBufferSize));
+    }
+
+    // Buffer with MapWrite usage
+    {
+        constexpr wgpu::BufferUsage kBufferUsage =
+            wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
+
+        wgpu::Buffer buffer;
+        EXPECT_LAZY_CLEAR(1u, buffer = CreateBuffer(kBufferSize, kBufferUsage, true));
+
+        const uint8_t* mappedData = static_cast<const uint8_t*>(buffer.GetConstMappedRange());
+        EXPECT_EQ(0, memcmp(mappedData, kExpectedData.data(), kBufferSize));
+        buffer.Unmap();
+
+        EXPECT_LAZY_CLEAR(
+            0u, EXPECT_BUFFER_U32_RANGE_EQ(kExpectedData.data(), buffer, 0, kExpectedData.size()));
+    }
+
+    // Buffer with neither MapRead nor MapWrite usage
+    {
+        constexpr wgpu::BufferUsage kBufferUsage = wgpu::BufferUsage::CopySrc;
+
+        wgpu::Buffer buffer;
+        EXPECT_LAZY_CLEAR(1u, buffer = CreateBuffer(kBufferSize, kBufferUsage, true));
+
+        const uint8_t* mappedData = static_cast<const uint8_t*>(buffer.GetConstMappedRange());
+        EXPECT_EQ(0, memcmp(mappedData, kExpectedData.data(), kBufferSize));
+        buffer.Unmap();
+
+        EXPECT_LAZY_CLEAR(
+            0u, EXPECT_BUFFER_U32_RANGE_EQ(kExpectedData.data(), buffer, 0, kExpectedData.size()));
+    }
 }
 
 // Test that the code path of CopyBufferToTexture clears the source buffer correctly when it is the
diff --git a/src/tests/end2end/NonzeroBufferCreationTests.cpp b/src/tests/end2end/NonzeroBufferCreationTests.cpp
index 46f1366..9a8c684 100644
--- a/src/tests/end2end/NonzeroBufferCreationTests.cpp
+++ b/src/tests/end2end/NonzeroBufferCreationTests.cpp
@@ -14,9 +14,26 @@
 
 #include "tests/DawnTest.h"
 
+#include <array>
 #include <vector>
 
-class NonzeroBufferCreationTests : public DawnTest {};
+class NonzeroBufferCreationTests : public DawnTest {
+  public:
+    void MapReadAsyncAndWait(wgpu::Buffer buffer, uint64_t offset, uint64_t size) {
+        bool done = false;
+        buffer.MapAsync(
+            wgpu::MapMode::Read, offset, size,
+            [](WGPUBufferMapAsyncStatus status, void* userdata) {
+                ASSERT_EQ(WGPUBufferMapAsyncStatus_Success, status);
+                *static_cast<bool*>(userdata) = true;
+            },
+            &done);
+
+        while (!done) {
+            WaitABit();
+        }
+    }
+};
 
 // Verify that each byte of the buffer has all been initialized to 1 with the toggle enabled when it
 // is created with CopyDst usage.
@@ -29,7 +46,7 @@
 
     wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
 
-    std::vector<uint8_t> expectedData(kSize, static_cast<uint8_t>(1u));
+    std::vector<uint8_t> expectedData(kSize, uint8_t(1u));
     EXPECT_BUFFER_U32_RANGE_EQ(reinterpret_cast<uint32_t*>(expectedData.data()), buffer, 0,
                                kSize / sizeof(uint32_t));
 }
@@ -45,11 +62,72 @@
 
     wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
 
-    std::vector<uint8_t> expectedData(kSize, static_cast<uint8_t>(1u));
+    std::vector<uint8_t> expectedData(kSize, uint8_t(1u));
     EXPECT_BUFFER_U32_RANGE_EQ(reinterpret_cast<uint32_t*>(expectedData.data()), buffer, 0,
                                kSize / sizeof(uint32_t));
 }
 
+// Verify that each byte of the buffer has all been initialized to 1 with the toggle enabled when
+// it is created with mappedAtCreation == true.
+TEST_P(NonzeroBufferCreationTests, BufferCreationWithMappedAtCreation) {
+    // When we use Dawn wire, the lazy initialization of the buffers with mappedAtCreation == true
+    // are done in the Dawn wire and we don't plan to get it work with the toggle
+    // "nonzero_clear_resources_on_creation_for_testing" (we will have more tests on it in the
+    // BufferZeroInitTests.
+    DAWN_SKIP_TEST_IF(UsesWire());
+
+    constexpr uint32_t kSize = 32u;
+
+    wgpu::BufferDescriptor defaultDescriptor;
+    defaultDescriptor.size = kSize;
+    defaultDescriptor.mappedAtCreation = true;
+
+    const std::vector<uint8_t> expectedData(kSize, uint8_t(1u));
+    const uint32_t* expectedDataPtr = reinterpret_cast<const uint32_t*>(expectedData.data());
+
+    // Buffer with MapRead usage
+    {
+        wgpu::BufferDescriptor descriptor = defaultDescriptor;
+        descriptor.usage = wgpu::BufferUsage::MapRead;
+        wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+        const uint8_t* mappedData = static_cast<const uint8_t*>(buffer.GetConstMappedRange());
+        EXPECT_EQ(0, memcmp(mappedData, expectedData.data(), kSize));
+        buffer.Unmap();
+
+        MapReadAsyncAndWait(buffer, 0, kSize);
+        mappedData = static_cast<const uint8_t*>(buffer.GetConstMappedRange());
+        EXPECT_EQ(0, memcmp(mappedData, expectedData.data(), kSize));
+        buffer.Unmap();
+    }
+
+    // Buffer with MapWrite usage
+    {
+        wgpu::BufferDescriptor descriptor = defaultDescriptor;
+        descriptor.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
+        wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+        const uint8_t* mappedData = static_cast<const uint8_t*>(buffer.GetConstMappedRange());
+        EXPECT_EQ(0, memcmp(mappedData, expectedData.data(), kSize));
+        buffer.Unmap();
+
+        EXPECT_BUFFER_U32_RANGE_EQ(expectedDataPtr, buffer, 0, kSize / sizeof(uint32_t));
+    }
+
+    // Buffer with neither MapRead nor MapWrite usage
+    {
+        wgpu::BufferDescriptor descriptor = defaultDescriptor;
+        descriptor.usage = wgpu::BufferUsage::CopySrc;
+        wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+        const uint8_t* mappedData = static_cast<const uint8_t*>(buffer.GetConstMappedRange());
+        EXPECT_EQ(0, memcmp(mappedData, expectedData.data(), kSize));
+        buffer.Unmap();
+
+        EXPECT_BUFFER_U32_RANGE_EQ(expectedDataPtr, buffer, 0, kSize / sizeof(uint32_t));
+    }
+}
+
 DAWN_INSTANTIATE_TEST(NonzeroBufferCreationTests,
                       D3D12Backend({"nonzero_clear_resources_on_creation_for_testing"}),
                       MetalBackend({"nonzero_clear_resources_on_creation_for_testing"}),