D3D12: only lazy clear OutputAttachment textures with render target ops

Previously, lazy clearing always added DEPTH_STENCIL or RENDER_TARGET to
textures because we cleared using ClearDepthStencilView or
ClearRenderTargetView. Now, we're able to clear using copies.

This also allows textures to actually use the small resource heap
placement optimization. Doing so generates debug layer warnings when the
small alignment is first tried but rejected. This CL silences those
warnings.

Bug: dawn:145
Change-Id: Id385846536b337cddcfdadc5739561c7adc30c8c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/26840
Commit-Queue: Austin Eng <enga@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
diff --git a/src/dawn_native/d3d12/AdapterD3D12.cpp b/src/dawn_native/d3d12/AdapterD3D12.cpp
index 5b98915..364e6c13 100644
--- a/src/dawn_native/d3d12/AdapterD3D12.cpp
+++ b/src/dawn_native/d3d12/AdapterD3D12.cpp
@@ -146,6 +146,11 @@
             // Remove after warning have been addressed
             // https://crbug.com/dawn/421
             D3D12_MESSAGE_ID_GPU_BASED_VALIDATION_INCOMPATIBLE_RESOURCE_STATE,
+
+            // For small placed resource alignment, we first request the small alignment, which may
+            // get rejected and generate a debug error. Then, we request 0 to get the allowed
+            // allowed alignment.
+            D3D12_MESSAGE_ID_CREATERESOURCE_INVALIDALIGNMENT,
         };
 
         // Create a retrieval filter with a deny list to suppress messages.
diff --git a/src/dawn_native/d3d12/ResourceAllocatorManagerD3D12.cpp b/src/dawn_native/d3d12/ResourceAllocatorManagerD3D12.cpp
index 3158cbe..8090b7d 100644
--- a/src/dawn_native/d3d12/ResourceAllocatorManagerD3D12.cpp
+++ b/src/dawn_native/d3d12/ResourceAllocatorManagerD3D12.cpp
@@ -279,12 +279,15 @@
             resourceHeapKind, requestedResourceDescriptor.SampleDesc.Count,
             requestedResourceDescriptor.Alignment);
 
+        // TODO(bryan.bernhart): Figure out how to compute the alignment without calling this
+        // twice.
         D3D12_RESOURCE_ALLOCATION_INFO resourceInfo =
             mDevice->GetD3D12Device()->GetResourceAllocationInfo(0, 1, &resourceDescriptor);
 
         // If the requested resource alignment was rejected, let D3D tell us what the
         // required alignment is for this resource.
-        if (resourceDescriptor.Alignment != resourceInfo.Alignment) {
+        if (resourceDescriptor.Alignment != 0 &&
+            resourceDescriptor.Alignment != resourceInfo.Alignment) {
             resourceDescriptor.Alignment = 0;
             resourceInfo =
                 mDevice->GetD3D12Device()->GetResourceAllocationInfo(0, 1, &resourceDescriptor);
diff --git a/src/dawn_native/d3d12/TextureD3D12.cpp b/src/dawn_native/d3d12/TextureD3D12.cpp
index 69220f0..4780634 100644
--- a/src/dawn_native/d3d12/TextureD3D12.cpp
+++ b/src/dawn_native/d3d12/TextureD3D12.cpp
@@ -79,13 +79,7 @@
             // A multisampled resource must have either D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET or
             // D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL set in D3D12_RESOURCE_DESC::Flags.
             // https://docs.microsoft.com/en-us/windows/desktop/api/d3d12/ns-d3d12-d3d12_resource_desc
-            // Currently all textures are zero-initialized via the render-target path so always add
-            // the render target flag, except for compressed textures for which the render-target
-            // flag is invalid.
-            // TODO(natlee@microsoft.com, jiawei.shao@intel.com): do not require render target for
-            // lazy clearing.
-            if ((usage & wgpu::TextureUsage::OutputAttachment) || isMultisampledTexture ||
-                !format.isCompressed) {
+            if ((usage & wgpu::TextureUsage::OutputAttachment) != 0 || isMultisampledTexture) {
                 if (format.HasDepthOrStencil()) {
                     flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
                 } else {
@@ -855,7 +849,7 @@
         uint8_t clearColor = (clearValue == TextureBase::ClearValue::Zero) ? 0 : 1;
         float fClearColor = (clearValue == TextureBase::ClearValue::Zero) ? 0.f : 1.f;
 
-        if (GetFormat().isRenderable) {
+        if ((GetUsage() & wgpu::TextureUsage::OutputAttachment) != 0) {
             if (GetFormat().HasDepthOrStencil()) {
                 TrackUsageAndTransitionNow(commandContext, D3D12_RESOURCE_STATE_DEPTH_WRITE, range);
 
@@ -935,60 +929,63 @@
                 }
             }
         } else {
-            // TODO(natlee@microsoft.com): test compressed textures are cleared
             // create temp buffer with clear color to copy to the texture image
-            uint32_t bytesPerRow =
-                Align((GetWidth() / GetFormat().blockWidth) * GetFormat().blockByteSize,
-                      kTextureBytesPerRowAlignment);
-            uint64_t bufferSize64 = bytesPerRow * (GetHeight() / GetFormat().blockHeight);
-            if (bufferSize64 > std::numeric_limits<uint32_t>::max()) {
-                return DAWN_OUT_OF_MEMORY_ERROR("Unable to allocate buffer.");
-            }
-            uint32_t bufferSize = static_cast<uint32_t>(bufferSize64);
-            DynamicUploader* uploader = device->GetDynamicUploader();
-            UploadHandle uploadHandle;
-            DAWN_TRY_ASSIGN(uploadHandle,
-                            uploader->Allocate(bufferSize, device->GetPendingCommandSerial()));
-            memset(uploadHandle.mappedBuffer, clearColor, bufferSize);
-
             TrackUsageAndTransitionNow(commandContext, D3D12_RESOURCE_STATE_COPY_DEST, range);
 
-            ASSERT(range.aspects == Aspect::Color);
-            for (uint32_t level = range.baseMipLevel; level < range.baseMipLevel + range.levelCount;
-                 ++level) {
-                // compute d3d12 texture copy locations for texture and buffer
-                Extent3D copySize = GetMipLevelVirtualSize(level);
+            for (Aspect aspect : IterateEnumMask(range.aspects)) {
+                const TexelBlockInfo& blockInfo = GetFormat().GetTexelBlockInfo(aspect);
 
-                uint32_t rowsPerImage = GetHeight();
-                Texture2DCopySplit copySplit =
-                    ComputeTextureCopySplit({0, 0, 0}, copySize, GetFormat(),
-                                            uploadHandle.startOffset, bytesPerRow, rowsPerImage);
+                uint32_t bytesPerRow =
+                    Align((GetWidth() / blockInfo.blockWidth) * blockInfo.blockByteSize,
+                          kTextureBytesPerRowAlignment);
+                uint64_t bufferSize64 = bytesPerRow * (GetHeight() / blockInfo.blockHeight);
+                if (bufferSize64 > std::numeric_limits<uint32_t>::max()) {
+                    return DAWN_OUT_OF_MEMORY_ERROR("Unable to allocate buffer.");
+                }
+                uint32_t bufferSize = static_cast<uint32_t>(bufferSize64);
 
-                for (uint32_t layer = range.baseArrayLayer;
-                     layer < range.baseArrayLayer + range.layerCount; ++layer) {
-                    if (clearValue == TextureBase::ClearValue::Zero &&
-                        IsSubresourceContentInitialized(
-                            SubresourceRange::SingleMipAndLayer(level, layer, Aspect::Color))) {
-                        // Skip lazy clears if already initialized.
-                        continue;
-                    }
+                DynamicUploader* uploader = device->GetDynamicUploader();
+                UploadHandle uploadHandle;
+                DAWN_TRY_ASSIGN(uploadHandle,
+                                uploader->Allocate(bufferSize, device->GetPendingCommandSerial()));
+                memset(uploadHandle.mappedBuffer, clearColor, bufferSize);
 
-                    D3D12_TEXTURE_COPY_LOCATION textureLocation =
-                        ComputeTextureCopyLocationForTexture(this, level, layer, Aspect::Color);
-                    for (uint32_t i = 0; i < copySplit.count; ++i) {
-                        Texture2DCopySplit::CopyInfo& info = copySplit.copies[i];
+                for (uint32_t level = range.baseMipLevel;
+                     level < range.baseMipLevel + range.levelCount; ++level) {
+                    // compute d3d12 texture copy locations for texture and buffer
+                    Extent3D copySize = GetMipLevelVirtualSize(level);
 
-                        D3D12_TEXTURE_COPY_LOCATION bufferLocation =
-                            ComputeBufferLocationForCopyTextureRegion(
-                                this, ToBackend(uploadHandle.stagingBuffer)->GetResource(),
-                                info.bufferSize, copySplit.offset, bytesPerRow, Aspect::Color);
-                        D3D12_BOX sourceRegion =
-                            ComputeD3D12BoxFromOffsetAndSize(info.bufferOffset, info.copySize);
+                    uint32_t rowsPerImage = GetHeight();
+                    Texture2DCopySplit copySplit = ComputeTextureCopySplit(
+                        {0, 0, 0}, copySize, blockInfo, uploadHandle.startOffset, bytesPerRow,
+                        rowsPerImage);
 
-                        // copy the buffer filled with clear color to the texture
-                        commandList->CopyTextureRegion(&textureLocation, info.textureOffset.x,
-                                                       info.textureOffset.y, info.textureOffset.z,
-                                                       &bufferLocation, &sourceRegion);
+                    for (uint32_t layer = range.baseArrayLayer;
+                         layer < range.baseArrayLayer + range.layerCount; ++layer) {
+                        if (clearValue == TextureBase::ClearValue::Zero &&
+                            IsSubresourceContentInitialized(
+                                SubresourceRange::SingleMipAndLayer(level, layer, aspect))) {
+                            // Skip lazy clears if already initialized.
+                            continue;
+                        }
+
+                        D3D12_TEXTURE_COPY_LOCATION textureLocation =
+                            ComputeTextureCopyLocationForTexture(this, level, layer, aspect);
+                        for (uint32_t i = 0; i < copySplit.count; ++i) {
+                            Texture2DCopySplit::CopyInfo& info = copySplit.copies[i];
+
+                            D3D12_TEXTURE_COPY_LOCATION bufferLocation =
+                                ComputeBufferLocationForCopyTextureRegion(
+                                    this, ToBackend(uploadHandle.stagingBuffer)->GetResource(),
+                                    info.bufferSize, copySplit.offset, bytesPerRow, aspect);
+                            D3D12_BOX sourceRegion =
+                                ComputeD3D12BoxFromOffsetAndSize(info.bufferOffset, info.copySize);
+
+                            // copy the buffer filled with clear color to the texture
+                            commandList->CopyTextureRegion(
+                                &textureLocation, info.textureOffset.x, info.textureOffset.y,
+                                info.textureOffset.z, &bufferLocation, &sourceRegion);
+                        }
                     }
                 }
             }
diff --git a/src/dawn_native/opengl/TextureGL.cpp b/src/dawn_native/opengl/TextureGL.cpp
index 7cfb05f..e0930d7 100644
--- a/src/dawn_native/opengl/TextureGL.cpp
+++ b/src/dawn_native/opengl/TextureGL.cpp
@@ -316,7 +316,6 @@
         } else {
             ASSERT(range.aspects == Aspect::Color);
 
-            // TODO(natlee@microsoft.com): test compressed textures are cleared
             // create temp buffer with clear color to copy to the texture image
             ASSERT(kTextureBytesPerRowAlignment % GetFormat().blockByteSize == 0);
             uint32_t bytesPerRow =
diff --git a/src/dawn_native/vulkan/TextureVk.cpp b/src/dawn_native/vulkan/TextureVk.cpp
index 8c7bd3f..936ad9f 100644
--- a/src/dawn_native/vulkan/TextureVk.cpp
+++ b/src/dawn_native/vulkan/TextureVk.cpp
@@ -937,7 +937,6 @@
                 }
             }
         } else {
-            // TODO(natlee@microsoft.com): test compressed textures are cleared
             // create temp buffer with clear color to copy to the texture image
             uint32_t bytesPerRow =
                 Align((GetWidth() / GetFormat().blockWidth) * GetFormat().blockByteSize,
diff --git a/src/tests/end2end/TextureZeroInitTests.cpp b/src/tests/end2end/TextureZeroInitTests.cpp
index bca410f..e6e6a75 100644
--- a/src/tests/end2end/TextureZeroInitTests.cpp
+++ b/src/tests/end2end/TextureZeroInitTests.cpp
@@ -30,6 +30,7 @@
         }                                                                                     \
     } while (0)
 
+// TODO(natlee@microsoft.com): test compressed textures are cleared
 class TextureZeroInitTest : public DawnTest {
   protected:
     void SetUp() override {