D3D12: Align UBO sizes to 256B.

D3D debug layer uses the descriptor size (width) to
validate CBV bounds when directly allocating UBOs.
This causes validation failure when the buffer size
is misaligned (ie. not a multiple of 256B) even
through the underlying resource heap size is always
64KB aligned.

This change always aligns the buffer size to be 256B
to avoid such validation error should sub-allocation
fail.

BUG=dawn:506

Change-Id: Ic9072934cac65cfd25d0e2a20cb364bd3ca88e3b
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/26820
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com>
diff --git a/src/common/Math.cpp b/src/common/Math.cpp
index 8e5985f..80b8438 100644
--- a/src/common/Math.cpp
+++ b/src/common/Math.cpp
@@ -98,14 +98,6 @@
     return (value & (alignment32 - 1)) == 0;
 }
 
-uint32_t Align(uint32_t value, size_t alignment) {
-    ASSERT(alignment <= UINT32_MAX);
-    ASSERT(IsPowerOfTwo(alignment));
-    ASSERT(alignment != 0);
-    uint32_t alignment32 = static_cast<uint32_t>(alignment);
-    return (value + (alignment32 - 1)) & ~(alignment32 - 1);
-}
-
 uint16_t Float32ToFloat16(float fp32) {
     uint32_t fp32i = BitCast<uint32_t>(fp32);
     uint32_t sign16 = (fp32i & 0x80000000) >> 16;
diff --git a/src/common/Math.h b/src/common/Math.h
index c673785..d29a88c 100644
--- a/src/common/Math.h
+++ b/src/common/Math.h
@@ -51,7 +51,15 @@
 bool IsPtrAligned(const void* ptr, size_t alignment);
 void* AlignVoidPtr(void* ptr, size_t alignment);
 bool IsAligned(uint32_t value, size_t alignment);
-uint32_t Align(uint32_t value, size_t alignment);
+
+template <typename T>
+T Align(T value, size_t alignment) {
+    ASSERT(value <= std::numeric_limits<T>::max() - (alignment - 1));
+    ASSERT(IsPowerOfTwo(alignment));
+    ASSERT(alignment != 0);
+    T alignmentT = static_cast<T>(alignment);
+    return (value + (alignmentT - 1)) & ~(alignmentT - 1);
+}
 
 template <typename T>
 DAWN_FORCE_INLINE T* AlignPtr(T* ptr, size_t alignment) {
diff --git a/src/dawn_native/d3d12/BufferD3D12.cpp b/src/dawn_native/d3d12/BufferD3D12.cpp
index da4be7a..0d8bb6a 100644
--- a/src/dawn_native/d3d12/BufferD3D12.cpp
+++ b/src/dawn_native/d3d12/BufferD3D12.cpp
@@ -83,6 +83,15 @@
                 return D3D12_HEAP_TYPE_DEFAULT;
             }
         }
+
+        size_t D3D12BufferSizeAlignment(wgpu::BufferUsage usage) {
+            switch (usage) {
+                case wgpu::BufferUsage::Uniform:
+                    return D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT;
+                default:
+                    return 1;
+            }
+        }
     }  // namespace
 
     Buffer::Buffer(Device* device, const BufferDescriptor* descriptor)
@@ -95,7 +104,14 @@
         resourceDescriptor.Alignment = 0;
         // TODO(cwallez@chromium.org): Have a global "zero" buffer that can do everything instead
         // of creating a new 4-byte buffer?
-        resourceDescriptor.Width = std::max(GetSize(), uint64_t(4u));
+        // D3D buffers are always resource size aligned to 64KB. However, D3D12's validation forbids
+        // binding a CBV to an unaligned size. To prevent, one can always safely align the buffer
+        // desc size to the CBV data alignment as other buffer usages ignore it (no size check).
+        // The validation will still enforce bound checks with the unaligned size returned by
+        // GetSize().
+        // https://docs.microsoft.com/en-us/windows/win32/direct3d12/uploading-resources#buffer-alignment
+        resourceDescriptor.Width =
+            Align(std::max(GetSize(), uint64_t(4u)), D3D12BufferSizeAlignment(GetUsage()));
         resourceDescriptor.Height = 1;
         resourceDescriptor.DepthOrArraySize = 1;
         resourceDescriptor.MipLevels = 1;
diff --git a/src/tests/BUILD.gn b/src/tests/BUILD.gn
index b701984..0077695 100644
--- a/src/tests/BUILD.gn
+++ b/src/tests/BUILD.gn
@@ -396,7 +396,7 @@
     sources += [
       "white_box/D3D12DescriptorHeapTests.cpp",
       "white_box/D3D12ResidencyTests.cpp",
-      "white_box/D3D12SmallTextureTests.cpp",
+      "white_box/D3D12ResourceHeapTests.cpp",
     ]
   }
 
diff --git a/src/tests/unittests/MathTests.cpp b/src/tests/unittests/MathTests.cpp
index 2294fe1..71136dc 100644
--- a/src/tests/unittests/MathTests.cpp
+++ b/src/tests/unittests/MathTests.cpp
@@ -136,17 +136,17 @@
 // Tests for Align
 TEST(Math, Align) {
     // 0 aligns to 0
-    ASSERT_EQ(Align(0, 4), 0u);
-    ASSERT_EQ(Align(0, 256), 0u);
-    ASSERT_EQ(Align(0, 512), 0u);
+    ASSERT_EQ(Align(0u, 4), 0u);
+    ASSERT_EQ(Align(0u, 256), 0u);
+    ASSERT_EQ(Align(0u, 512), 0u);
 
     // Multiples align to self
-    ASSERT_EQ(Align(8, 8), 8u);
-    ASSERT_EQ(Align(16, 8), 16u);
-    ASSERT_EQ(Align(24, 8), 24u);
-    ASSERT_EQ(Align(256, 256), 256u);
-    ASSERT_EQ(Align(512, 256), 512u);
-    ASSERT_EQ(Align(768, 256), 768u);
+    ASSERT_EQ(Align(8u, 8), 8u);
+    ASSERT_EQ(Align(16u, 8), 16u);
+    ASSERT_EQ(Align(24u, 8), 24u);
+    ASSERT_EQ(Align(256u, 256), 256u);
+    ASSERT_EQ(Align(512u, 256), 512u);
+    ASSERT_EQ(Align(768u, 256), 768u);
 
     // Alignment with 1 is self
     for (uint32_t i = 0; i < 128; ++i) {
@@ -157,6 +157,10 @@
     for (uint32_t i = 1; i <= 64; ++i) {
         ASSERT_EQ(Align(64 + i, 64), 128u);
     }
+
+    // Test extrema
+    ASSERT_EQ(Align(static_cast<uint64_t>(0xFFFFFFFF), 4), 0x100000000u);
+    ASSERT_EQ(Align(static_cast<uint64_t>(0xFFFFFFFFFFFFFFFF), 1), 0xFFFFFFFFFFFFFFFFull);
 }
 
 // Tests for IsPtrAligned
diff --git a/src/tests/white_box/D3D12SmallTextureTests.cpp b/src/tests/white_box/D3D12ResourceHeapTests.cpp
similarity index 66%
rename from src/tests/white_box/D3D12SmallTextureTests.cpp
rename to src/tests/white_box/D3D12ResourceHeapTests.cpp
index fdf5d81..450cb9e 100644
--- a/src/tests/white_box/D3D12SmallTextureTests.cpp
+++ b/src/tests/white_box/D3D12ResourceHeapTests.cpp
@@ -14,12 +14,18 @@
 
 #include "tests/DawnTest.h"
 
+#include "dawn_native/d3d12/BufferD3D12.h"
 #include "dawn_native/d3d12/TextureD3D12.h"
 
 using namespace dawn_native::d3d12;
 
-class D3D12SmallTextureTests : public DawnTest {
+class D3D12ResourceHeapTests : public DawnTest {
   protected:
+    void SetUp() override {
+        DawnTest::SetUp();
+        DAWN_SKIP_TEST_IF(UsesWire());
+    }
+
     std::vector<const char*> GetRequiredExtensions() override {
         mIsBCFormatSupported = SupportsExtensions({"texture_compression_bc"});
         if (!mIsBCFormatSupported) {
@@ -38,8 +44,7 @@
 };
 
 // Verify that creating a small compressed textures will be 4KB aligned.
-TEST_P(D3D12SmallTextureTests, AlignSmallCompressedTexture) {
-    DAWN_SKIP_TEST_IF(UsesWire());
+TEST_P(D3D12ResourceHeapTests, AlignSmallCompressedTexture) {
     DAWN_SKIP_TEST_IF(!IsBCFormatSupported());
 
     // TODO(http://crbug.com/dawn/282): Investigate GPU/driver rejections of small alignment.
@@ -73,4 +78,28 @@
               static_cast<uint64_t>(D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT));
 }
 
-DAWN_INSTANTIATE_TEST(D3D12SmallTextureTests, D3D12Backend());
+// Verify creating a UBO will always be 256B aligned.
+TEST_P(D3D12ResourceHeapTests, AlignUBO) {
+    // Create a small UBO
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 4 * 1024;
+    descriptor.usage = wgpu::BufferUsage::Uniform;
+
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+    Buffer* d3dBuffer = reinterpret_cast<Buffer*>(buffer.Get());
+
+    EXPECT_TRUE((d3dBuffer->GetD3D12Resource()->GetDesc().Width %
+                 static_cast<uint64_t>(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) == 0u);
+
+    // Create a larger UBO
+    descriptor.size = (4 * 1024 * 1024) + 255;
+    descriptor.usage = wgpu::BufferUsage::Uniform;
+
+    buffer = device.CreateBuffer(&descriptor);
+    d3dBuffer = reinterpret_cast<Buffer*>(buffer.Get());
+
+    EXPECT_TRUE((d3dBuffer->GetD3D12Resource()->GetDesc().Width %
+                 static_cast<uint64_t>(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) == 0u);
+}
+
+DAWN_INSTANTIATE_TEST(D3D12ResourceHeapTests, D3D12Backend());