Introduce SubresourceStorage (3/N): Inline data

This CL changes SubresourceStorage to have an inline storage for the
per-aspect compressed data and allocate the storage for decompressed
data lazily. This will avoid the large performance cost of allocations
in the happy case.

Bug: dawn:441

Change-Id: Iae1cab87b699cb0e60031abe7306cdff92fbd049
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/35521
Commit-Queue: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/src/dawn_native/SubresourceStorage.h b/src/dawn_native/SubresourceStorage.h
index a10aca7..4ae07fc 100644
--- a/src/dawn_native/SubresourceStorage.h
+++ b/src/dawn_native/SubresourceStorage.h
@@ -79,6 +79,12 @@
     // would be operations that touch all Nth mips of a 2D array texture without touching the
     // others.
     //
+    // There are several hot code paths that create new SubresourceStorage like the tracking of
+    // resource usage per-pass. We don't want to allocate a container for the decompressed data
+    // unless we have to because it would dramatically lower performance. Instead
+    // SubresourceStorage contains an inline array that contains the per-aspect compressed data
+    // and only allocates a per-subresource on aspect decompression.
+    //
     // T must be a copyable type that supports equality comparison with ==.
     //
     // The implementation of functions in this file can have a lot of control flow and corner cases
@@ -94,8 +100,6 @@
            third_party/dawn/src/dawn_native
     */
     //
-    // TODO(cwallez@chromium.org): Inline the storage for aspects to avoid allocating when
-    // possible.
     // TODO(cwallez@chromium.org): Make the recompression optional, the calling code should know
     // if recompression can happen or not in Update() and Merge()
     template <typename T>
@@ -179,11 +183,17 @@
 
         SubresourceRange GetFullLayerRange(Aspect aspect, uint32_t layer) const;
 
+        // LayerCompressed should never be called when the aspect is compressed otherwise it would
+        // need to check that mLayerCompressed is not null before indexing it.
         bool& LayerCompressed(uint32_t aspectIndex, uint32_t layerIndex);
         bool LayerCompressed(uint32_t aspectIndex, uint32_t layerIndex) const;
 
-        T& Data(uint32_t aspectIndex, uint32_t layerIndex = 0, uint32_t levelIndex = 0);
-        const T& Data(uint32_t aspectIndex, uint32_t layerIndex = 0, uint32_t levelIndex = 0) const;
+        // Return references to the data for a compressed plane / layer or subresource.
+        // Each variant should be called exactly under the correct compression level.
+        T& DataInline(uint32_t aspectIndex);
+        T& Data(uint32_t aspectIndex, uint32_t layer, uint32_t level = 0);
+        const T& DataInline(uint32_t aspectIndex) const;
+        const T& Data(uint32_t aspectIndex, uint32_t layer, uint32_t level = 0) const;
 
         Aspect mAspects;
         uint8_t mMipLevelCount;
@@ -193,6 +203,8 @@
         // compressed.
         static constexpr size_t kMaxAspects = 2;
         std::array<bool, kMaxAspects> mAspectCompressed;
+        std::array<T, kMaxAspects> mInlineAspectData;
+
         // Indexed as mLayerCompressed[aspectIndex * mArrayLayerCount + layer].
         std::unique_ptr<bool[]> mLayerCompressed;
 
@@ -214,16 +226,9 @@
         uint32_t aspectCount = GetAspectCount(aspects);
         ASSERT(aspectCount <= kMaxAspects);
 
-        mLayerCompressed = std::make_unique<bool[]>(aspectCount * mArrayLayerCount);
-        mData = std::make_unique<T[]>(aspectCount * mArrayLayerCount * mMipLevelCount);
-
         for (uint32_t aspectIndex = 0; aspectIndex < aspectCount; aspectIndex++) {
             mAspectCompressed[aspectIndex] = true;
-            Data(aspectIndex) = initialValue;
-        }
-
-        for (uint32_t layerIndex = 0; layerIndex < aspectCount * mArrayLayerCount; layerIndex++) {
-            mLayerCompressed[layerIndex] = true;
+            DataInline(aspectIndex) = initialValue;
         }
     }
 
@@ -243,7 +248,7 @@
                 if (fullAspects) {
                     SubresourceRange updateRange =
                         SubresourceRange::MakeFull(aspect, mArrayLayerCount, mMipLevelCount);
-                    updateFunc(updateRange, &Data(aspectIndex));
+                    updateFunc(updateRange, &DataInline(aspectIndex));
                     continue;
                 }
                 DecompressAspect(aspectIndex);
@@ -300,7 +305,7 @@
             // in `this` and can just iterate through it, merging with `other`'s constant value for
             // the aspect. For code simplicity this can be done with a call to Update().
             if (other.mAspectCompressed[aspectIndex]) {
-                const U& otherData = other.Data(aspectIndex);
+                const U& otherData = other.DataInline(aspectIndex);
                 Update(SubresourceRange::MakeFull(aspect, mArrayLayerCount, mMipLevelCount),
                        [&](const SubresourceRange& subrange, T* data) {
                            mergeFunc(subrange, data, otherData);
@@ -353,7 +358,7 @@
             if (mAspectCompressed[aspectIndex]) {
                 SubresourceRange range =
                     SubresourceRange::MakeFull(aspect, mArrayLayerCount, mMipLevelCount);
-                iterateFunc(range, Data(aspectIndex));
+                iterateFunc(range, DataInline(aspectIndex));
                 continue;
             }
 
@@ -386,7 +391,7 @@
         // Fastest path, the aspect is compressed!
         uint32_t dataIndex = aspectIndex * mArrayLayerCount * mMipLevelCount;
         if (mAspectCompressed[aspectIndex]) {
-            return Data(aspectIndex);
+            return DataInline(aspectIndex);
         }
 
         // Fast path, the array layer is compressed.
@@ -420,55 +425,80 @@
 
     template <typename T>
     bool SubresourceStorage<T>::IsLayerCompressedForTesting(Aspect aspect, uint32_t layer) const {
-        return mLayerCompressed[GetAspectIndex(aspect) * mArrayLayerCount + layer];
+        return mAspectCompressed[GetAspectIndex(aspect)] ||
+               mLayerCompressed[GetAspectIndex(aspect) * mArrayLayerCount + layer];
     }
 
     template <typename T>
     void SubresourceStorage<T>::DecompressAspect(uint32_t aspectIndex) {
         ASSERT(mAspectCompressed[aspectIndex]);
+        const T& aspectData = DataInline(aspectIndex);
+        mAspectCompressed[aspectIndex] = false;
 
-        ASSERT(LayerCompressed(aspectIndex, 0));
-        for (uint32_t layer = 1; layer < mArrayLayerCount; layer++) {
-            Data(aspectIndex, layer) = Data(aspectIndex);
-            ASSERT(LayerCompressed(aspectIndex, layer));
+        // Extra allocations are only needed when aspects are decompressed. Create them lazily.
+        if (mData == nullptr) {
+            ASSERT(mLayerCompressed == nullptr);
+
+            uint32_t aspectCount = GetAspectCount(mAspects);
+            mLayerCompressed = std::make_unique<bool[]>(aspectCount * mArrayLayerCount);
+            mData = std::make_unique<T[]>(aspectCount * mArrayLayerCount * mMipLevelCount);
+
+            for (uint32_t layerIndex = 0; layerIndex < aspectCount * mArrayLayerCount;
+                 layerIndex++) {
+                mLayerCompressed[layerIndex] = true;
+            }
         }
 
-        mAspectCompressed[aspectIndex] = false;
+        ASSERT(LayerCompressed(aspectIndex, 0));
+        for (uint32_t layer = 0; layer < mArrayLayerCount; layer++) {
+            Data(aspectIndex, layer) = aspectData;
+            ASSERT(LayerCompressed(aspectIndex, layer));
+        }
     }
 
     template <typename T>
     void SubresourceStorage<T>::RecompressAspect(uint32_t aspectIndex) {
         ASSERT(!mAspectCompressed[aspectIndex]);
+        // All layers of the aspect must be compressed for the aspect to possibly recompress.
+        for (uint32_t layer = 0; layer < mArrayLayerCount; layer++) {
+            if (!LayerCompressed(aspectIndex, layer)) {
+                return;
+            }
+        }
 
+        T layer0Data = Data(aspectIndex, 0);
         for (uint32_t layer = 1; layer < mArrayLayerCount; layer++) {
-            if (Data(aspectIndex, layer) != Data(aspectIndex) ||
-                !LayerCompressed(aspectIndex, layer)) {
+            if (Data(aspectIndex, layer) != layer0Data) {
                 return;
             }
         }
 
         mAspectCompressed[aspectIndex] = true;
+        DataInline(aspectIndex) = layer0Data;
     }
 
     template <typename T>
     void SubresourceStorage<T>::DecompressLayer(uint32_t aspectIndex, uint32_t layer) {
         ASSERT(LayerCompressed(aspectIndex, layer));
         ASSERT(!mAspectCompressed[aspectIndex]);
-
-        for (uint32_t level = 1; level < mMipLevelCount; level++) {
-            Data(aspectIndex, layer, level) = Data(aspectIndex, layer);
-        }
-
+        const T& layerData = Data(aspectIndex, layer);
         LayerCompressed(aspectIndex, layer) = false;
+
+        // We assume that (aspect, layer, 0) is stored at the same place as (aspect, layer) which
+        // allows starting the iteration at level 1.
+        for (uint32_t level = 1; level < mMipLevelCount; level++) {
+            Data(aspectIndex, layer, level) = layerData;
+        }
     }
 
     template <typename T>
     void SubresourceStorage<T>::RecompressLayer(uint32_t aspectIndex, uint32_t layer) {
         ASSERT(!LayerCompressed(aspectIndex, layer));
         ASSERT(!mAspectCompressed[aspectIndex]);
+        const T& level0Data = Data(aspectIndex, layer, 0);
 
         for (uint32_t level = 1; level < mMipLevelCount; level++) {
-            if (Data(aspectIndex, layer, level) != Data(aspectIndex, layer)) {
+            if (Data(aspectIndex, layer, level) != level0Data) {
                 return;
             }
         }
@@ -483,23 +513,38 @@
 
     template <typename T>
     bool& SubresourceStorage<T>::LayerCompressed(uint32_t aspectIndex, uint32_t layer) {
+        ASSERT(!mAspectCompressed[aspectIndex]);
         return mLayerCompressed[aspectIndex * mArrayLayerCount + layer];
     }
 
     template <typename T>
     bool SubresourceStorage<T>::LayerCompressed(uint32_t aspectIndex, uint32_t layer) const {
+        ASSERT(!mAspectCompressed[aspectIndex]);
         return mLayerCompressed[aspectIndex * mArrayLayerCount + layer];
     }
 
     template <typename T>
+    T& SubresourceStorage<T>::DataInline(uint32_t aspectIndex) {
+        ASSERT(mAspectCompressed[aspectIndex]);
+        return mInlineAspectData[aspectIndex];
+    }
+    template <typename T>
     T& SubresourceStorage<T>::Data(uint32_t aspectIndex, uint32_t layer, uint32_t level) {
+        ASSERT(level == 0 || !LayerCompressed(aspectIndex, layer));
+        ASSERT(!mAspectCompressed[aspectIndex]);
         return mData[(aspectIndex * mArrayLayerCount + layer) * mMipLevelCount + level];
     }
-
+    template <typename T>
+    const T& SubresourceStorage<T>::DataInline(uint32_t aspectIndex) const {
+        ASSERT(mAspectCompressed[aspectIndex]);
+        return mInlineAspectData[aspectIndex];
+    }
     template <typename T>
     const T& SubresourceStorage<T>::Data(uint32_t aspectIndex,
                                          uint32_t layer,
                                          uint32_t level) const {
+        ASSERT(level == 0 || !LayerCompressed(aspectIndex, layer));
+        ASSERT(!mAspectCompressed[aspectIndex]);
         return mData[(aspectIndex * mArrayLayerCount + layer) * mMipLevelCount + level];
     }
 
diff --git a/src/tests/unittests/SubresourceStorageTests.cpp b/src/tests/unittests/SubresourceStorageTests.cpp
index c128bed..b9b4fc3 100644
--- a/src/tests/unittests/SubresourceStorageTests.cpp
+++ b/src/tests/unittests/SubresourceStorageTests.cpp
@@ -143,8 +143,8 @@
                  layer < range.baseArrayLayer + range.layerCount; layer++) {
                 for (uint32_t level = range.baseMipLevel;
                      level < range.baseMipLevel + range.levelCount; level++) {
-                    ASSERT_EQ(data, Get(aspect, layer, level));
-                    ASSERT_EQ(data, real.Get(aspect, layer, level));
+                    EXPECT_EQ(data, Get(aspect, layer, level));
+                    EXPECT_EQ(data, real.Get(aspect, layer, level));
                 }
             }
         }
@@ -624,9 +624,54 @@
     CheckLayerCompressed(s, Aspect::Color, 2, false);
 }
 
+// Regression test for a missing check that layer 0 is compressed when recompressing.
+TEST(SubresourceStorageTest, Layer0NotCompressedBlocksAspectRecompression) {
+    const uint32_t kLayers = 2;
+    const uint32_t kLevels = 2;
+    SubresourceStorage<int> s(Aspect::Color, kLayers, kLevels);
+    FakeStorage<int> f(Aspect::Color, kLayers, kLevels);
+
+    // Set up s with zeros except (0, 1) which is garbage.
+    {
+        SubresourceRange range = SubresourceRange::MakeSingle(Aspect::Color, 0, 1);
+        CallUpdateOnBoth(&s, &f, range, [](const SubresourceRange&, int* data) { *data += 0xABC; });
+    }
+
+    // Other is 2x2 of zeroes
+    SubresourceStorage<int> other(Aspect::Color, kLayers, kLevels);
+
+    // Fake updating F with other which is fully compressed and will trigger recompression.
+    CallMergeOnBoth(&s, &f, other, [](const SubresourceRange&, int*, int) {});
+
+    // The Color aspect should not have been recompressed.
+    CheckAspectCompressed(s, Aspect::Color, false);
+    CheckLayerCompressed(s, Aspect::Color, 0, false);
+}
+
+// Regression test for aspect decompression not copying to layer 0
+TEST(SubresourceStorageTest, AspectDecompressionUpdatesLayer0) {
+    const uint32_t kLayers = 2;
+    const uint32_t kLevels = 2;
+    SubresourceStorage<int> s(Aspect::Color, kLayers, kLevels, 3);
+    FakeStorage<int> f(Aspect::Color, kLayers, kLevels, 3);
+
+    // Cause decompression by writing to a single subresource.
+    {
+        SubresourceRange range = SubresourceRange::MakeSingle(Aspect::Color, 1, 1);
+        CallUpdateOnBoth(&s, &f, range, [](const SubresourceRange&, int* data) { *data += 0xABC; });
+    }
+
+    // Check that the aspect's value of 3 was correctly decompressed in layer 0.
+    CheckLayerCompressed(s, Aspect::Color, 0, true);
+    EXPECT_EQ(3, s.Get(Aspect::Color, 0, 0));
+    EXPECT_EQ(3, s.Get(Aspect::Color, 0, 1));
+}
+
 // Bugs found while testing:
 //  - mLayersCompressed not initialized to true.
 //  - DecompressLayer setting Compressed to true instead of false.
 //  - Get() checking for !compressed instead of compressed for the early exit.
 //  - ASSERT in RecompressLayers was inverted.
 //  - Two != being converted to == during a rework.
+//  - (with ASSERT) that RecompressAspect didn't check that aspect 0 was compressed.
+//  - Missing decompression of layer 0 after introducing mInlineAspectData.