Load operations (#105)

* load ops: design + implementation (all backends)

* Animometer/glTFViewer: use just one subpass per frame
diff --git a/examples/Animometer.cpp b/examples/Animometer.cpp
index 7a27f34..d742ec1 100644
--- a/examples/Animometer.cpp
+++ b/examples/Animometer.cpp
@@ -138,17 +138,15 @@
 
     size_t i = 0;
 
-    std::vector<nxt::CommandBuffer> commands(50);
-    for (size_t j = 0; j < 50; j++) {
-
+    nxt::CommandBuffer commands;
+    {
         nxt::CommandBufferBuilder builder = device.CreateCommandBufferBuilder()
             .BeginRenderPass(renderpass, framebuffer)
             .BeginRenderSubpass()
             .SetRenderPipeline(pipeline)
             .Clone();
 
-        for (int k = 0; k < 200; k++) {
-
+        for (int k = 0; k < 10000; k++) {
             shaderData[i].time = f / 60.0f;
             builder.SetPushConstants(nxt::ShaderStageBit::Vertex, 0, 6, reinterpret_cast<uint32_t*>(&shaderData[i]))
                    .DrawArrays(3, 1, 0, 0);
@@ -157,10 +155,10 @@
 
         builder.EndRenderSubpass();
         builder.EndRenderPass();
-        commands[j] = builder.GetResult();
+        commands = builder.GetResult();
     }
 
-    queue.Submit(50, commands.data());
+    queue.Submit(1, &commands);
     backbuffer.TransitionUsage(nxt::TextureUsageBit::Present);
     swapchain.Present(backbuffer);
     DoFlush();
diff --git a/examples/SampleUtils.cpp b/examples/SampleUtils.cpp
index b666d3f..0760d87 100644
--- a/examples/SampleUtils.cpp
+++ b/examples/SampleUtils.cpp
@@ -126,7 +126,9 @@
     return device.CreateRenderPassBuilder()
         .SetAttachmentCount(2)
         .AttachmentSetFormat(0, nxt::TextureFormat::R8G8B8A8Unorm)
+        .AttachmentSetColorLoadOp(0, nxt::LoadOp::Clear)
         .AttachmentSetFormat(1, nxt::TextureFormat::D32FloatS8Uint)
+        .AttachmentSetDepthStencilLoadOps(1, nxt::LoadOp::Clear, nxt::LoadOp::Clear)
         .SetSubpassCount(1)
         .SubpassSetColorAttachment(0, 0, 0)
         .SubpassSetDepthStencilAttachment(0, 1)
diff --git a/examples/glTFViewer/glTFViewer.cpp b/examples/glTFViewer/glTFViewer.cpp
index 1c31d36..567bdad 100644
--- a/examples/glTFViewer/glTFViewer.cpp
+++ b/examples/glTFViewer/glTFViewer.cpp
@@ -80,7 +80,6 @@
 nxt::SwapChain swapchain;
 nxt::TextureView depthStencilView;
 nxt::RenderPass renderpass;
-nxt::Framebuffer lastFramebuffer;
 
 nxt::Buffer defaultBuffer;
 std::map<std::string, nxt::Buffer> buffers;
@@ -478,8 +477,7 @@
 
 // Drawing
 namespace {
-    void drawMesh(const tinygltf::Mesh& iMesh, const glm::mat4& model) {
-        nxt::CommandBufferBuilder cmd = device.CreateCommandBufferBuilder();
+    void drawMesh(nxt::CommandBufferBuilder& cmd, const tinygltf::Mesh& iMesh, const glm::mat4& model) {
         for (const auto& iPrim : iMesh.primitives) {
             if (iPrim.mode != gl::Triangles) {
                 fprintf(stderr, "unsupported primitive mode %d\n", iPrim.mode);
@@ -507,8 +505,6 @@
             material.uniformBuffer.SetSubData(0,
                     sizeof(u_transform_block) / sizeof(uint32_t),
                     reinterpret_cast<const uint32_t*>(&transforms));
-            cmd.BeginRenderPass(renderpass, lastFramebuffer);
-            cmd.BeginRenderSubpass();
             cmd.SetRenderPipeline(material.pipeline);
             cmd.TransitionBufferUsage(material.uniformBuffer, nxt::BufferUsageBit::Uniform);
             cmd.SetBindGroup(0, material.bindGroup0);
@@ -551,14 +547,10 @@
                 // DrawArrays
                 cmd.DrawArrays(vertexCount, 1, 0, 0);
             }
-            cmd.EndRenderSubpass();
-            cmd.EndRenderPass();
         }
-        auto commands = cmd.GetResult();
-        queue.Submit(1, &commands);
     }
 
-    void drawNode(const tinygltf::Node& node, const glm::mat4& parent = glm::mat4()) {
+    void drawNode(nxt::CommandBufferBuilder& cmd, const tinygltf::Node& node, const glm::mat4& parent = glm::mat4()) {
         glm::mat4 model;
         if (node.matrix.size() == 16) {
             model = glm::make_mat4(node.matrix.data());
@@ -579,22 +571,33 @@
         model = parent * model;
 
         for (const auto& meshID : node.meshes) {
-            drawMesh(scene.meshes[meshID], model);
+            drawMesh(cmd, scene.meshes[meshID], model);
         }
         for (const auto& child : node.children) {
-            drawNode(scene.nodes.at(child), model);
+            drawNode(cmd, scene.nodes.at(child), model);
         }
     }
 
     void frame() {
         nxt::Texture backbuffer;
-        GetNextFramebuffer(device, renderpass, swapchain, depthStencilView, &backbuffer, &lastFramebuffer);
+        nxt::Framebuffer framebuffer;
+        GetNextFramebuffer(device, renderpass, swapchain, depthStencilView, &backbuffer, &framebuffer);
+        framebuffer.AttachmentSetClearColor(0, 0.3f, 0.4f, 0.5f, 1);
 
         const auto& defaultSceneNodes = scene.scenes.at(scene.defaultScene);
+        nxt::CommandBufferBuilder cmd = device.CreateCommandBufferBuilder()
+            .BeginRenderPass(renderpass, framebuffer)
+            .BeginRenderSubpass()
+            .Clone();
         for (const auto& n : defaultSceneNodes) {
             const auto& node = scene.nodes.at(n);
-            drawNode(node);
+            drawNode(cmd, node);
         }
+        auto commands = cmd.EndRenderSubpass()
+            .EndRenderPass()
+            .GetResult();
+        queue.Submit(1, &commands);
+
         backbuffer.TransitionUsage(nxt::TextureUsageBit::Present);
         swapchain.Present(backbuffer);
         DoFlush();
diff --git a/next.json b/next.json
index 46edc52..c75bbf0 100644
--- a/next.json
+++ b/next.json
@@ -697,7 +697,29 @@
         "category": "native"
     },
     "framebuffer": {
-        "category": "object"
+        "category": "object",
+        "methods": [
+            {
+                "name": "attachment set clear color",
+                "TODO": "determine where to put these methods (probably BeginRenderPass?)",
+                "args": [
+                    {"name": "attachment slot", "type": "uint32_t"},
+                    {"name": "clear r", "type": "float"},
+                    {"name": "clear g", "type": "float"},
+                    {"name": "clear b", "type": "float"},
+                    {"name": "clear a", "type": "float"}
+                ]
+            },
+            {
+                "name": "attachment set clear depth stencil",
+                "TODO": "determine where to put these methods (probably BeginRenderPass?)",
+                "args": [
+                    {"name": "attachment slot", "type": "uint32_t"},
+                    {"name": "clear depth", "type": "float"},
+                    {"name": "clear stencil", "type": "uint32_t"}
+                ]
+            }
+        ]
     },
     "framebuffer builder": {
         "category": "object",
@@ -771,6 +793,13 @@
             {"value": 1, "name": "instance"}
         ]
     },
+    "load op": {
+        "category": "enum",
+        "values": [
+            {"value": 0, "name": "clear"},
+            {"value": 1, "name": "load"}
+        ]
+    },
     "pipeline layout": {
         "category": "object"
     },
@@ -847,6 +876,21 @@
                 ]
             },
             {
+                "name": "attachment set color load op",
+                "args": [
+                    {"name": "attachment slot", "type": "uint32_t"},
+                    {"name": "op", "type": "load op"}
+                ]
+            },
+            {
+                "name": "attachment set depth stencil load ops",
+                "args": [
+                    {"name": "attachment slot", "type": "uint32_t"},
+                    {"name": "depth op", "type": "load op"},
+                    {"name": "stencil op", "type": "load op"}
+                ]
+            },
+            {
                 "name": "set subpass count",
                 "args": [
                     {"name": "subpass count", "type": "uint32_t"}
diff --git a/src/backend/Framebuffer.cpp b/src/backend/Framebuffer.cpp
index bce4eaa..5f9cff8 100644
--- a/src/backend/Framebuffer.cpp
+++ b/src/backend/Framebuffer.cpp
@@ -25,16 +25,32 @@
     // Framebuffer
 
     FramebufferBase::FramebufferBase(FramebufferBuilder* builder)
-        : renderPass(std::move(builder->renderPass)), width(builder->width), height(builder->height), textureViews(std::move(builder->textureViews)) {
+        : device(builder->device), renderPass(std::move(builder->renderPass)),
+        width(builder->width), height(builder->height), textureViews(std::move(builder->textureViews)),
+        clearColors(textureViews.size()), clearDepthStencils(textureViews.size()) {
+    }
+
+    DeviceBase* FramebufferBase::GetDevice() {
+        return device;
     }
 
     RenderPassBase* FramebufferBase::GetRenderPass() {
         return renderPass.Get();
     }
 
-    TextureViewBase* FramebufferBase::GetTextureView(uint32_t index) {
-        ASSERT(index < textureViews.size());
-        return textureViews[index].Get();
+    TextureViewBase* FramebufferBase::GetTextureView(uint32_t attachmentSlot) {
+        ASSERT(attachmentSlot < textureViews.size());
+        return textureViews[attachmentSlot].Get();
+    }
+
+    FramebufferBase::ClearColor FramebufferBase::GetClearColor(uint32_t attachmentSlot) {
+        ASSERT(attachmentSlot < clearColors.size());
+        return clearColors[attachmentSlot];
+    }
+
+    FramebufferBase::ClearDepthStencil FramebufferBase::GetClearDepthStencil(uint32_t attachmentSlot) {
+        ASSERT(attachmentSlot < clearDepthStencils.size());
+        return clearDepthStencils[attachmentSlot];
     }
 
     uint32_t FramebufferBase::GetWidth() const {
@@ -45,6 +61,30 @@
         return height;
     }
 
+    void FramebufferBase::AttachmentSetClearColor(uint32_t attachmentSlot, float clearR, float clearG, float clearB, float clearA) {
+        if (attachmentSlot >= renderPass->GetAttachmentCount()) {
+            device->HandleError("Framebuffer attachment out of bounds");
+            return;
+        }
+        ASSERT(attachmentSlot < clearColors.size());
+        auto& c = clearColors[attachmentSlot];
+        c.color[0] = clearR;
+        c.color[1] = clearG;
+        c.color[2] = clearB;
+        c.color[3] = clearA;
+    }
+
+    void FramebufferBase::AttachmentSetClearDepthStencil(uint32_t attachmentSlot, float clearDepth, uint32_t clearStencil) {
+        if (attachmentSlot >= renderPass->GetAttachmentCount()) {
+            device->HandleError("Framebuffer attachment out of bounds");
+            return;
+        }
+        ASSERT(attachmentSlot < clearDepthStencils.size());
+        auto& c = clearDepthStencils[attachmentSlot];
+        c.depth = clearDepth;
+        c.stencil = clearStencil;
+    }
+
     // FramebufferBuilder
 
     enum FramebufferSetProperties {
diff --git a/src/backend/Framebuffer.h b/src/backend/Framebuffer.h
index de1cba3..69f0af7 100644
--- a/src/backend/Framebuffer.h
+++ b/src/backend/Framebuffer.h
@@ -28,18 +28,37 @@
 
     class FramebufferBase : public RefCounted {
         public:
+            struct ClearColor {
+                float color[4] = {};
+            };
+
+            struct ClearDepthStencil {
+                float depth = 1.0f;
+                uint32_t stencil = 0;
+            };
+
             FramebufferBase(FramebufferBuilder* builder);
 
+            DeviceBase* GetDevice();
             RenderPassBase* GetRenderPass();
-            TextureViewBase* GetTextureView(uint32_t index);
+            TextureViewBase* GetTextureView(uint32_t attachmentSlot);
+            ClearColor GetClearColor(uint32_t attachmentSlot);
+            ClearDepthStencil GetClearDepthStencil(uint32_t attachmentSlot);
             uint32_t GetWidth() const;
             uint32_t GetHeight() const;
 
+            // NXT API
+            void AttachmentSetClearColor(uint32_t attachmentSlot, float clearR, float clearG, float clearB, float clearA);
+            void AttachmentSetClearDepthStencil(uint32_t attachmentSlot, float clearDepth, uint32_t clearStencil);
+
         private:
+            DeviceBase* device;
             Ref<RenderPassBase> renderPass;
             uint32_t width = 0;
             uint32_t height = 0;
             std::vector<Ref<TextureViewBase>> textureViews;
+            std::vector<ClearColor> clearColors;
+            std::vector<ClearDepthStencil> clearDepthStencils;
     };
 
     class FramebufferBuilder : public Builder<FramebufferBase> {
diff --git a/src/backend/RenderPass.cpp b/src/backend/RenderPass.cpp
index 1c5c277..b2e04a3 100644
--- a/src/backend/RenderPass.cpp
+++ b/src/backend/RenderPass.cpp
@@ -18,6 +18,7 @@
 #include "backend/Device.h"
 #include "backend/Texture.h"
 #include "common/Assert.h"
+#include "common/BitSetIterator.h"
 
 namespace backend {
 
@@ -25,6 +26,23 @@
 
     RenderPassBase::RenderPassBase(RenderPassBuilder* builder)
         : attachments(std::move(builder->attachments)), subpasses(std::move(builder->subpasses)) {
+        for (uint32_t s = 0; s < GetSubpassCount(); ++s) {
+            const auto& subpass = GetSubpassInfo(s);
+            for (auto location : IterateBitSet(subpass.colorAttachmentsSet)) {
+                auto attachmentSlot = subpass.colorAttachments[location];
+                auto& firstSubpass = attachments[attachmentSlot].firstSubpass;
+                if (firstSubpass == UINT32_MAX) {
+                    firstSubpass = s;
+                }
+            }
+            if (subpass.depthStencilAttachmentSet) {
+                auto attachmentSlot = subpass.depthStencilAttachment;
+                auto& firstSubpass = attachments[attachmentSlot].firstSubpass;
+                if (firstSubpass == UINT32_MAX) {
+                    firstSubpass = s;
+                }
+            }
+        }
     }
 
     uint32_t RenderPassBase::GetAttachmentCount() const {
@@ -95,8 +113,8 @@
             HandleError("Render pass attachment count not set yet");
             return;
         }
-        if (attachmentSlot > attachments.size()) {
-            HandleError("Render pass attachment index out of bounds");
+        if (attachmentSlot >= attachments.size()) {
+            HandleError("Render pass attachment slot out of bounds");
             return;
         }
         if (attachmentProperties[attachmentSlot][ATTACHMENT_PROPERTY_FORMAT]) {
@@ -108,6 +126,34 @@
         attachmentProperties[attachmentSlot].set(ATTACHMENT_PROPERTY_FORMAT);
     }
 
+    void RenderPassBuilder::AttachmentSetColorLoadOp(uint32_t attachmentSlot, nxt::LoadOp op) {
+        if ((propertiesSet & RENDERPASS_PROPERTY_ATTACHMENT_COUNT) == 0) {
+            HandleError("Render pass attachment count not set yet");
+            return;
+        }
+        if (attachmentSlot >= attachments.size()) {
+            HandleError("Render pass attachment slot out of bounds");
+            return;
+        }
+
+        attachments[attachmentSlot].colorLoadOp = op;
+    }
+
+    void RenderPassBuilder::AttachmentSetDepthStencilLoadOps(uint32_t attachmentSlot, nxt::LoadOp depthOp, nxt::LoadOp stencilOp) {
+        if ((propertiesSet & RENDERPASS_PROPERTY_ATTACHMENT_COUNT) == 0) {
+            HandleError("Render pass attachment count not set yet");
+            return;
+        }
+        if (attachmentSlot >= attachments.size()) {
+            HandleError("Render pass attachment slot out of bounds");
+            return;
+        }
+
+        attachments[attachmentSlot].depthLoadOp = depthOp;
+        attachments[attachmentSlot].stencilLoadOp = stencilOp;
+    }
+
+
     void RenderPassBuilder::SetSubpassCount(uint32_t subpassCount) {
         if ((propertiesSet & RENDERPASS_PROPERTY_SUBPASS_COUNT) != 0) {
             HandleError("Render pass subpass count property set multiple times");
diff --git a/src/backend/RenderPass.h b/src/backend/RenderPass.h
index 036619c..f3ec74c 100644
--- a/src/backend/RenderPass.h
+++ b/src/backend/RenderPass.h
@@ -34,10 +34,19 @@
 
             struct AttachmentInfo {
                 nxt::TextureFormat format;
+                nxt::LoadOp colorLoadOp = nxt::LoadOp::Load;
+                nxt::LoadOp depthLoadOp = nxt::LoadOp::Load;
+                nxt::LoadOp stencilLoadOp = nxt::LoadOp::Load;
+                // The first subpass that this attachment is used in.
+                // This is used to determine, for each subpass, whether each
+                // of its attachments is being used for the first time.
+                uint32_t firstSubpass = UINT32_MAX;
             };
 
             struct SubpassInfo {
+                // Set of locations which are set
                 std::bitset<kMaxColorAttachments> colorAttachmentsSet;
+                // Mapping from location to attachment slot
                 std::array<uint32_t, kMaxColorAttachments> colorAttachments;
                 bool depthStencilAttachmentSet = false;
                 uint32_t depthStencilAttachment = 0;
@@ -58,12 +67,12 @@
         public:
             RenderPassBuilder(DeviceBase* device);
 
-            bool WasConsumed() const;
-
             // NXT API
             RenderPassBase* GetResultImpl() override;
             void SetAttachmentCount(uint32_t attachmentCount);
             void AttachmentSetFormat(uint32_t attachmentSlot, nxt::TextureFormat format);
+            void AttachmentSetColorLoadOp(uint32_t attachmentSlot, nxt::LoadOp op);
+            void AttachmentSetDepthStencilLoadOps(uint32_t attachmentSlot, nxt::LoadOp depthOp, nxt::LoadOp stencilOp);
             void SetSubpassCount(uint32_t subpassCount);
             void SubpassSetColorAttachment(uint32_t subpass, uint32_t outputAttachmentLocation, uint32_t attachmentSlot);
             void SubpassSetDepthStencilAttachment(uint32_t subpass, uint32_t attachmentSlot);
diff --git a/src/backend/Texture.cpp b/src/backend/Texture.cpp
index bd6649d..efa5044 100644
--- a/src/backend/Texture.cpp
+++ b/src/backend/Texture.cpp
@@ -52,6 +52,17 @@
         }
     }
 
+    bool TextureFormatHasDepthOrStencil(nxt::TextureFormat format) {
+        switch (format) {
+            case nxt::TextureFormat::R8G8B8A8Unorm:
+                return false;
+            case nxt::TextureFormat::D32FloatS8Uint:
+                return true;
+            default:
+                UNREACHABLE();
+        }
+    }
+
 
     // TextureBase
 
diff --git a/src/backend/Texture.h b/src/backend/Texture.h
index ca48e6d..1cf852b 100644
--- a/src/backend/Texture.h
+++ b/src/backend/Texture.h
@@ -26,6 +26,7 @@
     uint32_t TextureFormatPixelSize(nxt::TextureFormat format);
     bool TextureFormatHasDepth(nxt::TextureFormat format);
     bool TextureFormatHasStencil(nxt::TextureFormat format);
+    bool TextureFormatHasDepthOrStencil(nxt::TextureFormat format);
 
     class TextureBase : public RefCounted {
         public:
diff --git a/src/backend/d3d12/CommandBufferD3D12.cpp b/src/backend/d3d12/CommandBufferD3D12.cpp
index eec4db7..845d7f3 100644
--- a/src/backend/d3d12/CommandBufferD3D12.cpp
+++ b/src/backend/d3d12/CommandBufferD3D12.cpp
@@ -262,21 +262,59 @@
                 case Command::BeginRenderSubpass:
                     {
                         commands.NextCommand<BeginRenderSubpassCmd>();
-                        Framebuffer::OMSetRenderTargetArgs args = currentFramebuffer->GetSubpassOMSetRenderTargetArgs(currentSubpass);
+                        const auto& subpass = currentRenderPass->GetSubpassInfo(currentSubpass);
 
-                        // HACK(enga@google.com): Remove when clearing is implemented
-                        for (uint32_t index = 0; index < args.numRTVs; ++index) {
-                            static const float clearColor[] = { 0.0f, 0.0f, 0.0f, 0.0f };
-                            commandList->ClearRenderTargetView(args.RTVs[index], clearColor, 0, nullptr);
+                        Framebuffer::OMSetRenderTargetArgs args = currentFramebuffer->GetSubpassOMSetRenderTargetArgs(currentSubpass);
+                        if (args.dsv.ptr) {
+                            commandList->OMSetRenderTargets(args.numRTVs, args.RTVs.data(), FALSE, &args.dsv);
+                        } else {
+                            commandList->OMSetRenderTargets(args.numRTVs, args.RTVs.data(), FALSE, nullptr);
                         }
 
-                        if (args.dsv.ptr) {
-                            // HACK(enga@google.com): Remove when clearing is implemented
-                            commandList->ClearDepthStencilView(args.dsv, D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL, 1.0, 0, 0, nullptr);
+                        // Clear framebuffer attachments as needed
 
-                            commandList->OMSetRenderTargets(args.numRTVs, args.RTVs, FALSE, &args.dsv);
-                        } else {
-                            commandList->OMSetRenderTargets(args.numRTVs, args.RTVs, FALSE, nullptr);
+                        for (unsigned int location : IterateBitSet(subpass.colorAttachmentsSet)) {
+                            uint32_t attachmentSlot = subpass.colorAttachments[location];
+                            const auto& attachmentInfo = currentRenderPass->GetAttachmentInfo(attachmentSlot);
+
+                            // Only perform load op on first use
+                            if (attachmentInfo.firstSubpass == currentSubpass) {
+                                // Load op - color
+                                if (attachmentInfo.colorLoadOp == nxt::LoadOp::Clear) {
+                                    auto handle = currentFramebuffer->GetDSVDescriptor(attachmentSlot);
+                                    const auto& clear = currentFramebuffer->GetClearColor(attachmentSlot);
+                                    commandList->ClearRenderTargetView(handle, clear.color, 0, nullptr);
+                                }
+                            }
+                        }
+
+                        if (subpass.depthStencilAttachmentSet) {
+                            uint32_t attachmentSlot = subpass.depthStencilAttachment;
+                            const auto& attachmentInfo = currentRenderPass->GetAttachmentInfo(attachmentSlot);
+
+                            // Only perform load op on first use
+                            if (attachmentInfo.firstSubpass == currentSubpass) {
+                                // Load op - depth/stencil
+                                bool doDepthClear = TextureFormatHasDepth(attachmentInfo.format) &&
+                                    (attachmentInfo.depthLoadOp == nxt::LoadOp::Clear);
+                                bool doStencilClear = TextureFormatHasStencil(attachmentInfo.format) &&
+                                    (attachmentInfo.stencilLoadOp == nxt::LoadOp::Clear);
+
+                                D3D12_CLEAR_FLAGS clearFlags = {};
+                                if (doDepthClear) {
+                                    clearFlags |= D3D12_CLEAR_FLAG_DEPTH;
+                                }
+                                if (doStencilClear) {
+                                    clearFlags |= D3D12_CLEAR_FLAG_STENCIL;
+                                }
+                                if (clearFlags) {
+                                    auto handle = currentFramebuffer->GetRTVDescriptor(attachmentSlot);
+                                    const auto& clear = currentFramebuffer->GetClearDepthStencil(attachmentSlot);
+                                    // TODO(kainino@chromium.org): investigate: should the NXT clear stencil type be uint8_t?
+                                    uint8_t clearStencil = static_cast<uint8_t>(clear.stencil);
+                                    commandList->ClearDepthStencilView(handle, clearFlags, clear.depth, clearStencil, 0, nullptr);
+                                }
+                            }
                         }
 
                         static constexpr std::array<float, 4> defaultBlendFactor = { 0, 0, 0, 0 };
diff --git a/src/backend/d3d12/FramebufferD3D12.cpp b/src/backend/d3d12/FramebufferD3D12.cpp
index 29a6dbb..06140a0 100644
--- a/src/backend/d3d12/FramebufferD3D12.cpp
+++ b/src/backend/d3d12/FramebufferD3D12.cpp
@@ -68,17 +68,26 @@
         const auto& subpassInfo = GetRenderPass()->GetSubpassInfo(subpassIndex);
         OMSetRenderTargetArgs args = {};
 
-        for (uint32_t index : IterateBitSet(subpassInfo.colorAttachmentsSet)) {
-            uint32_t heapIndex = attachmentHeapIndices[subpassInfo.colorAttachments[index]];
-            args.RTVs[args.numRTVs++] = rtvHeap.GetCPUHandle(heapIndex);
+        for (uint32_t location : IterateBitSet(subpassInfo.colorAttachmentsSet)) {
+            uint32_t slot = subpassInfo.colorAttachments[location];
+            args.RTVs[args.numRTVs] = GetRTVDescriptor(slot);
+            args.numRTVs++;
         }
         if (subpassInfo.depthStencilAttachmentSet) {
-            uint32_t heapIndex = attachmentHeapIndices[subpassInfo.depthStencilAttachment];
-            args.dsv = dsvHeap.GetCPUHandle(heapIndex);
+            uint32_t slot = subpassInfo.depthStencilAttachment;
+            args.dsv = GetDSVDescriptor(slot);
         }
 
         return args;
     }
 
+    D3D12_CPU_DESCRIPTOR_HANDLE Framebuffer::GetRTVDescriptor(uint32_t attachmentSlot) {
+        return rtvHeap.GetCPUHandle(attachmentHeapIndices[attachmentSlot]);
+    }
+
+    D3D12_CPU_DESCRIPTOR_HANDLE Framebuffer::GetDSVDescriptor(uint32_t attachmentSlot) {
+        return dsvHeap.GetCPUHandle(attachmentHeapIndices[attachmentSlot]);
+    }
+
 }
 }
diff --git a/src/backend/d3d12/FramebufferD3D12.h b/src/backend/d3d12/FramebufferD3D12.h
index 9f3c7c4..677b21b 100644
--- a/src/backend/d3d12/FramebufferD3D12.h
+++ b/src/backend/d3d12/FramebufferD3D12.h
@@ -21,6 +21,7 @@
 #include "backend/d3d12/d3d12_platform.h"
 #include "backend/d3d12/DescriptorHeapAllocator.h"
 
+#include <array>
 #include <vector>
 
 namespace backend {
@@ -32,12 +33,14 @@
         public:
             struct OMSetRenderTargetArgs {
                 unsigned int numRTVs = 0;
-                D3D12_CPU_DESCRIPTOR_HANDLE RTVs[kMaxColorAttachments] = {};
+                std::array<D3D12_CPU_DESCRIPTOR_HANDLE, kMaxColorAttachments> RTVs = {};
                 D3D12_CPU_DESCRIPTOR_HANDLE dsv = {};
             };
 
             Framebuffer(Device* device, FramebufferBuilder* builder);
             OMSetRenderTargetArgs GetSubpassOMSetRenderTargetArgs(uint32_t subpassIndex);
+            D3D12_CPU_DESCRIPTOR_HANDLE GetRTVDescriptor(uint32_t attachmentSlot);
+            D3D12_CPU_DESCRIPTOR_HANDLE GetDSVDescriptor(uint32_t attachmentSlot);
 
         private:
             Device* device = nullptr;
diff --git a/src/backend/metal/CommandBufferMTL.mm b/src/backend/metal/CommandBufferMTL.mm
index 0012e20..b68d40b 100644
--- a/src/backend/metal/CommandBufferMTL.mm
+++ b/src/backend/metal/CommandBufferMTL.mm
@@ -87,33 +87,61 @@
                 const auto& info = currentRenderPass->GetSubpassInfo(subpass);
 
                 MTLRenderPassDescriptor* descriptor = [MTLRenderPassDescriptor renderPassDescriptor];
-                for (uint32_t index = 0; index < info.colorAttachments.size(); ++index) {
-                    uint32_t attachment = info.colorAttachments[index];
+                for (unsigned int location : IterateBitSet(info.colorAttachmentsSet)) {
+                    uint32_t attachment = info.colorAttachments[location];
+                    const auto& attachmentInfo = currentRenderPass->GetAttachmentInfo(attachment);
 
                     auto textureView = currentFramebuffer->GetTextureView(attachment);
                     auto texture = ToBackend(textureView->GetTexture())->GetMTLTexture();
-                    descriptor.colorAttachments[index].texture = texture;
-                    descriptor.colorAttachments[index].loadAction = MTLLoadActionLoad;
-                    descriptor.colorAttachments[index].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 0.0);
-                    descriptor.colorAttachments[index].storeAction = MTLStoreActionStore;
+
+                    bool isFirstUse = attachmentInfo.firstSubpass == subpass;
+                    bool shouldClearOnFirstUse = attachmentInfo.colorLoadOp == nxt::LoadOp::Clear;
+                    if (isFirstUse && shouldClearOnFirstUse) {
+                        auto clearValue = currentFramebuffer->GetClearColor(location);
+                        descriptor.colorAttachments[location].loadAction = MTLLoadActionClear;
+                        descriptor.colorAttachments[location].clearColor = MTLClearColorMake(clearValue.color[0], clearValue.color[1], clearValue.color[2], clearValue.color[3]);
+                    } else {
+                        descriptor.colorAttachments[location].loadAction = MTLLoadActionLoad;
+                    }
+
+                    descriptor.colorAttachments[location].texture = texture;
+                    descriptor.colorAttachments[location].storeAction = MTLStoreActionStore;
                 }
                 if (info.depthStencilAttachmentSet) {
                     uint32_t attachment = info.depthStencilAttachment;
+                    const auto& attachmentInfo = currentRenderPass->GetAttachmentInfo(attachment);
 
                     auto textureView = currentFramebuffer->GetTextureView(attachment);
                     id<MTLTexture> texture = ToBackend(textureView->GetTexture())->GetMTLTexture();
                     nxt::TextureFormat format = textureView->GetTexture()->GetFormat();
+
+                    bool isFirstUse = attachmentInfo.firstSubpass == subpass;
+                    const auto& clearValues = currentFramebuffer->GetClearDepthStencil(attachment);
+
                     if (TextureFormatHasDepth(format)) {
                         descriptor.depthAttachment.texture = texture;
-                        descriptor.depthAttachment.loadAction = MTLLoadActionClear;
-                        descriptor.depthAttachment.clearDepth = 1.0;
                         descriptor.depthAttachment.storeAction = MTLStoreActionStore;
+
+                        bool shouldClearDepthOnFirstUse = attachmentInfo.depthLoadOp == nxt::LoadOp::Clear;
+                        if (isFirstUse && shouldClearDepthOnFirstUse) {
+                            descriptor.depthAttachment.loadAction = MTLLoadActionClear;
+                            descriptor.depthAttachment.clearDepth = clearValues.depth;
+                        } else {
+                            descriptor.depthAttachment.loadAction = MTLLoadActionLoad;
+                        }
                     }
+
                     if (TextureFormatHasStencil(format)) {
                         descriptor.stencilAttachment.texture = texture;
-                        descriptor.stencilAttachment.loadAction = MTLLoadActionClear;
-                        descriptor.stencilAttachment.clearStencil = 0;
                         descriptor.stencilAttachment.storeAction = MTLStoreActionStore;
+
+                        bool shouldClearStencilOnFirstUse = attachmentInfo.stencilLoadOp == nxt::LoadOp::Clear;
+                        if (isFirstUse && shouldClearStencilOnFirstUse) {
+                            descriptor.stencilAttachment.loadAction = MTLLoadActionClear;
+                            descriptor.stencilAttachment.clearStencil = clearValues.stencil;
+                        } else {
+                            descriptor.stencilAttachment.loadAction = MTLLoadActionLoad;
+                        }
                     }
                 }
 
diff --git a/src/backend/opengl/CommandBufferGL.cpp b/src/backend/opengl/CommandBufferGL.cpp
index 2668f79..9d18e3f 100644
--- a/src/backend/opengl/CommandBufferGL.cpp
+++ b/src/backend/opengl/CommandBufferGL.cpp
@@ -96,6 +96,8 @@
                 case Command::BeginRenderSubpass:
                     {
                         commands.NextCommand<BeginRenderSubpassCmd>();
+
+                        glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
                         // TODO(kainino@chromium.org): possible future
                         // optimization: create these framebuffers at
                         // Framebuffer build time (or maybe CommandBuffer build
@@ -104,31 +106,42 @@
                         glGenFramebuffers(1, &currentFBO);
                         glBindFramebuffer(GL_DRAW_FRAMEBUFFER, currentFBO);
 
-                        const auto& info = currentRenderPass->GetSubpassInfo(currentSubpass);
+                        const auto& subpass = currentRenderPass->GetSubpassInfo(currentSubpass);
 
-                        std::array<GLenum, kMaxColorAttachments> drawBuffers;
-                        drawBuffers.fill(GL_NONE);
+                        // Mapping from attachmentSlot to GL framebuffer
+                        // attachment points. Defaults to zero (GL_NONE).
+                        std::array<GLenum, kMaxColorAttachments> drawBuffers = {};
+
+                        // Construct GL framebuffer
+
                         unsigned int attachmentCount = 0;
-                        for (unsigned int attachmentSlot : IterateBitSet(info.colorAttachmentsSet)) {
-                            uint32_t attachment = info.colorAttachments[attachmentSlot];
+                        for (unsigned int location : IterateBitSet(subpass.colorAttachmentsSet)) {
+                            uint32_t attachment = subpass.colorAttachments[location];
 
                             auto textureView = currentFramebuffer->GetTextureView(attachment);
                             GLuint texture = ToBackend(textureView->GetTexture())->GetHandle();
+
+                            // Attach color buffers.
                             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
-                                GL_COLOR_ATTACHMENT0 + attachmentSlot,
-                                GL_TEXTURE_2D, texture, 0);
-                            drawBuffers[attachmentSlot] = GL_COLOR_ATTACHMENT0 + attachmentSlot;
-                            attachmentCount = attachmentSlot + 1;
+                                    GL_COLOR_ATTACHMENT0 + location,
+                                    GL_TEXTURE_2D, texture, 0);
+                            drawBuffers[location] = GL_COLOR_ATTACHMENT0 + location;
+                            attachmentCount = location + 1;
+
+                            // TODO(kainino@chromium.org): the color clears (later in
+                            // this function) may be undefined for other texture formats.
+                            ASSERT(textureView->GetTexture()->GetFormat() == nxt::TextureFormat::R8G8B8A8Unorm);
                         }
-                        glDrawBuffers(attachmentCount, &drawBuffers[0]);
+                        glDrawBuffers(attachmentCount, drawBuffers.data());
 
-                        if (info.depthStencilAttachmentSet) {
-                            uint32_t attachment = info.depthStencilAttachment;
+                        if (subpass.depthStencilAttachmentSet) {
+                            uint32_t attachmentSlot = subpass.depthStencilAttachment;
 
-                            auto textureView = currentFramebuffer->GetTextureView(attachment);
+                            auto textureView = currentFramebuffer->GetTextureView(attachmentSlot);
                             GLuint texture = ToBackend(textureView->GetTexture())->GetHandle();
                             nxt::TextureFormat format = textureView->GetTexture()->GetFormat();
 
+                            // Attach depth/stencil buffer.
                             GLenum glAttachment = 0;
                             // TODO(kainino@chromium.org): it may be valid to just always use GL_DEPTH_STENCIL_ATTACHMENT here.
                             if (TextureFormatHasDepth(format)) {
@@ -141,12 +154,50 @@
                                 glAttachment = GL_STENCIL_ATTACHMENT;
                             }
 
-                            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
-                                    glAttachment, GL_TEXTURE_2D, texture, 0);
-                            // Load action
-                            glClearStencil(0);
-                            glClearDepth(1.0);
-                            glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
+                            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, glAttachment, GL_TEXTURE_2D, texture, 0);
+
+                            // TODO(kainino@chromium.org): the depth/stencil clears (later in
+                            // this function) may be undefined for other texture formats.
+                            ASSERT(format == nxt::TextureFormat::D32FloatS8Uint);
+                        }
+
+                        // Clear framebuffer attachments as needed
+
+                        for (unsigned int location : IterateBitSet(subpass.colorAttachmentsSet)) {
+                            uint32_t attachmentSlot = subpass.colorAttachments[location];
+                            const auto& attachmentInfo = currentRenderPass->GetAttachmentInfo(attachmentSlot);
+
+                            // Only perform load op on first use
+                            if (attachmentInfo.firstSubpass == currentSubpass) {
+                                // Load op - color
+                                if (attachmentInfo.colorLoadOp == nxt::LoadOp::Clear) {
+                                    const auto& clear = currentFramebuffer->GetClearColor(location);
+                                    glClearBufferfv(GL_COLOR, location, clear.color);
+                                }
+                            }
+                        }
+
+                        if (subpass.depthStencilAttachmentSet) {
+                            uint32_t attachmentSlot = subpass.depthStencilAttachment;
+                            const auto& attachmentInfo = currentRenderPass->GetAttachmentInfo(attachmentSlot);
+
+                            // Only perform load op on first use
+                            if (attachmentInfo.firstSubpass == currentSubpass) {
+                                // Load op - depth/stencil
+                                const auto& clear = currentFramebuffer->GetClearDepthStencil(subpass.depthStencilAttachment);
+                                bool doDepthClear = TextureFormatHasDepth(attachmentInfo.format) &&
+                                    (attachmentInfo.depthLoadOp == nxt::LoadOp::Clear);
+                                bool doStencilClear = TextureFormatHasStencil(attachmentInfo.format) &&
+                                    (attachmentInfo.stencilLoadOp == nxt::LoadOp::Clear);
+                                if (doDepthClear && doStencilClear) {
+                                    glClearBufferfi(GL_DEPTH_STENCIL, 0, clear.depth, clear.stencil);
+                                } else if (doDepthClear) {
+                                    glClearBufferfv(GL_DEPTH, 0, &clear.depth);
+                                } else if (doStencilClear) {
+                                    const GLint clearStencil = clear.stencil;
+                                    glClearBufferiv(GL_STENCIL, 0, &clearStencil);
+                                }
+                            }
                         }
 
                         glBlendColor(0, 0, 0, 0);
diff --git a/src/backend/opengl/RenderPipelineGL.cpp b/src/backend/opengl/RenderPipelineGL.cpp
index a5c2104..46456df 100644
--- a/src/backend/opengl/RenderPipelineGL.cpp
+++ b/src/backend/opengl/RenderPipelineGL.cpp
@@ -16,8 +16,8 @@
 
 #include "backend/opengl/BlendStateGL.h"
 #include "backend/opengl/DepthStencilStateGL.h"
-#include "backend/opengl/PersistentPipelineStateGL.h"
 #include "backend/opengl/OpenGLBackend.h"
+#include "backend/opengl/PersistentPipelineStateGL.h"
 
 namespace backend {
 namespace opengl {
diff --git a/src/utils/MetalBinding.mm b/src/utils/MetalBinding.mm
index 283188e..027b2c0 100644
--- a/src/utils/MetalBinding.mm
+++ b/src/utils/MetalBinding.mm
@@ -103,21 +103,6 @@
                 currentTexture = currentDrawable.texture;
                 [currentTexture retain];
 
-                // Clear initial contents of the texture
-                {
-                    MTLRenderPassDescriptor* passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
-                    passDescriptor.colorAttachments[0].texture = currentTexture;
-                    passDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear;
-                    passDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore;
-                    passDescriptor.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 1.0);
-
-                    id<MTLCommandBuffer> commandBuffer = [commandQueue commandBuffer];
-                    id<MTLRenderCommandEncoder> commandEncoder = [commandBuffer
-                        renderCommandEncoderWithDescriptor:passDescriptor];
-                    [commandEncoder endEncoding];
-                    [commandBuffer commit];
-                }
-
                 nextTexture->texture = reinterpret_cast<void*>(currentTexture);
 
                 return NXT_SWAP_CHAIN_NO_ERROR;
diff --git a/src/utils/OpenGLBinding.cpp b/src/utils/OpenGLBinding.cpp
index 748d15f..1b23f3f 100644
--- a/src/utils/OpenGLBinding.cpp
+++ b/src/utils/OpenGLBinding.cpp
@@ -54,12 +54,6 @@
                 glDeleteFramebuffers(1, &backFBO);
             }
 
-            void HACKCLEAR() {
-                glBindFramebuffer(GL_DRAW_FRAMEBUFFER, backFBO);
-                glClearColor(0, 0, 0, 1);
-                glClear(GL_COLOR_BUFFER_BIT);
-            }
-
             // For GenerateSwapChainImplementation
             friend class SwapChainImpl;
 
@@ -89,8 +83,6 @@
                 // Reallocate the texture
                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0,
                         GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
-                // Clear the newly (re-)allocated texture
-                HACKCLEAR();
 
                 return NXT_SWAP_CHAIN_NO_ERROR;
             }
@@ -106,7 +98,6 @@
                 glBlitFramebuffer(0, 0, cfgWidth, cfgHeight, 0, 0, cfgWidth, cfgHeight,
                         GL_COLOR_BUFFER_BIT, GL_NEAREST);
                 glfwSwapBuffers(window);
-                HACKCLEAR();
 
                 return NXT_SWAP_CHAIN_NO_ERROR;
             }