Pipeline cache D3D12 backend impl

Add D3D12 pipeline caching impl: store cachedPSO blob in cached blob.
Record root signature ID3DBlob in cache key together with
D3D_SHADER_BYTECODE, D3D12_GRAPHICS_PIPELINE_STATE_DESC or
D3D12_COMPUTE_PIPELINE_STATE_DESC.

Shader caching is not added.

Add some pipeline caching negative tests.

Bug: dawn:549
Change-Id: Id1cb560b49f1cf495860e2e0bcf92d8d988c5379
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/91180
Auto-Submit: Shrek Shao <shrekshao@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Reviewed-by: Loko Kung <lokokung@google.com>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/.gitattributes b/.gitattributes
index 5333936..a0a0cf3 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -7,3 +7,5 @@
 *.sh     eol=lf
 *.spvasm eol=lf
 *.wgsl   eol=lf
+*.h      eol=lf
+*.cpp    eol=lf
\ No newline at end of file
diff --git a/src/dawn/native/BUILD.gn b/src/dawn/native/BUILD.gn
index b470336..4ac4e30 100644
--- a/src/dawn/native/BUILD.gn
+++ b/src/dawn/native/BUILD.gn
@@ -378,6 +378,7 @@
       "d3d12/BufferD3D12.h",
       "d3d12/CPUDescriptorHeapAllocationD3D12.cpp",
       "d3d12/CPUDescriptorHeapAllocationD3D12.h",
+      "d3d12/CacheKeyD3D12.cpp",
       "d3d12/CommandAllocatorManager.cpp",
       "d3d12/CommandAllocatorManager.h",
       "d3d12/CommandBufferD3D12.cpp",
@@ -406,6 +407,7 @@
       "d3d12/NativeSwapChainImplD3D12.h",
       "d3d12/PageableD3D12.cpp",
       "d3d12/PageableD3D12.h",
+      "d3d12/PipelineCacheD3D12.cpp",
       "d3d12/PipelineLayoutD3D12.cpp",
       "d3d12/PipelineLayoutD3D12.h",
       "d3d12/PlatformFunctions.cpp",
diff --git a/src/dawn/native/BlobCache.cpp b/src/dawn/native/BlobCache.cpp
index 878ded6..219fb1e 100644
--- a/src/dawn/native/BlobCache.cpp
+++ b/src/dawn/native/BlobCache.cpp
@@ -21,39 +21,47 @@
 
 namespace dawn::native {
 
-CachedBlob::CachedBlob(size_t size) {
-    if (size != 0) {
-        Reset(size);
+// static
+CachedBlob CachedBlob::Create(size_t size) {
+    if (size > 0) {
+        uint8_t* data = new uint8_t[size];
+        return CachedBlob(data, size, [=]() { delete[] data; });
+    } else {
+        return CachedBlob();
     }
 }
 
+CachedBlob::CachedBlob() : mData(nullptr), mSize(0), mDeleter({}) {}
+
+CachedBlob::CachedBlob(uint8_t* data, size_t size, std::function<void()> deleter)
+    : mData(data), mSize(size), mDeleter(deleter) {}
+
 CachedBlob::CachedBlob(CachedBlob&&) = default;
 
-CachedBlob::~CachedBlob() = default;
-
 CachedBlob& CachedBlob::operator=(CachedBlob&&) = default;
 
+CachedBlob::~CachedBlob() {
+    if (mDeleter) {
+        mDeleter();
+    }
+}
+
 bool CachedBlob::Empty() const {
     return mSize == 0;
 }
 
 const uint8_t* CachedBlob::Data() const {
-    return mData.get();
+    return mData;
 }
 
 uint8_t* CachedBlob::Data() {
-    return mData.get();
+    return mData;
 }
 
 size_t CachedBlob::Size() const {
     return mSize;
 }
 
-void CachedBlob::Reset(size_t size) {
-    mSize = size;
-    mData = std::make_unique<uint8_t[]>(size);
-}
-
 BlobCache::BlobCache(dawn::platform::CachingInterface* cachingInterface)
     : mCache(cachingInterface) {}
 
@@ -72,18 +80,19 @@
 }
 
 CachedBlob BlobCache::LoadInternal(const CacheKey& key) {
-    CachedBlob result;
     if (mCache == nullptr) {
-        return result;
+        return CachedBlob();
     }
     const size_t expectedSize = mCache->LoadData(key.data(), key.size(), nullptr, 0);
     if (expectedSize > 0) {
-        result.Reset(expectedSize);
+        // Need to put this inside to trigger copy elision.
+        CachedBlob result = CachedBlob::Create(expectedSize);
         const size_t actualSize =
             mCache->LoadData(key.data(), key.size(), result.Data(), expectedSize);
         ASSERT(expectedSize == actualSize);
+        return result;
     }
-    return result;
+    return CachedBlob();
 }
 
 void BlobCache::StoreInternal(const CacheKey& key, size_t valueSize, const void* value) {
diff --git a/src/dawn/native/BlobCache.h b/src/dawn/native/BlobCache.h
index b1c0a63..d491858 100644
--- a/src/dawn/native/BlobCache.h
+++ b/src/dawn/native/BlobCache.h
@@ -15,9 +15,16 @@
 #ifndef SRC_DAWN_NATIVE_BLOBCACHE_H_
 #define SRC_DAWN_NATIVE_BLOBCACHE_H_
 
+#include <functional>
 #include <memory>
 #include <mutex>
 
+#include "dawn/common/Platform.h"
+
+#if defined(DAWN_PLATFORM_WINDOWS)
+#include "dawn/native/d3d12/d3d12_platform.h"
+#endif  // DAWN_PLATFORM_WINDOWS
+
 namespace dawn::platform {
 class CachingInterface;
 }
@@ -30,21 +37,34 @@
 
 class CachedBlob {
   public:
-    explicit CachedBlob(size_t size = 0);
-    CachedBlob(CachedBlob&&);
-    ~CachedBlob();
+    static CachedBlob Create(size_t size);
 
+#if defined(DAWN_PLATFORM_WINDOWS)
+    static CachedBlob Create(Microsoft::WRL::ComPtr<ID3DBlob> blob);
+#endif  // DAWN_PLATFORM_WINDOWS
+
+    CachedBlob(const CachedBlob&) = delete;
+    CachedBlob& operator=(const CachedBlob&) = delete;
+
+    CachedBlob(CachedBlob&&);
     CachedBlob& operator=(CachedBlob&&);
 
+    ~CachedBlob();
+
     bool Empty() const;
     const uint8_t* Data() const;
     uint8_t* Data();
     size_t Size() const;
     void Reset(size_t size);
 
+    CachedBlob();
+
   private:
-    std::unique_ptr<uint8_t[]> mData = nullptr;
-    size_t mSize = 0;
+    explicit CachedBlob(uint8_t* data, size_t size, std::function<void()> deleter);
+
+    uint8_t* mData;
+    size_t mSize;
+    std::function<void()> mDeleter;
 };
 
 // This class should always be thread-safe because it may be called asynchronously. Its purpose
diff --git a/src/dawn/native/CMakeLists.txt b/src/dawn/native/CMakeLists.txt
index 2a03a18..0911e9d 100644
--- a/src/dawn/native/CMakeLists.txt
+++ b/src/dawn/native/CMakeLists.txt
@@ -247,6 +247,7 @@
         "d3d12/CPUDescriptorHeapAllocationD3D12.h"
         "d3d12/CommandAllocatorManager.cpp"
         "d3d12/CommandAllocatorManager.h"
+        "d3d12/CacheKeyD3D12.cpp"
         "d3d12/CommandBufferD3D12.cpp"
         "d3d12/CommandBufferD3D12.h"
         "d3d12/CommandRecordingContext.cpp"
@@ -273,6 +274,7 @@
         "d3d12/NativeSwapChainImplD3D12.h"
         "d3d12/PageableD3D12.cpp"
         "d3d12/PageableD3D12.h"
+        "d3d12/PipelineCacheD3D12.cpp"
         "d3d12/PipelineLayoutD3D12.cpp"
         "d3d12/PipelineLayoutD3D12.h"
         "d3d12/PlatformFunctions.cpp"
diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp
index 79b94a4..dba1ed0 100644
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@@ -628,6 +628,23 @@
     return nullptr;
 }
 
+CachedBlob DeviceBase::LoadCachedBlob(const CacheKey& key) {
+    BlobCache* blobCache = GetBlobCache();
+    if (!blobCache) {
+        return CachedBlob();
+    }
+    return blobCache->Load(key);
+}
+
+void DeviceBase::StoreCachedBlob(const CacheKey& key, const CachedBlob& blob) {
+    if (!blob.Empty()) {
+        BlobCache* blobCache = GetBlobCache();
+        if (blobCache) {
+            blobCache->Store(key, blob);
+        }
+    }
+}
+
 MaybeError DeviceBase::ValidateObject(const ApiObjectBase* object) const {
     ASSERT(object != nullptr);
     DAWN_INVALID_IF(object->GetDevice() != this,
diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h
index fd9ba57..ee791a4 100644
--- a/src/dawn/native/Device.h
+++ b/src/dawn/native/Device.h
@@ -22,6 +22,7 @@
 #include <utility>
 #include <vector>
 
+#include "dawn/native/BlobCache.h"
 #include "dawn/native/CacheKey.h"
 #include "dawn/native/Commands.h"
 #include "dawn/native/ComputePipeline.h"
@@ -47,7 +48,6 @@
 class AsyncTaskManager;
 class AttachmentState;
 class AttachmentStateBlueprint;
-class BlobCache;
 class CallbackTaskManager;
 class DynamicUploader;
 class ErrorScopeStack;
@@ -284,6 +284,8 @@
     MaybeError ValidateIsAlive() const;
 
     BlobCache* GetBlobCache();
+    CachedBlob LoadCachedBlob(const CacheKey& key);
+    void StoreCachedBlob(const CacheKey& key, const CachedBlob& blob);
 
     virtual ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size) = 0;
     virtual MaybeError CopyFromStagingToBuffer(StagingBufferBase* source,
diff --git a/src/dawn/native/PipelineCache.cpp b/src/dawn/native/PipelineCache.cpp
index 19b8b7b..7cb9081 100644
--- a/src/dawn/native/PipelineCache.cpp
+++ b/src/dawn/native/PipelineCache.cpp
@@ -38,7 +38,7 @@
     }
     // Try to write the data out to the persistent cache.
     CachedBlob blob;
-    DAWN_TRY_ASSIGN(blob, SerializeToBlobImpl());
+    DAWN_TRY(SerializeToBlobImpl(&blob));
     if (blob.Size() > 0) {
         // Using a simple heuristic to decide whether to write out the blob right now. May need
         // smarter tracking when we are dealing with monolithic caches.
diff --git a/src/dawn/native/PipelineCache.h b/src/dawn/native/PipelineCache.h
index e69386e..9b3011e 100644
--- a/src/dawn/native/PipelineCache.h
+++ b/src/dawn/native/PipelineCache.h
@@ -45,9 +45,10 @@
     CachedBlob Initialize();
 
   private:
-    // Backend implementation of serialization of the cache into a blob. Note that an empty
-    // blob may be returned.
-    virtual ResultOrError<CachedBlob> SerializeToBlobImpl() = 0;
+    // Backend implementation of serialization of the cache into a blob.
+    // Note: given that no local cached blob should be destructed and copy elision has strict
+    // requirement cached blob is passed in as a pointer to be assigned.
+    virtual MaybeError SerializeToBlobImpl(CachedBlob* blob) = 0;
 
     // The blob cache is owned by the Adapter and pipeline caches are owned/created by devices
     // or adapters. Since the device owns a reference to the Instance which owns the Adapter,
diff --git a/src/dawn/native/d3d12/CacheKeyD3D12.cpp b/src/dawn/native/d3d12/CacheKeyD3D12.cpp
new file mode 100644
index 0000000..0daf526
--- /dev/null
+++ b/src/dawn/native/d3d12/CacheKeyD3D12.cpp
@@ -0,0 +1,139 @@
+// Copyright 2022 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dawn/common/Assert.h"
+#include "dawn/common/Constants.h"
+#include "dawn/native/CacheKey.h"
+#include "dawn/native/d3d12/d3d12_platform.h"
+
+namespace dawn::native {
+
+template <>
+void CacheKeySerializer<D3D12_COMPUTE_PIPELINE_STATE_DESC>::Serialize(
+    CacheKey* key,
+    const D3D12_COMPUTE_PIPELINE_STATE_DESC& t) {
+    // Don't record pRootSignature as we already record the signature blob in pipline layout.
+    key->Record(t.CS).Record(t.NodeMask).Record(t.Flags);
+}
+
+template <>
+void CacheKeySerializer<D3D12_RENDER_TARGET_BLEND_DESC>::Serialize(
+    CacheKey* key,
+    const D3D12_RENDER_TARGET_BLEND_DESC& t) {
+    key->Record(t.BlendEnable, t.LogicOpEnable, t.SrcBlend, t.DestBlend, t.BlendOp, t.SrcBlendAlpha,
+                t.DestBlendAlpha, t.BlendOpAlpha, t.LogicOp, t.RenderTargetWriteMask);
+}
+
+template <>
+void CacheKeySerializer<D3D12_BLEND_DESC>::Serialize(CacheKey* key, const D3D12_BLEND_DESC& t) {
+    key->Record(t.AlphaToCoverageEnable, t.IndependentBlendEnable).Record(t.RenderTarget);
+}
+
+template <>
+void CacheKeySerializer<D3D12_DEPTH_STENCILOP_DESC>::Serialize(
+    CacheKey* key,
+    const D3D12_DEPTH_STENCILOP_DESC& t) {
+    key->Record(t.StencilFailOp, t.StencilDepthFailOp, t.StencilPassOp, t.StencilFunc);
+}
+
+template <>
+void CacheKeySerializer<D3D12_DEPTH_STENCIL_DESC>::Serialize(CacheKey* key,
+                                                             const D3D12_DEPTH_STENCIL_DESC& t) {
+    key->Record(t.DepthEnable, t.DepthWriteMask, t.DepthFunc, t.StencilEnable, t.StencilReadMask,
+                t.StencilWriteMask, t.FrontFace, t.BackFace);
+}
+
+template <>
+void CacheKeySerializer<D3D12_RASTERIZER_DESC>::Serialize(CacheKey* key,
+                                                          const D3D12_RASTERIZER_DESC& t) {
+    key->Record(t.FillMode, t.CullMode, t.FrontCounterClockwise, t.DepthBias, t.DepthBiasClamp,
+                t.SlopeScaledDepthBias, t.DepthClipEnable, t.MultisampleEnable,
+                t.AntialiasedLineEnable, t.ForcedSampleCount, t.ConservativeRaster);
+}
+
+template <>
+void CacheKeySerializer<D3D12_INPUT_ELEMENT_DESC>::Serialize(CacheKey* key,
+                                                             const D3D12_INPUT_ELEMENT_DESC& t) {
+    key->Record(t.SemanticName, t.SemanticIndex, t.Format, t.InputSlot, t.AlignedByteOffset,
+                t.InputSlotClass, t.InstanceDataStepRate);
+}
+
+template <>
+void CacheKeySerializer<D3D12_INPUT_LAYOUT_DESC>::Serialize(CacheKey* key,
+                                                            const D3D12_INPUT_LAYOUT_DESC& t) {
+    key->RecordIterable(t.pInputElementDescs, t.NumElements);
+}
+
+template <>
+void CacheKeySerializer<D3D12_SO_DECLARATION_ENTRY>::Serialize(
+    CacheKey* key,
+    const D3D12_SO_DECLARATION_ENTRY& t) {
+    key->Record(t.Stream, t.SemanticName, t.SemanticIndex, t.StartComponent, t.ComponentCount,
+                t.OutputSlot);
+}
+
+template <>
+void CacheKeySerializer<D3D12_STREAM_OUTPUT_DESC>::Serialize(CacheKey* key,
+                                                             const D3D12_STREAM_OUTPUT_DESC& t) {
+    key->RecordIterable(t.pSODeclaration, t.NumEntries)
+        .RecordIterable(t.pBufferStrides, t.NumStrides)
+        .Record(t.RasterizedStream);
+}
+
+template <>
+void CacheKeySerializer<DXGI_SAMPLE_DESC>::Serialize(CacheKey* key, const DXGI_SAMPLE_DESC& t) {
+    key->Record(t.Count, t.Quality);
+}
+
+template <>
+void CacheKeySerializer<D3D12_SHADER_BYTECODE>::Serialize(CacheKey* key,
+                                                          const D3D12_SHADER_BYTECODE& t) {
+    key->RecordIterable(reinterpret_cast<const uint8_t*>(t.pShaderBytecode), t.BytecodeLength);
+}
+
+template <>
+void CacheKeySerializer<D3D12_GRAPHICS_PIPELINE_STATE_DESC>::Serialize(
+    CacheKey* key,
+    const D3D12_GRAPHICS_PIPELINE_STATE_DESC& t) {
+    // Don't record pRootSignature as we already record the signature blob in pipline layout.
+    // Don't record CachedPSO as it is in the cached blob.
+    key->Record(t.VS)
+        .Record(t.PS)
+        .Record(t.DS)
+        .Record(t.HS)
+        .Record(t.GS)
+        .Record(t.StreamOutput)
+        .Record(t.BlendState)
+        .Record(t.SampleMask)
+        .Record(t.RasterizerState)
+        .Record(t.DepthStencilState)
+        .Record(t.InputLayout)
+        .Record(t.IBStripCutValue)
+        .Record(t.PrimitiveTopologyType)
+        .RecordIterable(t.RTVFormats, t.NumRenderTargets)
+        .Record(t.DSVFormat)
+        .Record(t.SampleDesc)
+        .Record(t.NodeMask)
+        .Record(t.Flags);
+}
+
+template <>
+void CacheKeySerializer<ID3DBlob>::Serialize(CacheKey* key, const ID3DBlob& t) {
+    // Workaround: GetBufferPointer and GetbufferSize are not marked as const
+    ID3DBlob* pBlob = const_cast<ID3DBlob*>(&t);
+    key->RecordIterable(reinterpret_cast<uint8_t*>(pBlob->GetBufferPointer()),
+                        pBlob->GetBufferSize());
+}
+
+}  // namespace dawn::native
diff --git a/src/dawn/native/d3d12/ComputePipelineD3D12.cpp b/src/dawn/native/d3d12/ComputePipelineD3D12.cpp
index cad0ce5..2f34338 100644
--- a/src/dawn/native/d3d12/ComputePipelineD3D12.cpp
+++ b/src/dawn/native/d3d12/ComputePipelineD3D12.cpp
@@ -55,15 +55,36 @@
     D3D12_COMPUTE_PIPELINE_STATE_DESC d3dDesc = {};
     d3dDesc.pRootSignature = ToBackend(GetLayout())->GetRootSignature();
 
+    // TODO(dawn:549): Compile shader everytime before we implement compiled shader cache
     CompiledShader compiledShader;
     DAWN_TRY_ASSIGN(compiledShader, module->Compile(computeStage, SingleShaderStage::Compute,
                                                     ToBackend(GetLayout()), compileFlags));
     d3dDesc.CS = compiledShader.GetD3D12ShaderBytecode();
+
+    mCacheKey.Record(d3dDesc, ToBackend(GetLayout())->GetRootSignatureBlob());
+
+    // Try to see if we have anything in the blob cache.
+    CachedBlob blob = device->LoadCachedBlob(GetCacheKey());
+    const bool cacheHit = !blob.Empty();
+    if (cacheHit) {
+        // Cache hits, attach cached blob to descriptor.
+        d3dDesc.CachedPSO.pCachedBlob = blob.Data();
+        d3dDesc.CachedPSO.CachedBlobSizeInBytes = blob.Size();
+    }
+
     auto* d3d12Device = device->GetD3D12Device();
     DAWN_TRY(CheckHRESULT(
         d3d12Device->CreateComputePipelineState(&d3dDesc, IID_PPV_ARGS(&mPipelineState)),
         "D3D12 creating pipeline state"));
 
+    if (!cacheHit) {
+        // Cache misses, need to get pipeline cached blob and store.
+        ComPtr<ID3DBlob> d3dBlob;
+        DAWN_TRY(CheckHRESULT(GetPipelineState()->GetCachedBlob(&d3dBlob),
+                              "D3D12 compute pipeline state get cached blob"));
+        device->StoreCachedBlob(GetCacheKey(), CachedBlob::Create(std::move(d3dBlob)));
+    }
+
     SetLabelImpl();
 
     return {};
diff --git a/src/dawn/native/d3d12/DeviceD3D12.cpp b/src/dawn/native/d3d12/DeviceD3D12.cpp
index 90ddc30..1e00fe1 100644
--- a/src/dawn/native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn/native/d3d12/DeviceD3D12.cpp
@@ -441,6 +441,9 @@
     const TextureViewDescriptor* descriptor) {
     return TextureView::Create(texture, descriptor);
 }
+Ref<PipelineCacheBase> Device::GetOrCreatePipelineCacheImpl(const CacheKey& key) {
+    UNREACHABLE();
+}
 void Device::InitializeComputePipelineAsyncImpl(Ref<ComputePipelineBase> computePipeline,
                                                 WGPUCreateComputePipelineAsyncCallback callback,
                                                 void* userdata) {
diff --git a/src/dawn/native/d3d12/DeviceD3D12.h b/src/dawn/native/d3d12/DeviceD3D12.h
index 99b03e3..776fce2 100644
--- a/src/dawn/native/d3d12/DeviceD3D12.h
+++ b/src/dawn/native/d3d12/DeviceD3D12.h
@@ -188,6 +188,7 @@
         const ComputePipelineDescriptor* descriptor) override;
     Ref<RenderPipelineBase> CreateUninitializedRenderPipelineImpl(
         const RenderPipelineDescriptor* descriptor) override;
+    Ref<PipelineCacheBase> GetOrCreatePipelineCacheImpl(const CacheKey& key) override;
     void InitializeComputePipelineAsyncImpl(Ref<ComputePipelineBase> computePipeline,
                                             WGPUCreateComputePipelineAsyncCallback callback,
                                             void* userdata) override;
diff --git a/src/dawn/native/d3d12/Forward.h b/src/dawn/native/d3d12/Forward.h
index ecb10aa..13f7c81 100644
--- a/src/dawn/native/d3d12/Forward.h
+++ b/src/dawn/native/d3d12/Forward.h
@@ -27,6 +27,7 @@
 class ComputePipeline;
 class Device;
 class Heap;
+class PipelineCache;
 class PipelineLayout;
 class QuerySet;
 class Queue;
@@ -46,6 +47,7 @@
     using CommandBufferType = CommandBuffer;
     using ComputePipelineType = ComputePipeline;
     using DeviceType = Device;
+    using PipelineCacheType = PipelineCache;
     using PipelineLayoutType = PipelineLayout;
     using QuerySetType = QuerySet;
     using QueueType = Queue;
diff --git a/src/dawn/native/d3d12/PipelineCacheD3D12.cpp b/src/dawn/native/d3d12/PipelineCacheD3D12.cpp
new file mode 100644
index 0000000..947f922
--- /dev/null
+++ b/src/dawn/native/d3d12/PipelineCacheD3D12.cpp
@@ -0,0 +1,33 @@
+// Copyright 2022 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dawn/native/BlobCache.h"
+#include "dawn/native/d3d12/d3d12_platform.h"
+
+namespace dawn::native {
+
+// static
+CachedBlob CachedBlob::Create(ComPtr<ID3DBlob> blob) {
+    // Detach so the deleter callback can "own" the reference
+    ID3DBlob* ptr = blob.Detach();
+    return CachedBlob(reinterpret_cast<uint8_t*>(ptr->GetBufferPointer()), ptr->GetBufferSize(),
+                      [=]() {
+                          // Reattach and drop to delete it.
+                          ComPtr<ID3DBlob> b;
+                          b.Attach(ptr);
+                          b = nullptr;
+                      });
+}
+
+}  // namespace dawn::native
diff --git a/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp b/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp
index 91505c1..636fae2 100644
--- a/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp
+++ b/src/dawn/native/d3d12/PipelineLayoutD3D12.cpp
@@ -252,10 +252,9 @@
     rootSignatureDescriptor.pStaticSamplers = nullptr;
     rootSignatureDescriptor.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
 
-    ComPtr<ID3DBlob> signature;
     ComPtr<ID3DBlob> error;
     HRESULT hr = device->GetFunctions()->d3d12SerializeRootSignature(
-        &rootSignatureDescriptor, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error);
+        &rootSignatureDescriptor, D3D_ROOT_SIGNATURE_VERSION_1, &mRootSignatureBlob, &error);
     if (DAWN_UNLIKELY(FAILED(hr))) {
         std::ostringstream messageStream;
         if (error) {
@@ -269,9 +268,10 @@
         DAWN_TRY(CheckHRESULT(hr, messageStream.str().c_str()));
     }
     DAWN_TRY(CheckHRESULT(device->GetD3D12Device()->CreateRootSignature(
-                              0, signature->GetBufferPointer(), signature->GetBufferSize(),
-                              IID_PPV_ARGS(&mRootSignature)),
+                              0, mRootSignatureBlob->GetBufferPointer(),
+                              mRootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&mRootSignature)),
                           "D3D12 create root signature"));
+    mCacheKey.Record(mRootSignatureBlob.Get());
     return {};
 }
 
@@ -310,6 +310,10 @@
     return mRootSignature.Get();
 }
 
+ID3DBlob* PipelineLayout::GetRootSignatureBlob() const {
+    return mRootSignatureBlob.Get();
+}
+
 const PipelineLayout::DynamicStorageBufferLengthInfo&
 PipelineLayout::GetDynamicStorageBufferLengthInfo() const {
     return mDynamicStorageBufferLengthInfo;
diff --git a/src/dawn/native/d3d12/PipelineLayoutD3D12.h b/src/dawn/native/d3d12/PipelineLayoutD3D12.h
index 5e5360e..2047412 100644
--- a/src/dawn/native/d3d12/PipelineLayoutD3D12.h
+++ b/src/dawn/native/d3d12/PipelineLayoutD3D12.h
@@ -52,6 +52,8 @@
 
     ID3D12RootSignature* GetRootSignature() const;
 
+    ID3DBlob* GetRootSignatureBlob() const;
+
     ID3D12CommandSignature* GetDispatchIndirectCommandSignatureWithNumWorkgroups();
 
     ID3D12CommandSignature* GetDrawIndirectCommandSignatureWithInstanceVertexOffsets();
@@ -98,6 +100,8 @@
     uint32_t mNumWorkgroupsParameterIndex;
     uint32_t mDynamicStorageBufferLengthsParameterIndex;
     ComPtr<ID3D12RootSignature> mRootSignature;
+    // Store the root signature blob to put in pipeline cachekey
+    ComPtr<ID3DBlob> mRootSignatureBlob;
     ComPtr<ID3D12CommandSignature> mDispatchIndirectCommandSignatureWithNumWorkgroups;
     ComPtr<ID3D12CommandSignature> mDrawIndirectCommandSignatureWithInstanceVertexOffsets;
     ComPtr<ID3D12CommandSignature> mDrawIndexedIndirectCommandSignatureWithInstanceVertexOffsets;
diff --git a/src/dawn/native/d3d12/RenderPipelineD3D12.cpp b/src/dawn/native/d3d12/RenderPipelineD3D12.cpp
index 8980b30..ce7b347 100644
--- a/src/dawn/native/d3d12/RenderPipelineD3D12.cpp
+++ b/src/dawn/native/d3d12/RenderPipelineD3D12.cpp
@@ -429,10 +429,29 @@
 
     mD3d12PrimitiveTopology = D3D12PrimitiveTopology(GetPrimitiveTopology());
 
+    mCacheKey.Record(descriptorD3D12, *layout->GetRootSignatureBlob());
+
+    // Try to see if we have anything in the blob cache.
+    CachedBlob blob = device->LoadCachedBlob(GetCacheKey());
+    const bool cacheHit = !blob.Empty();
+    if (cacheHit) {
+        // Cache hits, attach cached blob to descriptor.
+        descriptorD3D12.CachedPSO.pCachedBlob = blob.Data();
+        descriptorD3D12.CachedPSO.CachedBlobSizeInBytes = blob.Size();
+    }
+
     DAWN_TRY(CheckHRESULT(device->GetD3D12Device()->CreateGraphicsPipelineState(
                               &descriptorD3D12, IID_PPV_ARGS(&mPipelineState)),
                           "D3D12 create graphics pipeline state"));
 
+    if (!cacheHit) {
+        // Cache misses, need to get pipeline cached blob and store.
+        ComPtr<ID3DBlob> d3dBlob;
+        DAWN_TRY(CheckHRESULT(GetPipelineState()->GetCachedBlob(&d3dBlob),
+                              "D3D12 render pipeline state get cached blob"));
+        device->StoreCachedBlob(GetCacheKey(), CachedBlob::Create(std::move(d3dBlob)));
+    }
+
     SetLabelImpl();
 
     return {};
diff --git a/src/dawn/native/metal/BufferMTL.mm b/src/dawn/native/metal/BufferMTL.mm
index 42f8b7e..e49fc2e 100644
--- a/src/dawn/native/metal/BufferMTL.mm
+++ b/src/dawn/native/metal/BufferMTL.mm
@@ -53,6 +53,8 @@
     if (@available(macOS 10.11, *)) {
         return 256 * 1024 * 1024;
     }
+    // 256Mb for other platform if any. (Need to have a return for all branches).
+    return 256 * 1024 * 1024;
 #else
     // macOS / tvOS: 256Mb limit in versions without [MTLDevice maxBufferLength]
     return 256 * 1024 * 1024;
diff --git a/src/dawn/native/vulkan/PipelineCacheVk.cpp b/src/dawn/native/vulkan/PipelineCacheVk.cpp
index 60617ec..2bb1039 100644
--- a/src/dawn/native/vulkan/PipelineCacheVk.cpp
+++ b/src/dawn/native/vulkan/PipelineCacheVk.cpp
@@ -50,10 +50,10 @@
     return mHandle;
 }
 
-ResultOrError<CachedBlob> PipelineCache::SerializeToBlobImpl() {
-    CachedBlob emptyBlob;
+MaybeError PipelineCache::SerializeToBlobImpl(CachedBlob* blob) {
     if (mHandle == VK_NULL_HANDLE) {
-        return emptyBlob;
+        // Pipeline cache isn't created successfully
+        return {};
     }
 
     size_t bufferSize;
@@ -61,12 +61,13 @@
     DAWN_TRY(CheckVkSuccess(
         device->fn.GetPipelineCacheData(device->GetVkDevice(), mHandle, &bufferSize, nullptr),
         "GetPipelineCacheData"));
-
-    CachedBlob blob(bufferSize);
-    DAWN_TRY(CheckVkSuccess(
-        device->fn.GetPipelineCacheData(device->GetVkDevice(), mHandle, &bufferSize, blob.Data()),
-        "GetPipelineCacheData"));
-    return blob;
+    if (bufferSize > 0) {
+        *blob = CachedBlob::Create(bufferSize);
+        DAWN_TRY(CheckVkSuccess(device->fn.GetPipelineCacheData(device->GetVkDevice(), mHandle,
+                                                                &bufferSize, blob->Data()),
+                                "GetPipelineCacheData"));
+    }
+    return {};
 }
 
 void PipelineCache::Initialize() {
diff --git a/src/dawn/native/vulkan/PipelineCacheVk.h b/src/dawn/native/vulkan/PipelineCacheVk.h
index 7e56175..85a8891 100644
--- a/src/dawn/native/vulkan/PipelineCacheVk.h
+++ b/src/dawn/native/vulkan/PipelineCacheVk.h
@@ -38,7 +38,7 @@
     ~PipelineCache() override;
 
     void Initialize();
-    ResultOrError<CachedBlob> SerializeToBlobImpl() override;
+    MaybeError SerializeToBlobImpl(CachedBlob* blob) override;
 
     DeviceBase* mDevice;
     VkPipelineCache mHandle = VK_NULL_HANDLE;
diff --git a/src/dawn/tests/end2end/PipelineCachingTests.cpp b/src/dawn/tests/end2end/PipelineCachingTests.cpp
index bdf6a1e..94d30c5 100644
--- a/src/dawn/tests/end2end/PipelineCachingTests.cpp
+++ b/src/dawn/tests/end2end/PipelineCachingTests.cpp
@@ -26,18 +26,49 @@
 
 // TODO(dawn:549) Add some sort of pipeline descriptor repository to test more caching.
 
-static constexpr std::string_view kComputeShader = R"(
+static constexpr std::string_view kComputeShaderDefault = R"(
         @stage(compute) @workgroup_size(1) fn main() {}
     )";
 
-static constexpr std::string_view kVertexShader = R"(
+static constexpr std::string_view kComputeShaderMultipleEntryPoints = R"(
+        @stage(compute) @workgroup_size(16) fn main() {}
+        @stage(compute) @workgroup_size(64) fn main2() {}
+    )";
+
+static constexpr std::string_view kVertexShaderDefault = R"(
         @stage(vertex) fn main() -> @builtin(position) vec4<f32> {
             return vec4<f32>(0.0, 0.0, 0.0, 0.0);
         }
     )";
 
-static constexpr std::string_view kFragmentShader = R"(
-        @stage(fragment) fn main() {}
+static constexpr std::string_view kVertexShaderMultipleEntryPoints = R"(
+        @stage(vertex) fn main() -> @builtin(position) vec4<f32> {
+            return vec4<f32>(1.0, 0.0, 0.0, 1.0);
+        }
+
+        @stage(vertex) fn main2() -> @builtin(position) vec4<f32> {
+            return vec4<f32>(0.5, 0.5, 0.5, 1.0);
+        }
+    )";
+
+static constexpr std::string_view kFragmentShaderDefault = R"(
+        @stage(fragment) fn main() -> @location(0) vec4<f32> {
+            return vec4<f32>(0.1, 0.2, 0.3, 0.4);
+        }
+    )";
+
+static constexpr std::string_view kFragmentShaderMultipleOutput = R"(
+        struct FragmentOut {
+            @location(0) fragColor0 : vec4<f32>,
+            @location(1) fragColor1 : vec4<f32>,
+        }
+
+        @stage(fragment) fn main() -> FragmentOut {
+            var output : FragmentOut;
+            output.fragColor0 = vec4<f32>(0.1, 0.2, 0.3, 0.4);
+            output.fragColor1 = vec4<f32>(0.5, 0.6, 0.7, 0.8);
+            return output;
+        }
     )";
 
 class PipelineCachingTests : public DawnTest {
@@ -62,7 +93,7 @@
     {
         wgpu::Device device = CreateDevice();
         wgpu::ComputePipelineDescriptor desc;
-        desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data());
+        desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data());
         desc.compute.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc));
     }
@@ -72,7 +103,7 @@
     {
         wgpu::Device device = CreateDevice();
         wgpu::ComputePipelineDescriptor desc;
-        desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data());
+        desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data());
         desc.compute.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc));
     }
@@ -82,7 +113,7 @@
 // Tests that pipeline creation on the same device uses frontend cache when possible.
 TEST_P(SinglePipelineCachingTests, ComputePipelineFrontedCache) {
     wgpu::ComputePipelineDescriptor desc;
-    desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data());
+    desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data());
     desc.compute.entryPoint = "main";
 
     // First creation should create a cache entry.
@@ -106,7 +137,7 @@
     {
         wgpu::Device device = CreateDevice();
         wgpu::ComputePipelineDescriptor desc;
-        desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data());
+        desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data());
         desc.compute.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc));
     }
@@ -116,13 +147,74 @@
     {
         wgpu::Device device = CreateDevice();
         wgpu::ComputePipelineDescriptor desc;
-        desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data());
+        desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data());
         desc.compute.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 1u, device.CreateComputePipeline(&desc));
     }
     EXPECT_EQ(mMockCache.GetNumEntries(), 1u);
 }
 
+// Tests that pipeline creation hits the cache when using the same pipeline but with explicit
+// layout.
+TEST_P(SinglePipelineCachingTests, ComputePipelineBlobCacheExplictLayout) {
+    // First time should create and write out to the cache.
+    {
+        wgpu::Device device = CreateDevice();
+        wgpu::ComputePipelineDescriptor desc;
+        desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data());
+        desc.compute.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), 1u);
+
+    // Cache should hit: use the same pipeline but with explicit pipeline layout.
+    {
+        wgpu::Device device = CreateDevice();
+        wgpu::ComputePipelineDescriptor desc;
+        desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data());
+        desc.compute.entryPoint = "main";
+        desc.layout = utils::MakeBasicPipelineLayout(device, {});
+        EXPECT_CACHE_HIT(mMockCache, 1u, device.CreateComputePipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), 1u);
+}
+
+// Tests that pipeline creation wouldn't hit the cache if the pipelines are not exactly the same.
+TEST_P(SinglePipelineCachingTests, ComputePipelineBlobCacheShaderNegativeCases) {
+    size_t numCacheEntries = 0u;
+    // First time should create and write out to the cache.
+    {
+        wgpu::Device device = CreateDevice();
+        wgpu::ComputePipelineDescriptor desc;
+        desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data());
+        desc.compute.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries);
+
+    // Cache should not hit: different shader module.
+    {
+        wgpu::Device device = CreateDevice();
+        wgpu::ComputePipelineDescriptor desc;
+        desc.compute.module =
+            utils::CreateShaderModule(device, kComputeShaderMultipleEntryPoints.data());
+        desc.compute.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries);
+
+    // Cache should not hit: same shader module but different shader entry point.
+    {
+        wgpu::Device device = CreateDevice();
+        wgpu::ComputePipelineDescriptor desc;
+        desc.compute.module =
+            utils::CreateShaderModule(device, kComputeShaderMultipleEntryPoints.data());
+        desc.compute.entryPoint = "main2";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries);
+}
+
 // Tests that pipeline creation does not hits the cache when it is enabled but we use different
 // isolation keys.
 TEST_P(SinglePipelineCachingTests, ComputePipelineBlobCacheIsolationKey) {
@@ -130,7 +222,7 @@
     {
         wgpu::Device device = CreateDevice("isolation key 1");
         wgpu::ComputePipelineDescriptor desc;
-        desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data());
+        desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data());
         desc.compute.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc));
     }
@@ -140,7 +232,7 @@
     {
         wgpu::Device device = CreateDevice("isolation key 2");
         wgpu::ComputePipelineDescriptor desc;
-        desc.compute.module = utils::CreateShaderModule(device, kComputeShader.data());
+        desc.compute.module = utils::CreateShaderModule(device, kComputeShaderDefault.data());
         desc.compute.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateComputePipeline(&desc));
     }
@@ -158,10 +250,9 @@
     {
         wgpu::Device device = CreateDevice();
         utils::ComboRenderPipelineDescriptor desc;
-        desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None;
-        desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data());
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
         desc.vertex.entryPoint = "main";
-        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data());
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
         desc.cFragment.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
     }
@@ -171,10 +262,9 @@
     {
         wgpu::Device device = CreateDevice();
         utils::ComboRenderPipelineDescriptor desc;
-        desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None;
-        desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data());
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
         desc.vertex.entryPoint = "main";
-        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data());
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
         desc.cFragment.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
     }
@@ -184,10 +274,9 @@
 // Tests that pipeline creation on the same device uses frontend cache when possible.
 TEST_P(SinglePipelineCachingTests, RenderPipelineFrontedCache) {
     utils::ComboRenderPipelineDescriptor desc;
-    desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None;
-    desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data());
+    desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
     desc.vertex.entryPoint = "main";
-    desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data());
+    desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
     desc.cFragment.entryPoint = "main";
 
     // First creation should create a cache entry.
@@ -211,10 +300,9 @@
     {
         wgpu::Device device = CreateDevice();
         utils::ComboRenderPipelineDescriptor desc;
-        desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None;
-        desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data());
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
         desc.vertex.entryPoint = "main";
-        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data());
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
         desc.cFragment.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
     }
@@ -224,16 +312,172 @@
     {
         wgpu::Device device = CreateDevice();
         utils::ComboRenderPipelineDescriptor desc;
-        desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None;
-        desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data());
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
         desc.vertex.entryPoint = "main";
-        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data());
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
         desc.cFragment.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 1u, device.CreateRenderPipeline(&desc));
     }
     EXPECT_EQ(mMockCache.GetNumEntries(), 1u);
 }
 
+// Tests that pipeline creation hits the cache when using the same pipeline but with explicit
+// layout.
+TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheExplictLayout) {
+    // First time should create and write out to the cache.
+    {
+        wgpu::Device device = CreateDevice();
+        utils::ComboRenderPipelineDescriptor desc;
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
+        desc.vertex.entryPoint = "main";
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
+        desc.cFragment.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), 1u);
+
+    // Cache should hit: use the same pipeline but with explicit pipeline layout.
+    {
+        wgpu::Device device = CreateDevice();
+        utils::ComboRenderPipelineDescriptor desc;
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
+        desc.vertex.entryPoint = "main";
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
+        desc.cFragment.entryPoint = "main";
+        desc.layout = utils::MakeBasicPipelineLayout(device, {});
+        EXPECT_CACHE_HIT(mMockCache, 1u, device.CreateRenderPipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), 1u);
+}
+
+// Tests that pipeline creation wouldn't hit the cache if the pipelines have different state set in
+// the descriptor.
+TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheDescriptorNegativeCases) {
+    // First time should create and write out to the cache.
+    {
+        wgpu::Device device = CreateDevice();
+        utils::ComboRenderPipelineDescriptor desc;
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
+        desc.vertex.entryPoint = "main";
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
+        desc.cFragment.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), 1u);
+
+    // Cache should not hit: different pipeline descriptor state.
+    {
+        wgpu::Device device = CreateDevice();
+        utils::ComboRenderPipelineDescriptor desc;
+        desc.primitive.topology = wgpu::PrimitiveTopology::PointList;
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
+        desc.vertex.entryPoint = "main";
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
+        desc.cFragment.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), 2u);
+}
+
+// Tests that pipeline creation wouldn't hit the cache if the pipelines are not exactly the same in
+// terms of shader.
+TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheShaderNegativeCases) {
+    size_t numCacheEntries = 0u;
+    // First time should create and write out to the cache.
+    {
+        wgpu::Device device = CreateDevice();
+        utils::ComboRenderPipelineDescriptor desc;
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
+        desc.vertex.entryPoint = "main";
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
+        desc.cFragment.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries);
+
+    // Cache should not hit: different shader module.
+    {
+        wgpu::Device device = CreateDevice();
+        utils::ComboRenderPipelineDescriptor desc;
+        desc.vertex.module =
+            utils::CreateShaderModule(device, kVertexShaderMultipleEntryPoints.data());
+        desc.vertex.entryPoint = "main";
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
+        desc.cFragment.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries);
+
+    // Cache should not hit: same shader module but different shader entry point.
+    {
+        wgpu::Device device = CreateDevice();
+        utils::ComboRenderPipelineDescriptor desc;
+        desc.vertex.module =
+            utils::CreateShaderModule(device, kVertexShaderMultipleEntryPoints.data());
+        desc.vertex.entryPoint = "main2";
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
+        desc.cFragment.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries);
+}
+
+// Tests that pipeline creation wouldn't hit the cache if the pipelines are not exactly the same
+// (fragment color targets differences).
+TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheNegativeCasesFragmentColorTargets) {
+    size_t numCacheEntries = 0u;
+    // First time should create and write out to the cache.
+    {
+        wgpu::Device device = CreateDevice();
+        utils::ComboRenderPipelineDescriptor desc;
+        desc.cFragment.targetCount = 2;
+        desc.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm;
+        desc.cTargets[1].writeMask = wgpu::ColorWriteMask::None;
+        desc.cTargets[1].format = wgpu::TextureFormat::RGBA8Unorm;
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
+        desc.vertex.entryPoint = "main";
+        desc.cFragment.module =
+            utils::CreateShaderModule(device, kFragmentShaderMultipleOutput.data());
+        desc.cFragment.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries);
+
+    // Cache should not hit: different fragment color target state (sparse).
+    {
+        wgpu::Device device = CreateDevice();
+        utils::ComboRenderPipelineDescriptor desc;
+        desc.cFragment.targetCount = 2;
+        desc.cTargets[0].format = wgpu::TextureFormat::Undefined;
+        desc.cTargets[1].writeMask = wgpu::ColorWriteMask::None;
+        desc.cTargets[1].format = wgpu::TextureFormat::RGBA8Unorm;
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
+        desc.vertex.entryPoint = "main";
+        desc.cFragment.module =
+            utils::CreateShaderModule(device, kFragmentShaderMultipleOutput.data());
+        desc.cFragment.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries);
+
+    // Cache should not hit: different fragment color target state (trailing empty).
+    {
+        wgpu::Device device = CreateDevice();
+        utils::ComboRenderPipelineDescriptor desc;
+        desc.cFragment.targetCount = 2;
+        desc.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm;
+        desc.cTargets[1].writeMask = wgpu::ColorWriteMask::None;
+        desc.cTargets[1].format = wgpu::TextureFormat::Undefined;
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
+        desc.vertex.entryPoint = "main";
+        desc.cFragment.module =
+            utils::CreateShaderModule(device, kFragmentShaderMultipleOutput.data());
+        desc.cFragment.entryPoint = "main";
+        EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
+    }
+    EXPECT_EQ(mMockCache.GetNumEntries(), ++numCacheEntries);
+}
+
 // Tests that pipeline creation does not hits the cache when it is enabled but we use different
 // isolation keys.
 TEST_P(SinglePipelineCachingTests, RenderPipelineBlobCacheIsolationKey) {
@@ -241,10 +485,9 @@
     {
         wgpu::Device device = CreateDevice("isolation key 1");
         utils::ComboRenderPipelineDescriptor desc;
-        desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None;
-        desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data());
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
         desc.vertex.entryPoint = "main";
-        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data());
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
         desc.cFragment.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
     }
@@ -254,16 +497,17 @@
     {
         wgpu::Device device = CreateDevice("isolation key 2");
         utils::ComboRenderPipelineDescriptor desc;
-        desc.cTargets[0].writeMask = wgpu::ColorWriteMask::None;
-        desc.vertex.module = utils::CreateShaderModule(device, kVertexShader.data());
+        desc.vertex.module = utils::CreateShaderModule(device, kVertexShaderDefault.data());
         desc.vertex.entryPoint = "main";
-        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShader.data());
+        desc.cFragment.module = utils::CreateShaderModule(device, kFragmentShaderDefault.data());
         desc.cFragment.entryPoint = "main";
         EXPECT_CACHE_HIT(mMockCache, 0u, device.CreateRenderPipeline(&desc));
     }
     EXPECT_EQ(mMockCache.GetNumEntries(), 2u);
 }
 
-DAWN_INSTANTIATE_TEST(SinglePipelineCachingTests, VulkanBackend({"enable_blob_cache"}));
+DAWN_INSTANTIATE_TEST(SinglePipelineCachingTests,
+                      VulkanBackend({"enable_blob_cache"}),
+                      D3D12Backend({"enable_blob_cache"}));
 
 }  // namespace