From 0f4027ff431caf25d9e9289c2b40cbc150889ecf Mon Sep 17 00:00:00 2001
From: Nicolette Prevost <nicolettep@google.com>
Date: Wed, 13 May 2026 14:59:55 -0400
Subject: [PATCH] [graphite] Cache single-buffer BindGroups on DawnBuffer

* This follows the Vulkan backend in caching single-buffer bind groups on to Buffer implementations. This allows us to remove DawnResourceProvider's special caching of single-texture bindgroups and findOrCreateSingleTextureSamplerBindGroup(...).

* The DawnCommandBuffer instead defines the bind group entries for single-texture groups and uses the generic createBindGroup(...) method.

Bug: https://issues.chromium.org/issues/500305404
Change-Id: Iaf86d7751815806cba1353f74ee2ce3e49042b9d
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1232757
Commit-Queue: Nicolette Prevost <nicolettep@google.com>
Reviewed-by: Thomas Smith <thomsmit@google.com>
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
---
 src/gpu/graphite/dawn/DawnBuffer.cpp          |  13 ++
 src/gpu/graphite/dawn/DawnBuffer.h            |   7 +
 src/gpu/graphite/dawn/DawnCommandBuffer.cpp   |  71 ++++++----
 src/gpu/graphite/dawn/DawnCommandBuffer.h     |   2 +-
 src/gpu/graphite/dawn/DawnGraphicsPipeline.h  |   6 +-
 .../graphite/dawn/DawnResourceProvider.cpp    | 130 +++++++-----------
 src/gpu/graphite/dawn/DawnResourceProvider.h  |  22 +--
 src/gpu/graphite/dawn/DawnSharedContext.cpp   |   2 +-
 8 files changed, 127 insertions(+), 126 deletions(-)

--- a/src/gpu/graphite/dawn/DawnBuffer.cpp
+++ b/src/gpu/graphite/dawn/DawnBuffer.cpp
@@ -364,4 +364,17 @@
     }
 }
 
+const wgpu::BindGroup* DawnBuffer::getCachedSingleBufferBindGroup(size_t bindingSize) const {
+    for (auto& cachedGroup : fCachedSingleBufferBindGroups) {
+        if (cachedGroup.first == bindingSize) {
+            return &cachedGroup.second;
+        }
+    }
+    return nullptr;
+}
+void DawnBuffer::addCachedSingleBufferBindGroup(wgpu::BindGroup bindGroup,
+                                                size_t bindingSize) const {
+    fCachedSingleBufferBindGroups.push_back({bindingSize, bindGroup});
+}
+
 } // namespace skgpu::graphite
--- a/src/gpu/graphite/dawn/DawnBuffer.h
+++ b/src/gpu/graphite/dawn/DawnBuffer.h
@@ -30,6 +30,9 @@
 
     const wgpu::Buffer& dawnBuffer() const { return fBuffer; }
 
+    const wgpu::BindGroup* getCachedSingleBufferBindGroup(size_t bindingSize) const;
+    void addCachedSingleBufferBindGroup(wgpu::BindGroup, size_t bindingSize) const;
+
 private:
     DawnBuffer(const DawnSharedContext*, size_t size, wgpu::Buffer, void* mapAtCreationPtr);
 
@@ -54,6 +57,10 @@
     wgpu::Buffer fBuffer;
     SkMutex fAsyncMutex;
     skia_private::STArray<1, AutoCallback> fAsyncMapCallbacks SK_GUARDED_BY(fAsyncMutex);
+
+    // By the time the command buffer requests a bind group, the provided Buffer pointer is const
+    // so this attribute must be mutable to avoid a const_cast.
+    mutable skia_private::TArray<std::pair<size_t, wgpu::BindGroup>> fCachedSingleBufferBindGroups;
 };
 
 } // namespace skgpu::graphite
--- a/src/gpu/graphite/dawn/DawnCommandBuffer.cpp
+++ b/src/gpu/graphite/dawn/DawnCommandBuffer.cpp
@@ -932,43 +932,66 @@
 }
 
 void DawnCommandBuffer::syncUniformBuffers() {
-    static constexpr int kNumBuffers = DawnGraphicsPipeline::kNumUniformBuffers;
-
-    if (fBoundUniformBuffersDirty) {
-        fBoundUniformBuffersDirty = false;
+    if (!fBoundUniformBuffersDirty) {
+        return;
+    }
+    fBoundUniformBuffersDirty = false;
 
-        std::array<uint32_t, kNumBuffers> dynamicOffsets;
-        std::array<std::pair<const DawnBuffer*, uint32_t>, kNumBuffers> boundBuffersAndSizes;
+    bool usePushConstants = fSharedContext->dawnCaps()->
+            resourceBindingRequirements().fUsePushConstantsForIntrinsicConstants;
 
-        std::array<bool, kNumBuffers> enabled = {
-                !fSharedContext->dawnCaps()
-                         ->resourceBindingRequirements()
-                         .fUsePushConstantsForIntrinsicConstants,  // intrinsic uniforms
-                fActiveGraphicsPipeline->hasCombinedUniforms(),    // paint AND renderstep uniforms!
-                fActiveGraphicsPipeline->hasGradientBuffer(),      // gradient SSBO
+    // We expect to have up to 3 uniforms in this bind group.
+    static constexpr int kMaxUniformsInGroup = 3;
+    // Until/unless uniform bind group structure gets reorganized, this should be equivalent to the
+    // size of our bound uniform array.
+    SkASSERT(kMaxUniformsInGroup == fBoundUniforms.size());
+
+    wgpu::BindGroup bindGroup;
+    std::array<uint32_t, kMaxUniformsInGroup> dynamicOffsets {0};
+    // Check if we can use an optimized route for single-uniform buffer bind groups:
+    if (usePushConstants &&
+        !fActiveGraphicsPipeline->hasGradientBuffer() &&
+        fActiveGraphicsPipeline->hasCombinedUniforms()) {
+        const BindBufferInfo& bufferInfo =
+                fBoundUniforms[DawnGraphicsPipeline::kCombinedUniformIndex];
+        bindGroup = fResourceProvider->findOrCreateSingleUniformBindGroup(bufferInfo);
+        dynamicOffsets[DawnGraphicsPipeline::kCombinedUniformIndex] = bufferInfo.fOffset;
+    } else {
+        std::array<bool, kMaxUniformsInGroup> enabled = {
+                !usePushConstants,                              // intrinsic uniforms
+                fActiveGraphicsPipeline->hasCombinedUniforms(), // paint AND renderstep uniforms!
+                fActiveGraphicsPipeline->hasGradientBuffer(),   // gradient SSBO
+        };
+        constexpr uint32_t kBindingIndices[] = {
+            DawnGraphicsPipeline::kIntrinsicUniformBufferIndex,
+            DawnGraphicsPipeline::kCombinedUniformIndex,
+            DawnGraphicsPipeline::kGradientBufferIndex,
         };
 
-        for (int i = 0; i < kNumBuffers; ++i) {
+        std::array<wgpu::BindGroupEntry, kMaxUniformsInGroup> bindGroupEntries {};
+        for (int i = 0; i < kMaxUniformsInGroup; ++i) {
+            bindGroupEntries[i].binding = kBindingIndices[i];
             if (enabled[i] && fBoundUniforms[i]) {
-                boundBuffersAndSizes[i].first =
-                        static_cast<const DawnBuffer*>(fBoundUniforms[i].fBuffer);
-                boundBuffersAndSizes[i].second = fBoundUniforms[i].fSize;
+                bindGroupEntries[i].size = fBoundUniforms[i].fSize;
+                bindGroupEntries[i].buffer =
+                        static_cast<const DawnBuffer*>(fBoundUniforms[i].fBuffer)->dawnBuffer();
                 dynamicOffsets[i] = fBoundUniforms[i].fOffset;
             } else {
                 // Unused or null binding
-                boundBuffersAndSizes[i].first = nullptr;
-                dynamicOffsets[i] = 0;
+                bindGroupEntries[i].buffer = fResourceProvider->getOrCreateNullBuffer();
             }
         }
 
-        auto bindGroup =
-                fResourceProvider->findOrCreateUniformBuffersBindGroup(boundBuffersAndSizes);
-
-        fActiveRenderPassEncoder.SetBindGroup(DawnGraphicsPipeline::kUniformBufferBindGroupIndex,
-                                              bindGroup,
-                                              dynamicOffsets.size(),
-                                              dynamicOffsets.data());
+        const auto& groupLayouts = fActiveGraphicsPipeline->dawnGroupLayouts();
+        bindGroup = fResourceProvider->createBindGroup(
+                bindGroupEntries,
+                groupLayouts[DawnGraphicsPipeline::kUniformBufferBindGroupIndex]);
     }
+
+    fActiveRenderPassEncoder.SetBindGroup(DawnGraphicsPipeline::kUniformBufferBindGroupIndex,
+                                          bindGroup,
+                                          dynamicOffsets.size(),
+                                          dynamicOffsets.data());
 }
 
 void DawnCommandBuffer::setScissor(const Scissor& scissor) {
--- a/src/gpu/graphite/dawn/DawnCommandBuffer.h
+++ b/src/gpu/graphite/dawn/DawnCommandBuffer.h
@@ -164,7 +164,7 @@
 
     bool fBoundUniformBuffersDirty = false;
 
-    std::array<BindBufferInfo, DawnGraphicsPipeline::kNumUniformBuffers> fBoundUniforms;
+    std::array<BindBufferInfo, DawnGraphicsPipeline::kMaxNumUniformBuffers> fBoundUniforms;
 
     wgpu::CommandEncoder fCommandEncoder;
     wgpu::RenderPassEncoder fActiveRenderPassEncoder;
--- a/src/gpu/graphite/dawn/DawnGraphicsPipeline.h
+++ b/src/gpu/graphite/dawn/DawnGraphicsPipeline.h
@@ -45,10 +45,14 @@
     inline static constexpr unsigned int kTextureBindGroupIndex = 1;
     inline static constexpr unsigned int kBindGroupCount = 2;
 
+    // TODO(b/512814646): WASM does not support push constant usage, meaning that we often have 2
+    // uniform buffers within one BindGroup. This is unideal since we can store single-uniform
+    // BindGroups on DawnBuffers. Consider reorganizing uniform buffers such that we can more often
+    // only have one uniform buffer per BindGroup.
     inline static constexpr unsigned int kIntrinsicUniformBufferIndex = 0;
     inline static constexpr unsigned int kCombinedUniformIndex = 1;
     inline static constexpr unsigned int kGradientBufferIndex = 2;
-    inline static constexpr unsigned int kNumUniformBuffers = 3;
+    inline static constexpr unsigned int kMaxNumUniformBuffers = 3;
 
     inline static constexpr unsigned int kIntrinsicUniformSize = 32;
 
--- a/src/gpu/graphite/dawn/DawnResourceProvider.cpp
+++ b/src/gpu/graphite/dawn/DawnResourceProvider.cpp
@@ -31,7 +31,6 @@
 namespace {
 
 constexpr uint32_t kBufferBindingSizeAlignment = 16;
-constexpr int kMaxNumberOfCachedBufferBindGroups = 1024;
 constexpr int kMaxNumberOfCachedTextureBindGroups = 4096;
 
 wgpu::ShaderModule create_shader_module(const wgpu::Device& device, const char* source) {
@@ -113,36 +112,6 @@
 using BindGroupKey = typename DawnResourceProvider::BindGroupKey<NumEntries>;
 using UniformBindGroupKey = BindGroupKey<DawnResourceProvider::kNumUniformEntries>;
 
-UniformBindGroupKey make_ubo_bind_group_key(
-        const std::array<std::pair<const DawnBuffer*, uint32_t>,
-                         DawnResourceProvider::kNumUniformEntries>& boundBuffersAndSizes) {
-    UniformBindGroupKey uniqueKey;
-    {
-        // Each entry in the bind group needs 2 uint32_t in the key:
-        //  - buffer's unique ID: 32 bits.
-        //  - buffer's binding size: 32 bits.
-        // We need total of 4 entries in the uniform buffer bind group.
-        // Unused entries will be assigned zero values.
-        UniformBindGroupKey::Builder builder(&uniqueKey);
-
-        for (uint32_t i = 0; i < boundBuffersAndSizes.size(); ++i) {
-            const DawnBuffer* boundBuffer = boundBuffersAndSizes[i].first;
-            const uint32_t bindingSize = boundBuffersAndSizes[i].second;
-            if (boundBuffer) {
-                builder[2 * i] = boundBuffer->uniqueID().asUInt();
-                builder[2 * i + 1] = bindingSize;
-            } else {
-                builder[2 * i] = 0;
-                builder[2 * i + 1] = 0;
-            }
-        }
-
-        builder.finish();
-    }
-
-    return uniqueKey;
-}
-
 BindGroupKey<1> make_texture_bind_group_key(const DawnSampler* sampler,
                                             const DawnTexture* texture) {
     BindGroupKey<1> uniqueKey;
@@ -409,7 +378,6 @@
                                            uint32_t recorderID,
                                            size_t resourceBudget)
         : ResourceProvider(sharedContext, singleOwner, recorderID, resourceBudget)
-        , fUniformBufferBindGroupCache(kMaxNumberOfCachedBufferBindGroups)
         , fSingleTextureSamplerBindGroups(kMaxNumberOfCachedTextureBindGroups)
         , fSingleOwner(singleOwner) {
     fIntrinsicConstantsManager = std::make_unique<IntrinsicConstantsManager>(this);
@@ -637,53 +605,52 @@
     return fNullBuffer;
 }
 
-const wgpu::BindGroup& DawnResourceProvider::findOrCreateUniformBuffersBindGroup(
-        const std::array<std::pair<const DawnBuffer*, uint32_t>, kNumUniformEntries>&
-                boundBuffersAndSizes) {
+wgpu::BindGroup DawnResourceProvider::findOrCreateSingleUniformBindGroup(
+        const BindBufferInfo& bufferInfo) {
     SKGPU_ASSERT_SINGLE_OWNER(fSingleOwner)
-
-    auto key = make_ubo_bind_group_key(boundBuffersAndSizes);
-    auto* existingBindGroup = fUniformBufferBindGroupCache.find(key);
-    if (existingBindGroup) {
-        // cache hit.
-        return *existingBindGroup;
-    }
-
-    // Translate to wgpu::BindGroupDescriptor
-    std::array<wgpu::BindGroupEntry, kNumUniformEntries> entries;
-
-    constexpr uint32_t kBindingIndices[] = {
-        DawnGraphicsPipeline::kIntrinsicUniformBufferIndex,
-        DawnGraphicsPipeline::kCombinedUniformIndex,
-        DawnGraphicsPipeline::kGradientBufferIndex,
+    // We should only hit the single-uniform case if push constant usage is supported for intrinsic
+    // constants.
+    SkASSERT(this->dawnSharedContext()->dawnCaps()->
+            resourceBindingRequirements().fUsePushConstantsForIntrinsicConstants);
+
+    auto buffer = static_cast<const DawnBuffer*>(bufferInfo.fBuffer);
+
+    if (auto cachedBindGroup = buffer->getCachedSingleBufferBindGroup(bufferInfo.fSize)) {
+        return *cachedBindGroup;
+    }
+
+    // We should be able to assume that if we only have one uniform that it is the combined uniform
+    // buffer. Construct a list of bind group entries to represent the single combined uniform
+    // buffer case.
+    wgpu::BindGroupEntry intrinsicConstantNullEntry;
+    intrinsicConstantNullEntry.binding = DawnGraphicsPipeline::kIntrinsicUniformBufferIndex;
+    intrinsicConstantNullEntry.buffer  = this->getOrCreateNullBuffer();
+
+    wgpu::BindGroupEntry combinedUniformEntry;
+    combinedUniformEntry.binding = DawnGraphicsPipeline::kCombinedUniformIndex;
+    combinedUniformEntry.offset  = 0; // Use dynamic offsets; ignore bufferInfo.fOffset
+    combinedUniformEntry.buffer  = buffer->dawnBuffer();
+    combinedUniformEntry.size    = SkAlignTo(bufferInfo.fSize, kBufferBindingSizeAlignment);
+
+    wgpu::BindGroupEntry gradientBufferNullEntry;
+    gradientBufferNullEntry.binding = DawnGraphicsPipeline::kGradientBufferIndex;
+    gradientBufferNullEntry.buffer  = this->getOrCreateNullBuffer();
+
+    std::array<wgpu::BindGroupEntry, kNumUniformEntries> entries = {
+        intrinsicConstantNullEntry,
+        combinedUniformEntry,
+        gradientBufferNullEntry
     };
 
-    for (uint32_t i = 0; i < boundBuffersAndSizes.size(); ++i) {
-        const DawnBuffer* boundBuffer = boundBuffersAndSizes[i].first;
-        const uint32_t bindingSize = boundBuffersAndSizes[i].second;
-
-        entries[i].binding = kBindingIndices[i];
-        entries[i].offset = 0;
-        if (boundBuffer) {
-            entries[i].buffer = boundBuffer->dawnBuffer();
-            entries[i].size = SkAlignTo(bindingSize, kBufferBindingSizeAlignment);
-        } else {
-            entries[i].buffer = this->getOrCreateNullBuffer();
-            entries[i].size = wgpu::kWholeSize;
-        }
-    }
-
-    wgpu::BindGroupDescriptor desc;
-    desc.layout = this->dawnSharedContext()->getUniformBuffersBindGroupLayout();
-    desc.entryCount = entries.size();
-    desc.entries = entries.data();
+    wgpu::BindGroup bindGroup = this->createBindGroup(
+            entries, this->dawnSharedContext()->getUniformBuffersBindGroupLayout());
 
-    const auto& device = this->dawnSharedContext()->device();
-    auto bindGroup = device.CreateBindGroup(&desc);
+    buffer->addCachedSingleBufferBindGroup(bindGroup, bufferInfo.fSize);
 
-    return *fUniformBufferBindGroupCache.insert(key, bindGroup);
+    return bindGroup;
 }
 
+
 const wgpu::BindGroup& DawnResourceProvider::findOrCreateSingleTextureSamplerBindGroup(
         const DawnSampler* sampler, const DawnTexture* texture) {
     SKGPU_ASSERT_SINGLE_OWNER(fSingleOwner)
@@ -721,7 +688,6 @@
     // when the DawnTexture and DawnBuffer is destroyed, but removing the bind groups themselves
     // helps reduce CPU memory periodically.
     fSingleTextureSamplerBindGroups.reset();
-    fUniformBufferBindGroupCache.reset();
 }
 
 void DawnResourceProvider::onPurgeResourcesNotUsedSince(StdSteadyClock::time_point purgeTime) {
--- a/src/gpu/graphite/dawn/DawnResourceProvider.h
+++ b/src/gpu/graphite/dawn/DawnResourceProvider.h
@@ -65,14 +65,8 @@
                                              AccessPattern,
                                              std::string_view label);
 
-    // Find the cached bind group or create a new one based on the bound buffers and their
-    // binding sizes (boundBuffersAndSizes) for these uniforms (in order):
-    // - Intrinsic constants.
-    // - Render step uniforms.
-    // - Paint uniforms.
-    const wgpu::BindGroup& findOrCreateUniformBuffersBindGroup(
-            const std::array<std::pair<const DawnBuffer*, uint32_t>, kNumUniformEntries>&
-                    boundBuffersAndSizes);
+    // Find or create a bind group containing the given buffer.
+    wgpu::BindGroup findOrCreateSingleUniformBindGroup(const BindBufferInfo&);
 
     // Find or create a bind group containing the given sampler & texture.
     const wgpu::BindGroup& findOrCreateSingleTextureSamplerBindGroup(const DawnSampler* sampler,
@@ -82,6 +76,10 @@
     BindBufferInfo findOrCreateIntrinsicBindBufferInfo(DawnCommandBuffer* cb,
                                                        UniformDataBlock intrinsicValues);
 
+    // For BindGroupEntries, using this method to get a null buffer pointer rather than simply using
+    // nullptr allows for assigning a label (when enabled in Caps) for more clear debugging.
+    const wgpu::Buffer& getOrCreateNullBuffer();
+
 private:
     sk_sp<ComputePipeline> createComputePipeline(const ComputePipelineDesc&) override;
 
@@ -95,8 +93,6 @@
     BackendTexture onCreateBackendTexture(SkISize dimensions, const TextureInfo&) override;
     void onDeleteBackendTexture(const BackendTexture&) override;
 
-    const wgpu::Buffer& getOrCreateNullBuffer();
-
     DawnSharedContext* dawnSharedContext() const;
 
     void onFreeGpuResources() override;
@@ -111,7 +107,6 @@
                                       wgpu::BindGroup,
                                       typename BindGroupKey<NumEntries>::Hash>;
 
-    BindGroupCache<kNumUniformEntries> fUniformBufferBindGroupCache;
     BindGroupCache<1> fSingleTextureSamplerBindGroups;
 
     class IntrinsicBuffer;
--- a/src/gpu/graphite/dawn/DawnSharedContext.cpp
+++ b/src/gpu/graphite/dawn/DawnSharedContext.cpp
@@ -111,7 +111,7 @@
 void DawnSharedContext::createUniformBuffersBindGroupLayout() {
     const Caps* caps = this->caps();
 
-    std::array<wgpu::BindGroupLayoutEntry, DawnGraphicsPipeline::kNumUniformBuffers> entries;
+    std::array<wgpu::BindGroupLayoutEntry, DawnGraphicsPipeline::kMaxNumUniformBuffers> entries;
     entries[0].binding = DawnGraphicsPipeline::kIntrinsicUniformBufferIndex;
     entries[0].visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment;
     entries[0].buffer.type = wgpu::BufferBindingType::Uniform;
