/* * Copyright (c) Contributors to the Open 3D Engine Project. * For complete copyright and license terms please see the LICENSE at the root of this distribution. * * SPDX-License-Identifier: Apache-2.0 OR MIT * */ #include #include #include "NewDepthOfFieldCommon.azsli" ShaderResourceGroup PassSrg : SRG_PerPass { // For this shader we're only interested in the CoC values, which are in the alpha channel Texture2D m_colorAndCoC; // Tiled min/max CoC values RWTexture2D m_minMaxCoC; // Texture dimensions. XY channels are width and height and ZW channels are 1 / width and 1 / height // Auto-filled by the pass system when "ShaderImageDimensionsConstant" is specified in the .pass file float4 m_inputDimensions; float4 m_outputDimensions; Sampler LinearSampler { MinFilter = Linear; MagFilter = Linear; MipFilter = Linear; AddressU = Clamp; AddressV = Clamp; AddressW = Clamp; }; } groupshared uint LDS_MIN_COC[8]; groupshared uint LDS_MAX_COC[8]; // Calculates the min and max CoC (Circle of Confusion) for 16x16 pixel tiles [numthreads(8, 8, 1)] void MainCS(uint3 group_thread_id : SV_GroupThreadID, uint3 group_id : SV_GroupID, uint3 dispatch_id: SV_DispatchThreadID, uint linear_id : SV_GroupIndex) { // Initialize groupshared mem for atomic min/max operations if(group_thread_id.y == 0) { LDS_MIN_COC[group_thread_id.x] = 0xFFFFFFFF; LDS_MAX_COC[group_thread_id.x] = 0; } // Sync LDS GroupMemoryBarrierWithGroupSync(); // We use gather to get 2x2 values at once, so thread samples are spaced 2 pixels apart (+1 so the sample position is in between the four pixels) float2 samplePos = float2(dispatch_id.xy) * 2 + float2(1, 1); float2 sampleUV = samplePos * PassSrg::m_inputDimensions.zw; // Gather CoC float4 cocGather = PassSrg::m_colorAndCoC.GatherAlpha(PassSrg::LinearSampler, sampleUV); float cocMin = min4(cocGather); float cocMax = max4(cocGather); // For atomic min/max to work with uints, floating point values should be positive // Map from [-1, 1] range to [0, 2] and cast as uint InterlockedMin( LDS_MIN_COC[group_thread_id.x], asuint(cocMin + 1) ); InterlockedMax( LDS_MAX_COC[group_thread_id.x], asuint(cocMax + 1) ); // Sync LDS GroupMemoryBarrierWithGroupSync(); if(group_thread_id.y != 0) { return; } // Min the mins and max the maxs InterlockedMin( LDS_MIN_COC[0], LDS_MIN_COC[group_thread_id.x] ); InterlockedMax( LDS_MAX_COC[0], LDS_MAX_COC[group_thread_id.x] ); // Sync LDS GroupMemoryBarrierWithGroupSync(); // Each group write to just one pixel. If we're the last thread in the group, write out if(group_thread_id.x == 0) { // Unpack uints cocMin = asfloat(LDS_MIN_COC[0]) - 1; cocMax = asfloat(LDS_MAX_COC[0]) - 1; // Output min/max coc of 16x16 region PassSrg::m_minMaxCoC[group_id.xy] = float2(cocMin, cocMax); } }