You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
o3de/Gems/Atom/Feature/Common/Assets/Shaders/PostProcessing/NewDepthOfFieldTileReduce.azsl

94 lines
3.1 KiB
Plaintext

/*
* Copyright (c) Contributors to the Open 3D Engine Project.
* For complete copyright and license terms please see the LICENSE at the root of this distribution.
*
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
*/
#include <Atom/Features/SrgSemantics.azsli>
#include <Atom/RPI/Math.azsli>
#include "NewDepthOfFieldCommon.azsli"
ShaderResourceGroup PassSrg : SRG_PerPass
{
// For this shader we're only interested in the CoC values, which are in the alpha channel
Texture2D<float4> m_colorAndCoC;
// Tiled min/max CoC values
RWTexture2D<float2> m_minMaxCoC;
// Texture dimensions. XY channels are width and height and ZW channels are 1 / width and 1 / height
// Auto-filled by the pass system when "ShaderImageDimensionsConstant" is specified in the .pass file
float4 m_inputDimensions;
float4 m_outputDimensions;
Sampler LinearSampler
{
MinFilter = Linear;
MagFilter = Linear;
MipFilter = Linear;
AddressU = Clamp;
AddressV = Clamp;
AddressW = Clamp;
};
}
groupshared uint LDS_MIN_COC[8];
groupshared uint LDS_MAX_COC[8];
// Calculates the min and max CoC (Circle of Confusion) for 16x16 pixel tiles
[numthreads(8, 8, 1)]
void MainCS(uint3 group_thread_id : SV_GroupThreadID, uint3 group_id : SV_GroupID, uint3 dispatch_id: SV_DispatchThreadID, uint linear_id : SV_GroupIndex)
{
// Initialize groupshared mem for atomic min/max operations
if(group_thread_id.y == 0)
{
LDS_MIN_COC[group_thread_id.x] = 0xFFFFFFFF;
LDS_MAX_COC[group_thread_id.x] = 0;
}
// Sync LDS
GroupMemoryBarrierWithGroupSync();
// We use gather to get 2x2 values at once, so thread samples are spaced 2 pixels apart (+1 so the sample position is in between the four pixels)
float2 samplePos = float2(dispatch_id.xy) * 2 + float2(1, 1);
float2 sampleUV = samplePos * PassSrg::m_inputDimensions.zw;
// Gather CoC
float4 cocGather = PassSrg::m_colorAndCoC.GatherAlpha(PassSrg::LinearSampler, sampleUV);
float cocMin = min4(cocGather);
float cocMax = max4(cocGather);
// For atomic min/max to work with uints, floating point values should be positive
// Map from [-1, 1] range to [0, 2] and cast as uint
InterlockedMin( LDS_MIN_COC[group_thread_id.x], asuint(cocMin + 1) );
InterlockedMax( LDS_MAX_COC[group_thread_id.x], asuint(cocMax + 1) );
// Sync LDS
GroupMemoryBarrierWithGroupSync();
if(group_thread_id.y != 0)
{
return;
}
// Min the mins and max the maxs
InterlockedMin( LDS_MIN_COC[0], LDS_MIN_COC[group_thread_id.x] );
InterlockedMax( LDS_MAX_COC[0], LDS_MAX_COC[group_thread_id.x] );
// Sync LDS
GroupMemoryBarrierWithGroupSync();
// Each group write to just one pixel. If we're the last thread in the group, write out
if(group_thread_id.x == 0)
{
// Unpack uints
cocMin = asfloat(LDS_MIN_COC[0]) - 1;
cocMax = asfloat(LDS_MAX_COC[0]) - 1;
// Output min/max coc of 16x16 region
PassSrg::m_minMaxCoC[group_id.xy] = float2(cocMin, cocMax);
}
}