You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
o3de/Gems/Atom/Feature/Common/Assets/Shaders/PostProcessing/BloomDownsampleCS.azsl

146 lines
5.7 KiB
Plaintext

/*
* Copyright (c) Contributors to the Open 3D Engine Project.
* For complete copyright and license terms please see the LICENSE at the root of this distribution.
*
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
*/
#include <Atom/Features/SrgSemantics.azsli>
#include <Atom/Features/ColorManagement/TransformColor.azsli>
ShaderResourceGroup PassSrg : SRG_PerPass
{
Texture2D<float4> m_sourceTexture;
RWTexture2D<float4> m_targetTexture;
RWTexture2D<float4> m_targetMipLevel0;
RWTexture2D<float4> m_targetMipLevel1;
RWTexture2D<float4> m_targetMipLevel2;
RWTexture2D<float4> m_targetMipLevel3;
RWTexture2D<float4> m_targetMipLevel4;
Sampler LinearSampler
{
AddressU = Clamp;
AddressV = Clamp;
MinFilter = Linear;
MagFilter = Linear;
MipFilter = Linear;
};
// 1.0 / source image size
float2 m_sourceImageTexelSize;
// Elements:
// x -> threshold
// y -> (knee - 1) * threshold
// z -> 2 * knee * threshold
// w -> 1 / (4 * threshold * knee + EPSILON(1e-5))
float4 m_thresholdConstants;
groupshared float3 smColor1[256];
groupshared float3 smColor2[64];
float3 Threshold(float3 color)
{
float luminance = CalculateLuminance(color, ColorSpaceId::ACEScg);
float softMin = luminance + m_thresholdConstants.y;
softMin = clamp(softMin, 0.0, m_thresholdConstants.z);
softMin = softMin * softMin * m_thresholdConstants.w;
float weight = max(softMin, luminance - m_thresholdConstants.x);
weight /= max(luminance, 1e-5);
return weight * color;
}
}
// This function is used to reorder texels in the shared memory in a way of:
// -+---+---+---+---+-
// | 0 | 1 | 2 | 3 | -+---+---+---+---+---+---+---+---+-
// -+---+---+---+---+- ===> | 0 | 1 | 4 | 5 | 2 | 3 | 6 | 7 |
// | 4 | 5 | 6 | 7 | -+---+---+---+---+---+---+---+---+-
// -+---+---+---+---+-
// to place elements in each 2x2 block to 4 consecutvie slots
uint FlatID(uint2 xy, uint gridSize)
{
uint rowOffset = gridSize * (xy.y & 0xFFFFFFFE);
uint rowPos = (xy.y & 1) * 2 + xy.x + (xy.x &0xFFFFFFFE);
return rowOffset + rowPos;
}
[numthreads(16, 16, 1)]
void MainCS(uint3 dID : SV_DispatchThreadID, uint3 gtID : SV_GroupThreadID)
{
// Group thread ID for downsampling level 2
uint2 gtID2 = gtID.xy >> 1;
uint2 gtID3 = gtID.xy >> 2;
uint2 gtID4 = gtID.xy >> 3;
// Uv of sampled point in the source image, offseted to the center of a 2x2 tap
float2 uv = (dID.xy * 2 + 1) * PassSrg::m_sourceImageTexelSize;
//------------------ Mip 0 (1 / 2 downscale, 16x16 tap for each thread group) -----------------
// Apply threshold to extract bright pixels
float3 colorDownsample1 =
PassSrg::Threshold(PassSrg::m_sourceTexture.SampleLevel(PassSrg::LinearSampler, uv, 0).rgb);
// Cache 16x16 tap in each thread group for following downsampling steps
PassSrg::smColor1[FlatID(gtID.xy, 16)] = colorDownsample1;
// Wait shared memory ready
PassSrg::m_targetMipLevel0[dID.xy] = float4(colorDownsample1, 1.0);
GroupMemoryBarrierWithGroupSync();
//---------------------------------------------------------------------------------------------
uint parity = dID.x | dID.y;
//------------------ Mip 1 (1 / 4 downscale, 8x8 tap for each thread group) -------------------
float3 colorDownsample2;
if((parity & 1) == 0)
{
uint index = FlatID(gtID.xy, 16);
colorDownsample2 = (colorDownsample1 + PassSrg::smColor1[index + 1] +
PassSrg::smColor1[index + 2] + PassSrg::smColor1[index + 3]) * 0.25;
PassSrg::smColor2[FlatID(gtID2, 8)] = colorDownsample2;
PassSrg::m_targetMipLevel1[dID.xy >> 1] = float4(colorDownsample2, 1.0);
}
GroupMemoryBarrierWithGroupSync();
//---------------------------------------------------------------------------------------------
//------------------ Mip 2 (1 / 8 downscale, 4x4 tap for each thread group) -------------------
float3 colorDownsample3;
if((parity & 3) == 0)
{
uint index = FlatID(gtID2, 8);
colorDownsample3 = (colorDownsample2 + PassSrg::smColor2[index + 1] +
PassSrg::smColor2[index + 2] + PassSrg::smColor2[index + 3]) * 0.25;
PassSrg::smColor1[FlatID(gtID3, 4)] = colorDownsample3;
PassSrg::m_targetMipLevel2[dID.xy >> 2] = float4(colorDownsample3, 1.0);
}
GroupMemoryBarrierWithGroupSync();
//---------------------------------------------------------------------------------------------
//------------------ Mip 3 (1 / 16 downscale, 2x2 tap for each thread group) ------------------
float3 colorDownsample4;
if((parity & 7) == 0)
{
uint index = FlatID(gtID3, 4);
colorDownsample4 = (colorDownsample3 + PassSrg::smColor1[index + 1] +
PassSrg::smColor1[index + 2] + PassSrg::smColor1[index + 3]) * 0.25;
PassSrg::smColor2[FlatID(gtID4, 2)] = colorDownsample4;
PassSrg::m_targetMipLevel3[dID.xy >> 3] = float4(colorDownsample4, 1.0);
}
GroupMemoryBarrierWithGroupSync();
//---------------------------------------------------------------------------------------------
//------------------ Mip 4 (1 / 32 downscale, 1x1 tap for each thread group) ------------------
if((gtID.x | gtID.y) == 0)
{
float3 colorDownsample5 = (colorDownsample4 + PassSrg::smColor2[1] +
PassSrg::smColor2[2] + PassSrg::smColor2[3]) * 0.25;
PassSrg::m_targetMipLevel4[dID.xy >> 4] = float4(colorDownsample5, 1.0);
}
//---------------------------------------------------------------------------------------------
}