You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
o3de/Gems/Atom/Feature/Common/Assets/Shaders/LightCulling/LightCulling.azsl

706 lines
28 KiB
Plaintext

/*
* Copyright (c) Contributors to the Open 3D Engine Project.
* For complete copyright and license terms please see the LICENSE at the root of this distribution.
*
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
*/
#include <scenesrg.srgi>
// Perform light culling on a compute shader
#include <Atom/RPI/Math.azsli>
#include <Atom/Features/LightCulling/LightCullingShared.azsli>
enum QuadLightFlag // Copied from QuadLight.azsli. See ATOM-3731
{
None = 0x00,
EmitsBothDirections = 0x01, // 1 << 0, // Quad should emit light from both sides
UseFastApproximation = 0x02, // 1 << 1, // Use a fast approximation instead of linearly transformed cosines.
};
enum DiskLightFlag
{
UseConeAngle = 1,
};
ShaderResourceGroup PassSrg : SRG_PerPass
{
// Figure out how to remove duplicate struct definitions.
// These are also defined in View.srg
// ATOM-3731
struct SimplePointLight
{
float3 m_position;
float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2.
float3 m_rgbIntensityCandelas;
float m_padding; // explicit padding.
};
struct SimpleSpotLight
{
float3 m_position;
float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2.
float3 m_direction;
float m_cosInnerConeAngle; // cosine of the outer cone angle
float3 m_rgbIntensityCandelas;
float m_cosOuterConeAngle; // cosine of the inner cone angle
};
struct PointLight
{
float3 m_position;
float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2.
float3 m_rgbIntensityCandelas;
float m_bulbRadius;
uint3 m_shadowIndices;
uint m_padding;
};
struct DiskLight
{
float3 m_position;
float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2.
float3 m_rgbIntensityCandelas;
float m_diskRadius;
float3 m_direction;
uint m_flags;
float m_cosInnerConeAngle;
float m_cosOuterConeAngle;
float m_bulbPositionOffset;
uint m_shadowIndex;
};
struct CapsuleLight
{
float3 m_startPoint; // One of the end points of the capsule
float m_radius; // Radius of the capsule, ie distance from line segment to surface.
float3 m_direction; // normalized vector from m_startPoint towards the other end point.
float m_length; // length of the line segment making up the inside of the capsule. Doesn't include caps (0 length capsule == sphere)
float3 m_rgbIntensityCandelas; // total rgb luminous intensity of the capsule in candela
float m_invAttenuationRadiusSquared; // Inverse of the distance at which this light no longer has an effect, squared. Also used for falloff calculations.
};
struct QuadLight
{
float3 m_position;
float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2.
float3 m_leftDir; // Direction from center of quad to the left edge
float m_halfWidth; // Half the width of the quad. m_leftDir * m_halfWidth is a vector from the center to the left edge.
float3 m_upDir; // Direction from center of quad to the top edge
float m_halfHeight; // Half the height of the quad. m_upDir * m_halfHeight is a vector from the center to the top edge.
float3 m_rgbIntensityNits;
uint m_flags; // See QuadLightFlag
};
struct LightCullingConstants
{
float4x4 m_worldToView;
float4 m_screenUVToRay;
float2 m_gridPixel;
float2 m_gridHalfPixel;
uint m_gridWidth;
uint m_padding0;
uint m_padding1;
uint m_padding2;
};
LightCullingConstants m_constantData;
// Source light data
StructuredBuffer<SimplePointLight> m_simplePointLights;
StructuredBuffer<SimpleSpotLight> m_simpleSpotLights;
StructuredBuffer<PointLight> m_pointLights;
StructuredBuffer<DiskLight> m_diskLights;
StructuredBuffer<CapsuleLight> m_capsuleLights;
StructuredBuffer<QuadLight> m_quadLights;
uint m_simplePointLightCount;
uint m_simpleSpotLightCount;
uint m_pointLightCount;
uint m_diskLightCount;
uint m_capsuleLightCount;
uint m_quadLightCount;
// Produced by the LightCullingTilePrepare pass. Contains depth min/max and mask data (a bit set for each location where opaque geo was found)
Texture2D<uint4> m_tileLightData;
// Destination light data
RWStructuredBuffer<uint> m_lightList;
RWTexture2D<uint> m_lightCount;
struct Decal
{
float3 m_position;
float m_opacity;
float4 m_quaternion;
float3 m_halfSize;
float m_angleAttenuation;
uint m_sortKeyPacked;
uint m_textureArrayIndex;
uint m_textureIndex;
uint m_padding[1];
};
StructuredBuffer<Decal> m_decals;
uint m_decalCount;
}
groupshared uint shared_lightCount;
groupshared uint shared_lightIndices[TILE_DIM_X * TILE_DIM_Y];
bool IsVectorPointingTowardsEye(const float3 dir)
{
return (dir.z * RH_COORD_SYSTEM_REVERSE) < 0;
}
float3 WorldToView_Point(float3 p)
{
float3 result = mul(PassSrg::m_constantData.m_worldToView, float4(p, 1.0)).xyz;
return result;
}
float3 WorldToView_Vector(float3 v)
{
float3 result = mul((float3x3)PassSrg::m_constantData.m_worldToView, v);
return result;
}
bool TestSphereVsAabbInvSqrt(float3 sphereCenter, float invSphereRadiusSq, float3 aabbCenter, float3 aabbHalfSize)
{
float3 delta = max(float3(0.0, 0.0, 0.0), abs(aabbCenter - sphereCenter) - aabbHalfSize);
float d2 = dot(delta, delta);
return d2 * invSphereRadiusSq < 1.0f;
}
bool TestSphereVsAabb(float3 sphereCenter, float sphereRadiusSq, float3 aabbCenter, float3 aabbHalfSize)
{
float3 delta = max(float3(0.0, 0.0, 0.0), abs(aabbCenter - sphereCenter) - aabbHalfSize);
float d2 = dot(delta, delta);
return d2 < sphereRadiusSq;
}
// Note that this function isn't precise. It will have false positives due to being simplified for speed.
// Function origin and description: https://bartwronski.com/2017/04/13/cull-that-cone/
bool TestSphereVsCone(float3 spherePos, float sphereRadius, float3 origin, float3 forward, float cosa, float size)
{
float3 V = spherePos - origin;
float V1len = dot(V, forward);
bool backOk = V1len >= -sphereRadius;
bool frontOk = V1len <= sphereRadius + size;
float rsina = rsqrt(1 - cosa * cosa);
float VlenSq = dot(V, V);
float distanceClosestPoint = rsina * cosa * sqrt(max(0.0, VlenSq - V1len * V1len)) - V1len;
bool angleOk = distanceClosestPoint <= sphereRadius* rsina;
return angleOk && backOk && frontOk;
}
float2 ScreenUvToRay(float2 uv)
{
return uv * PassSrg::m_constantData.m_screenUVToRay.xy + PassSrg::m_constantData.m_screenUVToRay.zw;
}
// Returns screen rays
// xy contains the top left corner
// zw contains the bottom right corner
// These rays are constructed assuming distance to them along z is 1.0
// This lets us simply multiply the numbers by an actual depth value to get a position in the tile in view space
// we also return: tileCenterUv which is 2D screenCoordinates of the center of the tile
float4 ComputeScreenRays(uint2 tileId, out float2 tileCenterUv)
{
float2 tile_uv = float2(tileId) * PassSrg::m_constantData.m_gridPixel;
tileCenterUv = tile_uv + PassSrg::m_constantData.m_gridHalfPixel;
float4 tileRect;
tileRect.xy = ScreenUvToRay(tile_uv);
tileRect.zw = ScreenUvToRay(tile_uv + PassSrg::m_constantData.m_gridPixel);
return tileRect;
}
uint NextPowerTwo(uint x)
{
// https://wickedengine.net/2018/01/05/next-power-of-two-in-hlsl/
return 2 << firstbithigh(max(1, x) - 1);
}
uint GetSortKey(PassSrg::Decal decal)
{
return (decal.m_sortKeyPacked & 0xFF);
}
bool AreDecalsOutofOrder(uint packedIndexLeft, uint packedIndexRight)
{
uint leftIndex = Light_GetIndex(packedIndexLeft);
uint rightIndex = Light_GetIndex(packedIndexRight);
PassSrg::Decal leftDecal = PassSrg::m_decals[leftIndex];
PassSrg::Decal rightDecal = PassSrg::m_decals[rightIndex];
uint leftSortIndex = GetSortKey(leftDecal);
uint rightSortIndex = GetSortKey(rightDecal);
if (leftSortIndex == rightSortIndex)
{
return leftIndex > rightIndex;
}
else
{
return leftSortIndex > rightSortIndex;
}
}
void SortDecals(uint groupIndex)
{
// Note that shared_lightCount can exceed the array size if too many decals intersect the tile, so clamp it here.
uint numArray = min(TILE_DIM_X * TILE_DIM_Y, shared_lightCount);
uint numArrayPowerOfTwo = NextPowerTwo(numArray);
// Bitonic sort code from AMD: https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11
// AMD / MIT License is contained in the same directory as this file
// subArraySize = 2,4,8,16,etc...
for (uint subArraySize = 2; subArraySize <= numArrayPowerOfTwo; subArraySize = subArraySize * 2)
{
// compareDist = (subArraySize / 2), (subArraySize / 4), ... 32, 16, 8, 4, 2, 1
for (uint compareDist = subArraySize >> 1; compareDist > 0; compareDist = compareDist >> 1)
{
// This code from AMD very cleverly computes the locations of two different array indices to compare.
// The pattern that is produced from this is identical to: https://en.wikipedia.org/wiki/Bitonic_sorter#Alternative_representation
// Essentially creating larger monotonic sequences and then merging them together. (Two monotonic sequences together is a bitonic)
uint index_low = groupIndex & (compareDist - 1);
uint index_high = 2 * (groupIndex - index_low);
uint index0 = index_high + index_low;
uint index1 = compareDist == subArraySize >> 1 ? index_high + (2 * compareDist - 1) - index_low : index_high + compareDist + index_low;
if (index0 < numArray && index1 < numArray)
{
bool areDecalsOutOrder = AreDecalsOutofOrder(shared_lightIndices[index0], shared_lightIndices[index1]);
if (areDecalsOutOrder)
{
uint uTemp = shared_lightIndices[index0];
shared_lightIndices[index0] = shared_lightIndices[index1];
shared_lightIndices[index1] = uTemp;
}
}
GroupMemoryBarrierWithGroupSync();
}
}
}
void MarkLightAsVisibleInSharedMemory(uint lightIndex, uint inside)
{
uint sharedLightIndex;
InterlockedAdd(shared_lightCount, 1, sharedLightIndex);
sharedLightIndex = min(sharedLightIndex, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
shared_lightIndices[sharedLightIndex] = PackLightIndexWithBinMask(lightIndex, inside);
}
void CopySharedLightsToMainMemory(uint lightCount, uint groupIndex, uint3 groupID)
{
if( groupIndex < shared_lightCount )
{
uint offset = min(lightCount + groupIndex, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
uint index = GetLightListIndex(groupID, PassSrg::m_constantData.m_gridWidth, offset);
PassSrg::m_lightList[index] = shared_lightIndices[groupIndex];
}
}
// Return the minz and maxz of this light in view space
float2 ComputePointLightMinMaxZ(float lightRadius, float3 lightPosition)
{
float2 minmax = lightPosition.z + lightRadius * float2(-1,1) * RH_COORD_SYSTEM_REVERSE;
return minmax;
}
float2 ComputeSimpleSpotLightMinMax(PassSrg::SimpleSpotLight light, float3 lightPosition)
{
float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared);
float2 minmax = lightPosition.z + lightRadius * float2(-1, 1) * RH_COORD_SYSTEM_REVERSE;
return minmax;
}
// Return the minz and maxz of this quad light in view space
// Quad light must be double sided
float2 ComputeQuadLightMinMaxZ_DoubleSided(PassSrg::QuadLight light, float3 lightPosition)
{
const float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared);
const float2 minmax = lightPosition.z + lightRadius * float2(-1,1) * RH_COORD_SYSTEM_REVERSE;
return minmax;
}
// Return the minz and maxz of this quad light in view space
// Quad light must be single sided
float2 ComputeQuadLightMinMaxZ_SingleSided(PassSrg::QuadLight light, float3 lightPosition, float3 lightDirection)
{
// [GFX TODO][ATOM-6170] We can compute a tighter bounds with single sided lights by bringing in one of bounds
return ComputeQuadLightMinMaxZ_DoubleSided(light, lightPosition);
}
float2 ComputeDiskLightMinMax(PassSrg::DiskLight light, float3 lightPosition)
{
float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared) + light.m_bulbPositionOffset;
float2 minmax = lightPosition.z + lightRadius * float2(-1, 1) * RH_COORD_SYSTEM_REVERSE;
return minmax;
}
float2 ComputeCapsuleLightMinMax(PassSrg::CapsuleLight light, float3 lightPosition, float lightFalloffRadius)
{
float offsetZ = abs(WorldToView_Vector(light.m_direction).z * light.m_length * 0.5f) + lightFalloffRadius;
float nearZ = lightPosition.z - offsetZ * RH_COORD_SYSTEM_REVERSE;
float farZ = lightPosition.z + offsetZ * RH_COORD_SYSTEM_REVERSE;
return float2(nearZ, farZ);
}
void CullDecals(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents, float2 tile_center_uv)
{
for (uint decalIndex = groupIndex ; decalIndex < PassSrg::m_decalCount ; decalIndex += TILE_DIM_X * TILE_DIM_Y)
{
PassSrg::Decal decal = PassSrg::m_decals[decalIndex];
float3 decalPosition = WorldToView_Point(decal.m_position);
// just wrapping a bounding sphere around a cube for now to get a minor perf boost. i.e. the sphere radius is sqrt(x*x + y*y + z*z)
// ATOM-4224 - try AABB-AABB
float boundingSphereRadiusSqr = dot(decal.m_halfSize, decal.m_halfSize);
bool potentiallyIntersects = TestSphereVsAabb(decalPosition, boundingSphereRadiusSqr, aabb_center, aabb_extents);
if (potentiallyIntersects)
{
uint inside = 0;
float2 minmax = ComputePointLightMinMaxZ(sqrt(boundingSphereRadiusSqr), decalPosition);
if (IsObjectInsideTile(tileLightData, minmax, inside))
{
MarkLightAsVisibleInSharedMemory(decalIndex, inside);
}
}
}
}
void CullPointLight(uint lightIndex, float3 lightPosition, float invLightRadius, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
{
lightPosition = WorldToView_Point(lightPosition);
bool potentiallyIntersects = TestSphereVsAabbInvSqrt(lightPosition, invLightRadius, aabb_center, aabb_extents);
if (potentiallyIntersects)
{
// Implement and profile fine-grained light culling testing
// ATOM-3732
uint inside = 0;
float2 minmax = ComputePointLightMinMaxZ(rsqrt(invLightRadius), lightPosition);
if (IsObjectInsideTile(tileLightData, minmax, inside))
{
MarkLightAsVisibleInSharedMemory(lightIndex, inside);
}
}
}
void CullSimplePointLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
{
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_simplePointLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
{
PassSrg::SimplePointLight light = PassSrg::m_simplePointLights[lightIndex];
CullPointLight(lightIndex, light.m_position, light.m_invAttenuationRadiusSquared, tileLightData, aabb_center, aabb_extents);
}
}
void CullPointLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
{
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_pointLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
{
PassSrg::PointLight light = PassSrg::m_pointLights[lightIndex];
CullPointLight(lightIndex, light.m_position, light.m_invAttenuationRadiusSquared, tileLightData, aabb_center, aabb_extents);
}
}
void CullSimpleSpotLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
{
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_simpleSpotLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
{
PassSrg::SimpleSpotLight light = PassSrg::m_simpleSpotLights[lightIndex];
float3 lightPosition = WorldToView_Point(light.m_position);
float3 lightDirection = WorldToView_Vector(light.m_direction);
bool potentiallyIntersects = TestSphereVsCone(aabb_center, length(aabb_extents), lightPosition, lightDirection, light.m_cosOuterConeAngle, rsqrt(light.m_invAttenuationRadiusSquared));
if (potentiallyIntersects)
{
// Implement and profile fine-grained light culling testing
// ATOM-3732
uint inside = 0;
float2 minmax = ComputeSimpleSpotLightMinMax(light, lightPosition);
if (IsObjectInsideTile(tileLightData, minmax, inside))
{
MarkLightAsVisibleInSharedMemory(lightIndex, inside);
}
}
}
}
void CullDiskLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
{
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_diskLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
{
PassSrg::DiskLight light = PassSrg::m_diskLights[lightIndex];
float3 lightPosition = WorldToView_Point(light.m_position - light.m_bulbPositionOffset * light.m_direction);
float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared) + light.m_diskRadius;
float lightRadiusSqr = lightRadius * lightRadius;
float aabbRadius = length(aabb_extents);
float3 lightDirection = WorldToView_Vector(light.m_direction);
bool potentiallyIntersects;
if (light.m_flags & DiskLightFlag::UseConeAngle > 0)
{
potentiallyIntersects = TestSphereVsCone(aabb_center, length(aabb_extents), lightPosition, lightDirection, light.m_cosOuterConeAngle, rsqrt(light.m_invAttenuationRadiusSquared) + light.m_bulbPositionOffset);
}
else
{
potentiallyIntersects = TestSphereVsAabb(lightPosition, lightRadiusSqr, aabb_center, aabb_extents);
if (potentiallyIntersects)
{
// Only one side is visible, check that we are above the hemisphere
float3 toAABBCenter = aabb_center - lightPosition;
float distanceToLightPlane = dot(lightDirection, toAABBCenter);
potentiallyIntersects = distanceToLightPlane >= -aabbRadius;
}
}
if (potentiallyIntersects)
{
// Implement and profile fine-grained light culling testing
// ATOM-3732
uint inside = 0;
float2 minmax = ComputeDiskLightMinMax(light, lightPosition);
if (IsObjectInsideTile(tileLightData, minmax, inside))
{
MarkLightAsVisibleInSharedMemory(lightIndex, inside);
}
}
}
}
void CullCapsuleLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
{
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_capsuleLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
{
PassSrg::CapsuleLight light = PassSrg::m_capsuleLights[lightIndex];
float3 lightMiddleWorld = light.m_startPoint + light.m_direction * light.m_length * 0.5f;
float3 lightMiddleView = WorldToView_Point(lightMiddleWorld);
float lightFalloffRadius = rsqrt(light.m_invAttenuationRadiusSquared);
float lightConservativeBoundingRadius = lightFalloffRadius + light.m_length * 0.5f;
bool potentiallyIntersects = TestSphereVsAabb(lightMiddleView, lightConservativeBoundingRadius * lightConservativeBoundingRadius, aabb_center, aabb_extents);
if (potentiallyIntersects)
{
// Implement and profile fine-grained light culling testing
// ATOM-3732
uint inside = 0;
float2 minmax = ComputeCapsuleLightMinMax(light, lightMiddleView, lightFalloffRadius);
if (IsObjectInsideTile(tileLightData, minmax, inside))
{
MarkLightAsVisibleInSharedMemory(lightIndex, inside);
}
}
}
}
void CullQuadLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
{
// Implement and profile fine-grained light culling testing
// ATOM-3732
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_quadLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
{
const PassSrg::QuadLight light = PassSrg::m_quadLights[lightIndex];
const float3 lightPosition = WorldToView_Point(light.m_position);
bool potentiallyIntersects = TestSphereVsAabbInvSqrt(lightPosition, light.m_invAttenuationRadiusSquared, aabb_center, aabb_extents);
if (potentiallyIntersects)
{
float2 minmaxz;
const bool singleSided = (light.m_flags & QuadLightFlag::EmitsBothDirections) == 0;
if (singleSided)
{
// Only one side is visible, check that we are above the hemisphere
const float3 leftDir = light.m_leftDir;
const float3 upDir = light.m_upDir;
const float3 lightDirection = WorldToView_Vector(cross(leftDir, upDir));
const float3 toAABBCenter = aabb_center - lightPosition;
const float distanceToLightPlane = dot(lightDirection, toAABBCenter);
const float aabbRadius = length(aabb_extents);
const bool aboveHemisphere = distanceToLightPlane >= -aabbRadius;
if (aboveHemisphere)
{
minmaxz = ComputeQuadLightMinMaxZ_SingleSided(light, lightPosition, lightDirection);
}
else
{
potentiallyIntersects = false;
}
}
else
{
minmaxz = ComputeQuadLightMinMaxZ_DoubleSided(light, lightPosition);
}
uint inside = 0;
if (potentiallyIntersects && IsObjectInsideTile(tileLightData, minmaxz, inside))
{
MarkLightAsVisibleInSharedMemory(lightIndex, inside);
}
}
}
}
uint WriteEndOfGroup(uint lightCount, uint3 groupID)
{
uint lightsAfter = lightCount + shared_lightCount;
uint end = PackLightIndexWithBinMask(NVLC_END_OF_GROUP, NVLC_ALL_BIN_BITS);
uint offset = min(lightCount + shared_lightCount, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
uint index = GetLightListIndex(groupID, PassSrg::m_constantData.m_gridWidth, offset);
PassSrg::m_lightList[index] = end;
lightsAfter++;
return lightsAfter;
}
void ClearSharedLightCount(uint groupIndex)
{
if( groupIndex == 0 )
{
shared_lightCount = 0;
}
}
void ClearSharedLightCountWithDoubleBarrier(uint groupIndex)
{
GroupMemoryBarrierWithGroupSync();
ClearSharedLightCount(groupIndex);
GroupMemoryBarrierWithGroupSync();
}
float2 ReadDepthCloseFar(uint3 groupID)
{
float2 depthCloseFar = asfloat(PassSrg::m_tileLightData[groupID.xy].xy);
return depthCloseFar;
}
TileLightData ReadTileLightData(uint3 groupID)
{
uint4 packedData = PassSrg::m_tileLightData[groupID.xy];
return Tile_UnpackData(packedData);
}
uint WriteCullingDataToMainMemory(uint lightCount, uint groupIndex, uint3 groupID)
{
GroupMemoryBarrierWithGroupSync();
CopySharedLightsToMainMemory(lightCount, groupIndex, groupID );
lightCount = WriteEndOfGroup(lightCount, groupID);
return lightCount;
}
// This shader is invoke one thread-group per on-screen tile
// e.g. if the screen resolution is 1920x1080, with 16x16 tiles, there will be 120x68 tiles (and 120x68 thread groups)
// Each thread-group is dedicated to culling all lights against that screen-tile.
// It might be worth splitting this compute shader into several shaders, one per light type.
// Each thread will read one light, determine if it is visible, write it to shared memory, then move onto the next light until
// all lights are processed
// After all lights visibility is computed, it will write them back from shared memory to GPU memory
// This will write out the following:
// Point light index << 16 | bitmask contains which bits the light is present in
// Point light index << 16 | bitmask contains which bits the light is present in
// Point light index << 16 | bitmask contains which bits the light is present in
// ...
// End of Group
// Disk light index << 16 | bitmask contains which bits the light is present in
// Disk light index << 16 | bitmask contains which bits the light is present in
// Disk light index << 16 | bitmask contains which bits the light is present in
// ...
// End of Group
// i.e. for each 32-bit UINT, it contains the 16 bit light index + 16-bit binning information
// (other light types and decals to come)
// Note! This isn't consumed by the forward shader. This light list will be further processed by the LightCullingRemap shader, producing a LightListRemapped buffer
// that is more optimal for consumption by the forward shader.
[numthreads(TILE_DIM_X, TILE_DIM_Y, 1)]
void MainCS(
uint3 dispatchThreadID : SV_DispatchThreadID,
uint3 groupID : SV_GroupID,
uint groupIndex : SV_GroupIndex)
{
ClearSharedLightCount(groupIndex);
uint lightCount = 0;
TileLightData tileLightData = ReadTileLightData(groupID);
float2 tileCenterUv;
float4 tileRect = ComputeScreenRays(groupID.xy, tileCenterUv);
float3 aabb_center, aabb_extents;
BuildAabb(tileRect, tileLightData, aabb_center, aabb_extents);
GroupMemoryBarrierWithGroupSync();
CullDecals(groupIndex, tileLightData, aabb_center, aabb_extents, tileCenterUv);
GroupMemoryBarrierWithGroupSync();
SortDecals(groupIndex);
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
ClearSharedLightCountWithDoubleBarrier(groupIndex);
CullSimplePointLights(groupIndex, tileLightData, aabb_center, aabb_extents);
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
ClearSharedLightCountWithDoubleBarrier(groupIndex);
CullSimpleSpotLights(groupIndex, tileLightData, aabb_center, aabb_extents);
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
ClearSharedLightCountWithDoubleBarrier(groupIndex);
CullPointLights(groupIndex, tileLightData, aabb_center, aabb_extents);
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
ClearSharedLightCountWithDoubleBarrier(groupIndex);
CullDiskLights(groupIndex, tileLightData, aabb_center, aabb_extents);
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
ClearSharedLightCountWithDoubleBarrier(groupIndex);
CullCapsuleLights(groupIndex, tileLightData, aabb_center, aabb_extents);
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
ClearSharedLightCountWithDoubleBarrier(groupIndex);
CullQuadLights(groupIndex, tileLightData, aabb_center, aabb_extents);
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
if (groupIndex == 0)
{
PassSrg::m_lightCount[groupID.xy] = lightCount;
}
}