/* * Copyright (c) Contributors to the Open 3D Engine Project. * For complete copyright and license terms please see the LICENSE at the root of this distribution. * * SPDX-License-Identifier: Apache-2.0 OR MIT * */ #include // Perform light culling on a compute shader #include #include enum QuadLightFlag // Copied from QuadLight.azsli. See ATOM-3731 { None = 0x00, EmitsBothDirections = 0x01, // 1 << 0, // Quad should emit light from both sides UseFastApproximation = 0x02, // 1 << 1, // Use a fast approximation instead of linearly transformed cosines. }; enum DiskLightFlag { UseConeAngle = 1, }; ShaderResourceGroup PassSrg : SRG_PerPass { // Figure out how to remove duplicate struct definitions. // These are also defined in View.srg // ATOM-3731 struct SimplePointLight { float3 m_position; float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2. float3 m_rgbIntensityCandelas; float m_padding; // explicit padding. }; struct SimpleSpotLight { float3 m_position; float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2. float3 m_direction; float m_cosInnerConeAngle; // cosine of the outer cone angle float3 m_rgbIntensityCandelas; float m_cosOuterConeAngle; // cosine of the inner cone angle }; struct PointLight { float3 m_position; float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2. float3 m_rgbIntensityCandelas; float m_bulbRadius; uint3 m_shadowIndices; uint m_padding; }; struct DiskLight { float3 m_position; float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2. float3 m_rgbIntensityCandelas; float m_diskRadius; float3 m_direction; uint m_flags; float m_cosInnerConeAngle; float m_cosOuterConeAngle; float m_bulbPositionOffset; uint m_shadowIndex; }; struct CapsuleLight { float3 m_startPoint; // One of the end points of the capsule float m_radius; // Radius of the capsule, ie distance from line segment to surface. float3 m_direction; // normalized vector from m_startPoint towards the other end point. float m_length; // length of the line segment making up the inside of the capsule. Doesn't include caps (0 length capsule == sphere) float3 m_rgbIntensityCandelas; // total rgb luminous intensity of the capsule in candela float m_invAttenuationRadiusSquared; // Inverse of the distance at which this light no longer has an effect, squared. Also used for falloff calculations. }; struct QuadLight { float3 m_position; float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2. float3 m_leftDir; // Direction from center of quad to the left edge float m_halfWidth; // Half the width of the quad. m_leftDir * m_halfWidth is a vector from the center to the left edge. float3 m_upDir; // Direction from center of quad to the top edge float m_halfHeight; // Half the height of the quad. m_upDir * m_halfHeight is a vector from the center to the top edge. float3 m_rgbIntensityNits; uint m_flags; // See QuadLightFlag }; struct LightCullingConstants { float4x4 m_worldToView; float4 m_screenUVToRay; float2 m_gridPixel; float2 m_gridHalfPixel; uint m_gridWidth; uint m_padding0; uint m_padding1; uint m_padding2; }; LightCullingConstants m_constantData; // Source light data StructuredBuffer m_simplePointLights; StructuredBuffer m_simpleSpotLights; StructuredBuffer m_pointLights; StructuredBuffer m_diskLights; StructuredBuffer m_capsuleLights; StructuredBuffer m_quadLights; uint m_simplePointLightCount; uint m_simpleSpotLightCount; uint m_pointLightCount; uint m_diskLightCount; uint m_capsuleLightCount; uint m_quadLightCount; // Produced by the LightCullingTilePrepare pass. Contains depth min/max and mask data (a bit set for each location where opaque geo was found) Texture2D m_tileLightData; // Destination light data RWStructuredBuffer m_lightList; RWTexture2D m_lightCount; struct Decal { float3 m_position; float m_opacity; float4 m_quaternion; float3 m_halfSize; float m_angleAttenuation; uint m_sortKeyPacked; uint m_textureArrayIndex; uint m_textureIndex; uint m_padding[1]; }; StructuredBuffer m_decals; uint m_decalCount; } groupshared uint shared_lightCount; groupshared uint shared_lightIndices[TILE_DIM_X * TILE_DIM_Y]; bool IsVectorPointingTowardsEye(const float3 dir) { return (dir.z * RH_COORD_SYSTEM_REVERSE) < 0; } float3 WorldToView_Point(float3 p) { float3 result = mul(PassSrg::m_constantData.m_worldToView, float4(p, 1.0)).xyz; return result; } float3 WorldToView_Vector(float3 v) { float3 result = mul((float3x3)PassSrg::m_constantData.m_worldToView, v); return result; } bool TestSphereVsAabbInvSqrt(float3 sphereCenter, float invSphereRadiusSq, float3 aabbCenter, float3 aabbHalfSize) { float3 delta = max(float3(0.0, 0.0, 0.0), abs(aabbCenter - sphereCenter) - aabbHalfSize); float d2 = dot(delta, delta); return d2 * invSphereRadiusSq < 1.0f; } bool TestSphereVsAabb(float3 sphereCenter, float sphereRadiusSq, float3 aabbCenter, float3 aabbHalfSize) { float3 delta = max(float3(0.0, 0.0, 0.0), abs(aabbCenter - sphereCenter) - aabbHalfSize); float d2 = dot(delta, delta); return d2 < sphereRadiusSq; } // Note that this function isn't precise. It will have false positives due to being simplified for speed. // Function origin and description: https://bartwronski.com/2017/04/13/cull-that-cone/ bool TestSphereVsCone(float3 spherePos, float sphereRadius, float3 origin, float3 forward, float cosa, float size) { float3 V = spherePos - origin; float V1len = dot(V, forward); bool backOk = V1len >= -sphereRadius; bool frontOk = V1len <= sphereRadius + size; float rsina = rsqrt(1 - cosa * cosa); float VlenSq = dot(V, V); float distanceClosestPoint = rsina * cosa * sqrt(max(0.0, VlenSq - V1len * V1len)) - V1len; bool angleOk = distanceClosestPoint <= sphereRadius* rsina; return angleOk && backOk && frontOk; } float2 ScreenUvToRay(float2 uv) { return uv * PassSrg::m_constantData.m_screenUVToRay.xy + PassSrg::m_constantData.m_screenUVToRay.zw; } // Returns screen rays // xy contains the top left corner // zw contains the bottom right corner // These rays are constructed assuming distance to them along z is 1.0 // This lets us simply multiply the numbers by an actual depth value to get a position in the tile in view space // we also return: tileCenterUv which is 2D screenCoordinates of the center of the tile float4 ComputeScreenRays(uint2 tileId, out float2 tileCenterUv) { float2 tile_uv = float2(tileId) * PassSrg::m_constantData.m_gridPixel; tileCenterUv = tile_uv + PassSrg::m_constantData.m_gridHalfPixel; float4 tileRect; tileRect.xy = ScreenUvToRay(tile_uv); tileRect.zw = ScreenUvToRay(tile_uv + PassSrg::m_constantData.m_gridPixel); return tileRect; } uint NextPowerTwo(uint x) { // https://wickedengine.net/2018/01/05/next-power-of-two-in-hlsl/ return 2 << firstbithigh(max(1, x) - 1); } uint GetSortKey(PassSrg::Decal decal) { return (decal.m_sortKeyPacked & 0xFF); } bool AreDecalsOutofOrder(uint packedIndexLeft, uint packedIndexRight) { uint leftIndex = Light_GetIndex(packedIndexLeft); uint rightIndex = Light_GetIndex(packedIndexRight); PassSrg::Decal leftDecal = PassSrg::m_decals[leftIndex]; PassSrg::Decal rightDecal = PassSrg::m_decals[rightIndex]; uint leftSortIndex = GetSortKey(leftDecal); uint rightSortIndex = GetSortKey(rightDecal); if (leftSortIndex == rightSortIndex) { return leftIndex > rightIndex; } else { return leftSortIndex > rightSortIndex; } } void SortDecals(uint groupIndex) { // Note that shared_lightCount can exceed the array size if too many decals intersect the tile, so clamp it here. uint numArray = min(TILE_DIM_X * TILE_DIM_Y, shared_lightCount); uint numArrayPowerOfTwo = NextPowerTwo(numArray); // Bitonic sort code from AMD: https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11 // AMD / MIT License is contained in the same directory as this file // subArraySize = 2,4,8,16,etc... for (uint subArraySize = 2; subArraySize <= numArrayPowerOfTwo; subArraySize = subArraySize * 2) { // compareDist = (subArraySize / 2), (subArraySize / 4), ... 32, 16, 8, 4, 2, 1 for (uint compareDist = subArraySize >> 1; compareDist > 0; compareDist = compareDist >> 1) { // This code from AMD very cleverly computes the locations of two different array indices to compare. // The pattern that is produced from this is identical to: https://en.wikipedia.org/wiki/Bitonic_sorter#Alternative_representation // Essentially creating larger monotonic sequences and then merging them together. (Two monotonic sequences together is a bitonic) uint index_low = groupIndex & (compareDist - 1); uint index_high = 2 * (groupIndex - index_low); uint index0 = index_high + index_low; uint index1 = compareDist == subArraySize >> 1 ? index_high + (2 * compareDist - 1) - index_low : index_high + compareDist + index_low; if (index0 < numArray && index1 < numArray) { bool areDecalsOutOrder = AreDecalsOutofOrder(shared_lightIndices[index0], shared_lightIndices[index1]); if (areDecalsOutOrder) { uint uTemp = shared_lightIndices[index0]; shared_lightIndices[index0] = shared_lightIndices[index1]; shared_lightIndices[index1] = uTemp; } } GroupMemoryBarrierWithGroupSync(); } } } void MarkLightAsVisibleInSharedMemory(uint lightIndex, uint inside) { uint sharedLightIndex; InterlockedAdd(shared_lightCount, 1, sharedLightIndex); sharedLightIndex = min(sharedLightIndex, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1); shared_lightIndices[sharedLightIndex] = PackLightIndexWithBinMask(lightIndex, inside); } void CopySharedLightsToMainMemory(uint lightCount, uint groupIndex, uint3 groupID) { if( groupIndex < shared_lightCount ) { uint offset = min(lightCount + groupIndex, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1); uint index = GetLightListIndex(groupID, PassSrg::m_constantData.m_gridWidth, offset); PassSrg::m_lightList[index] = shared_lightIndices[groupIndex]; } } // Return the minz and maxz of this light in view space float2 ComputePointLightMinMaxZ(float lightRadius, float3 lightPosition) { float2 minmax = lightPosition.z + lightRadius * float2(-1,1) * RH_COORD_SYSTEM_REVERSE; return minmax; } float2 ComputeSimpleSpotLightMinMax(PassSrg::SimpleSpotLight light, float3 lightPosition) { float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared); float2 minmax = lightPosition.z + lightRadius * float2(-1, 1) * RH_COORD_SYSTEM_REVERSE; return minmax; } // Return the minz and maxz of this quad light in view space // Quad light must be double sided float2 ComputeQuadLightMinMaxZ_DoubleSided(PassSrg::QuadLight light, float3 lightPosition) { const float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared); const float2 minmax = lightPosition.z + lightRadius * float2(-1,1) * RH_COORD_SYSTEM_REVERSE; return minmax; } // Return the minz and maxz of this quad light in view space // Quad light must be single sided float2 ComputeQuadLightMinMaxZ_SingleSided(PassSrg::QuadLight light, float3 lightPosition, float3 lightDirection) { // [GFX TODO][ATOM-6170] We can compute a tighter bounds with single sided lights by bringing in one of bounds return ComputeQuadLightMinMaxZ_DoubleSided(light, lightPosition); } float2 ComputeDiskLightMinMax(PassSrg::DiskLight light, float3 lightPosition) { float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared) + light.m_bulbPositionOffset; float2 minmax = lightPosition.z + lightRadius * float2(-1, 1) * RH_COORD_SYSTEM_REVERSE; return minmax; } float2 ComputeCapsuleLightMinMax(PassSrg::CapsuleLight light, float3 lightPosition, float lightFalloffRadius) { float offsetZ = abs(WorldToView_Vector(light.m_direction).z * light.m_length * 0.5f) + lightFalloffRadius; float nearZ = lightPosition.z - offsetZ * RH_COORD_SYSTEM_REVERSE; float farZ = lightPosition.z + offsetZ * RH_COORD_SYSTEM_REVERSE; return float2(nearZ, farZ); } void CullDecals(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents, float2 tile_center_uv) { for (uint decalIndex = groupIndex ; decalIndex < PassSrg::m_decalCount ; decalIndex += TILE_DIM_X * TILE_DIM_Y) { PassSrg::Decal decal = PassSrg::m_decals[decalIndex]; float3 decalPosition = WorldToView_Point(decal.m_position); // just wrapping a bounding sphere around a cube for now to get a minor perf boost. i.e. the sphere radius is sqrt(x*x + y*y + z*z) // ATOM-4224 - try AABB-AABB float boundingSphereRadiusSqr = dot(decal.m_halfSize, decal.m_halfSize); bool potentiallyIntersects = TestSphereVsAabb(decalPosition, boundingSphereRadiusSqr, aabb_center, aabb_extents); if (potentiallyIntersects) { uint inside = 0; float2 minmax = ComputePointLightMinMaxZ(sqrt(boundingSphereRadiusSqr), decalPosition); if (IsObjectInsideTile(tileLightData, minmax, inside)) { MarkLightAsVisibleInSharedMemory(decalIndex, inside); } } } } void CullPointLight(uint lightIndex, float3 lightPosition, float invLightRadius, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents) { lightPosition = WorldToView_Point(lightPosition); bool potentiallyIntersects = TestSphereVsAabbInvSqrt(lightPosition, invLightRadius, aabb_center, aabb_extents); if (potentiallyIntersects) { // Implement and profile fine-grained light culling testing // ATOM-3732 uint inside = 0; float2 minmax = ComputePointLightMinMaxZ(rsqrt(invLightRadius), lightPosition); if (IsObjectInsideTile(tileLightData, minmax, inside)) { MarkLightAsVisibleInSharedMemory(lightIndex, inside); } } } void CullSimplePointLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents) { for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_simplePointLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y) { PassSrg::SimplePointLight light = PassSrg::m_simplePointLights[lightIndex]; CullPointLight(lightIndex, light.m_position, light.m_invAttenuationRadiusSquared, tileLightData, aabb_center, aabb_extents); } } void CullPointLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents) { for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_pointLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y) { PassSrg::PointLight light = PassSrg::m_pointLights[lightIndex]; CullPointLight(lightIndex, light.m_position, light.m_invAttenuationRadiusSquared, tileLightData, aabb_center, aabb_extents); } } void CullSimpleSpotLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents) { for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_simpleSpotLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y) { PassSrg::SimpleSpotLight light = PassSrg::m_simpleSpotLights[lightIndex]; float3 lightPosition = WorldToView_Point(light.m_position); float3 lightDirection = WorldToView_Vector(light.m_direction); bool potentiallyIntersects = TestSphereVsCone(aabb_center, length(aabb_extents), lightPosition, lightDirection, light.m_cosOuterConeAngle, rsqrt(light.m_invAttenuationRadiusSquared)); if (potentiallyIntersects) { // Implement and profile fine-grained light culling testing // ATOM-3732 uint inside = 0; float2 minmax = ComputeSimpleSpotLightMinMax(light, lightPosition); if (IsObjectInsideTile(tileLightData, minmax, inside)) { MarkLightAsVisibleInSharedMemory(lightIndex, inside); } } } } void CullDiskLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents) { for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_diskLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y) { PassSrg::DiskLight light = PassSrg::m_diskLights[lightIndex]; float3 lightPosition = WorldToView_Point(light.m_position - light.m_bulbPositionOffset * light.m_direction); float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared) + light.m_diskRadius; float lightRadiusSqr = lightRadius * lightRadius; float aabbRadius = length(aabb_extents); float3 lightDirection = WorldToView_Vector(light.m_direction); bool potentiallyIntersects; if (light.m_flags & DiskLightFlag::UseConeAngle > 0) { potentiallyIntersects = TestSphereVsCone(aabb_center, length(aabb_extents), lightPosition, lightDirection, light.m_cosOuterConeAngle, rsqrt(light.m_invAttenuationRadiusSquared) + light.m_bulbPositionOffset); } else { potentiallyIntersects = TestSphereVsAabb(lightPosition, lightRadiusSqr, aabb_center, aabb_extents); if (potentiallyIntersects) { // Only one side is visible, check that we are above the hemisphere float3 toAABBCenter = aabb_center - lightPosition; float distanceToLightPlane = dot(lightDirection, toAABBCenter); potentiallyIntersects = distanceToLightPlane >= -aabbRadius; } } if (potentiallyIntersects) { // Implement and profile fine-grained light culling testing // ATOM-3732 uint inside = 0; float2 minmax = ComputeDiskLightMinMax(light, lightPosition); if (IsObjectInsideTile(tileLightData, minmax, inside)) { MarkLightAsVisibleInSharedMemory(lightIndex, inside); } } } } void CullCapsuleLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents) { for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_capsuleLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y) { PassSrg::CapsuleLight light = PassSrg::m_capsuleLights[lightIndex]; float3 lightMiddleWorld = light.m_startPoint + light.m_direction * light.m_length * 0.5f; float3 lightMiddleView = WorldToView_Point(lightMiddleWorld); float lightFalloffRadius = rsqrt(light.m_invAttenuationRadiusSquared); float lightConservativeBoundingRadius = lightFalloffRadius + light.m_length * 0.5f; bool potentiallyIntersects = TestSphereVsAabb(lightMiddleView, lightConservativeBoundingRadius * lightConservativeBoundingRadius, aabb_center, aabb_extents); if (potentiallyIntersects) { // Implement and profile fine-grained light culling testing // ATOM-3732 uint inside = 0; float2 minmax = ComputeCapsuleLightMinMax(light, lightMiddleView, lightFalloffRadius); if (IsObjectInsideTile(tileLightData, minmax, inside)) { MarkLightAsVisibleInSharedMemory(lightIndex, inside); } } } } void CullQuadLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents) { // Implement and profile fine-grained light culling testing // ATOM-3732 for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_quadLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y) { const PassSrg::QuadLight light = PassSrg::m_quadLights[lightIndex]; const float3 lightPosition = WorldToView_Point(light.m_position); bool potentiallyIntersects = TestSphereVsAabbInvSqrt(lightPosition, light.m_invAttenuationRadiusSquared, aabb_center, aabb_extents); if (potentiallyIntersects) { float2 minmaxz; const bool singleSided = (light.m_flags & QuadLightFlag::EmitsBothDirections) == 0; if (singleSided) { // Only one side is visible, check that we are above the hemisphere const float3 leftDir = light.m_leftDir; const float3 upDir = light.m_upDir; const float3 lightDirection = WorldToView_Vector(cross(leftDir, upDir)); const float3 toAABBCenter = aabb_center - lightPosition; const float distanceToLightPlane = dot(lightDirection, toAABBCenter); const float aabbRadius = length(aabb_extents); const bool aboveHemisphere = distanceToLightPlane >= -aabbRadius; if (aboveHemisphere) { minmaxz = ComputeQuadLightMinMaxZ_SingleSided(light, lightPosition, lightDirection); } else { potentiallyIntersects = false; } } else { minmaxz = ComputeQuadLightMinMaxZ_DoubleSided(light, lightPosition); } uint inside = 0; if (potentiallyIntersects && IsObjectInsideTile(tileLightData, minmaxz, inside)) { MarkLightAsVisibleInSharedMemory(lightIndex, inside); } } } } uint WriteEndOfGroup(uint lightCount, uint3 groupID) { uint lightsAfter = lightCount + shared_lightCount; uint end = PackLightIndexWithBinMask(NVLC_END_OF_GROUP, NVLC_ALL_BIN_BITS); uint offset = min(lightCount + shared_lightCount, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1); uint index = GetLightListIndex(groupID, PassSrg::m_constantData.m_gridWidth, offset); PassSrg::m_lightList[index] = end; lightsAfter++; return lightsAfter; } void ClearSharedLightCount(uint groupIndex) { if( groupIndex == 0 ) { shared_lightCount = 0; } } void ClearSharedLightCountWithDoubleBarrier(uint groupIndex) { GroupMemoryBarrierWithGroupSync(); ClearSharedLightCount(groupIndex); GroupMemoryBarrierWithGroupSync(); } float2 ReadDepthCloseFar(uint3 groupID) { float2 depthCloseFar = asfloat(PassSrg::m_tileLightData[groupID.xy].xy); return depthCloseFar; } TileLightData ReadTileLightData(uint3 groupID) { uint4 packedData = PassSrg::m_tileLightData[groupID.xy]; return Tile_UnpackData(packedData); } uint WriteCullingDataToMainMemory(uint lightCount, uint groupIndex, uint3 groupID) { GroupMemoryBarrierWithGroupSync(); CopySharedLightsToMainMemory(lightCount, groupIndex, groupID ); lightCount = WriteEndOfGroup(lightCount, groupID); return lightCount; } // This shader is invoke one thread-group per on-screen tile // e.g. if the screen resolution is 1920x1080, with 16x16 tiles, there will be 120x68 tiles (and 120x68 thread groups) // Each thread-group is dedicated to culling all lights against that screen-tile. // It might be worth splitting this compute shader into several shaders, one per light type. // Each thread will read one light, determine if it is visible, write it to shared memory, then move onto the next light until // all lights are processed // After all lights visibility is computed, it will write them back from shared memory to GPU memory // This will write out the following: // Point light index << 16 | bitmask contains which bits the light is present in // Point light index << 16 | bitmask contains which bits the light is present in // Point light index << 16 | bitmask contains which bits the light is present in // ... // End of Group // Disk light index << 16 | bitmask contains which bits the light is present in // Disk light index << 16 | bitmask contains which bits the light is present in // Disk light index << 16 | bitmask contains which bits the light is present in // ... // End of Group // i.e. for each 32-bit UINT, it contains the 16 bit light index + 16-bit binning information // (other light types and decals to come) // Note! This isn't consumed by the forward shader. This light list will be further processed by the LightCullingRemap shader, producing a LightListRemapped buffer // that is more optimal for consumption by the forward shader. [numthreads(TILE_DIM_X, TILE_DIM_Y, 1)] void MainCS( uint3 dispatchThreadID : SV_DispatchThreadID, uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) { ClearSharedLightCount(groupIndex); uint lightCount = 0; TileLightData tileLightData = ReadTileLightData(groupID); float2 tileCenterUv; float4 tileRect = ComputeScreenRays(groupID.xy, tileCenterUv); float3 aabb_center, aabb_extents; BuildAabb(tileRect, tileLightData, aabb_center, aabb_extents); GroupMemoryBarrierWithGroupSync(); CullDecals(groupIndex, tileLightData, aabb_center, aabb_extents, tileCenterUv); GroupMemoryBarrierWithGroupSync(); SortDecals(groupIndex); lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID ); ClearSharedLightCountWithDoubleBarrier(groupIndex); CullSimplePointLights(groupIndex, tileLightData, aabb_center, aabb_extents); lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID ); ClearSharedLightCountWithDoubleBarrier(groupIndex); CullSimpleSpotLights(groupIndex, tileLightData, aabb_center, aabb_extents); lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID ); ClearSharedLightCountWithDoubleBarrier(groupIndex); CullPointLights(groupIndex, tileLightData, aabb_center, aabb_extents); lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID ); ClearSharedLightCountWithDoubleBarrier(groupIndex); CullDiskLights(groupIndex, tileLightData, aabb_center, aabb_extents); lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID ); ClearSharedLightCountWithDoubleBarrier(groupIndex); CullCapsuleLights(groupIndex, tileLightData, aabb_center, aabb_extents); lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID ); ClearSharedLightCountWithDoubleBarrier(groupIndex); CullQuadLights(groupIndex, tileLightData, aabb_center, aabb_extents); lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID ); if (groupIndex == 0) { PassSrg::m_lightCount[groupID.xy] = lightCount; } }