You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
706 lines
28 KiB
Plaintext
706 lines
28 KiB
Plaintext
/*
|
|
* Copyright (c) Contributors to the Open 3D Engine Project.
|
|
* For complete copyright and license terms please see the LICENSE at the root of this distribution.
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 OR MIT
|
|
*
|
|
*/
|
|
|
|
#include <scenesrg.srgi>
|
|
|
|
// Perform light culling on a compute shader
|
|
|
|
#include <Atom/RPI/Math.azsli>
|
|
#include <Atom/Features/LightCulling/LightCullingShared.azsli>
|
|
|
|
enum QuadLightFlag // Copied from QuadLight.azsli. See ATOM-3731
|
|
{
|
|
None = 0x00,
|
|
EmitsBothDirections = 0x01, // 1 << 0, // Quad should emit light from both sides
|
|
UseFastApproximation = 0x02, // 1 << 1, // Use a fast approximation instead of linearly transformed cosines.
|
|
};
|
|
|
|
enum DiskLightFlag
|
|
{
|
|
UseConeAngle = 1,
|
|
};
|
|
|
|
ShaderResourceGroup PassSrg : SRG_PerPass
|
|
{
|
|
// Figure out how to remove duplicate struct definitions.
|
|
// These are also defined in View.srg
|
|
// ATOM-3731
|
|
|
|
struct SimplePointLight
|
|
{
|
|
float3 m_position;
|
|
float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2.
|
|
float3 m_rgbIntensityCandelas;
|
|
float m_padding; // explicit padding.
|
|
};
|
|
|
|
struct SimpleSpotLight
|
|
{
|
|
float3 m_position;
|
|
float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2.
|
|
float3 m_direction;
|
|
float m_cosInnerConeAngle; // cosine of the outer cone angle
|
|
float3 m_rgbIntensityCandelas;
|
|
float m_cosOuterConeAngle; // cosine of the inner cone angle
|
|
};
|
|
|
|
struct PointLight
|
|
{
|
|
float3 m_position;
|
|
float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2.
|
|
float3 m_rgbIntensityCandelas;
|
|
float m_bulbRadius;
|
|
uint3 m_shadowIndices;
|
|
uint m_padding;
|
|
};
|
|
|
|
struct DiskLight
|
|
{
|
|
float3 m_position;
|
|
float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2.
|
|
float3 m_rgbIntensityCandelas;
|
|
float m_diskRadius;
|
|
float3 m_direction;
|
|
uint m_flags;
|
|
float m_cosInnerConeAngle;
|
|
float m_cosOuterConeAngle;
|
|
float m_bulbPositionOffset;
|
|
uint m_shadowIndex;
|
|
};
|
|
|
|
struct CapsuleLight
|
|
{
|
|
float3 m_startPoint; // One of the end points of the capsule
|
|
float m_radius; // Radius of the capsule, ie distance from line segment to surface.
|
|
float3 m_direction; // normalized vector from m_startPoint towards the other end point.
|
|
float m_length; // length of the line segment making up the inside of the capsule. Doesn't include caps (0 length capsule == sphere)
|
|
float3 m_rgbIntensityCandelas; // total rgb luminous intensity of the capsule in candela
|
|
float m_invAttenuationRadiusSquared; // Inverse of the distance at which this light no longer has an effect, squared. Also used for falloff calculations.
|
|
};
|
|
|
|
struct QuadLight
|
|
{
|
|
float3 m_position;
|
|
float m_invAttenuationRadiusSquared; // For a radius at which this light no longer has an effect, 1 / radius^2.
|
|
float3 m_leftDir; // Direction from center of quad to the left edge
|
|
float m_halfWidth; // Half the width of the quad. m_leftDir * m_halfWidth is a vector from the center to the left edge.
|
|
float3 m_upDir; // Direction from center of quad to the top edge
|
|
float m_halfHeight; // Half the height of the quad. m_upDir * m_halfHeight is a vector from the center to the top edge.
|
|
float3 m_rgbIntensityNits;
|
|
uint m_flags; // See QuadLightFlag
|
|
};
|
|
|
|
struct LightCullingConstants
|
|
{
|
|
float4x4 m_worldToView;
|
|
float4 m_screenUVToRay;
|
|
float2 m_gridPixel;
|
|
float2 m_gridHalfPixel;
|
|
uint m_gridWidth;
|
|
uint m_padding0;
|
|
uint m_padding1;
|
|
uint m_padding2;
|
|
};
|
|
LightCullingConstants m_constantData;
|
|
|
|
// Source light data
|
|
StructuredBuffer<SimplePointLight> m_simplePointLights;
|
|
StructuredBuffer<SimpleSpotLight> m_simpleSpotLights;
|
|
StructuredBuffer<PointLight> m_pointLights;
|
|
StructuredBuffer<DiskLight> m_diskLights;
|
|
StructuredBuffer<CapsuleLight> m_capsuleLights;
|
|
StructuredBuffer<QuadLight> m_quadLights;
|
|
uint m_simplePointLightCount;
|
|
uint m_simpleSpotLightCount;
|
|
uint m_pointLightCount;
|
|
uint m_diskLightCount;
|
|
uint m_capsuleLightCount;
|
|
uint m_quadLightCount;
|
|
|
|
// Produced by the LightCullingTilePrepare pass. Contains depth min/max and mask data (a bit set for each location where opaque geo was found)
|
|
Texture2D<uint4> m_tileLightData;
|
|
|
|
// Destination light data
|
|
RWStructuredBuffer<uint> m_lightList;
|
|
RWTexture2D<uint> m_lightCount;
|
|
|
|
struct Decal
|
|
{
|
|
float3 m_position;
|
|
float m_opacity;
|
|
float4 m_quaternion;
|
|
float3 m_halfSize;
|
|
float m_angleAttenuation;
|
|
uint m_sortKeyPacked;
|
|
uint m_textureArrayIndex;
|
|
uint m_textureIndex;
|
|
uint m_padding[1];
|
|
};
|
|
|
|
StructuredBuffer<Decal> m_decals;
|
|
uint m_decalCount;
|
|
}
|
|
|
|
groupshared uint shared_lightCount;
|
|
groupshared uint shared_lightIndices[TILE_DIM_X * TILE_DIM_Y];
|
|
|
|
bool IsVectorPointingTowardsEye(const float3 dir)
|
|
{
|
|
return (dir.z * RH_COORD_SYSTEM_REVERSE) < 0;
|
|
}
|
|
|
|
float3 WorldToView_Point(float3 p)
|
|
{
|
|
float3 result = mul(PassSrg::m_constantData.m_worldToView, float4(p, 1.0)).xyz;
|
|
return result;
|
|
}
|
|
|
|
float3 WorldToView_Vector(float3 v)
|
|
{
|
|
float3 result = mul((float3x3)PassSrg::m_constantData.m_worldToView, v);
|
|
return result;
|
|
}
|
|
|
|
bool TestSphereVsAabbInvSqrt(float3 sphereCenter, float invSphereRadiusSq, float3 aabbCenter, float3 aabbHalfSize)
|
|
{
|
|
float3 delta = max(float3(0.0, 0.0, 0.0), abs(aabbCenter - sphereCenter) - aabbHalfSize);
|
|
float d2 = dot(delta, delta);
|
|
return d2 * invSphereRadiusSq < 1.0f;
|
|
}
|
|
|
|
bool TestSphereVsAabb(float3 sphereCenter, float sphereRadiusSq, float3 aabbCenter, float3 aabbHalfSize)
|
|
{
|
|
float3 delta = max(float3(0.0, 0.0, 0.0), abs(aabbCenter - sphereCenter) - aabbHalfSize);
|
|
float d2 = dot(delta, delta);
|
|
|
|
return d2 < sphereRadiusSq;
|
|
}
|
|
|
|
// Note that this function isn't precise. It will have false positives due to being simplified for speed.
|
|
// Function origin and description: https://bartwronski.com/2017/04/13/cull-that-cone/
|
|
bool TestSphereVsCone(float3 spherePos, float sphereRadius, float3 origin, float3 forward, float cosa, float size)
|
|
{
|
|
float3 V = spherePos - origin;
|
|
float V1len = dot(V, forward);
|
|
|
|
bool backOk = V1len >= -sphereRadius;
|
|
bool frontOk = V1len <= sphereRadius + size;
|
|
|
|
float rsina = rsqrt(1 - cosa * cosa);
|
|
float VlenSq = dot(V, V);
|
|
float distanceClosestPoint = rsina * cosa * sqrt(max(0.0, VlenSq - V1len * V1len)) - V1len;
|
|
bool angleOk = distanceClosestPoint <= sphereRadius* rsina;
|
|
|
|
return angleOk && backOk && frontOk;
|
|
}
|
|
|
|
|
|
float2 ScreenUvToRay(float2 uv)
|
|
{
|
|
return uv * PassSrg::m_constantData.m_screenUVToRay.xy + PassSrg::m_constantData.m_screenUVToRay.zw;
|
|
}
|
|
|
|
// Returns screen rays
|
|
// xy contains the top left corner
|
|
// zw contains the bottom right corner
|
|
// These rays are constructed assuming distance to them along z is 1.0
|
|
// This lets us simply multiply the numbers by an actual depth value to get a position in the tile in view space
|
|
|
|
// we also return: tileCenterUv which is 2D screenCoordinates of the center of the tile
|
|
float4 ComputeScreenRays(uint2 tileId, out float2 tileCenterUv)
|
|
{
|
|
float2 tile_uv = float2(tileId) * PassSrg::m_constantData.m_gridPixel;
|
|
|
|
tileCenterUv = tile_uv + PassSrg::m_constantData.m_gridHalfPixel;
|
|
|
|
float4 tileRect;
|
|
tileRect.xy = ScreenUvToRay(tile_uv);
|
|
tileRect.zw = ScreenUvToRay(tile_uv + PassSrg::m_constantData.m_gridPixel);
|
|
|
|
return tileRect;
|
|
}
|
|
|
|
uint NextPowerTwo(uint x)
|
|
{
|
|
// https://wickedengine.net/2018/01/05/next-power-of-two-in-hlsl/
|
|
return 2 << firstbithigh(max(1, x) - 1);
|
|
}
|
|
|
|
uint GetSortKey(PassSrg::Decal decal)
|
|
{
|
|
return (decal.m_sortKeyPacked & 0xFF);
|
|
}
|
|
|
|
bool AreDecalsOutofOrder(uint packedIndexLeft, uint packedIndexRight)
|
|
{
|
|
uint leftIndex = Light_GetIndex(packedIndexLeft);
|
|
uint rightIndex = Light_GetIndex(packedIndexRight);
|
|
PassSrg::Decal leftDecal = PassSrg::m_decals[leftIndex];
|
|
PassSrg::Decal rightDecal = PassSrg::m_decals[rightIndex];
|
|
|
|
uint leftSortIndex = GetSortKey(leftDecal);
|
|
uint rightSortIndex = GetSortKey(rightDecal);
|
|
if (leftSortIndex == rightSortIndex)
|
|
{
|
|
return leftIndex > rightIndex;
|
|
}
|
|
else
|
|
{
|
|
return leftSortIndex > rightSortIndex;
|
|
}
|
|
}
|
|
|
|
void SortDecals(uint groupIndex)
|
|
{
|
|
// Note that shared_lightCount can exceed the array size if too many decals intersect the tile, so clamp it here.
|
|
uint numArray = min(TILE_DIM_X * TILE_DIM_Y, shared_lightCount);
|
|
uint numArrayPowerOfTwo = NextPowerTwo(numArray);
|
|
|
|
// Bitonic sort code from AMD: https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11
|
|
// AMD / MIT License is contained in the same directory as this file
|
|
// subArraySize = 2,4,8,16,etc...
|
|
for (uint subArraySize = 2; subArraySize <= numArrayPowerOfTwo; subArraySize = subArraySize * 2)
|
|
{
|
|
// compareDist = (subArraySize / 2), (subArraySize / 4), ... 32, 16, 8, 4, 2, 1
|
|
for (uint compareDist = subArraySize >> 1; compareDist > 0; compareDist = compareDist >> 1)
|
|
{
|
|
// This code from AMD very cleverly computes the locations of two different array indices to compare.
|
|
// The pattern that is produced from this is identical to: https://en.wikipedia.org/wiki/Bitonic_sorter#Alternative_representation
|
|
// Essentially creating larger monotonic sequences and then merging them together. (Two monotonic sequences together is a bitonic)
|
|
uint index_low = groupIndex & (compareDist - 1);
|
|
uint index_high = 2 * (groupIndex - index_low);
|
|
uint index0 = index_high + index_low;
|
|
|
|
uint index1 = compareDist == subArraySize >> 1 ? index_high + (2 * compareDist - 1) - index_low : index_high + compareDist + index_low;
|
|
|
|
if (index0 < numArray && index1 < numArray)
|
|
{
|
|
bool areDecalsOutOrder = AreDecalsOutofOrder(shared_lightIndices[index0], shared_lightIndices[index1]);
|
|
if (areDecalsOutOrder)
|
|
{
|
|
uint uTemp = shared_lightIndices[index0];
|
|
shared_lightIndices[index0] = shared_lightIndices[index1];
|
|
shared_lightIndices[index1] = uTemp;
|
|
}
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
}
|
|
}
|
|
|
|
void MarkLightAsVisibleInSharedMemory(uint lightIndex, uint inside)
|
|
{
|
|
uint sharedLightIndex;
|
|
InterlockedAdd(shared_lightCount, 1, sharedLightIndex);
|
|
|
|
sharedLightIndex = min(sharedLightIndex, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
|
|
|
|
shared_lightIndices[sharedLightIndex] = PackLightIndexWithBinMask(lightIndex, inside);
|
|
}
|
|
|
|
void CopySharedLightsToMainMemory(uint lightCount, uint groupIndex, uint3 groupID)
|
|
{
|
|
if( groupIndex < shared_lightCount )
|
|
{
|
|
uint offset = min(lightCount + groupIndex, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
|
|
uint index = GetLightListIndex(groupID, PassSrg::m_constantData.m_gridWidth, offset);
|
|
PassSrg::m_lightList[index] = shared_lightIndices[groupIndex];
|
|
}
|
|
}
|
|
|
|
// Return the minz and maxz of this light in view space
|
|
float2 ComputePointLightMinMaxZ(float lightRadius, float3 lightPosition)
|
|
{
|
|
float2 minmax = lightPosition.z + lightRadius * float2(-1,1) * RH_COORD_SYSTEM_REVERSE;
|
|
return minmax;
|
|
}
|
|
|
|
float2 ComputeSimpleSpotLightMinMax(PassSrg::SimpleSpotLight light, float3 lightPosition)
|
|
{
|
|
float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared);
|
|
float2 minmax = lightPosition.z + lightRadius * float2(-1, 1) * RH_COORD_SYSTEM_REVERSE;
|
|
return minmax;
|
|
}
|
|
|
|
// Return the minz and maxz of this quad light in view space
|
|
// Quad light must be double sided
|
|
float2 ComputeQuadLightMinMaxZ_DoubleSided(PassSrg::QuadLight light, float3 lightPosition)
|
|
{
|
|
const float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared);
|
|
const float2 minmax = lightPosition.z + lightRadius * float2(-1,1) * RH_COORD_SYSTEM_REVERSE;
|
|
return minmax;
|
|
}
|
|
|
|
// Return the minz and maxz of this quad light in view space
|
|
// Quad light must be single sided
|
|
float2 ComputeQuadLightMinMaxZ_SingleSided(PassSrg::QuadLight light, float3 lightPosition, float3 lightDirection)
|
|
{
|
|
// [GFX TODO][ATOM-6170] We can compute a tighter bounds with single sided lights by bringing in one of bounds
|
|
return ComputeQuadLightMinMaxZ_DoubleSided(light, lightPosition);
|
|
}
|
|
|
|
float2 ComputeDiskLightMinMax(PassSrg::DiskLight light, float3 lightPosition)
|
|
{
|
|
float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared) + light.m_bulbPositionOffset;
|
|
float2 minmax = lightPosition.z + lightRadius * float2(-1, 1) * RH_COORD_SYSTEM_REVERSE;
|
|
return minmax;
|
|
}
|
|
|
|
float2 ComputeCapsuleLightMinMax(PassSrg::CapsuleLight light, float3 lightPosition, float lightFalloffRadius)
|
|
{
|
|
float offsetZ = abs(WorldToView_Vector(light.m_direction).z * light.m_length * 0.5f) + lightFalloffRadius;
|
|
float nearZ = lightPosition.z - offsetZ * RH_COORD_SYSTEM_REVERSE;
|
|
float farZ = lightPosition.z + offsetZ * RH_COORD_SYSTEM_REVERSE;
|
|
|
|
return float2(nearZ, farZ);
|
|
}
|
|
|
|
void CullDecals(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents, float2 tile_center_uv)
|
|
{
|
|
for (uint decalIndex = groupIndex ; decalIndex < PassSrg::m_decalCount ; decalIndex += TILE_DIM_X * TILE_DIM_Y)
|
|
{
|
|
PassSrg::Decal decal = PassSrg::m_decals[decalIndex];
|
|
float3 decalPosition = WorldToView_Point(decal.m_position);
|
|
|
|
// just wrapping a bounding sphere around a cube for now to get a minor perf boost. i.e. the sphere radius is sqrt(x*x + y*y + z*z)
|
|
// ATOM-4224 - try AABB-AABB
|
|
float boundingSphereRadiusSqr = dot(decal.m_halfSize, decal.m_halfSize);
|
|
|
|
bool potentiallyIntersects = TestSphereVsAabb(decalPosition, boundingSphereRadiusSqr, aabb_center, aabb_extents);
|
|
|
|
if (potentiallyIntersects)
|
|
{
|
|
uint inside = 0;
|
|
float2 minmax = ComputePointLightMinMaxZ(sqrt(boundingSphereRadiusSqr), decalPosition);
|
|
if (IsObjectInsideTile(tileLightData, minmax, inside))
|
|
{
|
|
MarkLightAsVisibleInSharedMemory(decalIndex, inside);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void CullPointLight(uint lightIndex, float3 lightPosition, float invLightRadius, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
|
|
{
|
|
lightPosition = WorldToView_Point(lightPosition);
|
|
bool potentiallyIntersects = TestSphereVsAabbInvSqrt(lightPosition, invLightRadius, aabb_center, aabb_extents);
|
|
if (potentiallyIntersects)
|
|
{
|
|
// Implement and profile fine-grained light culling testing
|
|
// ATOM-3732
|
|
|
|
uint inside = 0;
|
|
float2 minmax = ComputePointLightMinMaxZ(rsqrt(invLightRadius), lightPosition);
|
|
if (IsObjectInsideTile(tileLightData, minmax, inside))
|
|
{
|
|
MarkLightAsVisibleInSharedMemory(lightIndex, inside);
|
|
}
|
|
}
|
|
}
|
|
|
|
void CullSimplePointLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
|
|
{
|
|
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_simplePointLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
|
|
{
|
|
PassSrg::SimplePointLight light = PassSrg::m_simplePointLights[lightIndex];
|
|
CullPointLight(lightIndex, light.m_position, light.m_invAttenuationRadiusSquared, tileLightData, aabb_center, aabb_extents);
|
|
}
|
|
}
|
|
|
|
void CullPointLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
|
|
{
|
|
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_pointLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
|
|
{
|
|
PassSrg::PointLight light = PassSrg::m_pointLights[lightIndex];
|
|
CullPointLight(lightIndex, light.m_position, light.m_invAttenuationRadiusSquared, tileLightData, aabb_center, aabb_extents);
|
|
}
|
|
}
|
|
|
|
void CullSimpleSpotLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
|
|
{
|
|
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_simpleSpotLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
|
|
{
|
|
PassSrg::SimpleSpotLight light = PassSrg::m_simpleSpotLights[lightIndex];
|
|
float3 lightPosition = WorldToView_Point(light.m_position);
|
|
float3 lightDirection = WorldToView_Vector(light.m_direction);
|
|
|
|
bool potentiallyIntersects = TestSphereVsCone(aabb_center, length(aabb_extents), lightPosition, lightDirection, light.m_cosOuterConeAngle, rsqrt(light.m_invAttenuationRadiusSquared));
|
|
if (potentiallyIntersects)
|
|
{
|
|
// Implement and profile fine-grained light culling testing
|
|
// ATOM-3732
|
|
|
|
uint inside = 0;
|
|
float2 minmax = ComputeSimpleSpotLightMinMax(light, lightPosition);
|
|
if (IsObjectInsideTile(tileLightData, minmax, inside))
|
|
{
|
|
MarkLightAsVisibleInSharedMemory(lightIndex, inside);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void CullDiskLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
|
|
{
|
|
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_diskLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
|
|
{
|
|
PassSrg::DiskLight light = PassSrg::m_diskLights[lightIndex];
|
|
float3 lightPosition = WorldToView_Point(light.m_position - light.m_bulbPositionOffset * light.m_direction);
|
|
float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared) + light.m_diskRadius;
|
|
float lightRadiusSqr = lightRadius * lightRadius;
|
|
float aabbRadius = length(aabb_extents);
|
|
float3 lightDirection = WorldToView_Vector(light.m_direction);
|
|
|
|
bool potentiallyIntersects;
|
|
if (light.m_flags & DiskLightFlag::UseConeAngle > 0)
|
|
{
|
|
potentiallyIntersects = TestSphereVsCone(aabb_center, length(aabb_extents), lightPosition, lightDirection, light.m_cosOuterConeAngle, rsqrt(light.m_invAttenuationRadiusSquared) + light.m_bulbPositionOffset);
|
|
}
|
|
else
|
|
{
|
|
potentiallyIntersects = TestSphereVsAabb(lightPosition, lightRadiusSqr, aabb_center, aabb_extents);
|
|
|
|
if (potentiallyIntersects)
|
|
{
|
|
// Only one side is visible, check that we are above the hemisphere
|
|
float3 toAABBCenter = aabb_center - lightPosition;
|
|
float distanceToLightPlane = dot(lightDirection, toAABBCenter);
|
|
|
|
potentiallyIntersects = distanceToLightPlane >= -aabbRadius;
|
|
}
|
|
}
|
|
|
|
if (potentiallyIntersects)
|
|
{
|
|
// Implement and profile fine-grained light culling testing
|
|
// ATOM-3732
|
|
|
|
uint inside = 0;
|
|
float2 minmax = ComputeDiskLightMinMax(light, lightPosition);
|
|
if (IsObjectInsideTile(tileLightData, minmax, inside))
|
|
{
|
|
MarkLightAsVisibleInSharedMemory(lightIndex, inside);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void CullCapsuleLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
|
|
{
|
|
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_capsuleLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
|
|
{
|
|
PassSrg::CapsuleLight light = PassSrg::m_capsuleLights[lightIndex];
|
|
float3 lightMiddleWorld = light.m_startPoint + light.m_direction * light.m_length * 0.5f;
|
|
float3 lightMiddleView = WorldToView_Point(lightMiddleWorld);
|
|
|
|
float lightFalloffRadius = rsqrt(light.m_invAttenuationRadiusSquared);
|
|
float lightConservativeBoundingRadius = lightFalloffRadius + light.m_length * 0.5f;
|
|
|
|
bool potentiallyIntersects = TestSphereVsAabb(lightMiddleView, lightConservativeBoundingRadius * lightConservativeBoundingRadius, aabb_center, aabb_extents);
|
|
|
|
if (potentiallyIntersects)
|
|
{
|
|
// Implement and profile fine-grained light culling testing
|
|
// ATOM-3732
|
|
|
|
uint inside = 0;
|
|
float2 minmax = ComputeCapsuleLightMinMax(light, lightMiddleView, lightFalloffRadius);
|
|
if (IsObjectInsideTile(tileLightData, minmax, inside))
|
|
{
|
|
MarkLightAsVisibleInSharedMemory(lightIndex, inside);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void CullQuadLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
|
|
{
|
|
// Implement and profile fine-grained light culling testing
|
|
// ATOM-3732
|
|
|
|
for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_quadLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
|
|
{
|
|
const PassSrg::QuadLight light = PassSrg::m_quadLights[lightIndex];
|
|
const float3 lightPosition = WorldToView_Point(light.m_position);
|
|
|
|
bool potentiallyIntersects = TestSphereVsAabbInvSqrt(lightPosition, light.m_invAttenuationRadiusSquared, aabb_center, aabb_extents);
|
|
|
|
if (potentiallyIntersects)
|
|
{
|
|
float2 minmaxz;
|
|
|
|
const bool singleSided = (light.m_flags & QuadLightFlag::EmitsBothDirections) == 0;
|
|
if (singleSided)
|
|
{
|
|
// Only one side is visible, check that we are above the hemisphere
|
|
const float3 leftDir = light.m_leftDir;
|
|
const float3 upDir = light.m_upDir;
|
|
const float3 lightDirection = WorldToView_Vector(cross(leftDir, upDir));
|
|
const float3 toAABBCenter = aabb_center - lightPosition;
|
|
const float distanceToLightPlane = dot(lightDirection, toAABBCenter);
|
|
const float aabbRadius = length(aabb_extents);
|
|
|
|
const bool aboveHemisphere = distanceToLightPlane >= -aabbRadius;
|
|
if (aboveHemisphere)
|
|
{
|
|
minmaxz = ComputeQuadLightMinMaxZ_SingleSided(light, lightPosition, lightDirection);
|
|
}
|
|
else
|
|
{
|
|
potentiallyIntersects = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
minmaxz = ComputeQuadLightMinMaxZ_DoubleSided(light, lightPosition);
|
|
}
|
|
|
|
uint inside = 0;
|
|
if (potentiallyIntersects && IsObjectInsideTile(tileLightData, minmaxz, inside))
|
|
{
|
|
MarkLightAsVisibleInSharedMemory(lightIndex, inside);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
uint WriteEndOfGroup(uint lightCount, uint3 groupID)
|
|
{
|
|
uint lightsAfter = lightCount + shared_lightCount;
|
|
|
|
uint end = PackLightIndexWithBinMask(NVLC_END_OF_GROUP, NVLC_ALL_BIN_BITS);
|
|
uint offset = min(lightCount + shared_lightCount, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
|
|
uint index = GetLightListIndex(groupID, PassSrg::m_constantData.m_gridWidth, offset);
|
|
PassSrg::m_lightList[index] = end;
|
|
|
|
lightsAfter++;
|
|
|
|
return lightsAfter;
|
|
}
|
|
|
|
void ClearSharedLightCount(uint groupIndex)
|
|
{
|
|
if( groupIndex == 0 )
|
|
{
|
|
shared_lightCount = 0;
|
|
}
|
|
}
|
|
|
|
void ClearSharedLightCountWithDoubleBarrier(uint groupIndex)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
ClearSharedLightCount(groupIndex);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
|
|
float2 ReadDepthCloseFar(uint3 groupID)
|
|
{
|
|
float2 depthCloseFar = asfloat(PassSrg::m_tileLightData[groupID.xy].xy);
|
|
return depthCloseFar;
|
|
}
|
|
|
|
TileLightData ReadTileLightData(uint3 groupID)
|
|
{
|
|
uint4 packedData = PassSrg::m_tileLightData[groupID.xy];
|
|
return Tile_UnpackData(packedData);
|
|
}
|
|
|
|
uint WriteCullingDataToMainMemory(uint lightCount, uint groupIndex, uint3 groupID)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
CopySharedLightsToMainMemory(lightCount, groupIndex, groupID );
|
|
lightCount = WriteEndOfGroup(lightCount, groupID);
|
|
return lightCount;
|
|
}
|
|
|
|
// This shader is invoke one thread-group per on-screen tile
|
|
// e.g. if the screen resolution is 1920x1080, with 16x16 tiles, there will be 120x68 tiles (and 120x68 thread groups)
|
|
// Each thread-group is dedicated to culling all lights against that screen-tile.
|
|
// It might be worth splitting this compute shader into several shaders, one per light type.
|
|
|
|
// Each thread will read one light, determine if it is visible, write it to shared memory, then move onto the next light until
|
|
// all lights are processed
|
|
// After all lights visibility is computed, it will write them back from shared memory to GPU memory
|
|
|
|
// This will write out the following:
|
|
|
|
// Point light index << 16 | bitmask contains which bits the light is present in
|
|
// Point light index << 16 | bitmask contains which bits the light is present in
|
|
// Point light index << 16 | bitmask contains which bits the light is present in
|
|
// ...
|
|
// End of Group
|
|
// Disk light index << 16 | bitmask contains which bits the light is present in
|
|
// Disk light index << 16 | bitmask contains which bits the light is present in
|
|
// Disk light index << 16 | bitmask contains which bits the light is present in
|
|
// ...
|
|
// End of Group
|
|
// i.e. for each 32-bit UINT, it contains the 16 bit light index + 16-bit binning information
|
|
// (other light types and decals to come)
|
|
// Note! This isn't consumed by the forward shader. This light list will be further processed by the LightCullingRemap shader, producing a LightListRemapped buffer
|
|
// that is more optimal for consumption by the forward shader.
|
|
|
|
[numthreads(TILE_DIM_X, TILE_DIM_Y, 1)]
|
|
void MainCS(
|
|
uint3 dispatchThreadID : SV_DispatchThreadID,
|
|
uint3 groupID : SV_GroupID,
|
|
uint groupIndex : SV_GroupIndex)
|
|
{
|
|
ClearSharedLightCount(groupIndex);
|
|
|
|
uint lightCount = 0;
|
|
|
|
TileLightData tileLightData = ReadTileLightData(groupID);
|
|
|
|
float2 tileCenterUv;
|
|
float4 tileRect = ComputeScreenRays(groupID.xy, tileCenterUv);
|
|
float3 aabb_center, aabb_extents;
|
|
BuildAabb(tileRect, tileLightData, aabb_center, aabb_extents);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
CullDecals(groupIndex, tileLightData, aabb_center, aabb_extents, tileCenterUv);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
SortDecals(groupIndex);
|
|
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
|
|
|
|
ClearSharedLightCountWithDoubleBarrier(groupIndex);
|
|
|
|
CullSimplePointLights(groupIndex, tileLightData, aabb_center, aabb_extents);
|
|
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
|
|
|
|
ClearSharedLightCountWithDoubleBarrier(groupIndex);
|
|
|
|
CullSimpleSpotLights(groupIndex, tileLightData, aabb_center, aabb_extents);
|
|
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
|
|
|
|
ClearSharedLightCountWithDoubleBarrier(groupIndex);
|
|
|
|
CullPointLights(groupIndex, tileLightData, aabb_center, aabb_extents);
|
|
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
|
|
|
|
ClearSharedLightCountWithDoubleBarrier(groupIndex);
|
|
|
|
CullDiskLights(groupIndex, tileLightData, aabb_center, aabb_extents);
|
|
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
|
|
|
|
ClearSharedLightCountWithDoubleBarrier(groupIndex);
|
|
|
|
CullCapsuleLights(groupIndex, tileLightData, aabb_center, aabb_extents);
|
|
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
|
|
|
|
ClearSharedLightCountWithDoubleBarrier(groupIndex);
|
|
|
|
CullQuadLights(groupIndex, tileLightData, aabb_center, aabb_extents);
|
|
lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
|
|
|
|
|
|
if (groupIndex == 0)
|
|
{
|
|
PassSrg::m_lightCount[groupID.xy] = lightCount;
|
|
}
|
|
}
|