You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
245 lines
12 KiB
Plaintext
245 lines
12 KiB
Plaintext
/*
|
|
* Copyright (c) Contributors to the Open 3D Engine Project.
|
|
* For complete copyright and license terms please see the LICENSE at the root of this distribution.
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 OR MIT
|
|
*
|
|
*/
|
|
#include "LinearSkinningPassSRG.azsli"
|
|
#include <Atom/Features/MorphTargets/MorphTargetCompression.azsli>
|
|
#include <Atom/Features/MatrixUtility.azsli>
|
|
#include <Atom/RPI/Math.azsli>
|
|
|
|
|
|
option enum class SkinningMethod { LinearSkinning, DualQuaternion } o_skinningMethod = SkinningMethod::LinearSkinning;
|
|
option bool o_applyMorphTargets = false;
|
|
option bool o_applyColorMorphTargets = false;
|
|
|
|
float3 ReadFloat3FromFloatBuffer(Buffer<float> buffer, uint index)
|
|
{
|
|
float3 result;
|
|
result.x = buffer[index * 3];
|
|
result.y = buffer[index * 3 + 1];
|
|
result.z = buffer[index * 3 + 2];
|
|
return result;
|
|
}
|
|
|
|
// Apply a morph target delta with three components
|
|
void ApplyMorphTargetDelta(uint streamOffset, uint vertexIndex, inout float3 modifiedValue)
|
|
{
|
|
// Get the start of the current delta
|
|
uint offset = streamOffset + vertexIndex * 3;
|
|
|
|
// Read in the encoded deltas
|
|
uint3 encodedInts;
|
|
encodedInts.x = asint(PassSrg::m_skinnedMeshOutputStream[offset]);
|
|
encodedInts.y = asint(PassSrg::m_skinnedMeshOutputStream[offset + 1]);
|
|
encodedInts.z = asint(PassSrg::m_skinnedMeshOutputStream[offset + 2]);
|
|
|
|
// Since we're done reading, re-set the accumulation to 0 for the next frame
|
|
PassSrg::m_skinnedMeshOutputStream[offset] = asfloat(0);
|
|
PassSrg::m_skinnedMeshOutputStream[offset + 1] = asfloat(0);
|
|
PassSrg::m_skinnedMeshOutputStream[offset + 2] = asfloat(0);
|
|
|
|
// Now decode and apply the delta
|
|
float3 decodedFloats = DecodeIntsToFloats(encodedInts, InstanceSrg::m_morphTargetDeltaInverseIntegerEncoding);
|
|
modifiedValue += decodedFloats;
|
|
}
|
|
|
|
// Apply a morph target delta with four components
|
|
|
|
void ApplyMorphTargetDelta(uint streamOffset, uint vertexIndex, inout float4 modifiedValue)
|
|
{
|
|
// Get the start of the current delta
|
|
uint offset = streamOffset + vertexIndex * 4;
|
|
|
|
// Read in the encoded deltas
|
|
uint4 encodedInts;
|
|
encodedInts.x = asint(PassSrg::m_skinnedMeshOutputStream[offset]);
|
|
encodedInts.y = asint(PassSrg::m_skinnedMeshOutputStream[offset + 1]);
|
|
encodedInts.z = asint(PassSrg::m_skinnedMeshOutputStream[offset + 2]);
|
|
encodedInts.w = asint(PassSrg::m_skinnedMeshOutputStream[offset + 3]);
|
|
|
|
// Since we're done reading, re-set the accumulation to 0 for the next frame
|
|
PassSrg::m_skinnedMeshOutputStream[offset] = asfloat(0);
|
|
PassSrg::m_skinnedMeshOutputStream[offset + 1] = asfloat(0);
|
|
PassSrg::m_skinnedMeshOutputStream[offset + 2] = asfloat(0);
|
|
PassSrg::m_skinnedMeshOutputStream[offset + 3] = asfloat(0);
|
|
|
|
// Now decode and apply the delta
|
|
float4 decodedFloats = DecodeIntsToFloats(encodedInts, InstanceSrg::m_morphTargetDeltaInverseIntegerEncoding);
|
|
modifiedValue += decodedFloats;
|
|
}
|
|
|
|
|
|
//! Utility function for vertex shaders to transform vertex tangent, bitangent, and normal vectors into world space based on MikkT conventions.
|
|
//! Structured like ConstructTBN from TangentSpace.azsli, but uses a float3x3 for the localToWorld matrix.
|
|
//! It does not flip the bitangent using the w component of the tangent, and instead assumes that the input bitangent is already oriented correctly.
|
|
void ConstructTBN(float3 vertexNormal, float4 vertexTangent, float3 vertexBitangent, float3x3 localToWorld, float3x3 localToWorldInverseTranspose, out float3 normalWS, out float3 tangentWS, out float3 bitangentWS)
|
|
{
|
|
normalWS = normalize(mul(localToWorldInverseTranspose, vertexNormal));
|
|
tangentWS = normalize(mul(localToWorld, vertexTangent.xyz));
|
|
bitangentWS = normalize(mul(localToWorld, vertexBitangent));
|
|
}
|
|
|
|
void SkinTBN(float3x3 skinToWorldMatrix, inout float3 normal, inout float4 tangent, inout float3 bitangent)
|
|
{
|
|
// For non-uniform scaling the Normal needs the inverse scale
|
|
float3x3 skinToWorldInvTrans = InverseTransposeMatrixFast(skinToWorldMatrix);
|
|
|
|
ConstructTBN(normal, tangent, bitangent, skinToWorldMatrix, skinToWorldInvTrans, normal, tangent.xyz, bitangent);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Skinning support
|
|
void SkinVertexLinear(int4 indices, float4 weights, inout float3 position, inout float3 normal, inout float4 tangent, inout float3 bitangent)
|
|
{
|
|
float3x4 skinToWorldMatrix = (float3x4)0;
|
|
|
|
[unroll]
|
|
for(uint i = 0; i < 4; ++i)
|
|
{
|
|
skinToWorldMatrix += InstanceSrg::m_boneTransformsLinear[ indices[i] ] * weights[i];
|
|
}
|
|
|
|
position = mul(skinToWorldMatrix, float4(position, 1.0));
|
|
|
|
// Cast to float3x3 because we only need the rotation and scale when computing the TBN
|
|
SkinTBN((float3x3)skinToWorldMatrix, normal, tangent, bitangent);
|
|
}
|
|
|
|
void AddWeightedDualQuaternion(inout float2x4 lhs, float2x4 rhs, float weight)
|
|
{
|
|
float flip = dot(lhs[0], rhs[0]) < 0.0f ? -1.0f : 1.0f;
|
|
lhs += rhs * weight * flip;
|
|
}
|
|
|
|
void NormalizeDualQuaternion(inout float2x4 dualQuaternion)
|
|
{
|
|
float invLength = rsqrt(dot(dualQuaternion[0], dualQuaternion[0]));
|
|
dualQuaternion *= invLength;
|
|
}
|
|
|
|
void SkinVertexDualQuaternion(int4 indices, float4 weights, inout float3 position, inout float3 normal, inout float4 tangent, inout float3 bitangent)
|
|
{
|
|
float2x4 skinToWorldDualQuaternion = (float2x4)0;
|
|
|
|
[unroll]
|
|
for(uint i = 0; i < 4; ++i)
|
|
{
|
|
AddWeightedDualQuaternion(skinToWorldDualQuaternion, InstanceSrg::m_boneTransformsDualQuaternion[ indices[i] ], weights[i]);
|
|
}
|
|
|
|
NormalizeDualQuaternion(skinToWorldDualQuaternion);
|
|
const float4 rotation = skinToWorldDualQuaternion[0];
|
|
const float4 translation = skinToWorldDualQuaternion[1];
|
|
|
|
position.xyz =
|
|
RotateVectorByQuaternion(position.xyz, rotation) +
|
|
(rotation.w * translation.xyz - translation.w * rotation.xyz + cross(rotation.xyz, translation.xyz)) * 2;
|
|
|
|
normal = normalize(RotateVectorByQuaternion(normal, rotation));
|
|
tangent.xyz = normalize(RotateVectorByQuaternion(tangent.xyz, rotation));
|
|
bitangent = normalize(RotateVectorByQuaternion(bitangent, rotation));
|
|
}
|
|
|
|
[numthreads(256,1,1)]
|
|
void MainCS(uint3 thread_id: SV_DispatchThreadID)
|
|
{
|
|
// Each thread is responsible for one vertex
|
|
// The total number of threads in a per-ActorInstance dispatch item matches the total number of vertices in the skinned mesh
|
|
|
|
// The thread id for each dimension is limited to uint16_t max, so to support more than 65535 vertices we get the real index from both the x and y dimensions
|
|
const uint i = (thread_id.x) + InstanceSrg::m_totalNumberOfThreadsX * (thread_id.y);
|
|
if(i < InstanceSrg::m_numVertices)
|
|
{
|
|
// Moving current vertex position updated last frame to a predefined location to maintain a vertex history between two frames
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + (InstanceSrg::m_numVertices + i) * 3] = PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + i * 3];
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + (InstanceSrg::m_numVertices + i) * 3 + 1] = PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + i * 3 + 1];
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + (InstanceSrg::m_numVertices + i) * 3 + 2] = PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + i * 3 + 2];
|
|
|
|
float4 blendWeights;
|
|
blendWeights.x = InstanceSrg::m_sourceBlendWeights[i * 4];
|
|
blendWeights.y = InstanceSrg::m_sourceBlendWeights[i * 4 + 1];
|
|
blendWeights.z = InstanceSrg::m_sourceBlendWeights[i * 4 + 2];
|
|
blendWeights.w = InstanceSrg::m_sourceBlendWeights[i * 4 + 3];
|
|
|
|
// [TODO ATOM-15288]
|
|
// Temporary workaround. When all the blend weights of a vertex are zero it means its data is set by the CPU directly
|
|
// and skinning must be skipped to not overwrite it (e.g. cloth simulation).
|
|
if(!any(blendWeights))
|
|
{
|
|
return;
|
|
}
|
|
|
|
float3 position = ReadFloat3FromFloatBuffer(InstanceSrg::m_sourcePositions, i);
|
|
float3 normal = ReadFloat3FromFloatBuffer(InstanceSrg::m_sourceNormals, i);
|
|
float4 tangent = InstanceSrg::m_sourceTangents[i];
|
|
float3 bitangent = ReadFloat3FromFloatBuffer(InstanceSrg::m_sourceBiTangents, i);
|
|
|
|
// Four indices, 16-bits each, stored in 2 32-bit uints
|
|
uint2 rawIndices = InstanceSrg::m_sourceBlendIndices.Load2(i * 8);
|
|
|
|
uint4 blendIndices;
|
|
// Although the first index in each 32-bit pair is in the most significant bits in the cpu buffer,
|
|
// the indices get swapped within the 32-bit uint when they are loaded so we treat them
|
|
// as if the first index is in the least significant bits
|
|
blendIndices.x = rawIndices.x >> 16;
|
|
blendIndices.y = rawIndices.x & 0x0000FFFF;
|
|
blendIndices.z = rawIndices.y >> 16;
|
|
blendIndices.w = rawIndices.y & 0x0000FFFF;
|
|
|
|
|
|
// Moving current vertex position updated last frame to a predefined location to maintain a vertex history between two frames
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + (InstanceSrg::m_numVertices + i) * 3] = PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + i * 3];
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + (InstanceSrg::m_numVertices + i) * 3 + 1] = PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + i * 3 + 1];
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + (InstanceSrg::m_numVertices + i) * 3 + 2] = PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + i * 3 + 2];
|
|
|
|
if(o_applyMorphTargets)
|
|
{
|
|
ApplyMorphTargetDelta(InstanceSrg::m_morphTargetPositionDeltaOffset, i, position);
|
|
ApplyMorphTargetDelta(InstanceSrg::m_morphTargetNormalDeltaOffset, i, normal);
|
|
ApplyMorphTargetDelta(InstanceSrg::m_morphTargetTangentDeltaOffset, i, tangent.xyz);
|
|
ApplyMorphTargetDelta(InstanceSrg::m_morphTargetBitangentDeltaOffset, i, bitangent);
|
|
}
|
|
|
|
if (o_applyColorMorphTargets)
|
|
{
|
|
float4 color = InstanceSrg::m_sourceColors[i];
|
|
ApplyMorphTargetDelta(InstanceSrg::m_morphTargetColorDeltaOffset, i, color);
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetColors + i * 4] = color.r;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetColors + i * 4 + 1] = color.g;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetColors + i * 4 + 2] = color.b;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetColors + i * 4 + 3] = color.a;
|
|
}
|
|
|
|
switch(o_skinningMethod)
|
|
{
|
|
case SkinningMethod::LinearSkinning:
|
|
SkinVertexLinear(blendIndices, blendWeights, position, normal, tangent, bitangent);
|
|
break;
|
|
case SkinningMethod::DualQuaternion:
|
|
SkinVertexDualQuaternion(blendIndices, blendWeights, position, normal, tangent, bitangent);
|
|
break;
|
|
}
|
|
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + i * 3] = position.x;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + i * 3 + 1] = position.y;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetPositions + i * 3 + 2] = position.z;
|
|
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetNormals + i * 3] = normal.x;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetNormals + i * 3 + 1] = normal.y;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetNormals + i * 3 + 2] = normal.z;
|
|
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetTangents + i * 4] = tangent.x;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetTangents + i * 4 + 1] = tangent.y;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetTangents + i * 4 + 2] = tangent.z;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetTangents + i * 4 + 3] = tangent.w;
|
|
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetBiTangents + i * 3] = bitangent.x;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetBiTangents + i * 3 + 1] = bitangent.y;
|
|
PassSrg::m_skinnedMeshOutputStream[InstanceSrg::m_targetBiTangents + i * 3 + 2] = bitangent.z;
|
|
|
|
}
|
|
}
|