You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
450 lines
17 KiB
C++
450 lines
17 KiB
C++
/*
|
|
* All or portions of this file Copyright (c) Amazon.com, Inc. or its affiliates or
|
|
* its licensors.
|
|
*
|
|
* For complete copyright and license terms please see the LICENSE at the root of this
|
|
* distribution (the "License"). All use of this software is governed by the License,
|
|
* or, if provided, by the license below or the license accompanying this file. Do not
|
|
* remove or modify any license notices. This file is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
*
|
|
*/
|
|
// Original file Copyright Crytek GMBH or its affiliates, used under license.
|
|
|
|
// Description : Predictors for index frame compression
|
|
|
|
|
|
#ifndef CRYINCLUDE_CRY3DENGINE_GEOMCACHEPREDICTORS_H
|
|
#define CRYINCLUDE_CRY3DENGINE_GEOMCACHEPREDICTORS_H
|
|
#pragma once
|
|
|
|
#include "GeomCacheFileFormat.h"
|
|
#include "Cry3DEngineTraits.h"
|
|
|
|
namespace GeomCachePredictors
|
|
{
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Index frame prediction
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
template<class T, bool kbEncode>
|
|
void ParallelogramPredictor(const uint numValues, T* pIn, T* pOut, const std::vector<uint16>& predictorData)
|
|
{
|
|
T* pAbsoluteValues = kbEncode ? pIn : pOut;
|
|
uint outPosition = 0;
|
|
|
|
for (uint i = 0, predictorDataPos = 0; i < numValues; ++i)
|
|
{
|
|
const uint16 uDist = predictorData[predictorDataPos++];
|
|
T predictedValue;
|
|
|
|
if (uDist == 0xFFFF)
|
|
{
|
|
if (i == 0)
|
|
{
|
|
// There is no previous value, so we just pass through
|
|
pOut[outPosition++] = pIn[i];
|
|
continue;
|
|
}
|
|
|
|
// No neighbour triangle, just use previous value for prediction
|
|
predictedValue = pAbsoluteValues[i - 1];
|
|
}
|
|
else
|
|
{
|
|
// Parallelogram prediction
|
|
const uint16 vDist = predictorData[predictorDataPos++];
|
|
const uint16 wDist = predictorData[predictorDataPos++];
|
|
|
|
const T& u = pAbsoluteValues[i - uDist];
|
|
const T& v = pAbsoluteValues[i - vDist];
|
|
const T& w = pAbsoluteValues[i - wDist];
|
|
|
|
predictedValue = u + v - w;
|
|
}
|
|
|
|
if (kbEncode)
|
|
{
|
|
const T realValue = pIn[i];
|
|
const T delta = realValue - predictedValue;
|
|
pOut[outPosition++] = delta;
|
|
}
|
|
else
|
|
{
|
|
const T delta = pIn[i];
|
|
const T realValue = delta + predictedValue;
|
|
pOut[outPosition++] = realValue;
|
|
}
|
|
}
|
|
}
|
|
|
|
template<bool kbEncode>
|
|
void QTangentPredictor(const uint numValues, const GeomCacheFile::QTangent* pIn, GeomCacheFile::QTangent* pOut, const std::vector<uint16>& predictorData)
|
|
{
|
|
const GeomCacheFile::QTangent* pAbsoluteValues = kbEncode ? pIn : pOut;
|
|
uint outPosition = 0;
|
|
|
|
for (uint i = 0, predictorDataPos = 0; i < numValues; ++i)
|
|
{
|
|
const uint16 uDist = predictorData[predictorDataPos++];
|
|
Vec4_tpl<int32> predictedValue;
|
|
|
|
if (uDist == 0xFFFF)
|
|
{
|
|
if (i == 0)
|
|
{
|
|
// There is no previous value, so we just pass through
|
|
pOut[outPosition++] = pIn[i];
|
|
continue;
|
|
}
|
|
|
|
// No neighbour triangle, just use previous value for prediction
|
|
predictedValue = pAbsoluteValues[i - 1];
|
|
}
|
|
else
|
|
{
|
|
// Average value of two nearest vertices of adjancent triangle
|
|
const uint16 vDist = predictorData[predictorDataPos++];
|
|
++predictorDataPos;
|
|
|
|
const GeomCacheFile::QTangent& u = pAbsoluteValues[i - uDist];
|
|
const GeomCacheFile::QTangent& v = pAbsoluteValues[i - vDist];
|
|
predictedValue = Vec4_tpl<int32>(u) + Vec4_tpl<int32>(v);
|
|
|
|
// Vec4_tpl defines division in a way that only works for floats
|
|
predictedValue.x /= 2;
|
|
predictedValue.y /= 2;
|
|
predictedValue.z /= 2;
|
|
predictedValue.w /= 2;
|
|
}
|
|
|
|
if (kbEncode)
|
|
{
|
|
const GeomCacheFile::QTangent& realValue = pIn[i];
|
|
const GeomCacheFile::QTangent delta = realValue - predictedValue;
|
|
pOut[outPosition++] = delta;
|
|
}
|
|
else
|
|
{
|
|
const GeomCacheFile::QTangent delta = pIn[i];
|
|
const GeomCacheFile::QTangent realValue = delta + predictedValue;
|
|
pOut[outPosition++] = realValue;
|
|
}
|
|
}
|
|
}
|
|
|
|
template<bool kbEncode>
|
|
inline void ColorPredictor(const uint numValues, const GeomCacheFile::Color* pIn, GeomCacheFile::Color* pOut, const std::vector<uint16>& predictorData)
|
|
{
|
|
const GeomCacheFile::Color* pAbsoluteValues = kbEncode ? pIn : pOut;
|
|
uint outPosition = 0;
|
|
|
|
for (uint i = 0, predictorDataPos = 0; i < numValues; ++i)
|
|
{
|
|
const uint16 uDist = predictorData[predictorDataPos++];
|
|
int32 predictedValue;
|
|
|
|
if (uDist == 0xFFFF)
|
|
{
|
|
if (i == 0)
|
|
{
|
|
// There is no previous value, so we just pass through
|
|
pOut[outPosition++] = pIn[i];
|
|
continue;
|
|
}
|
|
|
|
// No neighbour triangle, just use previous value for prediction
|
|
predictedValue = pAbsoluteValues[i - 1];
|
|
}
|
|
else
|
|
{
|
|
// Average value of two nearest vertices of adjancent triangle
|
|
const uint16 vDist = predictorData[predictorDataPos++];
|
|
++predictorDataPos;
|
|
|
|
const GeomCacheFile::Color& u = pAbsoluteValues[i - uDist];
|
|
const GeomCacheFile::Color& v = pAbsoluteValues[i - vDist];
|
|
predictedValue = (int32(u) + int32(v)) / 2;
|
|
}
|
|
|
|
if (kbEncode)
|
|
{
|
|
const GeomCacheFile::Color& realValue = pIn[i];
|
|
const GeomCacheFile::Color delta = realValue - predictedValue;
|
|
pOut[outPosition++] = delta;
|
|
}
|
|
else
|
|
{
|
|
const GeomCacheFile::Color delta = pIn[i];
|
|
const GeomCacheFile::Color realValue = delta + predictedValue;
|
|
pOut[outPosition++] = realValue;
|
|
}
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Temporal prediction
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
// Motion predictor input data
|
|
template<class T>
|
|
struct STemporalPredictorData
|
|
{
|
|
uint m_numElements;
|
|
const T* m_pPrevFrames[2];
|
|
const T* m_pFloorFrame;
|
|
const T* m_pCeilFrame;
|
|
};
|
|
|
|
template<class T>
|
|
Vec2_tpl<T> operator>>(const Vec2_tpl<T>& v, uint shift)
|
|
{
|
|
Vec2_tpl<T> result = v;
|
|
result.x >>= shift;
|
|
result.y >>= shift;
|
|
return result;
|
|
}
|
|
|
|
template<class T>
|
|
Vec3_tpl<T> operator>>(const Vec3_tpl<T>& v, uint shift)
|
|
{
|
|
Vec3_tpl<T> result = v;
|
|
result.x >>= shift;
|
|
result.y >>= shift;
|
|
result.z >>= shift;
|
|
return result;
|
|
}
|
|
|
|
template<class T>
|
|
Vec4_tpl<T> operator>>(const Vec4_tpl<T>& v, uint shift)
|
|
{
|
|
Vec4_tpl<T> result = v;
|
|
result.x >>= shift;
|
|
result.y >>= shift;
|
|
result.z >>= shift;
|
|
result.w >>= shift;
|
|
return result;
|
|
}
|
|
|
|
template<class I, class T>
|
|
void InterpolateDeltaEncode(const uint numValues, const uint8 lerpFactor, const T* pFloorFrame, const T* pCeilFrame, const T* pIn, T* pOut)
|
|
{
|
|
for (uint i = 0; i < numValues; ++i)
|
|
{
|
|
const I floorValue = I(pFloorFrame[i]);
|
|
const I ceilValue = I(pCeilFrame[i]);
|
|
const T predictedValue = T(floorValue + (((ceilValue - floorValue) * lerpFactor) >> 8));
|
|
|
|
const T& realValue = pIn[i];
|
|
const T delta = realValue - predictedValue;
|
|
pOut[i] = delta;
|
|
}
|
|
}
|
|
|
|
template<class I, class T>
|
|
void MotionDeltaEncode(const uint numValues, const uint8 acceleration, const T* const pPrevFrames[2], const T* pIn, T* pOut)
|
|
{
|
|
for (uint i = 0; i < numValues; ++i)
|
|
{
|
|
const I prevPrevFrameValue = I(pPrevFrames[0][i]);
|
|
const I prevFrameValue = I(pPrevFrames[1][i]);
|
|
const T predictedValue = T(prevFrameValue + (((prevFrameValue - prevPrevFrameValue) * acceleration) >> 7));
|
|
|
|
const T& realValue = pIn[i];
|
|
const T delta = realValue - predictedValue;
|
|
pOut[i] = delta;
|
|
}
|
|
}
|
|
|
|
template<class I, class T, bool kbEncode>
|
|
void InterpolateMotionDeltaPredictor(const GeomCacheFile::STemporalPredictorControl& controlIn, const STemporalPredictorData<T>& data, const T* pIn, T* pOut)
|
|
{
|
|
const T* pFloorFrame = data.m_pFloorFrame;
|
|
const T* const pCeilFrame = data.m_pCeilFrame;
|
|
const T* const* pPrevFrames = data.m_pPrevFrames;
|
|
|
|
const uint8& lerpFactor = controlIn.m_indexFrameLerpFactor;
|
|
const uint8& acceleration = controlIn.m_acceleration;
|
|
const uint8 combineFactor = controlIn.m_combineFactor;
|
|
|
|
const uint numElements = data.m_numElements;
|
|
for (uint i = 0; i < numElements; ++i)
|
|
{
|
|
const I prevPrevFrameValue = I(pPrevFrames[0][i]);
|
|
const I prevFrameValue = I(pPrevFrames[1][i]);
|
|
const I floorValue = I(pFloorFrame[i]);
|
|
const I ceilValue = I(pCeilFrame[i]);
|
|
|
|
const I interpolatePredictedValue = T(floorValue + (((ceilValue - floorValue) * lerpFactor) >> 8));
|
|
const I motionPredictedValue = T(prevFrameValue + (((prevFrameValue - prevPrevFrameValue) * acceleration) >> 7));
|
|
const T predictedValue = (interpolatePredictedValue + (((motionPredictedValue - interpolatePredictedValue) * combineFactor) >> 7));
|
|
|
|
if (kbEncode)
|
|
{
|
|
const T realValue = pIn[i];
|
|
const T delta = realValue - predictedValue;
|
|
pOut[i] = delta;
|
|
}
|
|
else
|
|
{
|
|
const T delta = pIn[i];
|
|
const T realValue = delta + predictedValue;
|
|
pOut[i] = realValue;
|
|
}
|
|
}
|
|
}
|
|
|
|
#if AZ_LEGACY_3DENGINE_TRAIT_DEFINE_MM_MULLO_EPI32_EMU
|
|
ILINE __m128i _mm_mullo_epi32_emu(const __m128i& a, const __m128i& b)
|
|
{
|
|
#if AZ_LEGACY_3DENGINE_TRAIT_HAS_MM_MULLO_EPI32
|
|
return _mm_mullo_epi32(a, b);
|
|
#else
|
|
__m128i tmp1 = _mm_mul_epu32(a, b);
|
|
__m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4));
|
|
return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0)));
|
|
#endif
|
|
}
|
|
|
|
ILINE __m128i _mm_packus_epi32_emu(__m128i& a, __m128i& b)
|
|
{
|
|
#if AZ_LEGACY_3DENGINE_TRAIT_HAS_MM_PACKUS_EPI32
|
|
return _mm_packus_epi32(a, b);
|
|
#else
|
|
a = _mm_slli_epi32(a, 16);
|
|
b = _mm_slli_epi32(b, 16);
|
|
a = _mm_srai_epi32(a, 16);
|
|
b = _mm_srai_epi32(b, 16);
|
|
return _mm_packs_epi32(a, b);
|
|
#endif
|
|
}
|
|
|
|
ILINE __m128i Interpolate(__m128i a, __m128i b, __m128i c, const uint32 factor, const int shiftFactor)
|
|
{
|
|
const __m128i zero = _mm_setzero_si128();
|
|
const __m128i truncate = _mm_set_epi16(0, -1, 0, -1, 0, -1, 0, -1);
|
|
|
|
// Unpack to 2x4 32 bit integers
|
|
__m128i factors = _mm_set1_epi32(factor);
|
|
|
|
__m128i aLo = _mm_unpacklo_epi16(a, zero);
|
|
__m128i aHi = _mm_unpackhi_epi16(a, zero);
|
|
__m128i bLo = _mm_unpacklo_epi16(b, zero);
|
|
__m128i bHi = _mm_unpackhi_epi16(b, zero);
|
|
|
|
// Interpolate and pack again
|
|
__m128i lerpLo = _mm_sub_epi32(bLo, aLo);
|
|
lerpLo = _mm_mullo_epi32_emu(lerpLo, factors);
|
|
lerpLo = _mm_srli_epi32(lerpLo, shiftFactor);
|
|
lerpLo = _mm_and_si128(lerpLo, truncate);
|
|
|
|
__m128i lerpHi = _mm_sub_epi32(bHi, aHi);
|
|
lerpHi = _mm_mullo_epi32_emu(lerpHi, factors);
|
|
lerpHi = _mm_srli_epi32(lerpHi, shiftFactor);
|
|
lerpHi = _mm_and_si128(lerpHi, truncate);
|
|
|
|
__m128i lerp = _mm_packus_epi32_emu(lerpLo, lerpHi);
|
|
__m128i result = _mm_add_epi16(lerp, c);
|
|
|
|
return result;
|
|
}
|
|
|
|
template<>
|
|
void InterpolateMotionDeltaPredictor<uint32, uint16, false>
|
|
(const GeomCacheFile::STemporalPredictorControl& controlIn, const STemporalPredictorData<uint16>& data, const uint16* pIn, uint16* pOut)
|
|
{
|
|
__m128i* pRawIn = (__m128i*)pIn;
|
|
__m128i* pRawOut = (__m128i*)pOut;
|
|
__m128i* pFloorFrame = (__m128i*)data.m_pFloorFrame;
|
|
__m128i* pCeilFrame = (__m128i*)data.m_pCeilFrame;
|
|
__m128i* pPrevFrames[2] = { (__m128i*)data.m_pPrevFrames[0], (__m128i*)data.m_pPrevFrames[1] };
|
|
|
|
const uint8 lerpFactor = controlIn.m_indexFrameLerpFactor;
|
|
const uint8 acceleration = controlIn.m_acceleration;
|
|
const uint8 combineFactor = controlIn.m_combineFactor;
|
|
|
|
// vector store as much as possible, but account for cases where the output buffer
|
|
// size doesn't divide evenly by 8
|
|
const uint remainingElements = data.m_numElements % 8;
|
|
const uint numElementsPadded = data.m_numElements / 8 + (remainingElements != 0);
|
|
const uint lastElement = numElementsPadded - 1;
|
|
for (uint i = 0; i < numElementsPadded; ++i)
|
|
{
|
|
// Load 8 floor & ceil values
|
|
__m128i floorValues = _mm_load_si128(pFloorFrame + i);
|
|
__m128i ceilValues = _mm_load_si128(pCeilFrame + i);
|
|
|
|
// Load 8 prep prev & prev frame values
|
|
__m128i prevPrevFrameValues = _mm_load_si128(pPrevFrames[0] + i);
|
|
__m128i prevFrameValues = _mm_load_si128(pPrevFrames[1] + i);
|
|
|
|
// Calculate prediction
|
|
__m128i lerp = Interpolate(floorValues, ceilValues, floorValues, lerpFactor, 8);
|
|
__m128i motion = Interpolate(prevPrevFrameValues, prevFrameValues, prevFrameValues, acceleration, 7);
|
|
__m128i predictedValues = Interpolate(lerp, motion, lerp, combineFactor, 7);
|
|
|
|
__m128i delta = _mm_load_si128(pRawIn + i);
|
|
__m128i realValues = _mm_add_epi16(delta, predictedValues);
|
|
if (i == lastElement)
|
|
{
|
|
memcpy(pOut + (i * 8), &realValues, remainingElements * sizeof(uint16));
|
|
}
|
|
else
|
|
{
|
|
_mm_store_si128(pRawOut + i, realValues);
|
|
}
|
|
}
|
|
}
|
|
|
|
template<>
|
|
void InterpolateMotionDeltaPredictor<Vec2_tpl<uint32>, Vec2_tpl<uint16>, false>
|
|
(const GeomCacheFile::STemporalPredictorControl& controlIn, const STemporalPredictorData<Vec2_tpl<uint16> >& data,
|
|
const Vec2_tpl<uint16>* pIn, Vec2_tpl<uint16>* pOut)
|
|
{
|
|
STemporalPredictorData<uint16> uInt16Data;
|
|
|
|
uInt16Data.m_pFloorFrame = (uint16*)data.m_pFloorFrame;
|
|
uInt16Data.m_pCeilFrame = (uint16*)data.m_pCeilFrame;
|
|
uInt16Data.m_pPrevFrames[0] = (uint16*)data.m_pPrevFrames[0];
|
|
uInt16Data.m_pPrevFrames[1] = (uint16*)data.m_pPrevFrames[1];
|
|
uInt16Data.m_numElements = data.m_numElements * 2;
|
|
|
|
InterpolateMotionDeltaPredictor<uint32, uint16, false>(controlIn, uInt16Data, (uint16*)pIn, (uint16*)pOut);
|
|
}
|
|
|
|
template<>
|
|
void InterpolateMotionDeltaPredictor<Vec3_tpl<uint32>, Vec3_tpl<uint16>, false>
|
|
(const GeomCacheFile::STemporalPredictorControl& controlIn, const STemporalPredictorData<Vec3_tpl<uint16> >& data,
|
|
const Vec3_tpl<uint16>* pIn, Vec3_tpl<uint16>* pOut)
|
|
{
|
|
STemporalPredictorData<uint16> uInt16Data;
|
|
|
|
uInt16Data.m_pFloorFrame = (uint16*)data.m_pFloorFrame;
|
|
uInt16Data.m_pCeilFrame = (uint16*)data.m_pCeilFrame;
|
|
uInt16Data.m_pPrevFrames[0] = (uint16*)data.m_pPrevFrames[0];
|
|
uInt16Data.m_pPrevFrames[1] = (uint16*)data.m_pPrevFrames[1];
|
|
uInt16Data.m_numElements = data.m_numElements * 3;
|
|
|
|
InterpolateMotionDeltaPredictor<uint32, uint16, false>(controlIn, uInt16Data, (uint16*)pIn, (uint16*)pOut);
|
|
}
|
|
|
|
template<>
|
|
void InterpolateMotionDeltaPredictor<Vec4_tpl<uint32>, Vec4_tpl<uint16>, false>
|
|
(const GeomCacheFile::STemporalPredictorControl& controlIn, const STemporalPredictorData<Vec4_tpl<uint16> >& data,
|
|
const Vec4_tpl<uint16>* pIn, Vec4_tpl<uint16>* pOut)
|
|
{
|
|
STemporalPredictorData<uint16> uInt16Data;
|
|
|
|
uInt16Data.m_pFloorFrame = (uint16*)data.m_pFloorFrame;
|
|
uInt16Data.m_pCeilFrame = (uint16*)data.m_pCeilFrame;
|
|
uInt16Data.m_pPrevFrames[0] = (uint16*)data.m_pPrevFrames[0];
|
|
uInt16Data.m_pPrevFrames[1] = (uint16*)data.m_pPrevFrames[1];
|
|
uInt16Data.m_numElements = data.m_numElements * 4;
|
|
|
|
InterpolateMotionDeltaPredictor<uint32, uint16, false>(controlIn, uInt16Data, (uint16*)pIn, (uint16*)pOut);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#endif // CRYINCLUDE_CRY3DENGINE_GEOMCACHEPREDICTORS_H
|