You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
o3de/Code/Legacy/CryCommon/Cry_XOptimise.h

566 lines
16 KiB
C++

/*
* Copyright (c) Contributors to the Open 3D Engine Project
*
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
*/
// Description : Misc mathematical functions
#ifndef CRYINCLUDE_CRYCOMMON_CRY_XOPTIMISE_H
#define CRYINCLUDE_CRYCOMMON_CRY_XOPTIMISE_H
#pragma once
#include <platform.h>
inline float AngleMod(float a)
{
a = (float)((360.0 / 65536) * ((int)(a * (65536 / 360.0)) & 65535));
return a;
}
inline float AngleModRad(float a)
{
a = (float)((gf_PI2 / 65536) * ((int)(a * (65536 / gf_PI2)) & 65535));
return a;
}
inline unsigned short Degr2Word(float f)
{
return (unsigned short)(AngleMod(f) / 360.0f * 65536.0f);
}
inline float Word2Degr(unsigned short s)
{
return (float)s / 65536.0f * 360.0f;
}
#if defined(_CPU_X86)
ILINE float __fastcall Ffabs(float f)
{
*((unsigned*) &f) &= ~0x80000000;
return (f);
}
#else
inline float Ffabs(float x) { return fabsf(x); }
#endif
#define mathMatrixRotationZ(pOut, angle) (*(Matrix44*)pOut) = GetTransposed44(Matrix44(Matrix34::CreateRotationZ(angle)))
#define mathMatrixRotationY(pOut, angle) (*(Matrix44*)pOut) = GetTransposed44(Matrix44(Matrix34::CreateRotationY(angle)))
#define mathMatrixRotationX(pOut, angle) (*(Matrix44*)pOut) = GetTransposed44(Matrix44(Matrix34::CreateRotationX(angle)))
#define mathMatrixTranslation(pOut, x, y, z) (*(Matrix44*)pOut) = GetTransposed44(Matrix44(Matrix34::CreateTranslationMat(Vec3(x, y, z))))
#define mathMatrixScaling(pOut, sx, sy, sz) (*(Matrix44*)pOut) = GetTransposed44(Matrix44(Matrix34::CreateScale(Vec3(sx, sy, sz))))
template <class T>
inline void ExchangeVals(T& X, T& Y)
{
const T Tmp = X;
X = Y;
Y = Tmp;
}
inline void mathMatrixPerspectiveFov(Matrix44A* pMatr, f32 fovY, f32 Aspect, f32 zn, f32 zf)
{
f32 yScale = 1.0f / tan_tpl(fovY / 2.0f);
f32 xScale = yScale / Aspect;
f32 m22 = f32(f64(zf) / (f64(zn) - f64(zf)));
f32 m32 = f32(f64(zn) * f64(zf) / (f64(zn) - f64(zf)));
(*pMatr)(0, 0) = xScale;
(*pMatr)(0, 1) = 0;
(*pMatr)(0, 2) = 0;
(*pMatr)(0, 3) = 0;
(*pMatr)(1, 0) = 0;
(*pMatr)(1, 1) = yScale;
(*pMatr)(1, 2) = 0;
(*pMatr)(1, 3) = 0;
(*pMatr)(2, 0) = 0;
(*pMatr)(2, 1) = 0;
(*pMatr)(2, 2) = m22;
(*pMatr)(2, 3) = -1.0f;
(*pMatr)(3, 0) = 0;
(*pMatr)(3, 1) = 0;
(*pMatr)(3, 2) = m32;
(*pMatr)(3, 3) = 0;
}
inline void mathMatrixOrtho(Matrix44A* pMatr, f32 w, f32 h, f32 zn, f32 zf)
{
f32 m22 = f32(1.0 / (f64(zn) - f64(zf)));
f32 m32 = f32(f64(zn) / (f64(zn) - f64(zf)));
(*pMatr)(0, 0) = 2.0f / w;
(*pMatr)(0, 1) = 0;
(*pMatr)(0, 2) = 0;
(*pMatr)(0, 3) = 0;
(*pMatr)(1, 0) = 0;
(*pMatr)(1, 1) = 2.0f / h;
(*pMatr)(1, 2) = 0;
(*pMatr)(1, 3) = 0;
(*pMatr)(2, 0) = 0;
(*pMatr)(2, 1) = 0;
(*pMatr)(2, 2) = m22;
(*pMatr)(2, 3) = 0;
(*pMatr)(3, 0) = 0;
(*pMatr)(3, 1) = 0;
(*pMatr)(3, 2) = m32;
(*pMatr)(3, 3) = 1;
}
inline void mathMatrixOrthoOffCenter(Matrix44A* pMatr, f32 l, f32 r, f32 b, f32 t, f32 zn, f32 zf)
{
f32 m22 = f32(1.0 / (f64(zn) - f64(zf)));
f32 m32 = f32(f64(zn) / (f64(zn) - f64(zf)));
(*pMatr)(0, 0) = 2.0f / (r - l);
(*pMatr)(0, 1) = 0;
(*pMatr)(0, 2) = 0;
(*pMatr)(0, 3) = 0;
(*pMatr)(1, 0) = 0;
(*pMatr)(1, 1) = 2.0f / (t - b);
(*pMatr)(1, 2) = 0;
(*pMatr)(1, 3) = 0;
(*pMatr)(2, 0) = 0;
(*pMatr)(2, 1) = 0;
(*pMatr)(2, 2) = m22;
(*pMatr)(2, 3) = 0;
(*pMatr)(3, 0) = (l + r) / (l - r);
(*pMatr)(3, 1) = (t + b) / (b - t);
(*pMatr)(3, 2) = m32;
(*pMatr)(3, 3) = 1.0f;
}
inline void mathMatrixOrthoOffCenterLH(Matrix44A* pMatr, f32 l, f32 r, f32 b, f32 t, f32 zn, f32 zf)
{
f32 m22 = f32(1.0 / (f64(zf) - f64(zn)));
f32 m32 = f32(f64(zn) / (f64(zn) - f64(zf)));
(*pMatr)(0, 0) = 2.0f / (r - l);
(*pMatr)(0, 1) = 0;
(*pMatr)(0, 2) = 0;
(*pMatr)(0, 3) = 0;
(*pMatr)(1, 0) = 0;
(*pMatr)(1, 1) = 2.0f / (t - b);
(*pMatr)(1, 2) = 0;
(*pMatr)(1, 3) = 0;
(*pMatr)(2, 0) = 0;
(*pMatr)(2, 1) = 0;
(*pMatr)(2, 2) = m22;
(*pMatr)(2, 3) = 0;
(*pMatr)(3, 0) = (l + r) / (l - r);
(*pMatr)(3, 1) = (t + b) / (b - t);
(*pMatr)(3, 2) = m32;
(*pMatr)(3, 3) = 1.0f;
}
inline void mathMatrixPerspectiveOffCenter(Matrix44A* pMatr, f32 l, f32 r, f32 b, f32 t, f32 zn, f32 zf)
{
f32 m22 = f32(f64(zf) / (f64(zn) - f64(zf)));
f32 m32 = f32(f64(zn) * f64(zf) / (f64(zn) - f64(zf)));
(*pMatr)(0, 0) = 2 * zn / (r - l);
(*pMatr)(0, 1) = 0;
(*pMatr)(0, 2) = 0;
(*pMatr)(0, 3) = 0;
(*pMatr)(1, 0) = 0;
(*pMatr)(1, 1) = 2 * zn / (t - b);
(*pMatr)(1, 2) = 0;
(*pMatr)(1, 3) = 0;
(*pMatr)(2, 0) = (l + r) / (r - l);
(*pMatr)(2, 1) = (t + b) / (t - b);
(*pMatr)(2, 2) = m22;
(*pMatr)(2, 3) = -1;
(*pMatr)(3, 0) = 0;
(*pMatr)(3, 1) = 0;
(*pMatr)(3, 2) = m32;
(*pMatr)(3, 3) = 0;
}
inline void mathMatrixPerspectiveOffCenterReverseDepth(Matrix44A* pMatr, f32 l, f32 r, f32 b, f32 t, f32 zn, f32 zf)
{
f32 m22 = f32(-f64(zn) / (f64(zn) - f64(zf)));
f32 m32 = f32(-f64(zn) * f64(zf) / (f64(zn) - f64(zf)));
(*pMatr)(0, 0) = 2 * zn / (r - l);
(*pMatr)(0, 1) = 0;
(*pMatr)(0, 2) = 0;
(*pMatr)(0, 3) = 0;
(*pMatr)(1, 0) = 0;
(*pMatr)(1, 1) = 2 * zn / (t - b);
(*pMatr)(1, 2) = 0;
(*pMatr)(1, 3) = 0;
(*pMatr)(2, 0) = (l + r) / (r - l);
(*pMatr)(2, 1) = (t + b) / (t - b);
(*pMatr)(2, 2) = m22;
(*pMatr)(2, 3) = -1;
(*pMatr)(3, 0) = 0;
(*pMatr)(3, 1) = 0;
(*pMatr)(3, 2) = m32;
(*pMatr)(3, 3) = 0;
}
//RH
inline void mathMatrixLookAt(Matrix44A* pMatr, const Vec3& Eye, const Vec3& At, const Vec3& Up)
{
Vec3 vLightDir = (Eye - At);
Vec3 zaxis = vLightDir.GetNormalized();
Vec3 xaxis = (Up.Cross(zaxis)).GetNormalized();
Vec3 yaxis = zaxis.Cross(xaxis);
(*pMatr)(0, 0) = xaxis.x;
(*pMatr)(0, 1) = yaxis.x;
(*pMatr)(0, 2) = zaxis.x;
(*pMatr)(0, 3) = 0;
(*pMatr)(1, 0) = xaxis.y;
(*pMatr)(1, 1) = yaxis.y;
(*pMatr)(1, 2) = zaxis.y;
(*pMatr)(1, 3) = 0;
(*pMatr)(2, 0) = xaxis.z;
(*pMatr)(2, 1) = yaxis.z;
(*pMatr)(2, 2) = zaxis.z;
(*pMatr)(2, 3) = 0;
(*pMatr)(3, 0) = -xaxis.Dot(Eye);
(*pMatr)(3, 1) = -yaxis.Dot(Eye);
(*pMatr)(3, 2) = -zaxis.Dot(Eye);
(*pMatr)(3, 3) = 1;
}
inline bool mathMatrixPerspectiveFovInverse(Matrix44_tpl<f64>* pResult, const Matrix44A* pProjFov)
{
if ((*pProjFov)(0, 1) == 0.0f && (*pProjFov)(0, 2) == 0.0f && (*pProjFov)(0, 3) == 0.0f &&
(*pProjFov)(1, 0) == 0.0f && (*pProjFov)(1, 2) == 0.0f && (*pProjFov)(1, 3) == 0.0f &&
(*pProjFov)(3, 0) == 0.0f && (*pProjFov)(3, 1) == 0.0f && (*pProjFov)(3, 2) != 0.0f)
{
(*pResult)(0, 0) = 1.0 / (*pProjFov).m00;
(*pResult)(0, 1) = 0;
(*pResult)(0, 2) = 0;
(*pResult)(0, 3) = 0;
(*pResult)(1, 0) = 0;
(*pResult)(1, 1) = 1.0 / (*pProjFov).m11;
(*pResult)(1, 2) = 0;
(*pResult)(1, 3) = 0;
(*pResult)(2, 0) = 0;
(*pResult)(2, 1) = 0;
(*pResult)(2, 2) = 0;
(*pResult)(2, 3) = 1.0 / (*pProjFov).m32;
(*pResult)(3, 0) = (*pProjFov).m20 / (*pProjFov).m00;
(*pResult)(3, 1) = (*pProjFov).m21 / (*pProjFov).m11;
(*pResult)(3, 2) = -1;
(*pResult)(3, 3) = (*pProjFov).m22 / (*pProjFov).m32;
return true;
}
return false;
}
template<class T_out, class T_in>
inline void mathMatrixLookAtInverse(Matrix44_tpl<T_out>* pResult, const Matrix44_tpl<T_in>* pLookAt)
{
(*pResult)(0, 0) = (*pLookAt).m00;
(*pResult)(0, 1) = (*pLookAt).m10;
(*pResult)(0, 2) = (*pLookAt).m20;
(*pResult)(0, 3) = (*pLookAt).m03;
(*pResult)(1, 0) = (*pLookAt).m01;
(*pResult)(1, 1) = (*pLookAt).m11;
(*pResult)(1, 2) = (*pLookAt).m21;
(*pResult)(1, 3) = (*pLookAt).m13;
(*pResult)(2, 0) = (*pLookAt).m02;
(*pResult)(2, 1) = (*pLookAt).m12;
(*pResult)(2, 2) = (*pLookAt).m22;
(*pResult)(2, 3) = (*pLookAt).m23;
(*pResult)(3, 0) = T_out(-(f64((*pLookAt).m00) * f64((*pLookAt).m30) + f64((*pLookAt).m01) * f64((*pLookAt).m31) + f64((*pLookAt).m02) * f64((*pLookAt).m32)));
(*pResult)(3, 1) = T_out(-(f64((*pLookAt).m10) * f64((*pLookAt).m30) + f64((*pLookAt).m11) * f64((*pLookAt).m31) + f64((*pLookAt).m12) * f64((*pLookAt).m32)));
(*pResult)(3, 2) = T_out(-(f64((*pLookAt).m20) * f64((*pLookAt).m30) + f64((*pLookAt).m21) * f64((*pLookAt).m31) + f64((*pLookAt).m22) * f64((*pLookAt).m32)));
(*pResult)(3, 3) = (*pLookAt).m33;
};
inline void mathVec4Transform(f32 out[4], const f32 m[16], const f32 in[4])
{
#define M(row, col) m[col * 4 + row]
out[0] = M(0, 0) * in[0] + M(0, 1) * in[1] + M(0, 2) * in[2] + M(0, 3) * in[3];
out[1] = M(1, 0) * in[0] + M(1, 1) * in[1] + M(1, 2) * in[2] + M(1, 3) * in[3];
out[2] = M(2, 0) * in[0] + M(2, 1) * in[1] + M(2, 2) * in[2] + M(2, 3) * in[3];
out[3] = M(3, 0) * in[0] + M(3, 1) * in[1] + M(3, 2) * in[2] + M(3, 3) * in[3];
#undef M
}
//fix: replace by 3x4 Matrix transformation and move to crymath
inline void mathVec3Transform(f32 out[4], const f32 m[16], const f32 in[3])
{
#define M(row, col) m[col * 4 + row]
out[0] = M(0, 0) * in[0] + M(0, 1) * in[1] + M(0, 2) * in[2] + M(0, 3) * 1.0f;
out[1] = M(1, 0) * in[0] + M(1, 1) * in[1] + M(1, 2) * in[2] + M(1, 3) * 1.0f;
out[2] = M(2, 0) * in[0] + M(2, 1) * in[1] + M(2, 2) * in[2] + M(2, 3) * 1.0f;
out[3] = M(3, 0) * in[0] + M(3, 1) * in[1] + M(3, 2) * in[2] + M(3, 3) * 1.0f;
#undef M
}
#define mathVec3TransformF(pOut, pV, pM) mathVec3Transform((f32*)pOut, (const f32*)pM, (f32*)pV)
#define mathVec4TransformF(pOut, pV, pM) mathVec4Transform((f32*)pOut, (const f32*)pM, (f32*)pV)
#define mathVec3NormalizeF(pOut, pV) (*(Vec3*)pOut) = (((Vec3*)pV)->GetNormalizedSafe())
#define mathVec2NormalizeF(pOut, pV) (*(Vec2*)pOut) = (((Vec2*)pV)->GetNormalizedSafe())
//fix replace viewport by int16 array
//fix for d3d viewport
inline f32 mathVec3Project(Vec3* pvWin, const Vec3* pvObj, const int32 pViewport[4], const Matrix44A* pProjection, const Matrix44A* pView, const Matrix44A* pWorld)
{
Vec4 in, out;
in.x = pvObj->x;
in.y = pvObj->y;
in.z = pvObj->z;
in.w = 1.0f;
mathVec4Transform((f32*)&out, (f32*)pWorld, (f32*)&in);
mathVec4Transform((f32*)&in, (f32*)pView, (f32*)&out);
mathVec4Transform((f32*)&out, (f32*)pProjection, (f32*)&in);
if (out.w == 0.0f)
{
return 0.f;
}
out.x /= out.w;
out.y /= out.w;
out.z /= out.w;
//output coords
pvWin->x = pViewport[0] + (1 + out.x) * pViewport[2] / 2;
pvWin->y = pViewport[1] + (1 - out.y) * pViewport[3] / 2; //flip coords for y axis
//FIX: update fViewportMinZ fViewportMaxZ support for Viewport everywhere
float fViewportMinZ = 0, fViewportMaxZ = 1.0f;
pvWin->z = fViewportMinZ + out.z * (fViewportMaxZ - fViewportMinZ);
return out.w;
}
inline Vec3* mathVec3UnProject(Vec3* pvObj, const Vec3* pvWin, const int32 pViewport[4], const Matrix44A* pProjection, const Matrix44A* pView, const Matrix44A* pWorld, [[maybe_unused]] int32 OptFlags)
{
Matrix44A m, mA;
Vec4 in, out;
//FIX: update fViewportMinZ fViewportMaxZ support for Viewport everywhere
float fViewportMinZ = 0, fViewportMaxZ = 1.0f;
in.x = (pvWin->x - pViewport[0]) * 2 / pViewport[2] - 1.0f;
in.y = 1.0f - ((pvWin->y - pViewport[1]) * 2 / pViewport[3]); //flip coords for y axis
in.z = (pvWin->z - fViewportMinZ) / (fViewportMaxZ - fViewportMinZ);
in.w = 1.0f;
//prepare inverse projection matrix
mA = ((*pWorld) * (*pView)) * (*pProjection);
m = mA.GetInverted();
mathVec4Transform((f32*)&out, m.GetData(), (f32*)&in);
if (out.w == 0.0f)
{
return NULL;
}
pvObj->x = out.x / out.w;
pvObj->y = out.y / out.w;
pvObj->z = out.z / out.w;
return pvObj;
}
inline Vec3* mathVec3ProjectArray(Vec3* pOut, uint32 OutStride, const Vec3* pV, uint32 VStride, const int32 pViewport[4], const Matrix44A* pProjection, const Matrix44A* pView, const Matrix44A* pWorld, uint32 n, int32)
{
Matrix44A m;
Vec4 in, out;
int8* pOutT = (int8*)pOut;
int8* pInT = (int8*)pV;
Vec3* pvWin;
Vec3* pvObj;
//FIX: update fViewportMinZ fViewportMaxZ support for Viewport everywhere
float fViewportMinZ = 0, fViewportMaxZ = 1.0f;
m = ((*pWorld) * (*pView)) * (*pProjection);
for (uint32 i = 0; i < n; i++)
{
pvObj = (Vec3*)pInT;
pvWin = (Vec3*)pOutT;
in.x = pvObj->x;
in.y = pvObj->y;
in.z = pvObj->z;
in.w = 1.0f;
mathVec4Transform((f32*)&out, m.GetData(), (f32*)&in);
if (out.w == 0.0f)
{
return NULL;
}
float fInvW = 1.0f / out.w;
out.x *= fInvW;
out.y *= fInvW;
out.z *= fInvW;
//output coords
pvWin->x = pViewport[0] + (1 + out.x) * pViewport[2] / 2;
pvWin->y = pViewport[1] + (1 - out.y) * pViewport[3] / 2; //flip coords for y axis
pvWin->z = fViewportMinZ + out.z * (fViewportMaxZ - fViewportMinZ);
pOutT += OutStride;
pInT += VStride;
}
return pOut;
}
inline Vec3* mathVec3UnprojectArray(Vec3* pOut, uint32 OutStride, const Vec3* pV, uint32 VStride, const int32 pViewport[4], const Matrix44* pProjection, const Matrix44* pView, const Matrix44* pWorld, uint32 n, [[maybe_unused]] int32 OptFlags)
{
Vec4 in, out;
Matrix44 m, mA;
int8* pOutT = (int8*)pOut;
int8* pInT = (int8*)pV;
Vec3* pvWin;
Vec3* pvObj;
//FIX: update fViewportMinZ fViewportMaxZ support for Viewport everywhere
float fViewportMinZ = 0, fViewportMaxZ = 1.0f;
mA = ((*pWorld) * (*pView)) * (*pProjection);
m = mA.GetInverted();
for (uint32 i = 0; i < n; i++)
{
pvWin = (Vec3*)pInT;
pvObj = (Vec3*)pOutT;
in.x = (pvWin->x - pViewport[0]) * 2 / pViewport[2] - 1.0f;
in.y = 1.0f - ((pvWin->y - pViewport[1]) * 2 / pViewport[3]); //flip coords for y axis
in.z = (pvWin->z - fViewportMinZ) / (fViewportMaxZ - fViewportMinZ);
in.w = 1.0f;
mathVec4Transform((f32*)&out, m.GetData(), (f32*)&in);
assert(out.w != 0.0f);
if (out.w == 0.0f)
{
return NULL;
}
pvObj->x = out.x / out.w;
pvObj->y = out.y / out.w;
pvObj->z = out.z / out.w;
pOutT += OutStride;
pInT += VStride;
}
return pOut;
}
/*****************************************************
MISC FUNCTIONS
*****************************************************/
//////////////////////////////////////////////////////////////////////////
#if defined(_CPU_X86)
inline int fastftol_positive(float f)
{
int i;
f -= 0.5f;
#if defined(_MSC_VER)
__asm fld [f]
__asm fistp [i]
#elif defined(__GNUC__)
__asm__ ("fld %[f]\n fistpl %[i]" : [i] "+m" (i) : [f] "m" (f));
#else
#error
#endif
return i;
}
#else
inline int fastftol_positive (float f)
{
assert(f >= 0.f);
return (int)floorf(f);
}
#endif
//////////////////////////////////////////////////////////////////////////
#if defined(_CPU_X86)
inline int fastround_positive(float f)
{
int i;
assert(f >= 0.f);
#if defined(_MSC_VER)
__asm fld [f]
__asm fistp [i]
#elif defined(__GNUC__)
__asm__ ("fld %[f]\n fistpl %[i]" : [i] "+m" (i) : [f] "m" (f));
#else
#error
#endif
return i;
}
#else
inline int fastround_positive(float f)
{
assert(f >= 0.f);
return (int) (f + 0.5f);
}
#endif
//////////////////////////////////////////////////////////////////////////
#if defined(_CPU_X86)
ILINE int __fastcall FtoI(float x)
{
int t;
#if defined(_MSC_VER)
__asm
{
fld x
fistp t
}
#elif defined(__GNUC__)
__asm__ ("fld %[x]\n fistpl %[t]" : [t] "+m" (t) : [x] "m" (x));
#else
#error
#endif
return t;
}
#else
inline int FtoI(float x) { return (int)x; }
#endif
#endif // CRYINCLUDE_CRYCOMMON_CRY_XOPTIMISE_H