You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
594 lines
16 KiB
C++
594 lines
16 KiB
C++
/*
|
|
* Copyright (c) Contributors to the Open 3D Engine Project
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 OR MIT
|
|
*
|
|
*/
|
|
|
|
|
|
// Description : various integer bit fiddling hacks
|
|
|
|
|
|
#pragma once
|
|
|
|
#include "CompileTimeAssert.h"
|
|
#include <AzCore/Casting/numeric_cast.h>
|
|
|
|
// Section dictionary
|
|
#if defined(AZ_RESTRICTED_PLATFORM)
|
|
#define BITFIDDLING_H_SECTION_TRAITS 1
|
|
#define BITFIDDLING_H_SECTION_INTEGERLOG2 2
|
|
#endif
|
|
|
|
// Traits
|
|
#if defined(AZ_RESTRICTED_PLATFORM)
|
|
#define AZ_RESTRICTED_SECTION BITFIDDLING_H_SECTION_TRAITS
|
|
#include AZ_RESTRICTED_FILE(BitFiddling_h)
|
|
#elif defined(LINUX) || defined(APPLE)
|
|
#define BITFIDDLING_H_TRAIT_HAS_COUNT_LEADING_ZEROS 1
|
|
#endif
|
|
|
|
#if BITFIDDLING_H_TRAIT_HAS_COUNT_LEADING_ZEROS
|
|
#define countLeadingZeros32(x) __builtin_clz(x)
|
|
#else // Windows implementation
|
|
ILINE uint32 countLeadingZeros32(uint32 x)
|
|
{
|
|
DWORD result = 32 ^ 31; // assumes result is unmodified if _BitScanReverse returns 0
|
|
_BitScanReverse(&result, x);
|
|
PREFAST_SUPPRESS_WARNING(6102);
|
|
result ^= 31; // needed because the index is from LSB (whereas all other implementations are from MSB)
|
|
return result;
|
|
}
|
|
#endif
|
|
|
|
inline uint32 circularShift(uint32 nbits, uint32 i)
|
|
{
|
|
return (i << nbits) | (i >> (32 - nbits));
|
|
}
|
|
|
|
template <typename T>
|
|
inline size_t countTrailingZeroes(T v)
|
|
{
|
|
size_t n = 0;
|
|
|
|
v = ~v & (v - 1);
|
|
while (v)
|
|
{
|
|
++n;
|
|
v >>= 1;
|
|
}
|
|
|
|
return n;
|
|
}
|
|
|
|
// this function returns the integer logarithm of various numbers without branching
|
|
#define IL2VAL(mask, shift) \
|
|
c |= ((x & mask) != 0) * shift; \
|
|
x >>= ((x & mask) != 0) * shift
|
|
|
|
template <typename TInteger>
|
|
inline bool IsPowerOfTwo(TInteger x)
|
|
{
|
|
return (x & (x - 1)) == 0;
|
|
}
|
|
|
|
// compile time version of IsPowerOfTwo, useful for STATIC_CHECK
|
|
template <int nValue>
|
|
struct IsPowerOfTwoCompileTime
|
|
{
|
|
enum
|
|
{
|
|
IsPowerOfTwo = ((nValue & (nValue - 1)) == 0)
|
|
};
|
|
};
|
|
|
|
inline uint32 NextPower2(uint32 n)
|
|
{
|
|
n--;
|
|
n |= n >> 1;
|
|
n |= n >> 2;
|
|
n |= n >> 4;
|
|
n |= n >> 8;
|
|
n |= n >> 16;
|
|
n++;
|
|
return n;
|
|
}
|
|
|
|
inline uint8 IntegerLog2(uint8 x)
|
|
{
|
|
uint8 c = 0;
|
|
IL2VAL(0xf0, 4);
|
|
IL2VAL(0xc, 2);
|
|
IL2VAL(0x2, 1);
|
|
return c;
|
|
}
|
|
inline uint16 IntegerLog2(uint16 x)
|
|
{
|
|
uint16 c = 0;
|
|
IL2VAL(0xff00, 8);
|
|
IL2VAL(0xf0, 4);
|
|
IL2VAL(0xc, 2);
|
|
IL2VAL(0x2, 1);
|
|
return c;
|
|
}
|
|
|
|
inline uint32 IntegerLog2(uint32 x)
|
|
{
|
|
return 31 - countLeadingZeros32(x);
|
|
}
|
|
|
|
inline uint64 IntegerLog2(uint64 x)
|
|
{
|
|
uint64 c = 0;
|
|
IL2VAL(0xffffffff00000000ull, 32);
|
|
IL2VAL(0xffff0000u, 16);
|
|
IL2VAL(0xff00, 8);
|
|
IL2VAL(0xf0, 4);
|
|
IL2VAL(0xc, 2);
|
|
IL2VAL(0x2, 1);
|
|
return c;
|
|
}
|
|
|
|
#if defined(APPLE) || defined(LINUX)
|
|
inline unsigned long int IntegerLog2(unsigned long int x)
|
|
{
|
|
#if defined(PLATFORM_64BIT)
|
|
return IntegerLog2((uint64)x);
|
|
#else
|
|
return IntegerLog2((uint32)x);
|
|
#endif
|
|
}
|
|
#endif
|
|
#undef IL2VAL
|
|
|
|
#if defined(AZ_RESTRICTED_PLATFORM)
|
|
#define AZ_RESTRICTED_SECTION BITFIDDLING_H_SECTION_INTEGERLOG2
|
|
#include AZ_RESTRICTED_FILE(BitFiddling_h)
|
|
#endif
|
|
|
|
template <typename TInteger>
|
|
inline TInteger IntegerLog2_RoundUp(TInteger x)
|
|
{
|
|
return 1 + IntegerLog2(x - 1);
|
|
}
|
|
|
|
static ILINE uint8 BitIndex(uint8 v)
|
|
{
|
|
uint32 vv = v;
|
|
return aznumeric_caster(31 - countLeadingZeros32(vv));
|
|
}
|
|
|
|
static ILINE uint8 BitIndex(uint16 v)
|
|
{
|
|
uint32 vv = v;
|
|
return aznumeric_caster(31 - countLeadingZeros32(vv));
|
|
}
|
|
|
|
static ILINE uint8 BitIndex(uint32 v)
|
|
{
|
|
return aznumeric_caster(31 - countLeadingZeros32(v));
|
|
}
|
|
|
|
static ILINE uint8 CountBits(uint8 v)
|
|
{
|
|
uint8 c = v;
|
|
c = ((c >> 1) & 0x55) + (c & 0x55);
|
|
c = ((c >> 2) & 0x33) + (c & 0x33);
|
|
c = ((c >> 4) & 0x0f) + (c & 0x0f);
|
|
return c;
|
|
}
|
|
|
|
static ILINE uint8 CountBits(uint16 v)
|
|
{
|
|
return CountBits((uint8)(v & 0xff)) +
|
|
CountBits((uint8)((v >> 8) & 0xff));
|
|
}
|
|
|
|
static ILINE uint8 CountBits(uint32 v)
|
|
{
|
|
return CountBits((uint8)(v & 0xff)) +
|
|
CountBits((uint8)((v >> 8) & 0xff)) +
|
|
CountBits((uint8)((v >> 16) & 0xff)) +
|
|
CountBits((uint8)((v >> 24) & 0xff));
|
|
}
|
|
|
|
// Branchless version of return v < 0 ? alt : v;
|
|
ILINE int32 Isel32(int32 v, int32 alt)
|
|
{
|
|
return ((static_cast<int32>(v) >> 31) & alt) | ((static_cast<int32>(~v) >> 31) & v);
|
|
}
|
|
|
|
template <uint32 ILOG>
|
|
struct CompileTimeIntegerLog2
|
|
{
|
|
static const uint32 result = 1 + CompileTimeIntegerLog2<(ILOG >> 1)>::result;
|
|
};
|
|
template <>
|
|
struct CompileTimeIntegerLog2<1>
|
|
{
|
|
static const uint32 result = 0;
|
|
};
|
|
template <>
|
|
struct CompileTimeIntegerLog2<0>; // keep it undefined, we cannot represent "minus infinity" result
|
|
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2<1>::result == 0);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2<2>::result == 1);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2<3>::result == 1);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2<4>::result == 2);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2<5>::result == 2);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2<255>::result == 7);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2<256>::result == 8);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2<257>::result == 8);
|
|
|
|
template <uint32 ILOG>
|
|
struct CompileTimeIntegerLog2_RoundUp
|
|
{
|
|
static const uint32 result = CompileTimeIntegerLog2<ILOG>::result + ((ILOG & (ILOG - 1)) != 0);
|
|
};
|
|
template <>
|
|
struct CompileTimeIntegerLog2_RoundUp<0>; // we can return 0, but let's keep it undefined (same as CompileTimeIntegerLog2<0>)
|
|
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2_RoundUp<1>::result == 0);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2_RoundUp<2>::result == 1);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2_RoundUp<3>::result == 2);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2_RoundUp<4>::result == 2);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2_RoundUp<5>::result == 3);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2_RoundUp<255>::result == 8);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2_RoundUp<256>::result == 8);
|
|
COMPILE_TIME_ASSERT(CompileTimeIntegerLog2_RoundUp<257>::result == 9);
|
|
|
|
// Character-to-bitfield mapping
|
|
|
|
inline uint32 AlphaBit(char c)
|
|
{
|
|
return c >= 'a' && c <= 'z' ? 1 << (c - 'z' + 31) : 0;
|
|
}
|
|
|
|
inline uint64 AlphaBit64(char c)
|
|
{
|
|
return (c >= 'a' && c <= 'z' ? 1U << (c - 'z' + 31) : 0) |
|
|
(c >= 'A' && c <= 'Z' ? 1LL << (c - 'Z' + 63) : 0);
|
|
}
|
|
|
|
inline uint32 AlphaBits(uint32 wc)
|
|
{
|
|
// Handle wide multi-char constants, can be evaluated at compile-time.
|
|
return AlphaBit((char)wc)
|
|
| AlphaBit((char)(wc >> 8))
|
|
| AlphaBit((char)(wc >> 16))
|
|
| AlphaBit((char)(wc >> 24));
|
|
}
|
|
|
|
inline uint32 AlphaBits(const char* s)
|
|
{
|
|
// Handle string of any length.
|
|
uint32 n = 0;
|
|
while (*s)
|
|
{
|
|
n |= AlphaBit(*s++);
|
|
}
|
|
return n;
|
|
}
|
|
|
|
inline uint64 AlphaBits64(const char* s)
|
|
{
|
|
// Handle string of any length.
|
|
uint64 n = 0;
|
|
while (*s)
|
|
{
|
|
n |= AlphaBit64(*s++);
|
|
}
|
|
return n;
|
|
}
|
|
|
|
// s should point to a buffer at least 65 chars long
|
|
inline void BitsAlpha64(uint64 n, char* s)
|
|
{
|
|
for (int i = 0; n != 0; n >>= 1, i++)
|
|
{
|
|
if (n & 1)
|
|
{
|
|
*s++ = i < 32 ? static_cast<char>(i + 'z' - 31) : static_cast<char>(i + 'Z' - 63);
|
|
}
|
|
}
|
|
*s++ = '\0';
|
|
}
|
|
|
|
|
|
// if hardware doesn't support 3Dc we can convert to DXT5 (different channels are used)
|
|
// with almost the same quality but the same memory requirements
|
|
inline void ConvertBlock3DcToDXT5(uint8 pDstBlock[16], const uint8 pSrcBlock[16])
|
|
{
|
|
assert(pDstBlock != pSrcBlock); // does not work in place
|
|
|
|
// 4x4 block requires 8 bytes in DXT5 or 3DC
|
|
|
|
// DXT5: 8 bit alpha0, 8 bit alpha1, 16*3 bit alpha lerp
|
|
// 16bit col0, 16 bit col1 (R5G6B5 low byte then high byte), 16*2 bit color lerp
|
|
|
|
// 3DC: 8 bit x0, 8 bit x1, 16*3 bit x lerp
|
|
// 8 bit y0, 8 bit y1, 16*3 bit y lerp
|
|
|
|
for (uint32 dwK = 0; dwK < 8; ++dwK)
|
|
{
|
|
pDstBlock[dwK] = pSrcBlock[dwK];
|
|
}
|
|
for (uint32 dwK = 8; dwK < 16; ++dwK)
|
|
{
|
|
pDstBlock[dwK] = 0;
|
|
}
|
|
|
|
// 6 bit green channel (highest bits)
|
|
// by using all 3 channels with a slight offset we can get more precision but then a dot product would be needed in PS
|
|
// because of bilinear filter we cannot just distribute bits to get perfect result
|
|
uint16 colDst0 = (((uint16)pSrcBlock[8] + 2) >> 2) << 5;
|
|
uint16 colDst1 = (((uint16)pSrcBlock[9] + 2) >> 2) << 5;
|
|
|
|
bool bFlip = colDst0 <= colDst1;
|
|
|
|
if (bFlip)
|
|
{
|
|
uint16 help = colDst0;
|
|
colDst0 = colDst1;
|
|
colDst1 = help;
|
|
}
|
|
|
|
bool bEqual = colDst0 == colDst1;
|
|
|
|
// distribute bytes by hand to not have problems with endianess
|
|
pDstBlock[8 + 0] = (uint8)colDst0;
|
|
pDstBlock[8 + 1] = (uint8)(colDst0 >> 8);
|
|
pDstBlock[8 + 2] = (uint8)colDst1;
|
|
pDstBlock[8 + 3] = (uint8)(colDst1 >> 8);
|
|
|
|
uint16* pSrcBlock16 = (uint16*)(pSrcBlock + 10);
|
|
uint16* pDstBlock16 = (uint16*)(pDstBlock + 12);
|
|
|
|
// distribute 16 3 bit values to 16 2 bit values (loosing LSB)
|
|
for (uint32 dwK = 0; dwK < 16; ++dwK)
|
|
{
|
|
uint32 dwBit0 = dwK * 3 + 0;
|
|
uint32 dwBit1 = dwK * 3 + 1;
|
|
uint32 dwBit2 = dwK * 3 + 2;
|
|
|
|
uint8 hexDataIn = (((pSrcBlock16[(dwBit2 >> 4)] >> (dwBit2 & 0xf)) & 1) << 2) // get HSB
|
|
| (((pSrcBlock16[(dwBit1 >> 4)] >> (dwBit1 & 0xf)) & 1) << 1)
|
|
| ((pSrcBlock16[(dwBit0 >> 4)] >> (dwBit0 & 0xf)) & 1); // get LSB
|
|
|
|
uint8 hexDataOut = 0;
|
|
|
|
switch (hexDataIn)
|
|
{
|
|
case 0:
|
|
hexDataOut = 0;
|
|
break; // color 0
|
|
case 1:
|
|
hexDataOut = 1;
|
|
break; // color 1
|
|
|
|
case 2:
|
|
hexDataOut = 0;
|
|
break; // mostly color 0
|
|
case 3:
|
|
hexDataOut = 2;
|
|
break;
|
|
case 4:
|
|
hexDataOut = 2;
|
|
break;
|
|
case 5:
|
|
hexDataOut = 3;
|
|
break;
|
|
case 6:
|
|
hexDataOut = 3;
|
|
break;
|
|
case 7:
|
|
hexDataOut = 1;
|
|
break; // mostly color 1
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
|
|
if (bFlip)
|
|
{
|
|
if (hexDataOut < 2)
|
|
{
|
|
hexDataOut = 1 - hexDataOut; // 0<->1
|
|
}
|
|
else
|
|
{
|
|
hexDataOut = 5 - hexDataOut; // 2<->3
|
|
}
|
|
}
|
|
|
|
if (bEqual)
|
|
{
|
|
if (hexDataOut == 3)
|
|
{
|
|
hexDataOut = 1;
|
|
}
|
|
}
|
|
|
|
pDstBlock16[(dwK >> 3)] |= (hexDataOut << ((dwK & 0x7) << 1));
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
// is a bit on in a new bit field, but off in an old bit field
|
|
static ILINE bool TurnedOnBit(unsigned bit, unsigned oldBits, unsigned newBits)
|
|
{
|
|
return (newBits & bit) != 0 && (oldBits & bit) == 0;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inline uint32 cellUtilCountLeadingZero(uint32 x)
|
|
{
|
|
uint32 y;
|
|
uint32 n = 32;
|
|
|
|
y = x >> 16;
|
|
if (y != 0)
|
|
{
|
|
n = n - 16;
|
|
x = y;
|
|
}
|
|
y = x >> 8;
|
|
if (y != 0)
|
|
{
|
|
n = n - 8;
|
|
x = y;
|
|
}
|
|
y = x >> 4;
|
|
if (y != 0)
|
|
{
|
|
n = n - 4;
|
|
x = y;
|
|
}
|
|
y = x >> 2;
|
|
if (y != 0)
|
|
{
|
|
n = n - 2;
|
|
x = y;
|
|
}
|
|
y = x >> 1;
|
|
if (y != 0)
|
|
{
|
|
return n - 2;
|
|
}
|
|
return n - x;
|
|
}
|
|
|
|
inline uint32 cellUtilLog2(uint32 x)
|
|
{
|
|
return 31 - cellUtilCountLeadingZero(x);
|
|
}
|
|
|
|
|
|
|
|
|
|
inline void convertSwizzle(uint8*& dst, const uint8*& src,
|
|
const uint32 SrcPitch, const uint32 depth,
|
|
const uint32 xpos, const uint32 ypos,
|
|
const uint32 SciX1, const uint32 SciY1,
|
|
const uint32 SciX2, const uint32 SciY2,
|
|
const uint32 level)
|
|
{
|
|
if (level == 1)
|
|
{
|
|
switch (depth)
|
|
{
|
|
case 16:
|
|
if (xpos >= SciX1 && xpos < SciX2 && ypos >= SciY1 && ypos < SciY2)
|
|
{
|
|
// *((uint32*&)dst)++ = ((uint32*)src)[ypos * width + xpos];
|
|
// *((uint32*&)dst)++ = ((uint32*)src)[ypos * width + xpos+1];
|
|
// *((uint32*&)dst)++ = ((uint32*)src)[ypos * width + xpos+2];
|
|
// *((uint32*&)dst)++ = ((uint32*)src)[ypos * width + xpos+3];
|
|
*((uint32*&)dst)++ = *((uint32*)(src + (ypos * SrcPitch + xpos * 16)));
|
|
*((uint32*&)dst)++ = *((uint32*)(src + (ypos * SrcPitch + xpos * 16 + 4)));
|
|
*((uint32*&)dst)++ = *((uint32*)(src + (ypos * SrcPitch + xpos * 16 + 8)));
|
|
*((uint32*&)dst)++ = *((uint32*)(src + (ypos * SrcPitch + xpos * 16 + 12)));
|
|
}
|
|
else
|
|
{
|
|
((uint32*&)dst) += 4;
|
|
}
|
|
break;
|
|
case 8:
|
|
if (xpos >= SciX1 && xpos < SciX2 && ypos >= SciY1 && ypos < SciY2)
|
|
{
|
|
*((uint32*&)dst)++ = *((uint32*)(src + (ypos * SrcPitch + xpos * 8)));
|
|
*((uint32*&)dst)++ = *((uint32*)(src + (ypos * SrcPitch + xpos * 8 + 4)));
|
|
}
|
|
else
|
|
{
|
|
((uint32*&)dst) += 2;
|
|
}
|
|
break;
|
|
case 4:
|
|
if (xpos >= SciX1 && xpos < SciX2 && ypos >= SciY1 && ypos < SciY2)
|
|
{
|
|
*((uint32*&)dst) = *((uint32*)(src + (ypos * SrcPitch + xpos * 4)));
|
|
}
|
|
dst += 4;
|
|
break;
|
|
case 3:
|
|
if (xpos >= SciX1 && xpos < SciX2 && ypos >= SciY1 && ypos < SciY2)
|
|
{
|
|
*dst++ = src[ypos * SrcPitch + xpos * depth];
|
|
*dst++ = src[ypos * SrcPitch + xpos * depth + 1];
|
|
*dst++ = src[ypos * SrcPitch + xpos * depth + 2];
|
|
}
|
|
else
|
|
{
|
|
dst += 3;
|
|
}
|
|
break;
|
|
case 1:
|
|
if (xpos >= SciX1 && xpos < SciX2 && ypos >= SciY1 && ypos < SciY2)
|
|
{
|
|
*dst++ = src[ypos * SrcPitch + xpos * depth];
|
|
}
|
|
else
|
|
{
|
|
dst++;
|
|
}
|
|
break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
convertSwizzle(dst, src, SrcPitch, depth, xpos, ypos, SciX1, SciY1, SciX2, SciY2, level - 1);
|
|
convertSwizzle(dst, src, SrcPitch, depth, xpos + (1U << (level - 2)), ypos, SciX1, SciY1, SciX2, SciY2, level - 1);
|
|
convertSwizzle(dst, src, SrcPitch, depth, xpos, ypos + (1U << (level - 2)), SciX1, SciY1, SciX2, SciY2, level - 1);
|
|
convertSwizzle(dst, src, SrcPitch, depth, xpos + (1U << (level - 2)), ypos + (1U << (level - 2)), SciX1, SciY1, SciX2, SciY2, level - 1);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
inline void Linear2Swizzle(uint8* dst,
|
|
const uint8* src,
|
|
const uint32 SrcPitch,
|
|
const uint32 width,
|
|
const uint32 height,
|
|
const uint32 depth,
|
|
const uint32 SciX1, const uint32 SciY1,
|
|
const uint32 SciX2, const uint32 SciY2)
|
|
{
|
|
src -= SciY1 * SrcPitch + SciX1 * depth;
|
|
if (width == height)
|
|
{
|
|
convertSwizzle(dst, src, SrcPitch, depth, 0, 0, SciX1, SciY1, SciX2, SciY2, cellUtilLog2(width) + 1);
|
|
}
|
|
else
|
|
if (width > height)
|
|
{
|
|
uint32 baseLevel = cellUtilLog2(width) - (cellUtilLog2(width) - cellUtilLog2(height));
|
|
for (uint32 i = 0; i < (1UL << (cellUtilLog2(width) - cellUtilLog2(height))); i++)
|
|
{
|
|
convertSwizzle(dst, src, SrcPitch, depth, (1U << baseLevel) * i, 0, SciX1, SciY1, SciX2, SciY2, baseLevel + 1);
|
|
}
|
|
}
|
|
else
|
|
// if (width < height)//wtf
|
|
{
|
|
uint32 baseLevel = cellUtilLog2(height) - (cellUtilLog2(height) - cellUtilLog2(width));
|
|
for (uint32 i = 0; i < (1UL << (cellUtilLog2(height) - cellUtilLog2(width))); i++)
|
|
{
|
|
convertSwizzle(dst, src, SrcPitch, depth, 0, (1U << baseLevel) * i, SciX1, SciY1, SciX2, SciY2, baseLevel + 1);
|
|
}
|
|
}
|
|
}
|