//============================================================================= // (C) 2005 ATI Research, Inc., All rights reserved. //============================================================================= // Modified from original #include "CCubeMapProcessor.h" #include #include #define CP_PI 3.14159265358979323846f namespace ImageProcessingAtom { //------------------------------------------------------------------------------ // D3D cube map face specification // mapping from 3D x,y,z cube map lookup coordinates // to 2D within face u,v coordinates // // --------------------> U direction // | (within-face texture space) // | _____ // | | | // | | +Y | // | _____|_____|_____ _____ // | | | | | | // | | -X | +Z | +X | -Z | // | |_____|_____|_____|_____| // | | | // | | -Y | // | |_____| // | // v V direction // (within-face texture space) //------------------------------------------------------------------------------ //Information about neighbors and how texture coorrdinates change across faces // in ORDER of left, right, top, bottom (e.g. edges corresponding to u=0, // u=1, v=0, v=1 in the 2D coordinate system of the particular face. //Note this currently assumes the D3D cube face ordering and orientation CPCubeMapNeighbor sg_CubeNgh[6][4] = { //XPOS face {{CP_FACE_Z_POS, CP_EDGE_RIGHT }, {CP_FACE_Z_NEG, CP_EDGE_LEFT }, {CP_FACE_Y_POS, CP_EDGE_RIGHT }, {CP_FACE_Y_NEG, CP_EDGE_RIGHT }}, //XNEG face {{CP_FACE_Z_NEG, CP_EDGE_RIGHT }, {CP_FACE_Z_POS, CP_EDGE_LEFT }, {CP_FACE_Y_POS, CP_EDGE_LEFT }, {CP_FACE_Y_NEG, CP_EDGE_LEFT }}, //YPOS face {{CP_FACE_X_NEG, CP_EDGE_TOP }, {CP_FACE_X_POS, CP_EDGE_TOP }, {CP_FACE_Z_NEG, CP_EDGE_TOP }, {CP_FACE_Z_POS, CP_EDGE_TOP }}, //YNEG face {{CP_FACE_X_NEG, CP_EDGE_BOTTOM}, {CP_FACE_X_POS, CP_EDGE_BOTTOM}, {CP_FACE_Z_POS, CP_EDGE_BOTTOM}, {CP_FACE_Z_NEG, CP_EDGE_BOTTOM}}, //ZPOS face {{CP_FACE_X_NEG, CP_EDGE_RIGHT }, {CP_FACE_X_POS, CP_EDGE_LEFT }, {CP_FACE_Y_POS, CP_EDGE_BOTTOM }, {CP_FACE_Y_NEG, CP_EDGE_TOP }}, //ZNEG face {{CP_FACE_X_POS, CP_EDGE_RIGHT }, {CP_FACE_X_NEG, CP_EDGE_LEFT }, {CP_FACE_Y_POS, CP_EDGE_TOP }, {CP_FACE_Y_NEG, CP_EDGE_BOTTOM }} }; //3x2 matrices that map cube map indexing vectors in 3d // (after face selection and divide through by the // _ABSOLUTE VALUE_ of the max coord) // into NVC space //Note this currently assumes the D3D cube face ordering and orientation #define CP_UDIR 0 #define CP_VDIR 1 #define CP_FACEAXIS 2 float sgFace2DMapping[6][3][3] = { //XPOS face {{ 0, 0, -1}, //u towards negative Z { 0, -1, 0}, //v towards negative Y {1, 0, 0}}, //pos X axis //XNEG face {{0, 0, 1}, //u towards positive Z {0, -1, 0}, //v towards negative Y {-1, 0, 0}}, //neg X axis //YPOS face {{1, 0, 0}, //u towards positive X {0, 0, 1}, //v towards positive Z {0, 1 , 0}}, //pos Y axis //YNEG face {{1, 0, 0}, //u towards positive X {0, 0 , -1}, //v towards negative Z {0, -1 , 0}}, //neg Y axis //ZPOS face {{1, 0, 0}, //u towards positive X {0, -1, 0}, //v towards negative Y {0, 0, 1}}, //pos Z axis //ZNEG face {{-1, 0, 0}, //u towards negative X {0, -1, 0}, //v towards negative Y {0, 0, -1}}, //neg Z axis }; //The 12 edges of the cubemap, (entries are used to index into the neighbor table) // this table is used to average over the edges. int32 sg_CubeEdgeList[12][2] = { {CP_FACE_X_POS, CP_EDGE_LEFT}, {CP_FACE_X_POS, CP_EDGE_RIGHT}, {CP_FACE_X_POS, CP_EDGE_TOP}, {CP_FACE_X_POS, CP_EDGE_BOTTOM}, {CP_FACE_X_NEG, CP_EDGE_LEFT}, {CP_FACE_X_NEG, CP_EDGE_RIGHT}, {CP_FACE_X_NEG, CP_EDGE_TOP}, {CP_FACE_X_NEG, CP_EDGE_BOTTOM}, {CP_FACE_Z_POS, CP_EDGE_TOP}, {CP_FACE_Z_POS, CP_EDGE_BOTTOM}, {CP_FACE_Z_NEG, CP_EDGE_TOP}, {CP_FACE_Z_NEG, CP_EDGE_BOTTOM} }; //Information about which of the 8 cube corners are correspond to the // the 4 corners in each cube face // the order is upper left, upper right, lower left, lower right int32 sg_CubeCornerList[6][4] = { { CP_CORNER_PPP, CP_CORNER_PPN, CP_CORNER_PNP, CP_CORNER_PNN }, // XPOS face { CP_CORNER_NPN, CP_CORNER_NPP, CP_CORNER_NNN, CP_CORNER_NNP }, // XNEG face { CP_CORNER_NPN, CP_CORNER_PPN, CP_CORNER_NPP, CP_CORNER_PPP }, // YPOS face { CP_CORNER_NNP, CP_CORNER_PNP, CP_CORNER_NNN, CP_CORNER_PNN }, // YNEG face { CP_CORNER_NPP, CP_CORNER_PPP, CP_CORNER_NNP, CP_CORNER_PNP }, // ZPOS face { CP_CORNER_PPN, CP_CORNER_NPN, CP_CORNER_PNN, CP_CORNER_NNN } // ZNEG face }; //-------------------------------------------------------------------------------------- // Convert cubemap face texel coordinates and face idx to 3D vector // note the U and V coords are integer coords and range from 0 to size-1 // this routine can be used to generate a normalizer cube map //-------------------------------------------------------------------------------------- void TexelCoordToVect(int32 a_FaceIdx, float a_U, float a_V, int32 a_Size, float *a_XYZ) { float nvcU, nvcV; float tempVec[3]; //scale up to [-1, 1] range (inclusive) nvcU = (2.0f * ((float)a_U + 0.5f) / a_Size ) - 1.0f; nvcV = (2.0f * ((float)a_V + 0.5f) / a_Size ) - 1.0f; //generate x,y,z vector (xform 2d NVC coord to 3D vector) //U contribution VM_SCALE3(a_XYZ, sgFace2DMapping[a_FaceIdx][CP_UDIR], nvcU); //V contribution VM_SCALE3(tempVec, sgFace2DMapping[a_FaceIdx][CP_VDIR], nvcV); VM_ADD3(a_XYZ, tempVec, a_XYZ); //add face axis VM_ADD3(a_XYZ, sgFace2DMapping[a_FaceIdx][CP_FACEAXIS], a_XYZ); //normalize vector VM_NORM3(a_XYZ, a_XYZ); } //-------------------------------------------------------------------------------------- // Convert 3D vector to cubemap face texel coordinates and face idx // note the U and V coords are integer coords and range from 0 to size-1 // this routine can be used to generate a normalizer cube map // // returns face IDX and texel coords //-------------------------------------------------------------------------------------- void VectToTexelCoord(float *a_XYZ, int32 a_Size, int32 *a_FaceIdx, float *a_U, float *a_V ) { float nvcU, nvcV; float absXYZ[3]; float maxCoord; float onFaceXYZ[3]; int32 faceIdx; float u, v; //absolute value 3 VM_ABS3(absXYZ, a_XYZ); if( (absXYZ[0] >= absXYZ[1]) && (absXYZ[0] >= absXYZ[2]) ) { maxCoord = absXYZ[0]; if(a_XYZ[0] >= 0) //face = XPOS { faceIdx = CP_FACE_X_POS; } else { faceIdx = CP_FACE_X_NEG; } } else if ( (absXYZ[1] >= absXYZ[0]) && (absXYZ[1] >= absXYZ[2]) ) { maxCoord = absXYZ[1]; if(a_XYZ[1] >= 0) //face = XPOS { faceIdx = CP_FACE_Y_POS; } else { faceIdx = CP_FACE_Y_NEG; } } else // if( (absXYZ[2] > absXYZ[0]) && (absXYZ[2] > absXYZ[1]) ) { maxCoord = absXYZ[2]; if(a_XYZ[2] >= 0) //face = XPOS { faceIdx = CP_FACE_Z_POS; } else { faceIdx = CP_FACE_Z_NEG; } } //divide through by max coord so face vector lies on cube face VM_SCALE3(onFaceXYZ, a_XYZ, 1.0f/maxCoord); nvcU = VM_DOTPROD3(sgFace2DMapping[ faceIdx ][CP_UDIR], onFaceXYZ ); nvcV = VM_DOTPROD3(sgFace2DMapping[ faceIdx ][CP_VDIR], onFaceXYZ ); u = (a_Size - 1.0f) * 0.5f * (nvcU + 1.0f); v = (a_Size - 1.0f) * 0.5f * (nvcV + 1.0f); *a_FaceIdx = faceIdx; *a_U = u; *a_V = v; } //-------------------------------------------------------------------------------------- // gets texel ptr in a cube map given a direction vector, and an array of // CImageSurfaces that represent the cube faces. // //-------------------------------------------------------------------------------------- CP_ITYPE *GetCubeMapTexelPtr(float *a_XYZ, CImageSurface *a_Surface) { float u, v; int32 faceIdx; //get face idx and u, v texel coordinate in face VectToTexelCoord(a_XYZ, a_Surface[0].m_Width, &faceIdx, &u, &v ); u = static_cast(VM_MIN((int32)u, a_Surface[0].m_Width - 1)); v = static_cast(VM_MIN((int32)v, a_Surface[0].m_Width - 1)); return( a_Surface[faceIdx].GetSurfaceTexelPtr(static_cast(u), static_cast(v)) ); } //-------------------------------------------------------------------------------------- // returns a bilinear filtered texel value // //-------------------------------------------------------------------------------------- void GetCubeMapTexelBilinear(float *a_XYZ, CImageSurface *a_Surface, CP_ITYPE* result, int32 numChannels) { float u, v; int32 faceIdx; //get face idx and u, v texel coordinate in face VectToTexelCoord(a_XYZ, a_Surface[0].m_Width, &faceIdx, &u, &v); //sample the four points in the quad around this point int32 uPoint = (int32)u; int32 vPoint = (int32)v; //top Left int32 uQuad = (int32)uPoint; int32 vQuad = (int32)vPoint; CP_ITYPE* sampleTL = (a_Surface[faceIdx].GetSurfaceTexelPtr(uQuad, vQuad)); //top right uQuad = VM_MIN(uPoint + 1, a_Surface[0].m_Width - 1); vQuad = vPoint; CP_ITYPE* sampleTR = (a_Surface[faceIdx].GetSurfaceTexelPtr(uQuad, vQuad)); //bottom left uQuad = uPoint; vQuad = VM_MIN(vPoint + 1, a_Surface[0].m_Width - 1); CP_ITYPE* sampleBL = (a_Surface[faceIdx].GetSurfaceTexelPtr(uQuad, vQuad)); //bottom right uQuad = VM_MIN(uPoint + 1, a_Surface[0].m_Width - 1); vQuad = VM_MIN(vPoint + 1, a_Surface[0].m_Width - 1); CP_ITYPE* sampleBR = (a_Surface[faceIdx].GetSurfaceTexelPtr(uQuad, vQuad)); //compute interpolated value float uDelta = u - uPoint; float vDelta = v - vPoint; for (uint32 i = 0; i < (uint32)numChannels; i++) { float topValue = sampleTL[i] * (1.0f - uDelta) + sampleTR[i] * uDelta; float bottomValue = sampleBL[i] * (1.0f - uDelta) + sampleBR[i] * uDelta; result[i] = topValue * (1.0f - vDelta) + bottomValue * vDelta; } } //-------------------------------------------------------------------------------------- // Compute solid angle of given texel in cubemap face for weighting taps in the // kernel by the area they project to on the unit sphere. // // Note that this code uses an approximation to the solid angle, by treating the // two triangles that make up the quad comprising the texel as planar. If more // accuracy is required, the solid angle per triangle lying on the sphere can be // computed using the sum of the interior angles - PI. // //-------------------------------------------------------------------------------------- float TexelCoordSolidAngle(int32 a_FaceIdx, float a_U, float a_V, int32 a_Size) { float cornerVect[4][3]; double cornerVect64[4][3]; float halfTexelStep = 0.5f; //note u, and v are in texel coords (where each texel is one unit) double edgeVect0[3]; double edgeVect1[3]; double xProdVect[3]; double texelArea; //compute 4 corner vectors of texel TexelCoordToVect(a_FaceIdx, a_U - halfTexelStep, a_V - halfTexelStep, a_Size, cornerVect[0] ); TexelCoordToVect(a_FaceIdx, a_U - halfTexelStep, a_V + halfTexelStep, a_Size, cornerVect[1] ); TexelCoordToVect(a_FaceIdx, a_U + halfTexelStep, a_V - halfTexelStep, a_Size, cornerVect[2] ); TexelCoordToVect(a_FaceIdx, a_U + halfTexelStep, a_V + halfTexelStep, a_Size, cornerVect[3] ); VM_NORM3_UNTYPED(cornerVect64[0], cornerVect[0] ); VM_NORM3_UNTYPED(cornerVect64[1], cornerVect[1] ); VM_NORM3_UNTYPED(cornerVect64[2], cornerVect[2] ); VM_NORM3_UNTYPED(cornerVect64[3], cornerVect[3] ); //area of triangle defined by corners 0, 1, and 2 VM_SUB3_UNTYPED(edgeVect0, cornerVect64[1], cornerVect64[0] ); VM_SUB3_UNTYPED(edgeVect1, cornerVect64[2], cornerVect64[0] ); VM_XPROD3_UNTYPED(xProdVect, edgeVect0, edgeVect1 ); texelArea = 0.5f * sqrt( VM_DOTPROD3_UNTYPED(xProdVect, xProdVect ) ); //area of triangle defined by corners 1, 2, and 3 VM_SUB3_UNTYPED(edgeVect0, cornerVect64[2], cornerVect64[1] ); VM_SUB3_UNTYPED(edgeVect1, cornerVect64[3], cornerVect64[1] ); VM_XPROD3_UNTYPED(xProdVect, edgeVect0, edgeVect1 ); texelArea += 0.5f * sqrt( VM_DOTPROD3_UNTYPED(xProdVect, xProdVect ) ); return static_cast(texelArea); } //-------------------------------------------------------------------------------------- //Builds a normalizer cubemap // // Takes in a cube face size, and an array of 6 surfaces to write the cube faces into // // Note that this normalizer cube map stores the vectors in unbiased -1 to 1 range. // if _bx2 style scaled and biased vectors are needed, uncomment the SCALE and BIAS // below //-------------------------------------------------------------------------------------- void CCubeMapProcessor::BuildNormalizerCubemap(int32 a_Size, CImageSurface *a_Surface ) { int32 iCubeFace, u, v; //iterate over cube faces for(iCubeFace=0; iCubeFace<6; iCubeFace++) { a_Surface[iCubeFace].Clear(); a_Surface[iCubeFace].Init(a_Size, a_Size, 3); //fast texture walk, build normalizer cube map CP_ITYPE *texelPtr = a_Surface[iCubeFace].m_ImgData; for(v=0; v < a_Surface[iCubeFace].m_Height; v++) { for(u=0; u < a_Surface[iCubeFace].m_Width; u++) { TexelCoordToVect(iCubeFace, (float)u, (float)v, a_Size, texelPtr); //VM_SCALE3(texelPtr, texelPtr, 0.5f); //VM_BIAS3(texelPtr, texelPtr, 0.5f); texelPtr += a_Surface[iCubeFace].m_NumChannels; } } } } //-------------------------------------------------------------------------------------- //Builds a normalizer cubemap, with the texels solid angle stored in the fourth component // //Takes in a cube face size, and an array of 6 surfaces to write the cube faces into // //Note that this normalizer cube map stores the vectors in unbiased -1 to 1 range. // if _bx2 style scaled and biased vectors are needed, uncomment the SCALE and BIAS // below //-------------------------------------------------------------------------------------- void CCubeMapProcessor::BuildNormalizerSolidAngleCubemap(int32 a_Size, CImageSurface *a_Surface ) { //iterate over cube faces for(int32 iCubeFace=0; iCubeFace<6; iCubeFace++) { a_Surface[iCubeFace].Clear(); a_Surface[iCubeFace].Init(a_Size, a_Size, 4); //First three channels for norm cube, and last channel for solid angle } //iterate over cube faces for(int32 iCubeFace=0; iCubeFace<6; iCubeFace++) { const int32 height = a_Surface[iCubeFace].m_Height; const int32 width = a_Surface[iCubeFace].m_Width; for(int32 v=0; v 0) { neighborFace = sg_CubeNgh[faceIdx][i].m_Face; neighborEdge = sg_CubeNgh[faceIdx][i].m_Edge; //For certain types of edge abutments, the bleedOverBBoxMin, and bleedOverBBoxMax need to // be flipped: the cases are // if a left edge mates with a left or bottom edge on the neighbor // if a top edge mates with a top or right edge on the neighbor // if a right edge mates with a right or top edge on the neighbor // if a bottom edge mates with a bottom or left edge on the neighbor //Seeing as the edges are enumerated as follows // left =0 // right =1 // top =2 // bottom =3 // // so if the edge enums are the same, or the sum of the enums == 3, // the bbox needs to be flipped if( (i == neighborEdge) || ((i+neighborEdge) == 3) ) { bleedOverBBoxMin[i] = (a_SrcSize-1) - bleedOverBBoxMin[i]; bleedOverBBoxMax[i] = (a_SrcSize-1) - bleedOverBBoxMax[i]; } //The way the bounding box is extended onto the neighboring face // depends on which edge of neighboring face abuts with this one switch(sg_CubeNgh[faceIdx][i].m_Edge) { case CP_EDGE_LEFT: a_FilterExtents[neighborFace].Augment(0, bleedOverBBoxMin[i], 0); a_FilterExtents[neighborFace].Augment(bleedOverAmount[i], bleedOverBBoxMax[i], 0); break; case CP_EDGE_RIGHT: a_FilterExtents[neighborFace].Augment( (a_SrcSize-1), bleedOverBBoxMin[i], 0); a_FilterExtents[neighborFace].Augment( (a_SrcSize-1) - bleedOverAmount[i], bleedOverBBoxMax[i], 0); break; case CP_EDGE_TOP: a_FilterExtents[neighborFace].Augment(bleedOverBBoxMin[i], 0, 0); a_FilterExtents[neighborFace].Augment(bleedOverBBoxMax[i], bleedOverAmount[i], 0); break; case CP_EDGE_BOTTOM: a_FilterExtents[neighborFace].Augment(bleedOverBBoxMin[i], (a_SrcSize-1), 0); a_FilterExtents[neighborFace].Augment(bleedOverBBoxMax[i], (a_SrcSize-1) - bleedOverAmount[i], 0); break; } //clamp filter extents in non-center tap faces to remain within surface a_FilterExtents[neighborFace].ClampMin(0, 0, 0); a_FilterExtents[neighborFace].ClampMax(a_SrcSize-1, a_SrcSize-1, 0); } //If the bleed over amount bleeds past the adjacent face onto the opposite face // from the center tap face, then process the opposite face entirely for now. //Note that the cases in which this happens, what usually happens is that // more than one edge bleeds onto the opposite face, and the bounding box // encompasses the entire cube map face. if(bleedOverAmount[i] > a_SrcSize) { uint32 oppositeFaceIdx; //determine opposite face switch(faceIdx) { case CP_FACE_X_POS: oppositeFaceIdx = CP_FACE_X_NEG; break; case CP_FACE_X_NEG: oppositeFaceIdx = CP_FACE_X_POS; break; case CP_FACE_Y_POS: oppositeFaceIdx = CP_FACE_Y_NEG; break; case CP_FACE_Y_NEG: oppositeFaceIdx = CP_FACE_Y_POS; break; case CP_FACE_Z_POS: oppositeFaceIdx = CP_FACE_Z_NEG; break; default: // CP_FACE_Z_NEG: oppositeFaceIdx = CP_FACE_Z_POS; break; } //just encompass entire face for now a_FilterExtents[oppositeFaceIdx].Augment(0, 0, 0); a_FilterExtents[oppositeFaceIdx].Augment((a_SrcSize-1), (a_SrcSize-1), 0); } } } //-------------------------------------------------------------------------------------- //ProcessFilterExtents // Process bounding box in each cube face // //-------------------------------------------------------------------------------------- void CCubeMapProcessor::ProcessFilterExtents(float *a_CenterTapDir, float a_DotProdThresh, CBBoxInt32 *a_FilterExtents, CImageSurface *a_NormCubeMap, CImageSurface *a_SrcCubeMap, CP_ITYPE *a_DstVal, uint32 a_FilterType, bool a_bUseSolidAngleWeighting, float a_SpecularPower) { //accumulators are 64-bit floats in order to have the precision needed // over a summation of a large number of pixels double dstAccumFace[6][4]; double weightAccumFace[6]; const int32 nSrcChannels = a_SrcCubeMap[0].m_NumChannels; //norm cube map and srcCubeMap have same face width const int32 faceWidth = a_NormCubeMap[0].m_Width; //amount to add to pointer to move to next scanline in images const int32 normCubePitch = faceWidth * a_NormCubeMap[0].m_NumChannels; const int32 srcCubePitch = faceWidth * a_SrcCubeMap[0].m_NumChannels; //iterate over cubefaces for(int32 iFaceIdx=0; iFaceIdx<6; iFaceIdx++ ) { //dest accum for(int32 k=0; k= a_DotProdThresh ) { CP_ITYPE weight; //for now just weight all taps equally, but ideally // weight should be proportional to the solid angle of the tap if(a_bUseSolidAngleWeighting == true) { //solid angle stored in 4th channel of normalizer/solid angle cube map weight = *(texelVect+3); } else { //all taps equally weighted weight = 1.0f; } switch(a_FilterType) { case CP_FILTER_TYPE_COSINE_POWER: { if(tapDotProd > 0.0f) { weight *= pow(tapDotProd, a_SpecularPower) * tapDotProd; } else { weight = 0; } } break; case CP_FILTER_TYPE_CONE: case CP_FILTER_TYPE_ANGULAR_GAUSSIAN: { //weights are in same lookup table for both of these filter types weight *= m_FilterLUT[(int32)(tapDotProd * (m_NumFilterLUTEntries - 1))]; } break; case CP_FILTER_TYPE_COSINE: { if(tapDotProd > 0.0f) { weight *= tapDotProd; } else { weight = 0.0f; } } break; case CP_FILTER_TYPE_DISC: default: break; } //iterate over channels for(int32 k=0; k>= 1; //terminate if mip chain becomes too small if(mipLevelSize == 0) { return; } } } //-------------------------------------------------------------------------------------- //Copy and convert cube map face data from an external image/surface into this object // // a_FaceIdx = a value 0 to 5 speciying which face to copy into (one of the CP_FACE_? ) // a_Level = mip level to copy into // a_SrcType = data type of image being copyed from (one of the CP_TYPE_? types) // a_SrcNumChannels = number of channels of the image being copied from (usually 1 to 4) // a_SrcPitch = number of bytes per row of the source image being copied from // a_SrcDataPtr = pointer to the image data to copy from // a_Degamma = original gamma level of input image to undo by degamma // a_Scale = scale to apply to pixel values after degamma (in linear space) //-------------------------------------------------------------------------------------- void CCubeMapProcessor::SetInputFaceData(int32 a_FaceIdx, int32 a_MipIdx, int32 a_SrcType, int32 a_SrcNumChannels, int32 a_SrcPitch, void *a_SrcDataPtr, float a_MaxClamp, float a_Degamma, float a_Scale) { //since input is being modified, terminate any active filtering threads TerminateActiveThreads(); m_InputSurface[a_MipIdx][a_FaceIdx].SetImageDataClampDegammaScale( a_SrcType, a_SrcNumChannels, a_SrcPitch, a_SrcDataPtr, a_MaxClamp, a_Degamma, a_Scale ); } //-------------------------------------------------------------------------------------- //Copy and convert cube map face data from this object into an external image/surface // // a_FaceIdx = a value 0 to 5 speciying which face to copy into (one of the CP_FACE_? ) // a_Level = mip level to copy into // a_DstType = data type of image to copy to (one of the CP_TYPE_? types) // a_DstNumChannels = number of channels of the image to copy to (usually 1 to 4) // a_DstPitch = number of bytes per row of the dest image to copy to // a_DstDataPtr = pointer to the image data to copy to // a_Scale = scale to apply to pixel values (in linear space) before gamma for output // a_Gamma = gamma level to apply to pixels after scaling //-------------------------------------------------------------------------------------- void CCubeMapProcessor::GetInputFaceData(int32 a_FaceIdx, int32 a_MipIdx, int32 a_DstType, int32 a_DstNumChannels, int32 a_DstPitch, void *a_DstDataPtr, float a_Scale, float a_Gamma) { m_InputSurface[a_MipIdx][a_FaceIdx].GetImageDataScaleGamma( a_DstType, a_DstNumChannels, a_DstPitch, a_DstDataPtr, a_Scale, a_Gamma ); } //-------------------------------------------------------------------------------------- //ChannelSwapInputFaceData // swizzle data in first 4 channels for input faces // //-------------------------------------------------------------------------------------- void CCubeMapProcessor::ChannelSwapInputFaceData(int32 a_Channel0Src, int32 a_Channel1Src, int32 a_Channel2Src, int32 a_Channel3Src ) { int32 iMip, iFace, u, v, k; int32 size; CP_ITYPE texelData[4]; int32 channelSrcArray[4]; //since input is being modified, terminate any active filtering threads TerminateActiveThreads(); size = m_InputSize; channelSrcArray[0] = a_Channel0Src; channelSrcArray[1] = a_Channel1Src; channelSrcArray[2] = a_Channel2Src; channelSrcArray[3] = a_Channel3Src; //Iterate over mips and faces for input images for (iMip = 0; iMip < m_NumMipLevels; iMip++) { for (iFace = 0; iFace < 6; iFace++) { for (v = 0; v < size; v++) { for (u = 0; u < size; u++) { //get channel data for (k = 0; k < m_NumChannels; k++) { texelData[k] = *(m_InputSurface[iMip][iFace].GetSurfaceTexelPtr(u, v) + k); } //repack channel data accoring to swizzle information for (k = 0; k < m_NumChannels; k++) { *(m_InputSurface[iMip][iFace].GetSurfaceTexelPtr(u, v) + k) = texelData[channelSrcArray[k]]; } } } } // prepare size for next mip level size >>= 1; } } //-------------------------------------------------------------------------------------- //ChannelSwapOutputFaceData // swizzle data in first 4 channels for input faces // //-------------------------------------------------------------------------------------- void CCubeMapProcessor::ChannelSwapOutputFaceData(int32 a_Channel0Src, int32 a_Channel1Src, int32 a_Channel2Src, int32 a_Channel3Src ) { int32 iFace, iMipLevel, u, v, k; int32 size; CP_ITYPE texelData[4]; int32 channelSrcArray[4]; //since output is being modified, terminate any active filtering threads TerminateActiveThreads(); size = m_OutputSize; channelSrcArray[0] = a_Channel0Src; channelSrcArray[1] = a_Channel1Src; channelSrcArray[2] = a_Channel2Src; channelSrcArray[3] = a_Channel3Src; //Iterate over faces for input images for(iMipLevel=0; iMipLevel> 16u); bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); return float(bits) * 2.3283064365386963e-10f; // float(bits) * 2^-32 } inline void HammersleySequence(uint32 sampleIndex, uint32 sampleCount, float* vXi) { vXi[0] = float(sampleIndex) / float(sampleCount); vXi[1] = RadicalInverse2(sampleIndex); } void ImportanceSampleGGX(float* vXi, float alphaRoughnessSqr, float* vNormal, float* vOut) { float phi = 2.0f * CP_PI * vXi[0]; float cosTheta = sqrtf((1.0f - vXi[1]) / ( 1.0f + (alphaRoughnessSqr - 1.0f) * vXi[1])); float sinTheta = sqrtf(1.0f - cosTheta * cosTheta); float vH[3]; vH[0] = sinTheta * cosf(phi); vH[1] = sinTheta * sinf(phi); vH[2] = cosTheta; float vUpVectorX[3] = {1.0f, 0.0f, 0.0f}; float vUpVectorZ[3] = {0.0f, 0.0f, 1.0f}; float vTangentX[3]; float vTangentY[3]; float vTempVec[3]; // Build local frame VM_XPROD3(vTempVec, fabs(vNormal[2]) < 0.999f ? vUpVectorZ : vUpVectorX, vNormal); VM_NORM3(vTangentX, vTempVec); VM_XPROD3(vTangentY, vNormal, vTangentX); // Convert from tangent to world space vOut[0] = vTangentX[0] * vH[0] + vTangentY[0] * vH[1] + vNormal[0] * vH[2]; vOut[1] = vTangentX[1] * vH[0] + vTangentY[1] * vH[1] + vNormal[1] * vH[2]; vOut[2] = vTangentX[2] * vH[0] + vTangentY[2] * vH[1] + vNormal[2] * vH[2]; } void CCubeMapProcessor::FilterCubeSurfacesGGX(int32 a_MipIdx, int32 a_SampleCount, int32 a_FaceIdxStart, int32 a_FaceIdxEnd, int32 a_ThreadIdx) { // we don't want to convolve mip0 as it's theoretically a perfect mirror with zero roughness AZ_Assert(a_MipIdx > 0, "FilterCubeSurfacesGGX called for mip 0"); CImageSurface* dstCubeMap = m_OutputSurface[a_MipIdx]; const uint32 numChannels = VM_MIN(m_NumChannels, 4); const int32 dstSize = dstCubeMap[0].m_Width; const uint32 maxMipIndex = (uint32)m_NumMipLevels - 1; // Convert smoothness to roughness (needs to match shader code) // The roughness value in microfacet calculations (called "alpha" in the literature) does not give perceptually // linear results. Disney found that squaring the roughness value before using it in microfacet equations causes // the user-provided roughness parameter to be more perceptually linear. // See Burley's Disney PBR: https://pdfs.semanticscholar.org/eeee/3b125c09044d3e2f58ed0e4b1b66a677886d.pdf float smoothness = VM_MAX(1.0f - ((float)a_MipIdx / maxMipIndex), 0.0f); float perceptualRoughness = 1.0f - smoothness; float alphaRoughness = perceptualRoughness * perceptualRoughness; float alphaRoughnessSqr = alphaRoughness * alphaRoughness; //thread progress m_ThreadProgress[a_ThreadIdx].m_StartFace = a_FaceIdxStart; m_ThreadProgress[a_ThreadIdx].m_EndFace = a_FaceIdxEnd; CP_ITYPE* sourceTexelA = new CP_ITYPE[numChannels]; CP_ITYPE* sourceTexelB = new CP_ITYPE[numChannels]; //process required faces for(int32 iCubeFace = a_FaceIdxStart; iCubeFace <= a_FaceIdxEnd && !m_shutdownWorkerThreadSignal; iCubeFace++) { //iterate over dst cube map face texel for(int32 v = 0; v < dstSize && !m_shutdownWorkerThreadSignal; v++) { CP_ITYPE *texelPtr = dstCubeMap[iCubeFace].m_ImgData + v * dstCubeMap[iCubeFace].m_NumChannels * dstSize; m_ThreadProgress[a_ThreadIdx].m_CurrentFace = iCubeFace; m_ThreadProgress[a_ThreadIdx].m_CurrentRow = v; for (int32 u = 0; u < dstSize && !m_shutdownWorkerThreadSignal; u++) { float color[4] = { 0 }; float totalWeight = 0; float vH[3]; float vL[3]; //assume normal and view vector to be vCenterTapDir float vCenterTapDir[3]; TexelCoordToVect(iCubeFace, (float)u, (float)v, dstSize, vCenterTapDir); for (uint32 i = 0; i < (uint32)a_SampleCount && !m_shutdownWorkerThreadSignal; i++) { float vXi[2]; HammersleySequence(i, a_SampleCount, vXi); ImportanceSampleGGX(vXi, alphaRoughnessSqr, vCenterTapDir, vH); float fVdotH = VM_DOTPROD3(vCenterTapDir, vH); vL[0] = 2 * fVdotH * vH[0] - vCenterTapDir[0]; vL[1] = 2 * fVdotH * vH[1] - vCenterTapDir[1]; vL[2] = 2 * fVdotH * vH[2] - vCenterTapDir[2]; float fNdotL = VM_DOTPROD3(vCenterTapDir, vL); if (fNdotL > 0) { //compute specular D term (must match shader BRDF) float dh = alphaRoughnessSqr / (CP_PI * powf(fVdotH * fVdotH * (alphaRoughnessSqr - 1.0f) + 1.0f, 2.0f)); //calculate the PDF (probability distribution) of the sample to determine the best mip level. //lower probability sample directions use a smaller mip so they cover a larger sample area, which will //blend the sample values and reduce artifacts float pdf = dh * fVdotH / (4.0f * fVdotH); float solidAngleTexel = 4.0f * CP_PI / (6.0f * m_InputSurface[0][0].m_Width * m_InputSurface[0][0].m_Width); float solidAngleSample = 1.0f / (a_SampleCount * pdf); float mip = 0.5f * log2f(solidAngleSample / solidAngleTexel) + 1.0f; //determine surrounding mip levels uint32 mipA = static_cast(floor(mip)); uint32 mipB = mipA + 1; float lerp = 0.0f; VM_CLAMP(lerp, mip - mipA, 0.0f, 1.0f); if (mipA >= maxMipIndex) { mipA = mipB = maxMipIndex; lerp = 0.0f; } //retrieve bilinear filtered texel from each mip GetCubeMapTexelBilinear(vL, m_InputSurface[mipA], sourceTexelA, numChannels); GetCubeMapTexelBilinear(vL, m_InputSurface[mipB], sourceTexelB, numChannels); //interpolate each channel value from the two bilinear mip samples for trilinear filtering for (uint32 k = 0; k < numChannels; k++) { color[k] += (((1.0f - lerp) * sourceTexelA[k]) + (lerp * sourceTexelB[k])) * fNdotL; } totalWeight += fNdotL; } } for (uint32 k = 0; k < numChannels; k++) { texelPtr[k] = color[k] / totalWeight; } texelPtr += dstCubeMap[iCubeFace].m_NumChannels; } } } delete[] sourceTexelA; delete[] sourceTexelB; } void CCubeMapProcessor::FilterCubeMapMipChain(float a_BaseFilterAngle, float a_InitialMipAngle, float a_MipAnglePerLevelScale, int32 a_FilterType, int32 a_FixupType, int32 a_FixupWidth, bool a_bUseSolidAngle, float a_GlossScale, float a_GlossBias, int32 a_SampleCountGGX) { int32 i; float coneAngle; if(a_FilterType == CP_FILTER_TYPE_COSINE_POWER || a_FilterType == CP_FILTER_TYPE_GGX) { // Don't filter top mipmap a_BaseFilterAngle = 0; } //Build filter lookup tables based on the source miplevel size PrecomputeFilterLookupTables(a_FilterType, m_InputSurface[0][0].m_Width, a_BaseFilterAngle); //initialize thread progress m_ThreadProgress[0].m_CurrentMipLevel = 0; m_ThreadProgress[0].m_CurrentRow = 0; m_ThreadProgress[0].m_CurrentFace = 0; //Filter the top mip level (initial filtering used for diffuse or blurred specular lighting ) FilterCubeSurfaces(m_InputSurface[0], m_OutputSurface[0], a_BaseFilterAngle, a_FilterType, a_bUseSolidAngle, 0, //start at face 0 5, //end at face 5 0); //thread 0 is processing m_ThreadProgress[0].m_CurrentMipLevel = 1; m_ThreadProgress[0].m_CurrentRow = 0; m_ThreadProgress[0].m_CurrentFace = 0; FixupCubeEdges(m_OutputSurface[0], a_FixupType, a_FixupWidth); //Cone angle start (for generating subsequent mip levels) coneAngle = a_InitialMipAngle; //generate subsequent mip levels for(i=0; i<(m_NumMipLevels-1) && !m_shutdownWorkerThreadSignal; i++) { m_ThreadProgress[0].m_CurrentMipLevel = i+1; m_ThreadProgress[0].m_CurrentRow = 0; m_ThreadProgress[0].m_CurrentFace = 0; if (a_FilterType == CP_FILTER_TYPE_GGX) { FilterCubeSurfacesGGX(i + 1, a_SampleCountGGX, 0, //start at face 0 5, //end at face 5 0 //thread 0 is processing ); } else { CImageSurface* srcCubeImage = m_OutputSurface[i]; float specPow = 1.0f; if(a_FilterType == CP_FILTER_TYPE_COSINE_POWER) { uint32 numMipsForGloss = m_NumMipLevels - 2; // Lowest used mip is 4x4 float gloss = VM_MAX(1.0f - (float)(i + 1) / (float)(numMipsForGloss - 1), 0.0f); // Compute specular power (this must match shader code) specPow = pow(2.0f, a_GlossScale * gloss + a_GlossBias); // Blinn to Phong approximation: (R.E)^p == (N.H)^(4*p) specPow /= 4.0f; coneAngle = ComputeBaseFilterAngle(specPow); srcCubeImage = m_InputSurface[0]; } //Build filter lookup tables based on the source miplevel size PrecomputeFilterLookupTables(a_FilterType, srcCubeImage->m_Width, coneAngle); //filter cube surfaces FilterCubeSurfaces(srcCubeImage, m_OutputSurface[i+1], coneAngle, a_FilterType, a_bUseSolidAngle, 0, //start at face 0 5, //end at face 5 0, //thread 0 is processing specPow); } m_ThreadProgress[0].m_CurrentMipLevel = i+2; m_ThreadProgress[0].m_CurrentRow = 0; m_ThreadProgress[0].m_CurrentFace = 0; FixupCubeEdges(m_OutputSurface[i+1], a_FixupType, a_FixupWidth); coneAngle = coneAngle * a_MipAnglePerLevelScale; } m_Status = CP_STATUS_FILTER_COMPLETED; } //-------------------------------------------------------------------------------------- //Builds the following lookup tables prior to filtering: // -normalizer cube map // -tap weight lookup table // //-------------------------------------------------------------------------------------- void CCubeMapProcessor::PrecomputeFilterLookupTables(uint32 a_FilterType, int32 a_SrcCubeMapWidth, float a_FilterConeAngle) { float srcTexelAngle; int32 iCubeFace; //angle about center tap that defines filter cone float filterAngle; //min angle a src texel can cover (in degrees) srcTexelAngle = (180.0f / CP_PI) * atan2f(1.0f, (float)a_SrcCubeMapWidth); //filter angle is 1/2 the cone angle filterAngle = a_FilterConeAngle / 2.0f; //ensure filter angle is larger than a texel if(filterAngle < srcTexelAngle) { filterAngle = srcTexelAngle; } //ensure filter cone is always smaller than the hemisphere if(filterAngle > 90.0f) { filterAngle = 90.0f; } //build lookup table for tap weights based on angle between current tap and center tap BuildAngleWeightLUT(a_SrcCubeMapWidth * 2, a_FilterType, filterAngle); //clear pre-existing normalizer cube map for(iCubeFace=0; iCubeFace<6; iCubeFace++) { m_NormCubeMap[iCubeFace].Clear(); } //Normalized vectors per cubeface and per-texel solid angle BuildNormalizerSolidAngleCubemap(a_SrcCubeMapWidth, m_NormCubeMap); } //-------------------------------------------------------------------------------------- //The key to the speed of these filtering routines is to quickly define a per-face // bounding box of pixels which enclose all the taps in the filter kernel efficiently. // Later these pixels are selectively processed based on their dot products to see if // they reside within the filtering cone. // //This is done by computing the smallest per-texel angle to get a conservative estimate // of the number of texels needed to be covered in width and height order to filter the // region. the bounding box for the center taps face is defined first, and if the // filtereing region bleeds onto the other faces, bounding boxes for the other faces are // defined next //-------------------------------------------------------------------------------------- void CCubeMapProcessor::FilterCubeSurfaces(CImageSurface *a_SrcCubeMap, CImageSurface *a_DstCubeMap, float a_FilterConeAngle, int32 a_FilterType, bool a_bUseSolidAngle, int32 a_FaceIdxStart, int32 a_FaceIdxEnd, int32 a_ThreadIdx, float a_SpecularPower) { const int32 srcSize = a_SrcCubeMap[0].m_Width; const int32 dstSize = a_DstCubeMap[0].m_Width; //min angle a src texel can cover (in degrees) const float srcTexelAngle = (180.0f / CP_PI) * atan2f(1.0f, (float)srcSize); //angle about center tap to define filter cone float filterAngle; //filter angle is 1/2 the cone angle filterAngle = a_FilterConeAngle / 2.0f; //ensure filter angle is larger than a texel if(filterAngle < srcTexelAngle) { filterAngle = srcTexelAngle; } //ensure filter cone is always smaller than the hemisphere if(filterAngle > 90.0f) { filterAngle = 90.0f; } //the maximum number of texels in 1D the filter cone angle will cover // used to determine bounding box size for filter extents //ensure conservative region always covers at least one texel const int32 filterSize = AZ::GetMax((int32)ceil(filterAngle / srcTexelAngle), 1); //dotProdThresh threshold based on cone angle to determine whether or not taps // reside within the cone angle const float dotProdThresh = cosf( (CP_PI / 180.0f) * filterAngle ); //thread progress m_ThreadProgress[a_ThreadIdx].m_StartFace = a_FaceIdxStart; m_ThreadProgress[a_ThreadIdx].m_EndFace = a_FaceIdxEnd; //process required faces for(int32 iCubeFace = a_FaceIdxStart; iCubeFace <= a_FaceIdxEnd && !m_shutdownWorkerThreadSignal; iCubeFace++) { //iterate over dst cube map face texel for(int32 v = 0; v < dstSize && !m_shutdownWorkerThreadSignal; v++) { CP_ITYPE *texelPtr = a_DstCubeMap[iCubeFace].m_ImgData + v * a_DstCubeMap[iCubeFace].m_NumChannels * dstSize; m_ThreadProgress[a_ThreadIdx].m_CurrentFace = iCubeFace; m_ThreadProgress[a_ThreadIdx].m_CurrentRow = v; for(int32 u=0; u 0.0f) { totalMipComputation = pow(m_InputSize * m_BaseFilterAngle , 2.0f) * (m_OutputSize * m_OutputSize); } else { totalMipComputation = pow(m_InputSize * 0.01f , 2.0f) * (m_OutputSize * m_OutputSize); } progressMipComputation = 0.0f; if(a_FilterProgress->m_CurrentMipLevel > 0) { progressMipComputation = totalMipComputation; } //filtering angle for this miplevel filterAngle = m_InitialMipAngle; dstSize = m_OutputSize; //computation for entire base mip level (if current level is base level) if(a_FilterProgress->m_CurrentMipLevel == 0) { currentMipComputation = totalMipComputation; currentMipSize = dstSize; } //compuatation to generate subsequent mip levels for(i=1; i 180) { filterAngle = 180; } //note src size is dstSize*2 since miplevels are generated from the subsequent level computation = pow(dstSize * 2 * filterAngle, 2.0f) * (dstSize * dstSize); totalMipComputation += computation; //accumulate computation for completed mip levels if(a_FilterProgress->m_CurrentMipLevel > i) { progressMipComputation = totalMipComputation; } //computation for entire current mip level if(a_FilterProgress->m_CurrentMipLevel == i) { currentMipComputation = computation; currentMipSize = dstSize; } } //fraction of compuation time processing the entire current mip level will take currentMipComputation /= totalMipComputation; progressMipComputation /= totalMipComputation; progressFaceComputation = currentMipComputation * (float)(a_FilterProgress->m_CurrentFace - a_FilterProgress->m_StartFace) / (float)(1 + a_FilterProgress->m_EndFace - a_FilterProgress->m_StartFace); currentFaceComputation = currentMipComputation * 1.0f / (1 + a_FilterProgress->m_EndFace - a_FilterProgress->m_StartFace); progressRowComputation = currentFaceComputation * ((float)a_FilterProgress->m_CurrentRow / (float)currentMipSize); //progress completed a_FilterProgress->m_FractionCompleted = progressMipComputation + progressFaceComputation + progressRowComputation; if( a_FilterProgress->m_CurrentFace < 0) { a_FilterProgress->m_CurrentFace = 0; } if( a_FilterProgress->m_CurrentMipLevel < 0) { a_FilterProgress->m_CurrentMipLevel = 0; } if( a_FilterProgress->m_CurrentRow < 0) { a_FilterProgress->m_CurrentRow = 0; } } //-------------------------------------------------------------------------------------- // Return string describing the current status of the cubemap processing threads // //-------------------------------------------------------------------------------------- WCHAR *CCubeMapProcessor::GetFilterProgressString(void) { WCHAR threadProgressString[CP_MAX_FILTER_THREADS][CP_MAX_PROGRESS_STRING]; int32 i; for(i=0; i