#include "common.h" //-------------------------------------------------------------------------------------- // Gather pattern //-------------------------------------------------------------------------------------- //================================================================================================================================= // The constant buffer //================================================================================================================================= #define g_f2RTSize ( pos_decompression_params2.xy ) //================================================================================================================================= // Textures, Buffers & Samplers //================================================================================================================================= // CS Output buffers RWTexture2D g_ResultTexture : register( u0 ); #ifdef GBUFFER_OPTIMIZATION #define g_txDepth s_position #define g_txNormal s_position #else #define g_txDepth s_position #define g_txNormal s_normal #endif // Samplers #define g_SamplePoint smp_nofilter //================================================================================================================================= // Hard coded HDAO params //================================================================================================================================= static float g_fHDAORejectRadius = 0.43f; // Camera Z values must fall within the reject and accept radius to be static float g_fHDAOAcceptRadius = 0.0001f; // considered as a valley #if SSAO_QUALITY == 3 static float g_fHDAOIntensity = 0.5f; // Simple scaling factor to control the intensity of the occlusion #elif SSAO_QUALITY == 2 static float g_fHDAOIntensity = 0.4f; // Simple scaling factor to control the intensity of the occlusion #elif SSAO_QUALITY == 1 static float g_fHDAOIntensity = 0.3f; // Simple scaling factor to control the intensity of the occlusion #endif static float g_fHDAONormalScale = 0.10f; // Scaling factor to control the effect the normals have static float g_fAcceptAngle = 0.98f; // Used by the ValleyAngle function to determine shallow valleys //================================================================================================================================= // Thread / Group Defines //================================================================================================================================= // Group Defines #define GROUP_TEXEL_DIM ( 56 ) #define GROUP_THREAD_DIM ( 32 ) // 32 * 32 = 1024 threads #define GROUP_TEXEL_OVERLAP ( 12 ) // Texture Op Defines #define GATHER_THREADS ( 784 ) #define GATHER_THREADS_PER_ROW ( 28 ) #define GATHER_PER_THREAD ( 1 ) // ALU Op Defines #define ALU_DIM ( 32 ) //============================================================================================================================= // Group shared memory (LDS) //============================================================================================================================= groupshared struct { float fCameraZ[GROUP_TEXEL_DIM][GROUP_TEXEL_DIM]; }g_LDS; //============================================================================================================================= // Helper function to load data from the LDS, given texel coord // NOTE: X and Y are swapped around to ensure horizonatal reading across threads, this avoids // LDS memory bank conflicts //============================================================================================================================= float LoadFromLDS( uint2 u2Texel ) { return g_LDS.fCameraZ[u2Texel.y][u2Texel.x]; } //============================================================================================================================= // Helper function to store data to the LDS, given texel coord // NOTE: X and Y are swapped around to ensure horizonatal wrting across threads, this avoids // LDS memory bank conflicts //============================================================================================================================= void StoreToLDS( float fValue, uint2 u2Texel ) { g_LDS.fCameraZ[u2Texel.y][u2Texel.x] = fValue; } //================================================================================================================================= // HDAO sample pattern //================================================================================================================================= #define NUM_VALLEYS (48) static const int2 g_i2HDAOSamplePattern[NUM_VALLEYS] = { { 0, -11 }, { 2, -10 }, { 0, -9 }, { 5, -9 }, { 2, -8 }, { 7, -8 }, { 0, -7 }, { 5, -7 }, { 2, -6 }, { 7, -6 }, { 8, -6 }, { 0, -5 }, { 5, -5 }, { 10, -5 }, { 2, -4 }, { 7, -4 }, { 0, -3 }, { 5, -3 }, { 10, -3 }, { 2, -2 }, { 7, -2 }, { 0, -1 }, { 5, -1 }, { 10, -1 }, { 2, 0 }, { 7, 0 }, { 5, 1 }, { 10, 1 }, { 2, 2 }, { 7, 2 }, { 5, 3 }, { 10, 3 }, { 2, 4 }, { 7, 4 }, { 5, 5 }, { 10, 5 }, { 2, 6 }, { 7, 6 }, { 5, 7 }, { 6, 7 }, { 10, 7 }, { 2, 8 }, { 7, 8 }, { 5, 9 }, { 2, 10 }, { 7, 10 }, { 5, 11 }, { 2, 12 }, }; static const float g_fHDAOSampleWeights[NUM_VALLEYS] = { 0.1538, 0.2155, 0.3077, 0.2080, 0.3657, 0.1823, 0.4615, 0.3383, 0.5135, 0.2908, 0.2308, 0.6154, 0.4561, 0.1400, 0.6560, 0.3798, 0.7692, 0.5515, 0.1969, 0.7824, 0.4400, 0.9231, 0.6078, 0.2269, 0.8462, 0.4615, 0.6078, 0.2269, 0.7824, 0.4400, 0.5515, 0.1969, 0.6560, 0.3798, 0.4561, 0.1400, 0.5135, 0.2908, 0.3383, 0.2908, 0.0610, 0.3657, 0.1823, 0.2080, 0.2155, 0.0610, 0.0705, 0.0642, }; static float g_fWeightTotal = 18.4198; // Used by the valley angle function #define NUM_NORMAL_LOADS (4) static const int2 g_i2NormalLoadPattern[NUM_NORMAL_LOADS] = { { 0, -9 }, { 6, -6 }, { 10, 0 }, { 8, 9 }, }; //================================================================================================================================= // Computes the general valley angle //================================================================================================================================= float ValleyAngle( uint2 u2ScreenCoord ) { float3 f3N1; float3 f3N2; float fDot; float fSummedDot = 0.0f; int2 i2MirrorPattern; int2 i2OffsetScreenCoord; int2 i2MirrorOffsetScreenCoord; #ifdef GBUFFER_OPTIMIZATION float3 N = gbuf_unpack_normal( g_txNormal.Load( int3( u2ScreenCoord, 0), 0).xy ); #else float3 N = g_txNormal.Load( int3( u2ScreenCoord, 0), 0).xyz; #endif for( int iNormal=0; iNormal ( g_f2RTSize - float2( 1.0f, 1.0f ) ) ) ? ( g_f2RTSize - float2( 1.0f, 1.0f ) ) : ( i2OffsetScreenCoord ); i2MirrorOffsetScreenCoord = ( i2MirrorOffsetScreenCoord > ( g_f2RTSize - float2( 1.0f, 1.0f ) ) ) ? ( g_f2RTSize - float2( 1.0f, 1.0f ) ) : ( i2MirrorOffsetScreenCoord ); i2OffsetScreenCoord = ( i2OffsetScreenCoord < 0 ) ? ( 0 ) : ( i2OffsetScreenCoord ); i2MirrorOffsetScreenCoord = ( i2MirrorOffsetScreenCoord < 0 ) ? ( 0 ) : ( i2MirrorOffsetScreenCoord ); #ifdef GBUFFER_OPTIMIZATION f3N1.xy = g_txNormal.Load( int3( i2OffsetScreenCoord, 0), 0).xy; f3N1.xyz = gbuf_unpack_normal( f3N1.xy ); f3N2.xy = g_txNormal.Load( int3( i2MirrorOffsetScreenCoord, 0), 0).xy; f3N2.xyz = gbuf_unpack_normal( f3N2.xy ); #else f3N1.xyz = g_txNormal.Load( int3( i2OffsetScreenCoord, 0), 0 ).xyz; f3N2.xyz = g_txNormal.Load( int3( i2MirrorOffsetScreenCoord, 0), 0 ).xyz; #endif fDot = dot( f3N1, N ); fSummedDot += ( fDot > g_fAcceptAngle ) ? ( 0.0f ) : ( 1.0f - ( abs( fDot ) * 0.25f ) ); fDot = dot( f3N2, N ); fSummedDot += ( fDot > g_fAcceptAngle ) ? ( 0.0f ) : ( 1.0f - ( abs( fDot ) * 0.25f ) ); } fSummedDot /= 8.0f; fSummedDot += 0.5f; fSummedDot = ( fSummedDot <= 0.5f ) ? ( fSummedDot / 10.0f ) : ( fSummedDot ); return fSummedDot; } float ComputeHDAO( uint2 u2CenterTexel, uint2 u2ScreenPos ) { // Locals float fCenterZ; float2 f2SamplePos; float2 f2MirrorSamplePos; float fOcclusion = 0.0f; float2 f2SampledZ; float2 f2Diff; float2 f2Compare; float fDot; // Get the general valley angle, to scale the result by fDot = ValleyAngle( u2ScreenPos ); // Sample center texel fCenterZ = LoadFromLDS( u2CenterTexel ); // Loop through each valley [unroll] for( uint uValley = 0; uValley < NUM_VALLEYS; uValley++ ) { // Sample f2SampledZ.x = LoadFromLDS( u2CenterTexel + g_i2HDAOSamplePattern[uValley] ); f2SampledZ.y = LoadFromLDS( u2CenterTexel - g_i2HDAOSamplePattern[uValley] ); // Valley detect f2Diff = fCenterZ.xx - f2SampledZ; f2Compare = ( f2Diff < g_fHDAORejectRadius.xx ) ? ( 1.0f ) : ( 0.0f ); f2Compare *= ( f2Diff > g_fHDAOAcceptRadius.xx ) ? ( 1.0f ) : ( 0.0f ); // Weight occlusion fOcclusion += ( f2Compare.x * f2Compare.y * g_fHDAOSampleWeights[uValley] ); } // Finally calculate the HDAO occlusion value fOcclusion /= g_fWeightTotal; fOcclusion *= g_fHDAOIntensity * fDot; fOcclusion *= fCenterZ < 0.5f ? 0.0f : lerp( 0.0f, 1.0f, saturate( fCenterZ - 0.5f ) ); fOcclusion = 1.0f - saturate( fOcclusion ); return fOcclusion; } //============================================================================================================================= // HDAO CS: Performs valley detection in Camera Z space, and offsets by the Z // component of the camera space normal //============================================================================================================================= [numthreads( GROUP_THREAD_DIM, GROUP_THREAD_DIM, 1 )] void main( uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) { // Locals float2 f2ScreenCoord; float2 f2Coord; float2 f2InvTextureSize = 1.0f / g_f2RTSize; float4 f4Depth; float4 f4Normal; float4 f4LDSValue; uint uColumn, uRow; if( GI < GATHER_THREADS ) { // Get the screen position for this threads TEX ops uColumn = ( GI % GATHER_THREADS_PER_ROW ) * GATHER_PER_THREAD * 2; uRow = ( GI / GATHER_THREADS_PER_ROW ) * 2; f2ScreenCoord = float2( ( float2( Gid.x, Gid.y ) * float2( ALU_DIM, ALU_DIM ) ) - float2( GROUP_TEXEL_OVERLAP, GROUP_TEXEL_OVERLAP ) ) + float2( uColumn, uRow ); // Offset for the use of gather4 f2ScreenCoord += float2( 1.0f, 1.0f ); // Gather from input textures and lay down in the LDS [unroll] for( uint uGather=0; uGather