Skip to content

Commit

Permalink
Add last minute tweaks to v3.1.0
Browse files Browse the repository at this point in the history
* Add ComputeTangent compute kernel to update tangents for all vertices
(guide + follow). Previously we computed it only for guide and copied it
to follows. This seems to help reducing flashing problem around bear's
head.
* Fix hair density in the skinning-only mode (i.e. when simulation
disabled).
* Change default ShortCut config to 3 layers + deterministic. Looks much
better.
  • Loading branch information
khillesl-AMD authored and jstewart-amd committed May 19, 2016
1 parent ee890c5 commit 87705f1
Show file tree
Hide file tree
Showing 15 changed files with 4,280 additions and 4,323 deletions.
4 changes: 2 additions & 2 deletions amd_tressfx/src/Shaders/TressFXRender.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@
#define SHORTCUT_INITIAL_DEPTH 0x3f800000

// Number of depth layers to use. 2 or 3 supported.
#define SHORTCUT_NUM_DEPTHS 2
#define SHORTCUT_NUM_DEPTHS 3

// Compute source color as weighted average of front fragments, vs blending in order.
#define SHORTCUT_WEIGHTED_AVERAGE 1

// Output color deterministically when fragments have the same depth. Requires additional clear of colors resource.
#define SHORTCUT_DETERMINISTIC 0
#define SHORTCUT_DETERMINISTIC 1


//--------------------------------------------------------------------------------------
Expand Down
156 changes: 90 additions & 66 deletions amd_tressfx/src/Shaders/TressFXSimulation.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ StructuredBuffer<HairToTriangleMapping> g_HairToMeshMapping : register(t
#define THREAD_GROUP_SIZE 64

groupshared float4 sharedPos[THREAD_GROUP_SIZE];
groupshared float4 sharedTangent[THREAD_GROUP_SIZE];
groupshared float sharedLength[THREAD_GROUP_SIZE];

//--------------------------------------------------------------------------------------
Expand Down Expand Up @@ -314,10 +313,10 @@ bool CapsuleCollision(float4 curPosition, float4 oldPosition, inout float3 newPo
return false;

float3 segment = cc.p2.xyz - cc.p1.xyz;
float3 delta1 = curPosition.xyz - cc.p1.xyz;
float3 delta2 = cc.p2.xyz - curPosition.xyz;
float3 delta1 = curPosition.xyz - cc.p1.xyz;
float3 delta2 = cc.p2.xyz - curPosition.xyz;

float dist1 = dot(delta1, segment);
float dist1 = dot(delta1, segment);
float dist2 = dot(delta2, segment);

// colliding with sphere 1
Expand All @@ -326,7 +325,7 @@ bool CapsuleCollision(float4 curPosition, float4 oldPosition, inout float3 newPo
if ( dot(delta1, delta1) < radius2 )
{
float3 n = normalize(delta1);
newPosition = radius * n + cc.p1.xyz;
newPosition = radius * n + cc.p1.xyz;
return true;
}

Expand All @@ -342,7 +341,7 @@ bool CapsuleCollision(float4 curPosition, float4 oldPosition, inout float3 newPo
if ( dot(delta2, delta2) < radius2 )
{
float3 n = normalize(-delta2);
newPosition = radius * n + cc.p2.xyz;
newPosition = radius * n + cc.p2.xyz;
return true;
}

Expand All @@ -356,11 +355,11 @@ bool CapsuleCollision(float4 curPosition, float4 oldPosition, inout float3 newPo
if ( dot(delta, delta) < radius2 )
{
float3 n = normalize(delta);
float3 vec = curPosition.xyz - oldPosition.xyz;
float3 segN = normalize(segment);
float3 vecTangent = dot(vec, segN) * segN;
float3 vecNormal = vec - vecTangent;
newPosition = oldPosition.xyz + friction * vecTangent + (vecNormal + radius * n - delta);
float3 vec = curPosition.xyz - oldPosition.xyz;
float3 segN = normalize(segment);
float3 vecTangent = dot(vec, segN) * segN;
float3 vecNormal = vec - vecTangent;
newPosition = oldPosition.xyz + friction * vecTangent + (vecNormal + radius * n - delta);
return true;
}

Expand Down Expand Up @@ -541,7 +540,7 @@ void LocalShapeConstraints(uint GIndex : SV_GroupIndex,
//--------------------------------------------
{
float4 pos = g_HairVertexPositions[globalRootVertexIndex + 1];
float4 pos_plus_one;
float4 pos_plus_one;
uint globalVertexIndex = 0;
float4 rotGlobal = g_GlobalRotations[globalRootVertexIndex];

Expand All @@ -561,12 +560,12 @@ void LocalShapeConstraints(uint GIndex : SV_GroupIndex,
rotGlobalWorld = MultQuaternionAndQuaternion(g_Transforms[globalStrandIndex].quat, rotGlobal);

float3 orgPos_i_plus_1_InLocalFrame_i = g_HairRefVecsInLocalFrame[globalVertexIndex + 1].xyz;
float3 orgPos_i_plus_1_InGlobalFrame = MultQuaternionAndVector(rotGlobalWorld, orgPos_i_plus_1_InLocalFrame_i) + pos.xyz;
float3 orgPos_i_plus_1_InGlobalFrame = MultQuaternionAndVector(rotGlobalWorld, orgPos_i_plus_1_InLocalFrame_i) + pos.xyz;

float3 del = stiffnessForLocalShapeMatching * (orgPos_i_plus_1_InGlobalFrame - pos_plus_one.xyz).xyz;
float3 del = stiffnessForLocalShapeMatching * (orgPos_i_plus_1_InGlobalFrame - pos_plus_one.xyz).xyz;

if ( IsMovable(pos) )
pos.xyz -= del.xyz;
if ( IsMovable(pos) )
pos.xyz -= del.xyz;

if ( IsMovable(pos_plus_one) )
pos_plus_one.xyz += del.xyz;
Expand All @@ -575,20 +574,20 @@ void LocalShapeConstraints(uint GIndex : SV_GroupIndex,
// Update local/global frames
//---------------------------
float4 invRotGlobalWorld = InverseQuaternion(rotGlobalWorld);
float3 vec = normalize(pos_plus_one.xyz - pos.xyz);
float3 vec = normalize(pos_plus_one.xyz - pos.xyz);

float3 x_i_plus_1_frame_i = normalize(MultQuaternionAndVector(invRotGlobalWorld, vec));
float3 e = float3(1.0f, 0, 0);
float3 rotAxis = cross(e, x_i_plus_1_frame_i);
float3 x_i_plus_1_frame_i = normalize(MultQuaternionAndVector(invRotGlobalWorld, vec));
float3 e = float3(1.0f, 0, 0);
float3 rotAxis = cross(e, x_i_plus_1_frame_i);

if ( length(rotAxis) > 0.001 )
{
float angle_radian = acos(dot(e, x_i_plus_1_frame_i));
rotAxis = normalize(rotAxis);
if ( length(rotAxis) > 0.001 )
{
float angle_radian = acos(dot(e, x_i_plus_1_frame_i));
rotAxis = normalize(rotAxis);

float4 localRot = MakeQuaternion(angle_radian, rotAxis);
rotGlobal = MultQuaternionAndQuaternion(rotGlobal, localRot);
}
float4 localRot = MakeQuaternion(angle_radian, rotAxis);
rotGlobal = MultQuaternionAndQuaternion(rotGlobal, localRot);
}

g_HairVertexPositions[globalVertexIndex].xyz = pos.xyz;
g_HairVertexPositions[globalVertexIndex + 1].xyz = pos_plus_one.xyz;
Expand Down Expand Up @@ -659,23 +658,23 @@ void LocalShapeConstraintsWithIteration(uint GIndex : SV_GroupIndex,
globalVertexIndex = globalRootVertexIndex + localVertexIndex;
float4 pos_plus_one = sharedStrandPos[localVertexIndex + 1];

//--------------------------------
// Update position i and i_plus_1
//--------------------------------
float4 rotGlobalWorld;
//--------------------------------
// Update position i and i_plus_1
//--------------------------------
float4 rotGlobalWorld;

if ( g_bSingleHeadTransform )
rotGlobalWorld = MultQuaternionAndQuaternion(g_ModelRotateForHead, rotGlobal);
else
rotGlobalWorld = MultQuaternionAndQuaternion(g_Transforms[globalStrandIndex].quat, rotGlobal);

float3 orgPos_i_plus_1_InLocalFrame_i = g_HairRefVecsInLocalFrame[globalVertexIndex + 1].xyz;
float3 orgPos_i_plus_1_InGlobalFrame = MultQuaternionAndVector(rotGlobalWorld, orgPos_i_plus_1_InLocalFrame_i) + pos.xyz;
float3 orgPos_i_plus_1_InGlobalFrame = MultQuaternionAndVector(rotGlobalWorld, orgPos_i_plus_1_InLocalFrame_i) + pos.xyz;

float3 del = stiffnessForLocalShapeMatching * (orgPos_i_plus_1_InGlobalFrame - pos_plus_one.xyz).xyz;
float3 del = stiffnessForLocalShapeMatching * (orgPos_i_plus_1_InGlobalFrame - pos_plus_one.xyz).xyz;

if ( IsMovable(pos) )
pos.xyz -= del.xyz;
if ( IsMovable(pos) )
pos.xyz -= del.xyz;

if ( IsMovable(pos_plus_one) )
pos_plus_one.xyz += del.xyz;
Expand All @@ -684,20 +683,20 @@ void LocalShapeConstraintsWithIteration(uint GIndex : SV_GroupIndex,
// Update local/global frames
//---------------------------
float4 invRotGlobalWorld = InverseQuaternion(rotGlobalWorld);
float3 vec = normalize(pos_plus_one.xyz - pos.xyz);
float3 vec = normalize(pos_plus_one.xyz - pos.xyz);

float3 x_i_plus_1_frame_i = normalize(MultQuaternionAndVector(invRotGlobalWorld, vec));
float3 e = float3(1.0f, 0, 0);
float3 rotAxis = cross(e, x_i_plus_1_frame_i);
float3 x_i_plus_1_frame_i = normalize(MultQuaternionAndVector(invRotGlobalWorld, vec));
float3 e = float3(1.0f, 0, 0);
float3 rotAxis = cross(e, x_i_plus_1_frame_i);

if ( length(rotAxis) > 0.001 )
{
float angle_radian = acos(dot(e, x_i_plus_1_frame_i));
rotAxis = normalize(rotAxis);
if ( length(rotAxis) > 0.001 )
{
float angle_radian = acos(dot(e, x_i_plus_1_frame_i));
rotAxis = normalize(rotAxis);

float4 localRot = MakeQuaternion(angle_radian, rotAxis);
rotGlobal = MultQuaternionAndQuaternion(rotGlobal, localRot);
}
float4 localRot = MakeQuaternion(angle_radian, rotAxis);
rotGlobal = MultQuaternionAndQuaternion(rotGlobal, localRot);
}

sharedStrandPos[localVertexIndex].xyz = pos.xyz;
sharedStrandPos[localVertexIndex + 1].xyz = pos_plus_one.xyz;
Expand Down Expand Up @@ -748,20 +747,18 @@ void LengthConstriantsWindAndCollision(uint GIndex : SV_GroupIndex,
if ( g_Wind.x != 0 || g_Wind.y != 0 || g_Wind.z != 0 )
{
float4 force = float4(0, 0, 0, 0);

float frame = g_Wind.w;
float frame = g_Wind.w;

if ( localVertexIndex >= 2 && localVertexIndex < numVerticesInTheStrand - 1 )
{
// combining four winds.
float a = ((float)(globalStrandIndex % 20)) / 20.0f;
float3 w = a*g_Wind.xyz + (1.0f - a)*g_Wind1.xyz + a*g_Wind2.xyz + (1.0f - a)*g_Wind3.xyz;

uint sharedIndex = localVertexIndex * numOfStrandsPerThreadGroup + localStrandIndex;
uint sharedIndex = localVertexIndex * numOfStrandsPerThreadGroup + localStrandIndex;

float3 v = sharedPos[sharedIndex].xyz - sharedPos[sharedIndex + numOfStrandsPerThreadGroup].xyz;
float3 force = -cross(cross(v, w), v);
sharedPos[sharedIndex].xyz += force*g_TimeStep*g_TimeStep;
float3 force = -cross(cross(v, w), v);
sharedPos[sharedIndex].xyz += force*g_TimeStep*g_TimeStep;
}
}

Expand Down Expand Up @@ -808,9 +805,9 @@ void LengthConstriantsWindAndCollision(uint GIndex : SV_GroupIndex,

{
float3 center1 = g_cc0_center1AndRadius.xyz;
center1 = mul(float4(center1.xyz, 1), xf).xyz;
center1 = mul(float4(center1.xyz, 1), xf).xyz;
float3 center2 = g_cc0_center2AndRadiusSquared.xyz;
center2 = mul(float4(center2.xyz, 1), xf).xyz;
center2 = mul(float4(center2.xyz, 1), xf).xyz;

CollisionCapsule cc;
cc.p1.xyz = center1;
Expand All @@ -827,9 +824,9 @@ void LengthConstriantsWindAndCollision(uint GIndex : SV_GroupIndex,

{
float3 center1 = g_cc1_center1AndRadius.xyz;
center1 = mul(float4(center1.xyz, 1), xf).xyz;
center1 = mul(float4(center1.xyz, 1), xf).xyz;
float3 center2 = g_cc1_center2AndRadiusSquared.xyz;
center2 = mul(float4(center2.xyz, 1), xf).xyz;
center2 = mul(float4(center2.xyz, 1), xf).xyz;

CollisionCapsule cc;
cc.p1.xyz = center1;
Expand All @@ -841,14 +838,15 @@ void LengthConstriantsWindAndCollision(uint GIndex : SV_GroupIndex,

if ( bColDetected )
sharedPos[indexForSharedMem].xyz = newPos;

bAnyColDetected = bAnyColDetected || bColDetected;
}

{
float3 center1 = g_cc2_center1AndRadius.xyz;
center1 = mul(float4(center1.xyz, 1), xf).xyz;
center1 = mul(float4(center1.xyz, 1), xf).xyz;
float3 center2 = g_cc2_center2AndRadiusSquared.xyz;
center2 = mul(float4(center2.xyz, 1), xf).xyz;
center2 = mul(float4(center2.xyz, 1), xf).xyz;

CollisionCapsule cc;
cc.p1.xyz = center1;
Expand All @@ -860,18 +858,13 @@ void LengthConstriantsWindAndCollision(uint GIndex : SV_GroupIndex,

if ( bColDetected )
sharedPos[indexForSharedMem].xyz = newPos;

bAnyColDetected = bAnyColDetected || bColDetected;
}
}

GroupMemoryBarrierWithGroupSync();

//-------------------
// Compute tangent
//-------------------
float3 tangent = sharedPos[indexForSharedMem + numOfStrandsPerThreadGroup].xyz - sharedPos[indexForSharedMem].xyz;
g_HairVertexTangents[globalVertexIndex].xyz = normalize(tangent);

//---------------------------------------
// update global position buffers
//---------------------------------------
Expand All @@ -883,6 +876,39 @@ void LengthConstriantsWindAndCollision(uint GIndex : SV_GroupIndex,
return;
}

// One thread computes one total vertex (guide + follow)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void ComputeTangents(uint GIndex : SV_GroupIndex,
uint3 GId : SV_GroupID,
uint3 DTid : SV_DispatchThreadID)
{
uint globalStrandIndex, localStrandIndex, globalVertexIndex, localVertexIndex, numVerticesInTheStrand, indexForSharedMem, strandType;
CalcIndicesInVertexLevelTotal(GIndex, GId.x, globalStrandIndex, localStrandIndex, globalVertexIndex, localVertexIndex, numVerticesInTheStrand, indexForSharedMem, strandType);

sharedPos[indexForSharedMem] = g_HairVertexPositions[globalVertexIndex];
GroupMemoryBarrierWithGroupSync();

uint numOfStrandsPerThreadGroup = g_NumOfStrandsPerThreadGroup;

if ( localVertexIndex == 0 ) // vertex 0
{
float3 tangent = sharedPos[indexForSharedMem + numOfStrandsPerThreadGroup].xyz - sharedPos[indexForSharedMem].xyz;
g_HairVertexTangents[globalVertexIndex].xyz = normalize(tangent);
}
else // vertex 1 through n-1
{
float3 vert_i_minus_1 = sharedPos[indexForSharedMem - numOfStrandsPerThreadGroup].xyz;
float3 vert_i = sharedPos[indexForSharedMem].xyz;
g_HairVertexTangents[globalVertexIndex].xyz = normalize(vert_i - vert_i_minus_1);
}

/*if ( localVertexIndex < numVerticesInTheStrand - 1 )
{
float3 tangent = sharedPos[indexForSharedMem + numOfStrandsPerThreadGroup].xyz - sharedPos[indexForSharedMem].xyz;
g_HairVertexTangents[globalVertexIndex].xyz = normalize(tangent);
}*/
}

// One thread computes one vertex.
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void UpdateFollowHairVertices(uint GIndex : SV_GroupIndex,
Expand All @@ -893,7 +919,6 @@ void UpdateFollowHairVertices(uint GIndex : SV_GroupIndex,
CalcIndicesInVertexLevelMaster(GIndex, GId.x, globalStrandIndex, localStrandIndex, globalVertexIndex, localVertexIndex, numVerticesInTheStrand, indexForSharedMem, strandType);

sharedPos[indexForSharedMem] = g_HairVertexPositions[globalVertexIndex];
sharedTangent[indexForSharedMem] = g_HairVertexTangents[globalVertexIndex];
GroupMemoryBarrierWithGroupSync();

for ( uint i = 0; i < g_NumFollowHairsPerGuideHair; i++ )
Expand All @@ -903,7 +928,6 @@ void UpdateFollowHairVertices(uint GIndex : SV_GroupIndex,
float factor = g_TipSeparationFactor*((float)localVertexIndex / (float)numVerticesInTheStrand) + 1.0f;
float3 followPos = sharedPos[indexForSharedMem].xyz + factor*g_FollowHairRootOffset[globalFollowStrandIndex].xyz;
g_HairVertexPositions[globalFollowVertexIndex].xyz = followPos;
g_HairVertexTangents[globalFollowVertexIndex] = sharedTangent[indexForSharedMem];
}

return;
Expand Down
1 change: 1 addition & 0 deletions amd_tressfx/src/Shaders/build/fxc_compile_tressfx_all.bat
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ REM TressFXRender Pixel Shaders
REM TressFXSimulation Compute Shaders
"%fxc_exe%" /nologo /T cs_5_0 /E IntegrationAndGlobalShapeConstraints /O1 /Fh ..\inc\IntegrationAndGlobalShapeConstraints.inc /Vn IntegrationAndGlobalShapeConstraints_Data ..\TressFXSimulation.hlsl
"%fxc_exe%" /nologo /T cs_5_0 /E ApplyHairTransformGlobally /O1 /Fh ..\inc\ApplyHairTransformGlobally.inc /Vn ApplyHairTransformGlobally_Data ..\TressFXSimulation.hlsl
"%fxc_exe%" /nologo /T cs_5_0 /E ComputeTangents /O1 /Fh ..\inc\ComputeTangents.inc /Vn ComputeTangents_Data ..\TressFXSimulation.hlsl
"%fxc_exe%" /nologo /T cs_5_0 /E LocalShapeConstraints /O1 /Fh ..\inc\LocalShapeConstraints.inc /Vn LocalShapeConstraints_Data ..\TressFXSimulation.hlsl
"%fxc_exe%" /nologo /T cs_5_0 /E LocalShapeConstraintsWithIteration /O1 /Fh ..\inc\LocalShapeConstraintsWithIteration.inc /Vn LocalShapeConstraintsWithIteration_Data ..\TressFXSimulation.hlsl
"%fxc_exe%" /nologo /T cs_5_0 /E LengthConstriantsWindAndCollision /O1 /Fh ..\inc\LengthConstriantsWindAndCollision.inc /Vn LengthConstriantsWindAndCollision_Data ..\TressFXSimulation.hlsl
Expand Down
Loading

0 comments on commit 87705f1

Please sign in to comment.