Dx11 spherical harmonics decoding ( 15 ALU )

//--------------------------------------------------------------------------------------
//
// Dx11SH.hlsl                  ( hanecci 2012/05/30 )
//
//--------------------------------------------------------------------------------------

struct VS_INPUT
{
    float4 vPosition    : POSITION;
    float3 vNormal      : NORMAL;
};

//--------------------------------------------------------------------------------------

struct VS_OUTPUT
{
    float3 vNormal      : NORMAL;
    float4 vPosition    : SV_POSITION;
};

//--------------------------------------------------------------------------------------

VS_OUTPUT MainVS( VS_INPUT input )
{
    VS_OUTPUT output;

    output.vPosition = mul( input.vPosition, mWorldViewProjectionMatrix );
    output.vNormal   = mul( input.vNormal, (float3x3) mWorldMatrix );

    return output;
}

//--------------------------------------------------------------------------------------

struct PS_INPUT
{
    float3 vNormal     : NORMAL;
    float4 vPositionSS : SV_Position;
};

//--------------------------------------------------------------------------------------

float3
getIrradianceBySHMatrix( float4 normal )
{
    float4 r_sh_vec = float4( dot( mSH_r_vec0, normal ),
                              dot( mSH_r_vec1, normal ),
                              dot( mSH_r_vec2, normal ),
                              dot( mSH_r_vec3, normal ) );

    float4 g_sh_vec = float4( dot( mSH_g_vec0, normal ),
                              dot( mSH_g_vec1, normal ),
                              dot( mSH_g_vec2, normal ),
                              dot( mSH_g_vec3, normal ) );

    float4 b_sh_vec = float4( dot( mSH_b_vec0, normal ),
                              dot( mSH_b_vec1, normal ),
                              dot( mSH_b_vec2, normal ),
                              dot( mSH_b_vec3, normal ) );

    return float3( dot( normal, r_sh_vec ),
                   dot( normal, g_sh_vec ),
                   dot( normal, b_sh_vec ) );
}

//--------------------------------------------------------------------------------------

float4 MainPS( PS_INPUT input ) : SV_TARGET
{
    float3 normal   = normalize( input.vNormal.xyz );

    float3 color3 = getIrradianceBySHMatrix( normal );

    return float4( color3, 1.0f );
}

//--------------------------------------------------------------------------------------
//--------------------------------------------------------------------------------------
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111
//
//
//   fxc Dx11SH.hlsl /Fc
//    shaders\Dx11SH.fx_ps_asm /nologo /Tps_5_0 /O3 /EMainPS
//
//
// Buffer Definitions: 
//
// cbuffer cbSHMatrix
// {
//
//   float4 mSH_r_vec0;                 // Offset:    0 Size:    16
//   float4 mSH_r_vec1;                 // Offset:   16 Size:    16
//   float4 mSH_r_vec2;                 // Offset:   32 Size:    16
//   float4 mSH_r_vec3;                 // Offset:   48 Size:    16
//   float4 mSH_g_vec0;                 // Offset:   64 Size:    16
//   float4 mSH_g_vec1;                 // Offset:   80 Size:    16
//   float4 mSH_g_vec2;                 // Offset:   96 Size:    16
//   float4 mSH_g_vec3;                 // Offset:  112 Size:    16
//   float4 mSH_b_vec0;                 // Offset:  128 Size:    16
//   float4 mSH_b_vec1;                 // Offset:  144 Size:    16
//   float4 mSH_b_vec2;                 // Offset:  160 Size:    16
//   float4 mSH_b_vec3;                 // Offset:  176 Size:    16
//
// }
//
//
// Resource Bindings:
//
// Name                                 Type  Format         Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// cbSHMatrix                        cbuffer      NA          NA    6        1
//
//
//
// Input signature:
//
// Name                 Index   Mask Register SysValue Format   Used
// -------------------- ----- ------ -------- -------- ------ ------
// NORMAL                   0   xyz         0     NONE  float   xyz 
// SV_Position              0   xyzw        1      POS  float       
// COLOR                    0   xyzw        2     NONE  float       
//
//
// Output signature:
//
// Name                 Index   Mask Register SysValue Format   Used
// -------------------- ----- ------ -------- -------- ------ ------
// SV_TARGET                0   xyzw        0   TARGET  float   xyzw
//
ps_5_0
dcl_globalFlags refactoringAllowed
dcl_constantbuffer cb6[12], immediateIndexed
dcl_input_ps linear v0.xyz
dcl_output o0.xyzw
dcl_temps 2
mov o0.w, l(1.000000)
dp3 r0.x, v0.xyzx, v0.xyzx
rsq r0.x, r0.x
mul r0.xyz, r0.xxxx, v0.xyzx
mov r0.w, l(1.000000)

// SH decoding ( 15 ALU )
dp4 r1.x, cb6[0].xyzw, r0.xyzw
dp4 r1.y, cb6[1].xyzw, r0.xyzw
dp4 r1.z, cb6[2].xyzw, r0.xyzw
dp4 r1.w, cb6[3].xyzw, r0.xyzw
dp4_sat o0.x, r0.xyzw, r1.xyzw
dp4 r1.x, cb6[4].xyzw, r0.xyzw
dp4 r1.y, cb6[5].xyzw, r0.xyzw
dp4 r1.z, cb6[6].xyzw, r0.xyzw
dp4 r1.w, cb6[7].xyzw, r0.xyzw
dp4_sat o0.y, r0.xyzw, r1.xyzw
dp4 r1.x, cb6[8].xyzw, r0.xyzw
dp4 r1.y, cb6[9].xyzw, r0.xyzw
dp4 r1.z, cb6[10].xyzw, r0.xyzw
dp4 r1.w, cb6[11].xyzw, r0.xyzw
dp4_sat o0.z, r0.xyzw, r1.xyzw

ret 
// Approximately 21 instruction slots used

//--------------------------------------------------------------------------------------