// Tile clipping on car shadows in snow level
//
// Generated by Microsoft (R) HLSL Shader Compiler 6.3.9600.16384
//
//   using 3Dmigoto v1.3.15 on Wed Mar 06 06:24:55 2019
//
//
// Buffer Definitions:
//
// cbuffer $Globals
// {
//
//   float4 mWV[3];                     // Offset:    0 Size:    48
//   uint nBox;                         // Offset:   48 Size:     4
//   uint2 vScrnSize;                   // Offset:   52 Size:     8
//   row_major float4x4 mP2V;           // Offset:   64 Size:    64
//   float fClstZ[17];                  // Offset:  128 Size:   260
//
// }
//
//
// Resource Bindings:
//
// Name                                 Type  Format         Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// tBoxes                            texture  float4         buf    0        1
// tActiveFlags                      texture    uint         buf    2        1
// uBoxIndices                           UAV    uint         buf    0        1
// uCountBuffer                          UAV    uint         buf    1        1
// $Globals                          cbuffer      NA          NA    0        1
//
//
//
// Input signature:
//
// Name                 Index   Mask Register SysValue  Format   Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Input
//
// Output signature:
//
// Name                 Index   Mask Register SysValue  Format   Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Output
cs_5_0
dcl_globalFlags refactoringAllowed
dcl_constantbuffer cb0[25], dynamicIndexed
dcl_resource_buffer (float,float,float,float) t0
dcl_resource_buffer (uint,uint,uint,uint) t2
dcl_uav_typed_buffer (uint,uint,uint,uint) u0
dcl_uav_typed_buffer (uint,uint,uint,uint) u1
dcl_input vThreadGroupID.xyz
dcl_input vThreadIDInGroup.x
dcl_temps 11
dcl_tgsm_raw g0, 4
dcl_tgsm_raw g1, 4
dcl_tgsm_structured g2, 4, 126
dcl_thread_group 64, 1, 1

dcl_resource_texture2d (float,float,float,float) t121
ld_indexable(texture2d)(float,float,float,float) r10.xyzw, l(0, 0, 0, 0), t121.xyzw

if_z vThreadIDInGroup.x
  store_raw g0.x, l(0), l(0)
endif
sync_g_t
ishl r0.xy, vThreadGroupID.xyxx, l(6, 6, 0, 0)
iadd r0.yz, r0.xxyx, l(0, 64, 64, 0)
umin r0.yz, r0.yyzy, cb0[3].yyzy
utof r1.xy, cb0[3].yzyy
div r1.xy, l(2.000000, 2.000000, 0.000000, 0.000000), r1.xyxx
utof r0.xy, r0.xyxx
mad r0.x, r0.x, r1.x, l(-1.000000)
imad r0.w, l(-64), vThreadGroupID.y, cb0[3].z
utof r0.w, r0.w
mad r0.w, r0.w, r1.y, l(-1.000000)
mad r0.y, r0.y, r1.x, l(-1.000000)
iadd r0.z, -r0.z, cb0[3].z
utof r0.z, r0.z
mad r0.z, r0.z, r1.y, l(-1.000000)

// The below matrix multiplies appear to be setting min & max tiles that the
// decal can cover. It doesn't look like we have valid depth info here for a
// proper stereo correction, and we have stereo compute shaders disabled due to
// a plant physics desync issue. Let's expand the min/max tiles out by
// separation so that the decal will be completely covered so long as it is
// inside the screen (if someone sets convergence very high for large popout
// they will see the clipping). Use absolute values so it should (hopefully)
// keep working as is if we re-enable stereo compute shaders later
add r0.x, r0.x, -|r10.x|
add r0.y, r0.y, |r10.x|

mul r1.xyzw, r0.wwww, cb0[5].xyzw
mad r1.xyzw, r0.xxxx, cb0[4].xyzw, r1.xyzw
add r1.xyzw, r1.xyzw, cb0[6].xyzw
add r1.xyzw, r1.xyzw, cb0[7].xyzw
div r2.xyz, r1.zxyz, r1.wwww
mul r0.xz, r0.zzzz, cb0[5].xxyx
mad r0.xy, r0.yyyy, cb0[4].xyxx, r0.xzxx
add r0.xy, r0.xyxx, cb0[6].xyxx
add r0.xy, r0.xyxx, cb0[7].xyxx
div r0.zw, r0.xxxy, r1.wwww
mov r0.y, -r2.x
mov r2.w, -r2.z
dp2 r0.x, r2.xwxx, r2.xwxx
rsq r0.x, r0.x
mul r1.xy, r0.xxxx, r2.xwxx
dp2 r0.x, r0.ywyy, r0.ywyy
rsq r0.x, r0.x
mul r0.xy, r0.xxxx, r0.ywyy
mul r1.zw, r2.xxxy, l(0.000000, 0.000000, -1.000000, 1.000000)
dp2 r0.w, r1.zwzz, r1.zwzz
rsq r0.w, r0.w
mul r1.zw, r0.wwww, r1.zzzw
mov r2.y, -r0.z
dp2 r0.z, r2.xyxx, r2.xyxx
rsq r0.z, r0.z
mul r0.zw, r0.zzzz, r2.xxxy
mov r2.w, l(1.000000)
mov r3.x, vThreadIDInGroup.x
loop
  uge r3.y, r3.x, cb0[3].x
  breakc_nz r3.y
  ushr r3.y, r3.x, l(5)
  ld_indexable(buffer)(uint,uint,uint,uint) r3.y, r3.yyyy, t2.yxzw
  ishl r3.z, l(1), r3.x
  and r3.y, r3.z, r3.y
  if_nz r3.y
    imul null, r3.y, r3.x, l(3)
    ld_indexable(buffer)(float,float,float,float) r4.xyzw, r3.yyyy, t0.xyzw
    mov r2.xyz, r4.xyzx
    dp4 r4.x, cb0[0].xyzw, r2.xyzw
    dp4 r4.y, cb0[1].xyzw, r2.xyzw
    dp4 r4.z, cb0[2].xyzw, r2.xyzw
    add r2.x, r4.w, r4.z
    mov r2.y, vThreadGroupID.z
    ge r2.x, -r2.x, cb0[r2.y + 9].x
    if_z r2.x
      add r2.z, -r4.w, r4.z
      ge r2.z, cb0[r2.y + 8].x, -r2.z
      if_z r2.z
        dp2 r3.y, r1.xyxx, r4.yzyy
        dp2 r3.z, r0.xyxx, r4.yzyy
        dp2 r3.w, r0.zwzz, r4.xzxx
        lt r3.yzw, r3.yyzw, -r4.wwww
        dp2 r5.x, r1.zwzz, r4.xzxx
        lt r4.w, r5.x, -r4.w
        or r3.w, r3.w, r4.w
        or r3.z, r3.z, r3.w
        or r3.y, r3.y, r3.z
      else
        mov r3.y, l(-1)
      endif
      or r2.z, r2.z, r3.y
    else
      mov r2.z, l(-1)
    endif
    or r2.x, r2.x, r2.z
    if_z r2.x
      imad r2.xz, l(3, 0, 3, 0), r3.xxxx, l(1, 0, 2, 0)
      ld_indexable(buffer)(float,float,float,float) r5.xyzw, r2.xxxx, t0.xyzw
      ld_indexable(buffer)(float,float,float,float) r3.yzw, r2.zzzz, t0.wxyz
      mul r6.xyz, r3.zwyz, r5.zxyz
      mad r6.xyz, r5.yzxy, r3.wyzw, -r6.xyzx
      mul r6.xyz, r5.wwww, r6.xyzx
      dp3 r7.z, cb0[2].xyzx, r5.xyzx
      dp3 r8.z, cb0[2].xyzx, r3.yzwy
      dp3 r9.z, cb0[2].xyzx, r6.xyzx
      add r2.x, r4.z, |r7.z|
      add r2.x, |r8.z|, r2.x
      add r2.x, |r9.z|, r2.x
      lt r2.x, -r2.x, cb0[r2.y + 9].x
      if_nz r2.x
        add r2.x, r4.z, -|r7.z|
        add r2.x, -|r8.z|, r2.x
        add r2.x, -|r9.z|, r2.x
        lt r2.x, cb0[r2.y + 8].x, -r2.x
        if_nz r2.x
          dp3 r7.y, cb0[1].xyzx, r5.xyzx
          dp3 r8.y, cb0[1].xyzx, r3.yzwy
          dp3 r9.y, cb0[1].xyzx, r6.xyzx
          dp2 r2.x, r1.xyxx, r7.yzyy
          lt r2.x, l(0.000000), r2.x
          movc r2.x, r2.x, l(1.000000), l(-1.000000)
          mad r2.xy, r7.yzyy, r2.xxxx, r4.yzyy
          dp2 r2.z, r1.xyxx, r8.yzyy
          lt r2.z, l(0.000000), r2.z
          movc r2.z, r2.z, l(1.000000), l(-1.000000)
          mad r2.xy, r8.yzyy, r2.zzzz, r2.xyxx
          dp2 r2.z, r1.xyxx, r9.yzyy
          lt r2.z, l(0.000000), r2.z
          movc r2.z, r2.z, l(1.000000), l(-1.000000)
          mad r2.xy, r9.yzyy, r2.zzzz, r2.xyxx
          dp2 r2.x, r1.xyxx, r2.xyxx
          ge r2.x, r2.x, l(0.000000)
          if_nz r2.x
            dp2 r2.x, r0.xyxx, r7.yzyy
            lt r2.x, l(0.000000), r2.x
            movc r2.x, r2.x, l(1.000000), l(-1.000000)
            mad r2.xy, r7.yzyy, r2.xxxx, r4.yzyy
            dp2 r2.z, r0.xyxx, r8.yzyy
            lt r2.z, l(0.000000), r2.z
            movc r2.z, r2.z, l(1.000000), l(-1.000000)
            mad r2.xy, r8.yzyy, r2.zzzz, r2.xyxx
            dp2 r2.z, r0.xyxx, r9.yzyy
            lt r2.z, l(0.000000), r2.z
            movc r2.z, r2.z, l(1.000000), l(-1.000000)
            mad r2.xy, r9.yzyy, r2.zzzz, r2.xyxx
            dp2 r2.x, r0.xyxx, r2.xyxx
            ge r2.x, r2.x, l(0.000000)
            if_nz r2.x
              dp3 r7.x, cb0[0].xyzx, r5.xyzx
              dp3 r8.x, cb0[0].xyzx, r3.yzwy
              dp3 r9.x, cb0[0].xyzx, r6.xyzx
              dp2 r2.x, r0.zwzz, r7.xzxx
              lt r2.x, l(0.000000), r2.x
              movc r2.x, r2.x, l(1.000000), l(-1.000000)
              mad r2.xy, r7.xzxx, r2.xxxx, r4.xzxx
              dp2 r2.z, r0.zwzz, r8.xzxx
              lt r2.z, l(0.000000), r2.z
              movc r2.z, r2.z, l(1.000000), l(-1.000000)
              mad r2.xy, r8.xzxx, r2.zzzz, r2.xyxx
              dp2 r2.z, r0.zwzz, r9.xzxx
              lt r2.z, l(0.000000), r2.z
              movc r2.z, r2.z, l(1.000000), l(-1.000000)
              mad r2.xy, r9.xzxx, r2.zzzz, r2.xyxx
              dp2 r2.x, r0.zwzz, r2.xyxx
              ge r2.x, r2.x, l(0.000000)
              if_nz r2.x
                dp2 r2.x, r1.zwzz, r7.xzxx
                lt r2.x, l(0.000000), r2.x
                movc r2.x, r2.x, l(1.000000), l(-1.000000)
                mad r2.xy, r7.xzxx, r2.xxxx, r4.xzxx
                dp2 r2.z, r1.zwzz, r8.xzxx
                lt r2.z, l(0.000000), r2.z
                movc r2.z, r2.z, l(1.000000), l(-1.000000)
                mad r2.xy, r8.xzxx, r2.zzzz, r2.xyxx
                dp2 r2.z, r1.zwzz, r9.xzxx
                lt r2.z, l(0.000000), r2.z
                movc r2.z, r2.z, l(1.000000), l(-1.000000)
                mad r2.xy, r9.xzxx, r2.zzzz, r2.xyxx
                dp2 r2.x, r1.zwzz, r2.xyxx
                ge r2.x, r2.x, l(0.000000)
                if_nz r2.x
                  imm_atomic_iadd r4.x, g0, l(0), l(1)
                  ult r2.x, r4.x, l(126)
                  if_nz r2.x
                    store_structured g2.x, r4.x, l(0), r3.x
                  endif
                endif
              endif
            endif
          endif
        endif
      endif
    endif
  endif
  iadd r3.x, r3.x, l(64)
endloop
sync_g_t
ld_raw r0.x, l(0), g0.xxxx
umin r0.x, r0.x, l(126)
if_z vThreadIDInGroup.x
  if_nz r0.x
    imm_atomic_iadd r1.x, u1, l(0), r0.x
  else
    mov r1.x, l(0)
  endif
  iadd r0.yz, cb0[3].yyzy, l(0, 63, 63, 0)
  ushr r0.yz, r0.yyzy, l(0, 6, 6, 0)
  imul null, r0.z, r0.z, r0.y
  imul null, r0.y, r0.y, vThreadGroupID.y
  imad r0.y, r0.z, vThreadGroupID.z, r0.y
  iadd r0.y, r0.y, vThreadGroupID.x
  imul null, r0.z, r0.y, l(3)
  and r0.w, r1.x, l(0x0000ffff)
  store_uav_typed u0.xyzw, r0.zzzz, r0.wwww
  imad r0.yw, l(0, 3, 0, 3), r0.yyyy, l(0, 1, 0, 2)
  ushr r1.y, r1.x, l(16)
  store_uav_typed u0.xyzw, r0.yyyy, r1.yyyy
  store_uav_typed u0.xyzw, r0.wwww, r0.xxxx
  store_raw g1.x, l(0), r1.x
endif
sync_g_t
ld_raw r0.y, l(0), g1.xxxx
mov r0.z, vThreadIDInGroup.x
loop
  uge r0.w, r0.z, r0.x
  breakc_nz r0.w
  ld_structured r0.w, r0.z, l(0), g2.xxxx
  iadd r1.x, r0.z, r0.y
  store_uav_typed u0.xyzw, r1.xxxx, r0.wwww
  iadd r0.z, r0.z, l(64)
endloop
ret
// Approximately 221 instruction slots used
