// Decal tile indices
//
// Generated by Microsoft (R) HLSL Shader Compiler 6.3.9600.16384
//
//   using 3Dmigoto v1.3.15 on Wed Mar 06 06:24:56 2019
//
//
// Buffer Definitions:
//
// cbuffer $Globals
// {
//
//   float4 mWV[3];                     // Offset:    0 Size:    48
//   uint2 vScrnSize;                   // Offset:   48 Size:     8
//   row_major float4x4 mP2V;           // Offset:   64 Size:    64
//   float fClstZ[17];                  // Offset:  128 Size:   260
//
// }
//
//
// Resource Bindings:
//
// Name                                 Type  Format         Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// tBoxes                            texture  float4         buf    0        1
// tIntermediateIndices              texture    uint         buf    1        1
// tPriority                         texture    uint         buf    3        1
// uBoxIndices                           UAV    uint         buf    0        1
// uCountBuffer                          UAV    uint         buf    1        1
// $Globals                          cbuffer      NA          NA    0        1
//
//
//
// Input signature:
//
// Name                 Index   Mask Register SysValue  Format   Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Input
//
// Output signature:
//
// Name                 Index   Mask Register SysValue  Format   Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Output
cs_5_0
dcl_globalFlags refactoringAllowed
dcl_constantbuffer cb0[25], dynamicIndexed
dcl_resource_buffer (float,float,float,float) t0
dcl_resource_buffer (uint,uint,uint,uint) t1
dcl_resource_buffer (uint,uint,uint,uint) t3
dcl_uav_typed_buffer (uint,uint,uint,uint) u0
dcl_uav_typed_buffer (uint,uint,uint,uint) u1
dcl_input vThreadGroupID.xyz
dcl_input vThreadIDInGroup.x
dcl_temps 12
dcl_tgsm_raw g0, 4
dcl_tgsm_raw g1, 4
dcl_tgsm_structured g2, 4, 126
dcl_thread_group 64, 1, 1

dcl_resource_texture2d (float,float,float,float) t121
ld_indexable(texture2d)(float,float,float,float) r11.xyzw, l(0, 0, 0, 0), t121.xyzw

if_z vThreadIDInGroup.x
  store_raw g0.x, l(0), l(0)
endif
sync_g_t
ishl r0.xy, vThreadGroupID.xyxx, l(6, 6, 0, 0)
iadd r0.yz, r0.xxyx, l(0, 64, 64, 0)
umin r0.yz, r0.yyzy, cb0[3].xxyx
utof r1.xy, cb0[3].xyxx
div r1.xy, l(2.000000, 2.000000, 0.000000, 0.000000), r1.xyxx
utof r0.xy, r0.xyxx
mad r0.x, r0.x, r1.x, l(-1.000000)
imad r0.w, l(-64), vThreadGroupID.y, cb0[3].y
utof r0.w, r0.w
mad r0.w, r0.w, r1.y, l(-1.000000)
mad r0.y, r0.y, r1.x, l(-1.000000)
iadd r0.z, -r0.z, cb0[3].y
utof r0.z, r0.z
mad r0.z, r0.z, r1.y, l(-1.000000)

// The below matrix multiplies appear to be setting min & max tiles that the
// decal can cover. It doesn't look like we have valid depth info here for a
// proper stereo correction, and we have stereo compute shaders disabled due to
// a plant physics desync issue. Let's expand the min/max tiles out by
// separation so that the decal will be completely covered so long as it is
// inside the screen (if someone sets convergence very high for large popout
// they will see the clipping). Use absolute values so it should (hopefully)
// keep working as is if we re-enable stereo compute shaders later
add r0.x, r0.x, -|r11.x|
add r0.y, r0.y, |r11.x|

// mP2V:
mul r1.xyzw, r0.wwww, cb0[5].xyzw
mad r1.xyzw, r0.xxxx, cb0[4].xyzw, r1.xyzw
add r1.xyzw, r1.xyzw, cb0[6].xyzw
add r1.xyzw, r1.xyzw, cb0[7].xyzw
div r2.xyz, r1.zxyz, r1.wwww

mul r0.xz, r0.zzzz, cb0[5].xxyx
mad r0.xy, r0.yyyy, cb0[4].xyxx, r0.xzxx
add r0.xy, r0.xyxx, cb0[6].xyxx
add r0.xy, r0.xyxx, cb0[7].xyxx
div r0.zw, r0.xxxy, r1.wwww

mov r0.y, -r2.x
mov r2.w, -r2.z
dp2 r0.x, r2.xwxx, r2.xwxx
rsq r0.x, r0.x
mul r1.xy, r0.xxxx, r2.xwxx
dp2 r0.x, r0.ywyy, r0.ywyy
rsq r0.x, r0.x
mul r0.xy, r0.xxxx, r0.ywyy
mul r1.zw, r2.xxxy, l(0.000000, 0.000000, -1.000000, 1.000000)
dp2 r0.w, r1.zwzz, r1.zwzz
rsq r0.w, r0.w
mul r1.zw, r0.wwww, r1.zzzw
mov r2.y, -r0.z
dp2 r0.z, r2.xyxx, r2.xyxx
rsq r0.z, r0.z
mul r0.zw, r0.zzzz, r2.xxxy
ld_indexable(buffer)(uint,uint,uint,uint) r2.x, l(0, 0, 0, 0), t1.xyzw
mov r3.w, l(1.000000)
mov r2.y, vThreadIDInGroup.x
loop
  uge r2.w, r2.y, r2.x
  breakc_nz r2.w
  iadd r2.yz, r2.yyyy, l(0, 64, 1, 0)
  ld_indexable(buffer)(uint,uint,uint,uint) r2.z, r2.zzzz, t1.yzxw
  imul null, r2.w, r2.z, l(3)
  ld_indexable(buffer)(float,float,float,float) r4.xyzw, r2.wwww, t0.xyzw
  mov r3.xyz, r4.xyzx
  dp4 r4.x, cb0[0].xyzw, r3.xyzw
  dp4 r4.y, cb0[1].xyzw, r3.xyzw
  dp4 r4.z, cb0[2].xyzw, r3.xyzw
  add r2.w, r4.w, r4.z
  mov r3.x, vThreadGroupID.z
  ge r2.w, -r2.w, cb0[r3.x + 9].x
  if_z r2.w
    add r3.y, -r4.w, r4.z
    ge r3.y, cb0[r3.x + 8].x, -r3.y
    if_z r3.y
      dp2 r3.z, r1.xyxx, r4.yzyy
      lt r3.z, r3.z, -r4.w
      dp2 r5.x, r0.xyxx, r4.yzyy
      dp2 r5.y, r0.zwzz, r4.xzxx
      lt r5.xy, r5.xyxx, -r4.wwww
      dp2 r5.z, r1.zwzz, r4.xzxx
      lt r4.w, r5.z, -r4.w
      or r4.w, r5.y, r4.w
      or r4.w, r5.x, r4.w
      or r3.z, r3.z, r4.w
    else
      mov r3.z, l(-1)
    endif
    or r3.y, r3.y, r3.z
  else
    mov r3.y, l(-1)
  endif
  or r2.w, r2.w, r3.y
  if_z r2.w
    imad r3.yz, l(0, 3, 3, 0), r2.zzzz, l(0, 1, 2, 0)
    ld_indexable(buffer)(float,float,float,float) r5.xyzw, r3.yyyy, t0.xyzw
    ld_indexable(buffer)(float,float,float,float) r6.xyz, r3.zzzz, t0.xyzw
    mul r7.xyz, r5.zxyz, r6.yzxy
    mad r7.xyz, r5.yzxy, r6.zxyz, -r7.xyzx
    mul r7.xyz, r5.wwww, r7.xyzx
    dp3 r8.z, cb0[2].xyzx, r5.xyzx
    dp3 r9.z, cb0[2].xyzx, r6.xyzx
    dp3 r10.z, cb0[2].xyzx, r7.xyzx
    add r2.w, r4.z, |r8.z|
    add r2.w, |r9.z|, r2.w
    add r2.w, |r10.z|, r2.w
    lt r2.w, -r2.w, cb0[r3.x + 9].x
    if_nz r2.w
      add r2.w, r4.z, -|r8.z|
      add r2.w, -|r9.z|, r2.w
      add r2.w, -|r10.z|, r2.w
      lt r2.w, cb0[r3.x + 8].x, -r2.w
      if_nz r2.w
        dp3 r8.y, cb0[1].xyzx, r5.xyzx
        dp3 r9.y, cb0[1].xyzx, r6.xyzx
        dp3 r10.y, cb0[1].xyzx, r7.xyzx
        dp2 r2.w, r1.xyxx, r8.yzyy
        lt r2.w, l(0.000000), r2.w
        movc r2.w, r2.w, l(1.000000), l(-1.000000)
        mad r3.xy, r8.yzyy, r2.wwww, r4.yzyy
        dp2 r2.w, r1.xyxx, r9.yzyy
        lt r2.w, l(0.000000), r2.w
        movc r2.w, r2.w, l(1.000000), l(-1.000000)
        mad r3.xy, r9.yzyy, r2.wwww, r3.xyxx
        dp2 r2.w, r1.xyxx, r10.yzyy
        lt r2.w, l(0.000000), r2.w
        movc r2.w, r2.w, l(1.000000), l(-1.000000)
        mad r3.xy, r10.yzyy, r2.wwww, r3.xyxx
        dp2 r2.w, r1.xyxx, r3.xyxx
        ge r2.w, r2.w, l(0.000000)
        if_nz r2.w
          dp2 r2.w, r0.xyxx, r8.yzyy
          lt r2.w, l(0.000000), r2.w
          movc r2.w, r2.w, l(1.000000), l(-1.000000)
          mad r3.xy, r8.yzyy, r2.wwww, r4.yzyy
          dp2 r2.w, r0.xyxx, r9.yzyy
          lt r2.w, l(0.000000), r2.w
          movc r2.w, r2.w, l(1.000000), l(-1.000000)
          mad r3.xy, r9.yzyy, r2.wwww, r3.xyxx
          dp2 r2.w, r0.xyxx, r10.yzyy
          lt r2.w, l(0.000000), r2.w
          movc r2.w, r2.w, l(1.000000), l(-1.000000)
          mad r3.xy, r10.yzyy, r2.wwww, r3.xyxx
          dp2 r2.w, r0.xyxx, r3.xyxx
          ge r2.w, r2.w, l(0.000000)
          if_nz r2.w
            dp3 r8.x, cb0[0].xyzx, r5.xyzx
            dp3 r9.x, cb0[0].xyzx, r6.xyzx
            dp3 r10.x, cb0[0].xyzx, r7.xyzx
            dp2 r2.w, r0.zwzz, r8.xzxx
            lt r2.w, l(0.000000), r2.w
            movc r2.w, r2.w, l(1.000000), l(-1.000000)
            mad r3.xy, r8.xzxx, r2.wwww, r4.xzxx
            dp2 r2.w, r0.zwzz, r9.xzxx
            lt r2.w, l(0.000000), r2.w
            movc r2.w, r2.w, l(1.000000), l(-1.000000)
            mad r3.xy, r9.xzxx, r2.wwww, r3.xyxx
            dp2 r2.w, r0.zwzz, r10.xzxx
            lt r2.w, l(0.000000), r2.w
            movc r2.w, r2.w, l(1.000000), l(-1.000000)
            mad r3.xy, r10.xzxx, r2.wwww, r3.xyxx
            dp2 r2.w, r0.zwzz, r3.xyxx
            ge r2.w, r2.w, l(0.000000)
            if_nz r2.w
              dp2 r2.w, r1.zwzz, r8.xzxx
              lt r2.w, l(0.000000), r2.w
              movc r2.w, r2.w, l(1.000000), l(-1.000000)
              mad r3.xy, r8.xzxx, r2.wwww, r4.xzxx
              dp2 r2.w, r1.zwzz, r9.xzxx
              lt r2.w, l(0.000000), r2.w
              movc r2.w, r2.w, l(1.000000), l(-1.000000)
              mad r3.xy, r9.xzxx, r2.wwww, r3.xyxx
              dp2 r2.w, r1.zwzz, r10.xzxx
              lt r2.w, l(0.000000), r2.w
              movc r2.w, r2.w, l(1.000000), l(-1.000000)
              mad r3.xy, r10.xzxx, r2.wwww, r3.xyxx
              dp2 r2.w, r1.zwzz, r3.xyxx
              ge r2.w, r2.w, l(0.000000)
              if_nz r2.w
                imm_atomic_iadd r4.x, g0, l(0), l(1)
                ult r2.w, r4.x, l(126)
                if_nz r2.w
                  ld_indexable(buffer)(uint,uint,uint,uint) r2.w, r2.zzzz, t3.yzwx
                  ishl r2.w, r2.w, l(16)
                  or r2.z, r2.w, r2.z
                  store_structured g2.x, r4.x, l(0), r2.z
                endif
              endif
            endif
          endif
        endif
      endif
    endif
  endif
endloop
sync_g_t
ld_raw r0.x, l(0), g0.xxxx
umin r0.x, r0.x, l(126)
if_z vThreadIDInGroup.x
  if_nz r0.x
    imm_atomic_iadd r1.x, u1, l(0), r0.x
  else
    mov r1.x, l(0)
  endif
  iadd r0.yz, cb0[3].xxyx, l(0, 63, 63, 0)
  ushr r0.yz, r0.yyzy, l(0, 6, 6, 0)
  imul null, r0.z, r0.z, r0.y
  imul null, r0.y, r0.y, vThreadGroupID.y
  imad r0.y, r0.z, vThreadGroupID.z, r0.y
  iadd r0.y, r0.y, vThreadGroupID.x
  imul null, r0.z, r0.y, l(3)
  and r0.w, r1.x, l(0x0000ffff)
  store_uav_typed u0.xyzw, r0.zzzz, r0.wwww
  imad r0.yw, l(0, 3, 0, 3), r0.yyyy, l(0, 1, 0, 2)
  ushr r1.y, r1.x, l(16)
  store_uav_typed u0.xyzw, r0.yyyy, r1.yyyy
  store_uav_typed u0.xyzw, r0.wwww, r0.xxxx
  store_raw g1.x, l(0), r1.x
endif
sync_g_t
ishl r0.y, vThreadIDInGroup.x, l(1)
imad r0.z, vThreadIDInGroup.x, l(2), l(1)
iadd r0.w, r0.x, l(-1)
loop
  ld_raw r1.x, l(0), g0.xxxx
  ult r1.x, r1.x, l(2)
  breakc_nz r1.x
  sync_g_t
  if_z vThreadIDInGroup.x
    store_raw g0.x, l(0), l(0)
  endif
  sync_g_t
  mov r1.x, r0.y
  loop
    uge r1.y, r1.x, r0.w
    breakc_nz r1.y
    iadd r1.yz, r1.xxxx, l(0, 1, 128, 0)
    ld_structured r1.w, r1.x, l(0), g2.xxxx
    ld_structured r2.x, r1.y, l(0), g2.xxxx
    ult r2.y, r2.x, r1.w
    if_nz r2.y
      store_structured g2.x, r1.x, l(0), r2.x
      store_structured g2.x, r1.y, l(0), r1.w
      store_raw g0.x, l(0), l(2)
    endif
    mov r1.x, r1.z
  endloop
  sync_g_t
  mov r1.x, r0.z
  loop
    uge r1.y, r1.x, r0.w
    breakc_nz r1.y
    iadd r1.yz, r1.xxxx, l(0, 1, 128, 0)
    ld_structured r1.w, r1.x, l(0), g2.xxxx
    ld_structured r2.x, r1.y, l(0), g2.xxxx
    ult r2.y, r2.x, r1.w
    if_nz r2.y
      store_structured g2.x, r1.x, l(0), r2.x
      store_structured g2.x, r1.y, l(0), r1.w
      store_raw g0.x, l(0), l(2)
    endif
    mov r1.x, r1.z
  endloop
  sync_g_t
endloop
ld_raw r0.y, l(0), g1.xxxx
mov r0.z, vThreadIDInGroup.x
loop
  uge r0.w, r0.z, r0.x
  breakc_nz r0.w
  ld_structured r0.w, r0.z, l(0), g2.xxxx
  and r0.w, r0.w, l(0x0000ffff)
  iadd r1.x, r0.z, r0.y
  store_uav_typed u0.xyzw, r1.xxxx, r0.wwww
  iadd r0.z, r0.z, l(64)
endloop
ret
// Approximately 267 instruction slots used
