NIR (from SPIR-V) for MESA_SHADER_COMPUTE shader: shader: MESA_SHADER_COMPUTE local-size: 65, 1, 1 shared-size: 1 inputs: 0 outputs: 0 uniforms: 0 shared: 0 decl_var system INTERP_MODE_NONE uvec3 gl_GlobalInvocationID decl_function main (0 params) impl main { block block_0: /* preds: */ vec1 32 ssa_5 = load_const (0x00000014 /* 0.000000 */) vec1 32 ssa_0 = deref_var &gl_GlobalInvocationID (system uvec3) vec3 32 ssa_3 = intrinsic load_deref (ssa_0) () vec1 32 ssa_4 = imov ssa_3.x vec1 32 ssa_6 = uge ssa_4, ssa_5 /* succs: block_1 block_2 */ if ssa_6 { block block_1: /* preds: block_0 */ return /* succs: block_4 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_2 */ intrinsic barrier () () intrinsic group_memory_barrier () () return /* succs: block_4 */ block block_4: } NIR (SSA form) for compute shader: shader: MESA_SHADER_COMPUTE local-size: 65, 1, 1 shared-size: 1 inputs: 0 outputs: 0 uniforms: 12 shared: 0 decl_function main (0 params) impl main { block block_0: /* preds: */ vec1 32 ssa_0 = load_const (0x00000014 /* 0.000000 */) vec3 32 ssa_1 = load_const (0x00000041 /* 0.000000 */, 0x00000001 /* 0.000000 */, 0x00000001 /* 0.000000 */) vec3 32 ssa_2 = intrinsic load_work_group_id () () vec1 32 ssa_3 = load_const (0x00000000 /* 0.000000 */) vec3 32 ssa_4 = intrinsic load_uniform (ssa_3) (0, 12) /* base=0 */ /* range=12 */ vec1 32 ssa_5 = iadd ssa_2.x, ssa_4.x vec1 32 ssa_6 = intrinsic load_subgroup_id () () vec1 32 ssa_7 = load_const (0x00000003 /* 0.000000 */) vec1 32 ssa_8 = ishl ssa_6, ssa_7 vec1 32 ssa_9 = intrinsic load_subgroup_invocation () () vec1 32 ssa_10 = iadd ssa_9, ssa_8 vec1 32 ssa_11 = umod ssa_10, ssa_1.x vec1 32 ssa_12 = imul ssa_5, ssa_1.x vec1 32 ssa_13 = iadd ssa_12, ssa_11 vec1 32 ssa_14 = ult ssa_13, ssa_0 /* succs: block_1 block_2 */ if ssa_14 { block block_1: /* preds: block_0 */ intrinsic barrier () () intrinsic group_memory_barrier () () /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ /* succs: block_4 */ block block_4: } NIR (final form) for compute shader: shader: MESA_SHADER_COMPUTE local-size: 65, 1, 1 shared-size: 1 inputs: 0 outputs: 0 uniforms: 12 shared: 0 decl_function main (0 params) impl main { block block_0: /* preds: */ vec1 32 ssa_0 = load_const (0x00000014 /* 0.000000 */) vec3 32 ssa_1 = load_const (0x00000041 /* 0.000000 */, 0x00000001 /* 0.000000 */, 0x00000001 /* 0.000000 */) vec3 32 ssa_2 = intrinsic load_work_group_id () () vec1 32 ssa_3 = load_const (0x00000000 /* 0.000000 */) vec3 32 ssa_4 = intrinsic load_uniform (ssa_3) (0, 12) /* base=0 */ /* range=12 */ vec1 32 ssa_5 = iadd ssa_2.x, ssa_4.x vec1 32 ssa_6 = intrinsic load_subgroup_id () () vec1 32 ssa_7 = load_const (0x00000003 /* 0.000000 */) vec1 32 ssa_8 = ishl ssa_6, ssa_7 vec1 32 ssa_9 = intrinsic load_subgroup_invocation () () vec1 32 ssa_10 = iadd ssa_9, ssa_8 vec1 32 ssa_11 = umod ssa_10, ssa_1.x vec1 32 ssa_12 = imul ssa_5, ssa_1.x vec1 32 ssa_13 = iadd ssa_12, ssa_11 vec1 32 ssa_14 = ult ssa_13, ssa_0 /* succs: block_1 block_2 */ if ssa_14 { block block_1: /* preds: block_0 */ intrinsic barrier () () intrinsic group_memory_barrier () () /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ /* succs: block_4 */ block block_4: } NIR (SSA form) for compute shader: shader: MESA_SHADER_COMPUTE local-size: 65, 1, 1 shared-size: 1 inputs: 0 outputs: 0 uniforms: 12 shared: 0 decl_function main (0 params) impl main { block block_0: /* preds: */ vec1 32 ssa_0 = load_const (0x00000014 /* 0.000000 */) vec3 32 ssa_1 = load_const (0x00000041 /* 0.000000 */, 0x00000001 /* 0.000000 */, 0x00000001 /* 0.000000 */) vec3 32 ssa_2 = intrinsic load_work_group_id () () vec1 32 ssa_3 = load_const (0x00000000 /* 0.000000 */) vec3 32 ssa_4 = intrinsic load_uniform (ssa_3) (0, 12) /* base=0 */ /* range=12 */ vec1 32 ssa_5 = iadd ssa_2.x, ssa_4.x vec1 32 ssa_6 = intrinsic load_subgroup_id () () vec1 32 ssa_7 = load_const (0x00000004 /* 0.000000 */) vec1 32 ssa_8 = ishl ssa_6, ssa_7 vec1 32 ssa_9 = intrinsic load_subgroup_invocation () () vec1 32 ssa_10 = iadd ssa_9, ssa_8 vec1 32 ssa_11 = umod ssa_10, ssa_1.x vec1 32 ssa_12 = imul ssa_5, ssa_1.x vec1 32 ssa_13 = iadd ssa_12, ssa_11 vec1 32 ssa_14 = ult ssa_13, ssa_0 /* succs: block_1 block_2 */ if ssa_14 { block block_1: /* preds: block_0 */ intrinsic barrier () () intrinsic group_memory_barrier () () /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ /* succs: block_4 */ block block_4: } NIR (final form) for compute shader: shader: MESA_SHADER_COMPUTE local-size: 65, 1, 1 shared-size: 1 inputs: 0 outputs: 0 uniforms: 12 shared: 0 decl_function main (0 params) impl main { block block_0: /* preds: */ vec1 32 ssa_0 = load_const (0x00000014 /* 0.000000 */) vec3 32 ssa_1 = load_const (0x00000041 /* 0.000000 */, 0x00000001 /* 0.000000 */, 0x00000001 /* 0.000000 */) vec3 32 ssa_2 = intrinsic load_work_group_id () () vec1 32 ssa_3 = load_const (0x00000000 /* 0.000000 */) vec3 32 ssa_4 = intrinsic load_uniform (ssa_3) (0, 12) /* base=0 */ /* range=12 */ vec1 32 ssa_5 = iadd ssa_2.x, ssa_4.x vec1 32 ssa_6 = intrinsic load_subgroup_id () () vec1 32 ssa_7 = load_const (0x00000004 /* 0.000000 */) vec1 32 ssa_8 = ishl ssa_6, ssa_7 vec1 32 ssa_9 = intrinsic load_subgroup_invocation () () vec1 32 ssa_10 = iadd ssa_9, ssa_8 vec1 32 ssa_11 = umod ssa_10, ssa_1.x vec1 32 ssa_12 = imul ssa_5, ssa_1.x vec1 32 ssa_13 = iadd ssa_12, ssa_11 vec1 32 ssa_14 = ult ssa_13, ssa_0 /* succs: block_1 block_2 */ if ssa_14 { block block_1: /* preds: block_0 */ intrinsic barrier () () intrinsic group_memory_barrier () () /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ /* succs: block_4 */ block block_4: } Native code for unnamed compute shader (null) SIMD16 shader: 21 instructions. 0 loops. 188 cycles. 0:0 spills:fills. Promoted 0 constants. Compacted 336 to 256 bytes (24%) START B0 (126 cycles) mov(8) g2<1>UW 0x76543210V { align1 WE_all 1Q }; mov(16) g7<1>UD g0.1<0,1,0>UD { align1 1H compacted }; shl(16) g5<1>D g1.3<0,1,0>D 0x00000004UD { align1 1H }; add(8) g2.8<1>UW g2<8,8,1>UW 0x0008UW { align1 WE_all 1Q }; add(16) g3<1>D g7<8,8,1>D g1<0,1,0>D { align1 1H compacted }; mov(16) g7<1>D g2<8,8,1>UW { align1 1H }; mul(16) g13<1>D g3<8,8,1>D 65D { align1 1H compacted }; add(16) g9<1>D g7<8,8,1>D g5<8,8,1>D { align1 1H compacted }; math intmod(8) g11<1>UD g9<8,8,1>UD 0x00000041UD { align1 1Q compacted }; math intmod(8) g12<1>UD g10<8,8,1>UD 0x00000041UD { align1 2Q compacted }; add(16) g15<1>D g13<8,8,1>D g11<8,8,1>D { align1 1H compacted }; cmp.l.f0(16) null<1>UD g15<8,8,1>UD 0x00000014UD { align1 1H compacted }; (+f0) if(16) JIP: 88 UIP: 88 { align1 1H }; END B0 ->B1 ->B2 START B1 <-B0 (38 cycles) mov(8) g1<1>UD 0x00000000UD { align1 WE_all 1Q compacted }; and(1) g1.2<1>UD g0.2<0,1,0>UD 0x8f000000UD { align1 WE_all 1N }; send(16) null<1>UW g1<0,1,0>UD 0x02008004 gateway MsgDesc: (barrier msg) mlen 1 rlen 0 { align1 WE_all 1H }; wait(1) n0<0,1,0>UD { align1 WE_all 1N }; send(1) g1<1>UW g1<0,1,0>UW 0x0209c000 data MsgDesc: ( DC mfence, 0, 0) mlen 1 rlen 0 { align1 WE_all 1N }; END B1 ->B2 START B2 <-B0 <-B1 (24 cycles) endif(16) JIP: 16 { align1 1H }; mov(8) g127<1>UD g0<8,8,1>UD { align1 WE_all 1Q compacted }; send(16) null<1>UW g127<8,8,1>UW 0x82000010 thread_spawner MsgDesc: mlen 1 rlen 0 { align1 WE_all 1H EOT }; END B2 INTEL-MESA: error: ../mesa-18.3.1/src/intel/vulkan/anv_device.c:2091: GPU hung on one of our command buffers (VK_ERROR_DEVICE_LOST)