radeonsi: Compiling shader 1 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32, float, float, float, float) #0 { main_body: %27 = bitcast float %5 to i32 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 } attributes #0 = { "InitialPSInputAddr"="36983" } FRAG DCL IN[0], GENERIC[0], LINEAR DCL SAMP[0] DCL SVIEW[0], RECT, FLOAT DCL OUT[0], COLOR DCL TEMP[0] 0: TEX TEMP[0], IN[0], SAMP[0], RECT 1: MOV OUT[0], TEMP[0].xxxx 2: END radeonsi: Compiling shader 2 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !invariant.load !0 %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3, !amdgpu.uniform !0 %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0 %28 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %29 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %30 = bitcast float %28 to i32 %31 = bitcast float %29 to i32 %32 = insertelement <2 x i32> undef, i32 %30, i32 0 %33 = insertelement <2 x i32> %32, i32 %31, i32 1 %34 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %33, <8 x i32> %24, <4 x i32> %27, i32 15, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %35 = extractelement <4 x float> %34, i32 0 %36 = bitcast float %5 to i32 %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %36, 10 %38 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %37, float %35, 11 %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %35, 12 %40 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %35, 13 %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 14 %42 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } !0 = !{} VERT DCL IN[0..1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL CONST[0..2] DCL TEMP[0] IMM[0] FLT32 { -1.0000, 0.0000, 0.0000, 1.0000} 0: MAD TEMP[0].xy, IN[0], CONST[2].xyyy, CONST[1].zwww 1: MAD OUT[0].xy, TEMP[0], CONST[1].xyyy, IMM[0].xxxx 2: MOV OUT[0].zw, IMM[0] 3: MOV OUT[1], CONST[0] 4: MOV OUT[2], IN[1] 5: END radeonsi: Compiling shader 3 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { main_body: %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !invariant.load !0 %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %14) %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !invariant.load !0 %23 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %15) %24 = extractelement <4 x float> %23, i32 0 %25 = extractelement <4 x float> %23, i32 1 %26 = extractelement <4 x float> %23, i32 2 %27 = extractelement <4 x float> %23, i32 3 %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !invariant.load !0 %30 = call float @llvm.SI.load.const(<16 x i8> %29, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %29, i32 24) %32 = fmul float %19, %30 %33 = fadd float %32, %31 %34 = call float @llvm.SI.load.const(<16 x i8> %29, i32 36) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !invariant.load !0 %37 = call float @llvm.SI.load.const(<16 x i8> %36, i32 28) %38 = fmul float %20, %34 %39 = fadd float %38, %37 %40 = call float @llvm.SI.load.const(<16 x i8> %36, i32 16) %41 = fmul float %33, %40 %42 = fadd float %41, -1.000000e+00 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !invariant.load !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 20) %46 = fmul float %39, %45 %47 = fadd float %46, -1.000000e+00 %48 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %49 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !invariant.load !0 %52 = call float @llvm.SI.load.const(<16 x i8> %51, i32 8) %53 = call float @llvm.SI.load.const(<16 x i8> %51, i32 12) %54 = and i32 %9, 1 %55 = icmp eq i32 %54, 0 br i1 %55, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %56 = call float @llvm.AMDGPU.clamp.(float %48, float 0.000000e+00, float 1.000000e+00) %57 = call float @llvm.AMDGPU.clamp.(float %49, float 0.000000e+00, float 1.000000e+00) %58 = call float @llvm.AMDGPU.clamp.(float %52, float 0.000000e+00, float 1.000000e+00) %59 = call float @llvm.AMDGPU.clamp.(float %53, float 0.000000e+00, float 1.000000e+00) br label %endif-block endif-block: ; preds = %main_body, %if-true-block %.03 = phi float [ %59, %if-true-block ], [ %53, %main_body ] %.02 = phi float [ %58, %if-true-block ], [ %52, %main_body ] %.01 = phi float [ %57, %if-true-block ], [ %49, %main_body ] %.0 = phi float [ %56, %if-true-block ], [ %48, %main_body ] %60 = bitcast i32 %12 to float %61 = insertvalue <{ float, float, float }> undef, float %60, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %.0, float %.01, float %.02, float %.03) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %24, float %25, float %26, float %27) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %42, float %47, float 0.000000e+00, float 1.000000e+00) ret <{ float, float, float }> %61 } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} Talos: /home/vedranm/workspace/llvm/include/llvm/ADT/ilist_iterator.h:126: llvm::ilist_iterator::reference llvm::ilist_iterator::operator*() const [with OptionsT = llvm::ilist_detail::node_options; bool IsReverse = false; bool IsConst = true; llvm::ilist_iterator::reference = const llvm::MachineInstr&]: Assertion `!NodePtr->isKnownSentinel()' failed.