radeonsi: Compiling shader 1
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32, float, float, float, float) #0 {
main_body:
  %27 = bitcast float %5 to i32
  %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10
  %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11
  %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12
  %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13
  %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14
  %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33
}

attributes #0 = { "InitialPSInputAddr"="36983" }

FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL SAMP[0]
DCL SVIEW[0], RECT, FLOAT
DCL OUT[0], COLOR
DCL TEMP[0]
  0: TEX TEMP[0], IN[0], SAMP[0], RECT
  1: MOV OUT[0], TEMP[0].xxxx
  2: END
radeonsi: Compiling shader 2
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0
  %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !invariant.load !0
  %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)*
  %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3, !amdgpu.uniform !0
  %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12)
  %30 = bitcast float %28 to i32
  %31 = bitcast float %29 to i32
  %32 = insertelement <2 x i32> undef, i32 %30, i32 0
  %33 = insertelement <2 x i32> %32, i32 %31, i32 1
  %34 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %33, <8 x i32> %24, <4 x i32> %27, i32 15, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = bitcast float %5 to i32
  %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %36, 10
  %38 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %37, float %35, 11
  %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %35, 12
  %40 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %35, 13
  %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 14
  %42 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }

!0 = !{}

VERT
DCL IN[0..1]
DCL OUT[0], POSITION
DCL OUT[1], COLOR
DCL OUT[2], GENERIC[0]
DCL CONST[0..2]
DCL TEMP[0]
IMM[0] FLT32 {   -1.0000,     0.0000,     0.0000,     1.0000}
  0: MAD TEMP[0].xy, IN[0], CONST[2].xyyy, CONST[1].zwww
  1: MAD OUT[0].xy, TEMP[0], CONST[1].xyyy, IMM[0].xxxx
  2: MOV OUT[0].zw, IMM[0]
  3: MOV OUT[1], CONST[0]
  4: MOV OUT[2], IN[1]
  5: END
radeonsi: Compiling shader 3
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) {
main_body:
  %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
  %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !invariant.load !0
  %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %14)
  %19 = extractelement <4 x float> %18, i32 0
  %20 = extractelement <4 x float> %18, i32 1
  %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0
  %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !invariant.load !0
  %23 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %15)
  %24 = extractelement <4 x float> %23, i32 0
  %25 = extractelement <4 x float> %23, i32 1
  %26 = extractelement <4 x float> %23, i32 2
  %27 = extractelement <4 x float> %23, i32 3
  %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !invariant.load !0
  %30 = call float @llvm.SI.load.const(<16 x i8> %29, i32 32)
  %31 = call float @llvm.SI.load.const(<16 x i8> %29, i32 24)
  %32 = fmul float %19, %30
  %33 = fadd float %32, %31
  %34 = call float @llvm.SI.load.const(<16 x i8> %29, i32 36)
  %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !invariant.load !0
  %37 = call float @llvm.SI.load.const(<16 x i8> %36, i32 28)
  %38 = fmul float %20, %34
  %39 = fadd float %38, %37
  %40 = call float @llvm.SI.load.const(<16 x i8> %36, i32 16)
  %41 = fmul float %33, %40
  %42 = fadd float %41, -1.000000e+00
  %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !invariant.load !0
  %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 20)
  %46 = fmul float %39, %45
  %47 = fadd float %46, -1.000000e+00
  %48 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0)
  %49 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4)
  %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !invariant.load !0
  %52 = call float @llvm.SI.load.const(<16 x i8> %51, i32 8)
  %53 = call float @llvm.SI.load.const(<16 x i8> %51, i32 12)
  %54 = and i32 %9, 1
  %55 = icmp eq i32 %54, 0
  br i1 %55, label %endif-block, label %if-true-block

if-true-block:                                    ; preds = %main_body
  %56 = call float @llvm.AMDGPU.clamp.(float %48, float 0.000000e+00, float 1.000000e+00)
  %57 = call float @llvm.AMDGPU.clamp.(float %49, float 0.000000e+00, float 1.000000e+00)
  %58 = call float @llvm.AMDGPU.clamp.(float %52, float 0.000000e+00, float 1.000000e+00)
  %59 = call float @llvm.AMDGPU.clamp.(float %53, float 0.000000e+00, float 1.000000e+00)
  br label %endif-block

endif-block:                                      ; preds = %main_body, %if-true-block
  %.03 = phi float [ %59, %if-true-block ], [ %53, %main_body ]
  %.02 = phi float [ %58, %if-true-block ], [ %52, %main_body ]
  %.01 = phi float [ %57, %if-true-block ], [ %49, %main_body ]
  %.0 = phi float [ %56, %if-true-block ], [ %48, %main_body ]
  %60 = bitcast i32 %12 to float
  %61 = insertvalue <{ float, float, float }> undef, float %60, 2
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %.0, float %.01, float %.02, float %.03)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %24, float %25, float %26, float %27)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %42, float %47, float 0.000000e+00, float 1.000000e+00)
  ret <{ float, float, float }> %61
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #0

; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.clamp.(float, float, float) #0

; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

!0 = !{}

Talos: /home/vedranm/workspace/llvm/include/llvm/ADT/ilist_iterator.h:126: llvm::ilist_iterator<OptionsT, IsReverse, IsConst>::reference llvm::ilist_iterator<OptionsT, IsReverse, IsConst>::operator*() const [with OptionsT = llvm::ilist_detail::node_options<llvm::MachineInstr, true, true, void>; bool IsReverse = false; bool IsConst = true; llvm::ilist_iterator<OptionsT, IsReverse, IsConst>::reference = const llvm::MachineInstr&]: Assertion `!NodePtr->isKnownSentinel()' failed.