[llvm] backport patches from 3.5 to enable GL3.3 on radeonsi
Dave Airlie
airlied at fedoraproject.org
Wed Mar 19 06:13:40 UTC 2014
commit 3f23b322d6ebcdb18c58c8d3ff9ae74ed1fb26af
Author: Dave Airlie <airlied at redhat.com>
Date: Wed Mar 19 06:10:53 2014 +0000
backport patches from 3.5 to enable GL3.3 on radeonsi
llvm-3.4-radeonsi-backport.patch | 459 ++++++++++++++++++++++++++++++++++++++
llvm.spec | 9 +-
2 files changed, 467 insertions(+), 1 deletions(-)
---
diff --git a/llvm-3.4-radeonsi-backport.patch b/llvm-3.4-radeonsi-backport.patch
new file mode 100644
index 0000000..bbee9af
--- /dev/null
+++ b/llvm-3.4-radeonsi-backport.patch
@@ -0,0 +1,459 @@
+diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+index 99e1377..7105879 100644
+--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
++++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+@@ -316,6 +316,37 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
+ }
+ }
+
++void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
++ raw_ostream &O) {
++ unsigned SImm16 = MI->getOperand(OpNo).getImm();
++ unsigned Msg = SImm16 & 0xF;
++ if (Msg == 2 || Msg == 3) {
++ unsigned Op = (SImm16 >> 4) & 0xF;
++ if (Msg == 3)
++ O << "Gs_done(";
++ else
++ O << "Gs(";
++ if (Op == 0) {
++ O << "nop";
++ } else {
++ unsigned Stream = (SImm16 >> 8) & 0x3;
++ if (Op == 1)
++ O << "cut";
++ else if (Op == 2)
++ O << "emit";
++ else if (Op == 3)
++ O << "emit-cut";
++ O << " stream " << Stream;
++ }
++ O << "), [m0] ";
++ } else if (Msg == 1)
++ O << "interrupt ";
++ else if (Msg == 15)
++ O << "system ";
++ else
++ O << "unknown(" << Msg << ") ";
++}
++
+ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // Note: Mask values are taken from SIInsertWaits.cpp and not from ISA docs
+diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+index 77af942..2876dd2 100644
+--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
++++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+@@ -53,6 +53,7 @@ private:
+ void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
++ void printSendMsg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ };
+
+diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
+index 7ef662e..695ec40 100644
+--- a/lib/Target/R600/SIInsertWaits.cpp
++++ b/lib/Target/R600/SIInsertWaits.cpp
+@@ -314,6 +314,12 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
+
+ Counters Result = ZeroCounts;
+
++ // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
++ // but we also want to wait for any other outstanding transfers before
++ // signalling other hardware blocks
++ if (MI.getOpcode() == AMDGPU::S_SENDMSG)
++ return LastIssued;
++
+ // For each register affected by this
+ // instruction increase the result sequence
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
+index 4cd0daa..19d2171 100644
+--- a/lib/Target/R600/SIInstrInfo.td
++++ b/lib/Target/R600/SIInstrInfo.td
+@@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
+
+ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
+
+- let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */,
+- mayLoad = 1 in {
+-
+- let offen = 1, idxen = 0, addr64 = 0, offset = 0 in {
+- def _OFFEN : MUBUF <op, (outs regClass:$vdata),
+- (ins SReg_128:$srsrc, VReg_32:$vaddr),
+- asm#" $vdata, $srsrc + $vaddr", []>;
+- }
+-
+- let offen = 0, idxen = 1, addr64 = 0 in {
+- def _IDXEN : MUBUF <op, (outs regClass:$vdata),
+- (ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset),
+- asm#" $vdata, $srsrc[$vaddr] + $offset", []>;
+- }
++ let lds = 0, mayLoad = 1 in {
++
++ let addr64 = 0 in {
++
++ let offen = 0, idxen = 0 in {
++ def _OFFSET : MUBUF <op, (outs regClass:$vdata),
++ (ins SReg_128:$srsrc, VReg_32:$vaddr,
++ i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
++ i1imm:$slc, i1imm:$tfe),
++ asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
++ }
++
++ let offen = 1, idxen = 0, offset = 0 in {
++ def _OFFEN : MUBUF <op, (outs regClass:$vdata),
++ (ins SReg_128:$srsrc, VReg_32:$vaddr,
++ SSrc_32:$soffset, i1imm:$glc, i1imm:$slc,
++ i1imm:$tfe),
++ asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
++ }
++
++ let offen = 0, idxen = 1 in {
++ def _IDXEN : MUBUF <op, (outs regClass:$vdata),
++ (ins SReg_128:$srsrc, VReg_32:$vaddr,
++ i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
++ i1imm:$slc, i1imm:$tfe),
++ asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
++ }
++
++ let offen = 1, idxen = 1 in {
++ def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
++ (ins SReg_128:$srsrc, VReg_64:$vaddr,
++ SSrc_32:$soffset, i1imm:$glc,
++ i1imm:$slc, i1imm:$tfe),
++ asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
++ }
++ }
+
+- let offen = 0, idxen = 0, addr64 = 1 in {
+- def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
+- (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
+- asm#" $vdata, $srsrc + $vaddr + $offset", []>;
+- }
++ let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
++ def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
++ (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
++ asm#" $vdata, $srsrc + $vaddr + $offset", []>;
++ }
+ }
+ }
+
+diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
+index 76f05eb..9acb9b6 100644
+--- a/lib/Target/R600/SIInstructions.td
++++ b/lib/Target/R600/SIInstructions.td
+@@ -22,6 +22,10 @@ def InterpSlot : Operand<i32> {
+ let PrintMethod = "printInterpSlot";
+ }
+
++def SendMsgImm : Operand<i32> {
++ let PrintMethod = "printSendMsg";
++}
++
+ def isSI : Predicate<"Subtarget.getGeneration() "
+ ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+
+@@ -826,17 +830,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
+ def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
+ []
+ >;
+-} // End hasSideEffects
+ //def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
+ //def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
+ //def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
+-//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
++
++let Uses = [EXEC] in {
++ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
++ [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
++ > {
++ let DisableEncoding = "$m0";
++ }
++} // End Uses = [EXEC]
++
+ //def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
+ //def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
+ //def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
+ //def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
+ //def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
+ //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
++} // End hasSideEffects
+
+ def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
+@@ -1305,8 +1317,8 @@ def SI_END_CF : InstSI <
+
+ def SI_KILL : InstSI <
+ (outs),
+- (ins VReg_32:$src),
+- "SI_KIL $src",
++ (ins VSrc_32:$src),
++ "SI_KILL $src",
+ [(int_AMDGPU_kill f32:$src)]
+ >;
+
+@@ -1397,13 +1409,13 @@ def : Pat<
+
+ def : Pat <
+ (int_AMDGPU_kilp),
+- (SI_KILL (V_MOV_B32_e32 0xbf800000))
++ (SI_KILL 0xbf800000)
+ >;
+
+ /* int_SI_vs_load_input */
+ def : Pat<
+ (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
+- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
++ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
+ >;
+
+ /* int_SI_export */
+@@ -1809,7 +1821,7 @@ def : Pat <
+ // 3. Offset in an 32Bit VGPR
+ def : Pat <
+ (SIload_constant i128:$sbase, i32:$voff),
+- (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
++ (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
+ >;
+
+ // The multiplication scales from [0,1] to the unsigned integer range
+@@ -1970,6 +1982,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
+ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
+ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
+
++// BUFFER_LOAD_DWORD*, addr64=0
++multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
++ MUBUF bothen> {
++
++ def : Pat <
++ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
++ imm:$offset, 0, 0, imm:$glc, imm:$slc,
++ imm:$tfe)),
++ (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
++ (as_i1imm $slc), (as_i1imm $tfe))
++ >;
++
++ def : Pat <
++ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
++ imm, 1, 0, imm:$glc, imm:$slc,
++ imm:$tfe)),
++ (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
++ (as_i1imm $tfe))
++ >;
++
++ def : Pat <
++ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
++ imm:$offset, 0, 1, imm:$glc, imm:$slc,
++ imm:$tfe)),
++ (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
++ (as_i1imm $slc), (as_i1imm $tfe))
++ >;
++
++ def : Pat <
++ (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
++ imm, 1, 1, imm:$glc, imm:$slc,
++ imm:$tfe)),
++ (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
++ (as_i1imm $tfe))
++ >;
++}
++
++defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
++ BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
++defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
++ BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
++defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
++ BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
++
+ //===----------------------------------------------------------------------===//
+ // MTBUF Patterns
+ //===----------------------------------------------------------------------===//
+diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
+index 7fcc964..00e32c0 100644
+--- a/lib/Target/R600/SIIntrinsics.td
++++ b/lib/Target/R600/SIIntrinsics.td
+@@ -38,6 +38,22 @@ let TargetPrefix = "SI", isTarget = 1 in {
+ llvm_i32_ty], // tfe(imm)
+ []>;
+
++ // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
++ def int_SI_buffer_load_dword : Intrinsic <
++ [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
++ [llvm_anyint_ty, // rsrc(SGPR)
++ llvm_anyint_ty, // vaddr(VGPR)
++ llvm_i32_ty, // soffset(SGPR)
++ llvm_i32_ty, // inst_offset(imm)
++ llvm_i32_ty, // offen(imm)
++ llvm_i32_ty, // idxen(imm)
++ llvm_i32_ty, // glc(imm)
++ llvm_i32_ty, // slc(imm)
++ llvm_i32_ty], // tfe(imm)
++ [IntrReadArgMem]>;
++
++ def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++
+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_SI_sample : Sample;
+diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
+index 958763d..254f3a6 100644
+--- a/lib/Target/R600/SILowerControlFlow.cpp
++++ b/lib/Target/R600/SILowerControlFlow.cpp
+@@ -55,6 +55,7 @@
+ #include "llvm/CodeGen/MachineFunctionPass.h"
+ #include "llvm/CodeGen/MachineInstrBuilder.h"
+ #include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/IR/Constants.h"
+
+ using namespace llvm;
+
+@@ -145,7 +146,9 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+- if (!shouldSkip(&MBB, &MBB.getParent()->back()))
++ if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType !=
++ ShaderType::PIXEL ||
++ !shouldSkip(&MBB, &MBB.getParent()->back()))
+ return;
+
+ MachineBasicBlock::iterator Insert = &MI;
+@@ -295,15 +298,27 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
++ const MachineOperand &Op = MI.getOperand(0);
+
+- // Kill is only allowed in pixel shaders
++ // Kill is only allowed in pixel / geometry shaders
+ assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
+- ShaderType::PIXEL);
+-
+- // Clear this pixel from the exec mask if the operand is negative
+- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+- .addImm(0)
+- .addOperand(MI.getOperand(0));
++ ShaderType::PIXEL ||
++ MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
++ ShaderType::GEOMETRY);
++
++ // Clear this thread from the exec mask if the operand is negative
++ if ((Op.isImm() || Op.isFPImm())) {
++ // Constant operand: Set exec mask to 0 or do nothing
++ if (Op.isImm() ? (Op.getImm() & 0x80000000) :
++ Op.getFPImm()->isNegative()) {
++ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
++ .addImm(0);
++ }
++ } else {
++ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
++ .addImm(0)
++ .addOperand(Op);
++ }
+
+ MI.eraseFromParent();
+ }
+diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
+new file mode 100644
+index 0000000..4ab6a8a
+--- /dev/null
++++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
+@@ -0,0 +1,22 @@
++; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
++
++; SI-LABEL: @kill_gs_const
++; SI-NOT: V_CMPX_LE_F32
++; SI: S_MOV_B64 exec, 0
++
++define void @kill_gs_const() #0 {
++main_body:
++ %0 = icmp ule i32 0, 3
++ %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
++ call void @llvm.AMDGPU.kill(float %1)
++ %2 = icmp ule i32 3, 0
++ %3 = select i1 %2, float 1.000000e+00, float -1.000000e+00
++ call void @llvm.AMDGPU.kill(float %3)
++ ret void
++}
++
++declare void @llvm.AMDGPU.kill(float)
++
++attributes #0 = { "ShaderType"="2" }
++
++!0 = metadata !{metadata !"const", null, i32 1}
+diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll
+new file mode 100644
+index 0000000..a622775
+--- /dev/null
++++ b/test/CodeGen/R600/llvm.SI.load.dword.ll
+@@ -0,0 +1,40 @@
++;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
++
++; Example of a simple geometry shader loading vertex attributes from the
++; ESGS ring buffer
++
++; CHECK-LABEL: @main
++; CHECK: BUFFER_LOAD_DWORD
++; CHECK: BUFFER_LOAD_DWORD
++; CHECK: BUFFER_LOAD_DWORD
++; CHECK: BUFFER_LOAD_DWORD
++
++define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 {
++main_body:
++ %10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1
++ %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
++ %12 = shl i32 %6, 2
++ %13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
++ %14 = bitcast i32 %13 to float
++ %15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
++ %16 = bitcast i32 %15 to float
++ %17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
++ %18 = bitcast i32 %17 to float
++ %19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
++ %20 = bitcast i32 %19 to float
++ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, float %16, float %18, float %20)
++ ret void
++}
++
++; Function Attrs: nounwind readonly
++declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
++
++; Function Attrs: nounwind readonly
++declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1
++
++declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
++
++attributes #0 = { "ShaderType"="1" }
++attributes #1 = { nounwind readonly }
++
++!0 = metadata !{metadata !"const", null, i32 1}
+diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/R600/llvm.SI.sendmsg.ll
+new file mode 100644
+index 0000000..581d422
+--- /dev/null
++++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll
+@@ -0,0 +1,21 @@
++;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
++
++; CHECK-LABEL: @main
++; CHECK: S_SENDMSG Gs(emit stream 0)
++; CHECK: S_SENDMSG Gs(cut stream 1)
++; CHECK: S_SENDMSG Gs(emit-cut stream 2)
++; CHECK: S_SENDMSG Gs_done(nop)
++
++define void @main() {
++main_body:
++ call void @llvm.SI.sendmsg(i32 34, i32 0);
++ call void @llvm.SI.sendmsg(i32 274, i32 0);
++ call void @llvm.SI.sendmsg(i32 562, i32 0);
++ call void @llvm.SI.sendmsg(i32 3, i32 0);
++ ret void
++}
++
++; Function Attrs: nounwind
++declare void @llvm.SI.sendmsg(i32, i32) #0
++
++attributes #0 = { nounwind }
diff --git a/llvm.spec b/llvm.spec
index 5b9d293..96f6feb 100644
--- a/llvm.spec
+++ b/llvm.spec
@@ -36,7 +36,7 @@
Name: llvm
Version: 3.4
-Release: 4%{?dist}
+Release: 5%{?dist}
Summary: The Low Level Virtual Machine
Group: Development/Languages
@@ -59,6 +59,9 @@ Source11: llvm-Config-llvm-config.h
Patch1: 0001-data-install-preserve-timestamps.patch
Patch2: 0002-linker-flags-speedup-memory.patch
+# radeonsi GL 3.3 backport
+Patch3: llvm-3.4-radeonsi-backport.patch
+
BuildRequires: bison
BuildRequires: chrpath
BuildRequires: flex
@@ -284,6 +287,7 @@ mv lldb-%{version} tools/lldb
%patch1 -p1
%patch2 -p1
+%patch3 -p1
# fix library paths
sed -i 's|/lib /usr/lib $lt_ld_extra|%{_libdir} $lt_ld_extra|' ./configure
@@ -649,6 +653,9 @@ exit 0
%endif
%changelog
+* Wed Mar 19 2014 Dave Airlie <airlied at redhat.com> 3.4-5
+- backport patches from 3.5 to enable GL3.3 on radeonsi
+
* Fri Jan 31 2014 Kyle McMartin <kyle at redhat.com> 3.4-4
- Disable lldb on everything but x86_64, and i686. It hasn't been ported
beyond those platforms so far.
More information about the scm-commits
mailing list