jistone pushed to llvm11 (f34). "Update to 11.1.0 final (..more)"

Wednesday, 31 March 2021

Notification time stamped 2021-03-31 17:26:13 UTC

From 76f7a9988645561fe95dd5a220962db700d452cd Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone(a)redhat.com&gt;
Date: Mar 31 2021 01:05:25 +0000
Subject: Update to 11.1.0 final


Also add two fixes for rustc codegen.

---

diff --git a/.gitignore b/.gitignore
index 3180aca..5b213c4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 /llvm-11.1.0rc2.src.tar.xz
 /llvm-11.1.0rc2.src.tar.xz.sig
+/llvm-11.1.0.src.tar.xz
+/llvm-11.1.0.src.tar.xz.sig
diff --git a/0001-MemCpyOpt-Correctly-merge-alias-scopes-during-call-s.patch
b/0001-MemCpyOpt-Correctly-merge-alias-scopes-during-call-s.patch
new file mode 100644
index 0000000..411d94f
--- /dev/null
+++ b/0001-MemCpyOpt-Correctly-merge-alias-scopes-during-call-s.patch
@@ -0,0 +1,373 @@
+From e804574cad8efa1b7a660848ef7adc871a7f850e Mon Sep 17 00:00:00 2001
+From: modimo <modimo(a)fb.com&gt;
+Date: Thu, 3 Dec 2020 09:23:37 -0800
+Subject: [PATCH] [MemCpyOpt] Correctly merge alias scopes during call slot
+ optimization
+
+When MemCpyOpt performs call slot optimization it will concatenate the `alias.scope`
metadata between the function call and the memcpy. However, scoped AA relies on the
domains in metadata to be maintained in a caller-callee relationship. Naive concatenation
breaks this assumption leading to bad AA results.
+
+The fix is to take the intersection of domains then union the scopes within those
domains.
+
+The original bug came from a case of rust bad codegen which uses this bad aliasing to
perform additional memcpy optimizations. As show in the added test case `%src` got
forwarded past its lifetime leading to a dereference of garbage data.
+
+Testing
+ninja check-llvm
+
+Reviewed By: jeroen.dobbelaere
+
+Differential Revision: https://reviews.llvm.org/D91576
+
+(cherry picked from commit 18603319321a6c1b158800bcc60035ee01549516)
+---
+ llvm/include/llvm/Analysis/ScopedNoAliasAA.h  | 21 ++++++++++
+ llvm/lib/Analysis/ScopedNoAliasAA.cpp         | 25 ------------
+ llvm/lib/IR/Metadata.cpp                      | 28 ++++++++++++-
+ .../ScopedNoAliasAA/alias-scope-merging.ll    | 37 ++++++++++++++++++
+ llvm/test/Transforms/GVN/noalias.ll           | 29 +++++++-------
+ .../InstCombine/fold-phi-load-metadata.ll     |  4 +-
+ .../Transforms/MemCpyOpt/callslot_badaa.ll    | 39 +++++++++++++++++++
+ llvm/test/Transforms/NewGVN/noalias.ll        | 29 +++++++-------
+ 8 files changed, 156 insertions(+), 56 deletions(-)
+ create mode 100644 llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
+ create mode 100644 llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
+
+diff --git a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
+index c55228eace4b..562640647918 100644
+--- a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
++++ b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
+@@ -25,6 +25,27 @@ class Function;
+ class MDNode;
+ class MemoryLocation;
+ 
++/// This is a simple wrapper around an MDNode which provides a higher-level
++/// interface by hiding the details of how alias analysis information is encoded
++/// in its operands.
++class AliasScopeNode {
++  const MDNode *Node = nullptr;
++
++public:
++  AliasScopeNode() = default;
++  explicit AliasScopeNode(const MDNode *N) : Node(N) {}
++
++  /// Get the MDNode for this AliasScopeNode.
++  const MDNode *getNode() const { return Node; }
++
++  /// Get the MDNode for this AliasScopeNode's domain.
++  const MDNode *getDomain() const {
++    if (Node->getNumOperands() < 2)
++      return nullptr;
++    return dyn_cast_or_null<MDNode>(Node->getOperand(1));
++  }
++};
++
+ /// A simple AA result which uses scoped-noalias metadata to answer queries.
+ class ScopedNoAliasAAResult : public AAResultBase<ScopedNoAliasAAResult> {
+   friend AAResultBase<ScopedNoAliasAAResult>;
+diff --git a/llvm/lib/Analysis/ScopedNoAliasAA.cpp
b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+index 8928678d6ab2..22e0501b28f4 100644
+--- a/llvm/lib/Analysis/ScopedNoAliasAA.cpp
++++ b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+@@ -50,31 +50,6 @@ using namespace llvm;
+ static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias",
+                                          cl::init(true), cl::Hidden);
+ 
+-namespace {
+-
+-/// This is a simple wrapper around an MDNode which provides a higher-level
+-/// interface by hiding the details of how alias analysis information is encoded
+-/// in its operands.
+-class AliasScopeNode {
+-  const MDNode *Node = nullptr;
+-
+-public:
+-  AliasScopeNode() = default;
+-  explicit AliasScopeNode(const MDNode *N) : Node(N) {}
+-
+-  /// Get the MDNode for this AliasScopeNode.
+-  const MDNode *getNode() const { return Node; }
+-
+-  /// Get the MDNode for this AliasScopeNode's domain.
+-  const MDNode *getDomain() const {
+-    if (Node->getNumOperands() < 2)
+-      return nullptr;
+-    return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+-  }
+-};
+-
+-} // end anonymous namespace
+-
+ AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
+                                          const MemoryLocation &LocB,
+                                          AAQueryInfo &AAQI) {
+diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
+index ce89009e86eb..5826464206d6 100644
+--- a/llvm/lib/IR/Metadata.cpp
++++ b/llvm/lib/IR/Metadata.cpp
+@@ -26,6 +26,7 @@
+ #include "llvm/ADT/StringMap.h"
+ #include "llvm/ADT/StringRef.h"
+ #include "llvm/ADT/Twine.h"
++#include "llvm/Analysis/ScopedNoAliasAA.h"
+ #include "llvm/IR/Argument.h"
+ #include "llvm/IR/BasicBlock.h"
+ #include "llvm/IR/Constant.h"
+@@ -925,7 +926,32 @@ MDNode *MDNode::getMostGenericAliasScope(MDNode *A, MDNode *B) {
+   if (!A || !B)
+     return nullptr;
+ 
+-  return concatenate(A, B);
++  // Take the intersection of domains then union the scopes
++  // within those domains
++  SmallPtrSet<const MDNode *, 16> ADomains;
++  SmallPtrSet<const MDNode *, 16> IntersectDomains;
++  SmallSetVector<Metadata *, 4> MDs;
++  for (const MDOperand &MDOp : A->operands())
++    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
++      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
++        ADomains.insert(Domain);
++
++  for (const MDOperand &MDOp : B->operands())
++    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
++      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
++        if (ADomains.contains(Domain)) {
++          IntersectDomains.insert(Domain);
++          MDs.insert(MDOp);
++        }
++
++  for (const MDOperand &MDOp : A->operands())
++    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
++      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
++        if (IntersectDomains.contains(Domain))
++          MDs.insert(MDOp);
++
++  return MDs.empty() ? nullptr
++                     : getOrSelfReference(A->getContext(), MDs.getArrayRef());
+ }
+ 
+ MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
+diff --git a/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
b/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
+new file mode 100644
+index 000000000000..4c8369d30adb
+--- /dev/null
++++ b/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
+@@ -0,0 +1,37 @@
++; RUN: opt < %s -S -memcpyopt | FileCheck --match-full-lines %s
++
++; Alias scopes are merged by taking the intersection of domains, then the union of the
scopes within those domains
++define i8 @test(i8 %input) {
++  %tmp = alloca i8
++  %dst = alloca i8
++  %src = alloca i8
++; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64
1, i1 false), !alias.scope ![[SCOPE:[0-9]+]]
++  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %src), !noalias !4
++  store i8 %input, i8* %src
++  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1
false), !alias.scope !0
++  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !4
++  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1
false), !alias.scope !4
++  %ret_value = load i8, i8* %dst
++  ret i8 %ret_value
++}
++
++; Merged scope contains "callee0: %a" and "callee0 : %b"
++; CHECK-DAG: ![[CALLEE0_A:[0-9]+]] = distinct !{!{{[0-9]+}}, !{{[0-9]+}},
!"callee0: %a"}
++; CHECK-DAG: ![[CALLEE0_B:[0-9]+]] = distinct !{!{{[0-9]+}}, !{{[0-9]+}},
!"callee0: %b"}
++; CHECK-DAG: ![[SCOPE]] = !{![[CALLEE0_A]], ![[CALLEE0_B]]}
++
++declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
++declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
++declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
++
++!0 = !{!1, !7}
++!1 = distinct !{!1, !3, !"callee0: %a"}
++!2 = distinct !{!2, !3, !"callee0: %b"}
++!3 = distinct !{!3, !"callee0"}
++
++!4 = !{!2, !5}
++!5 = distinct !{!5, !6, !"callee1: %a"}
++!6 = distinct !{!6, !"callee1"}
++
++!7 = distinct !{!7, !8, !"callee2: %a"}
++!8 = distinct !{!8, !"callee2"}
+diff --git a/llvm/test/Transforms/GVN/noalias.ll b/llvm/test/Transforms/GVN/noalias.ll
+index 69c21f110b5e..67d48d768a91 100644
+--- a/llvm/test/Transforms/GVN/noalias.ll
++++ b/llvm/test/Transforms/GVN/noalias.ll
+@@ -5,7 +5,7 @@ define i32 @test1(i32* %p, i32* %q) {
+ ; CHECK: load i32, i32* %p
+ ; CHECK-NOT: noalias
+ ; CHECK: %c = add i32 %a, %a
+-  %a = load i32, i32* %p, !noalias !0
++  %a = load i32, i32* %p, !noalias !3
+   %b = load i32, i32* %p
+   %c = add i32 %a, %b
+   ret i32 %c
+@@ -13,31 +13,32 @@ define i32 @test1(i32* %p, i32* %q) {
+ 
+ define i32 @test2(i32* %p, i32* %q) {
+ ; CHECK-LABEL: @test2(i32* %p, i32* %q)
+-; CHECK: load i32, i32* %p, align 4, !alias.scope !0
++; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE1:[0-9]+]]
+ ; CHECK: %c = add i32 %a, %a
+-  %a = load i32, i32* %p, !alias.scope !0
+-  %b = load i32, i32* %p, !alias.scope !0
++  %a = load i32, i32* %p, !alias.scope !3
++  %b = load i32, i32* %p, !alias.scope !3
+   %c = add i32 %a, %b
+   ret i32 %c
+ }
+ 
+-; FIXME: In this case we can do better than intersecting the scopes, and can
+-; concatenate them instead. Both loads are in the same basic block, the first
+-; makes the second safe to speculatively execute, and there are no calls that may
+-; throw in between.
+ define i32 @test3(i32* %p, i32* %q) {
+ ; CHECK-LABEL: @test3(i32* %p, i32* %q)
+-; CHECK: load i32, i32* %p, align 4, !alias.scope !1
++; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE2:[0-9]+]]
+ ; CHECK: %c = add i32 %a, %a
+-  %a = load i32, i32* %p, !alias.scope !1
+-  %b = load i32, i32* %p, !alias.scope !2
++  %a = load i32, i32* %p, !alias.scope !4
++  %b = load i32, i32* %p, !alias.scope !5
+   %c = add i32 %a, %b
+   ret i32 %c
+ }
+ 
++; CHECK:   ![[SCOPE1]] = !{!{{[0-9]+}}}
++; CHECK:   ![[SCOPE2]] = !{!{{[0-9]+}}, !{{[0-9]+}}}
+ declare i32 @foo(i32*) readonly
+ 
+-!0 = !{!0}
+-!1 = !{!1}
+-!2 = !{!0, !1}
++!0 = distinct !{!0, !2, !"callee0: %a"}
++!1 = distinct !{!1, !2, !"callee0: %b"}
++!2 = distinct !{!2, !"callee0"}
+ 
++!3 = !{!0}
++!4 = !{!1}
++!5 = !{!0, !1}
+diff --git a/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
b/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
+index e5a1aa7362a5..7fa26b46e25d 100644
+--- a/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
++++ b/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
+@@ -40,10 +40,10 @@ return:                                           ; preds = %if.end,
%if.then
+ ; CHECK: ![[TBAA]] = !{![[TAG1:[0-9]+]], ![[TAG1]], i64 0}
+ ; CHECK: ![[TAG1]] = !{!"int", !{{[0-9]+}}, i64 0}
+ ; CHECK: ![[RANGE]] = !{i32 10, i32 25}
+-; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE2:[0-9]+]],
![[SCOPE1:[0-9]+]]}
++; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE1:[0-9]+]],
![[SCOPE2:[0-9]+]]}
+ ; CHECK: ![[SCOPE0]] = distinct !{![[SCOPE0]], !{{[0-9]+}}, !"scope0"}
+-; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"}
+ ; CHECK: ![[SCOPE1]] = distinct !{![[SCOPE1]], !{{[0-9]+}}, !"scope1"}
++; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"}
+ ; CHECK: ![[NOALIAS]] = !{![[SCOPE3:[0-9]+]]}
+ ; CHECK: ![[SCOPE3]] = distinct !{![[SCOPE3]], !{{[0-9]+}}, !"scope3"}
+ 
+diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
b/llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
+new file mode 100644
+index 000000000000..346546f72c4c
+--- /dev/null
++++ b/llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
+@@ -0,0 +1,39 @@
++; RUN: opt < %s -S -memcpyopt | FileCheck --match-full-lines %s
++
++; Make sure callslot optimization merges alias.scope metadata correctly when it merges
instructions.
++; Merging here naively generates:
++;  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1
false), !alias.scope !3
++;  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !0
++;   ...
++;  !0 = !{!1}
++;  !1 = distinct !{!1, !2, !"callee1: %a"}
++;  !2 = distinct !{!2, !"callee1"}
++;  !3 = !{!1, !4}
++;  !4 = distinct !{!4, !5, !"callee0: %a"}
++;  !5 = distinct !{!5, !"callee0"}
++; Which is incorrect because the lifetime.end of %src will now "noalias" the
above memcpy.
++define i8 @test(i8 %input) {
++  %tmp = alloca i8
++  %dst = alloca i8
++  %src = alloca i8
++; NOTE: we're matching the full line and looking for the lack of !alias.scope here
++; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64
1, i1 false)
++  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %src), !noalias !3
++  store i8 %input, i8* %src
++  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1
false), !alias.scope !0
++  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !3
++  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1
false), !alias.scope !3
++  %ret_value = load i8, i8* %dst
++  ret i8 %ret_value
++}
++
++declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
++declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
++declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
++
++!0 = !{!1}
++!1 = distinct !{!1, !2, !"callee0: %a"}
++!2 = distinct !{!2, !"callee0"}
++!3 = !{!4}
++!4 = distinct !{!4, !5, !"callee1: %a"}
++!5 = distinct !{!5, !"callee1"}
+diff --git a/llvm/test/Transforms/NewGVN/noalias.ll
b/llvm/test/Transforms/NewGVN/noalias.ll
+index c5f23bfad89a..2d90dc84d90b 100644
+--- a/llvm/test/Transforms/NewGVN/noalias.ll
++++ b/llvm/test/Transforms/NewGVN/noalias.ll
+@@ -5,7 +5,7 @@ define i32 @test1(i32* %p, i32* %q) {
+ ; CHECK: load i32, i32* %p
+ ; CHECK-NOT: noalias
+ ; CHECK: %c = add i32 %a, %a
+-  %a = load i32, i32* %p, !noalias !0
++  %a = load i32, i32* %p, !noalias !3
+   %b = load i32, i32* %p
+   %c = add i32 %a, %b
+   ret i32 %c
+@@ -13,31 +13,32 @@ define i32 @test1(i32* %p, i32* %q) {
+ 
+ define i32 @test2(i32* %p, i32* %q) {
+ ; CHECK-LABEL: @test2(i32* %p, i32* %q)
+-; CHECK: load i32, i32* %p, align 4, !alias.scope !0
++; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE1:[0-9]+]]
+ ; CHECK: %c = add i32 %a, %a
+-  %a = load i32, i32* %p, !alias.scope !0
+-  %b = load i32, i32* %p, !alias.scope !0
++  %a = load i32, i32* %p, !alias.scope !3
++  %b = load i32, i32* %p, !alias.scope !3
+   %c = add i32 %a, %b
+   ret i32 %c
+ }
+ 
+-; FIXME: In this case we can do better than intersecting the scopes, and can
+-; concatenate them instead. Both loads are in the same basic block, the first
+-; makes the second safe to speculatively execute, and there are no calls that may
+-; throw in between.
+ define i32 @test3(i32* %p, i32* %q) {
+ ; CHECK-LABEL: @test3(i32* %p, i32* %q)
+-; CHECK: load i32, i32* %p, align 4, !alias.scope !1
++; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE2:[0-9]+]]
+ ; CHECK: %c = add i32 %a, %a
+-  %a = load i32, i32* %p, !alias.scope !1
+-  %b = load i32, i32* %p, !alias.scope !2
++  %a = load i32, i32* %p, !alias.scope !4
++  %b = load i32, i32* %p, !alias.scope !5
+   %c = add i32 %a, %b
+   ret i32 %c
+ }
+ 
++; CHECK:   ![[SCOPE1]] = !{!{{[0-9]+}}}
++; CHECK:   ![[SCOPE2]] = !{!{{[0-9]+}}, !{{[0-9]+}}}
+ declare i32 @foo(i32*) readonly
+ 
+-!0 = !{!0}
+-!1 = !{!1}
+-!2 = !{!0, !1}
++!0 = distinct !{!0, !2, !"callee0: %a"}
++!1 = distinct !{!1, !2, !"callee0: %b"}
++!2 = distinct !{!2, !"callee0"}
+ 
++!3 = !{!0}
++!4 = !{!1}
++!5 = !{!0, !1}
+-- 
+2.30.2
+
diff --git a/0001-SystemZ-Assign-the-full-space-for-promoted-and-split.patch
b/0001-SystemZ-Assign-the-full-space-for-promoted-and-split.patch
new file mode 100644
index 0000000..db1b5e2
--- /dev/null
+++ b/0001-SystemZ-Assign-the-full-space-for-promoted-and-split.patch
@@ -0,0 +1,157 @@
+From c6f9d6db7b0c4677d1aae8977505fe6340a3aae2 Mon Sep 17 00:00:00 2001
+From: Josh Stone <cuviper(a)gmail.com&gt;
+Date: Wed, 10 Mar 2021 15:52:27 -0800
+Subject: [PATCH] [SystemZ]  Assign the full space for promoted and split
+ outgoing args. (#95)
+
+When a large "irregular" (e.g. i96) integer call argument is converted to
+indirect, 64-bit parts are stored to the stack. The full stack space
+(e.g. i128) was not allocated prior to this patch, but rather just the exact
+space of the original type. This caused neighboring values on the stack to be
+overwritten.
+
+Thanks to Josh Stone for reporting this.
+
+Review: Ulrich Weigand
+Fixes https://bugs.llvm.org/show_bug.cgi?id=49322
+Differential Revision: https://reviews.llvm.org/D97514
+
+(cherry picked from commit 52bbbf4d4459239e0f461bc302ada89e2c5d07fc)
+
+Co-authored-by: Jonas Paulsson <paulsson(a)linux.vnet.ibm.com&gt;
+---
+ .../Target/SystemZ/SystemZISelLowering.cpp    | 22 ++++++--
+ llvm/test/CodeGen/SystemZ/args-11.ll          | 54 +++++++++++++++++++
+ 2 files changed, 72 insertions(+), 4 deletions(-)
+ create mode 100644 llvm/test/CodeGen/SystemZ/args-11.ll
+
+diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+index eb1e51341ec4..faf7b3eaef3c 100644
+--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
++++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+@@ -1543,6 +1543,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
+   bool IsVarArg = CLI.IsVarArg;
+   MachineFunction &MF = DAG.getMachineFunction();
+   EVT PtrVT = getPointerTy(MF.getDataLayout());
++  LLVMContext &Ctx = *DAG.getContext();
+ 
+   // Detect unsupported vector argument and return types.
+   if (Subtarget.hasVector()) {
+@@ -1552,7 +1553,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ 
+   // Analyze the operands of the call, assigning locations to each operand.
+   SmallVector<CCValAssign, 16> ArgLocs;
+-  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
++  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
+   ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+ 
+   // We don't support GuaranteedTailCallOpt, only automatically-detected
+@@ -1577,14 +1578,25 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ 
+     if (VA.getLocInfo() == CCValAssign::Indirect) {
+       // Store the argument in a stack slot and pass its address.
+-      SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
++      unsigned ArgIndex = Outs[I].OrigArgIndex;
++      EVT SlotVT;
++      if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
++        // Allocate the full stack space for a promoted (and split) argument.
++        Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
++        EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
++        MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
++        unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
++        SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
++      } else {
++        SlotVT = Outs[I].ArgVT;
++      }
++      SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
+       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+       MemOpChains.push_back(
+           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
+                        MachinePointerInfo::getFixedStack(MF, FI)));
+       // If the original argument was split (e.g. i128), we need
+       // to store all parts of it here (and pass just one address).
+-      unsigned ArgIndex = Outs[I].OrigArgIndex;
+       assert (Outs[I].PartOffset == 0);
+       while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
+         SDValue PartValue = OutVals[I + 1];
+@@ -1594,6 +1606,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
+         MemOpChains.push_back(
+             DAG.getStore(Chain, DL, PartValue, Address,
+                          MachinePointerInfo::getFixedStack(MF, FI)));
++        assert((PartOffset + PartValue.getValueType().getStoreSize() <=
++                SlotVT.getStoreSize()) && "Not enough space for argument
part!");
+         ++I;
+       }
+       ArgValue = SpillSlot;
+@@ -1687,7 +1701,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ 
+   // Assign locations to each value returned by this call.
+   SmallVector<CCValAssign, 16> RetLocs;
+-  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
++  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
+   RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
+ 
+   // Copy all of the result registers out of their specified physreg.
+diff --git a/llvm/test/CodeGen/SystemZ/args-11.ll b/llvm/test/CodeGen/SystemZ/args-11.ll
+new file mode 100644
+index 000000000000..b355f9d6da15
+--- /dev/null
++++ b/llvm/test/CodeGen/SystemZ/args-11.ll
+@@ -0,0 +1,54 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
++; Test outgoing promoted arguments that are split (and passed by reference).
++;
++; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
++
++; The i96 arg is promoted to i128 and should get the full stack space.
++declare void @fn1(i96)
++define i32 @fn2() {
++; CHECK-LABEL: fn2:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
++; CHECK-NEXT:    .cfi_offset %r14, -48
++; CHECK-NEXT:    .cfi_offset %r15, -40
++; CHECK-NEXT:    aghi %r15, -184
++; CHECK-NEXT:    .cfi_def_cfa_offset 344
++; CHECK-NEXT:    mvhi 180(%r15), -1
++; CHECK-NEXT:    mvghi 168(%r15), 0
++; CHECK-NEXT:    la %r2, 160(%r15)
++; CHECK-NEXT:    mvghi 160(%r15), 0
++; CHECK-NEXT:    brasl %r14, fn1@PLT
++; CHECK-NEXT:    l %r2, 180(%r15)
++; CHECK-NEXT:    lmg %r14, %r15, 296(%r15)
++; CHECK-NEXT:    br %r14
++  %1 = alloca i32
++  store i32 -1, i32* %1
++  call void @fn1(i96 0)
++  %2 = load i32, i32* %1
++  ret i32 %2
++}
++
++declare void @fn3(i136)
++define i32 @fn4() {
++; CHECK-LABEL: fn4:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
++; CHECK-NEXT:    .cfi_offset %r14, -48
++; CHECK-NEXT:    .cfi_offset %r15, -40
++; CHECK-NEXT:    aghi %r15, -192
++; CHECK-NEXT:    .cfi_def_cfa_offset 352
++; CHECK-NEXT:    mvhi 188(%r15), -1
++; CHECK-NEXT:    mvghi 176(%r15), 0
++; CHECK-NEXT:    mvghi 168(%r15), 0
++; CHECK-NEXT:    la %r2, 160(%r15)
++; CHECK-NEXT:    mvghi 160(%r15), 0
++; CHECK-NEXT:    brasl %r14, fn3@PLT
++; CHECK-NEXT:    l %r2, 188(%r15)
++; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
++; CHECK-NEXT:    br %r14
++  %1 = alloca i32
++  store i32 -1, i32* %1
++  call void @fn3(i136 0)
++  %2 = load i32, i32* %1
++  ret i32 %2
++}
+-- 
+2.30.2
+
diff --git a/llvm11.spec b/llvm11.spec
index 9d5673a..2614808 100644
--- a/llvm11.spec
+++ b/llvm11.spec
@@ -10,7 +10,7 @@
 
 %global llvm_libdir %{_libdir}/%{name}
 %global build_llvm_libdir %{buildroot}%{llvm_libdir}
-%global rc_ver 2
+#global rc_ver 2
 %global baserelease 1
 %global llvm_srcdir llvm-%{version}%{?rc_ver:rc%{rc_ver}}.src
 %global maj_ver 11
@@ -59,6 +59,8 @@ Source4:	lit.fedora.cfg.py
 # Fix coreos-installer test crash on s390x (rhbz#1883457),
https://reviews.llvm.org/D89034
 Patch1:		0001-SystemZ-Use-LA-instead-of-AGR-in-eliminateFrameIndex.patch
 Patch2:     0001-gcc11.patch
+Patch3:		0001-SystemZ-Assign-the-full-space-for-promoted-and-split.patch
+Patch4:		0001-MemCpyOpt-Correctly-merge-alias-scopes-during-call-s.patch
 
 BuildRequires:	gcc
 BuildRequires:	gcc-c++
@@ -420,7 +422,7 @@ rm test/tools/llvm-readobj/ELF/dependent-libraries.test
 rm test/tools/dsymutil/X86/swift-interface.test
 
 # FIXME: use %%cmake_build instead of %%__ninja
-LD_LIBRARY_PATH=%{buildroot}/%{_libdir}  %{__ninja} check-all -C %{_vpath_builddir}
+LD_LIBRARY_PATH=%{buildroot}/%{pkg_libdir}  %{__ninja} check-all -C %{_vpath_builddir}
 
 %ldconfig_scriptlets libs
 
@@ -543,5 +545,9 @@ fi
 %endif
 
 %changelog
+* Tue Mar 23 2021 Josh Stone <jistone(a)redhat.com&gt; - 11.1.0-1
+- Update to 11.1.0 final
+- Add fixes for rustc codegen
+
 * Wed Feb 03 2021 Serge Guelton - 11.1.0-0.1.rc2
 - 11.1.0-rc2 release
diff --git a/sources b/sources
index 8030fde..e8fbf5f 100644
--- a/sources
+++ b/sources
@@ -1,2 +1,2 @@
-SHA512 (llvm-11.1.0rc2.src.tar.xz) =
4293bedabfacc3de5384b5567eb69d4ae19095540c31cf1f46b8e841db36b28215353aace2e55ccc15a069a63ba2954b2c969ad6337bebaa8877248a2dca024b
-SHA512 (llvm-11.1.0rc2.src.tar.xz.sig) =
a6465924e10cf8778c23c7d25c83ac3240611fc1045b55651a2f33aa1636357e86cc4df020a5603c3ae07a0185f769df9d348e8b8321e0db7eada81497327dd1
+SHA512 (llvm-11.1.0.src.tar.xz) =
07bf9973384151a18d5cc2892103e5f28a88c632e8e49662fde56d123632f2ed1b3710fa7a87b6b821955d0ec44160ff36f2aa4f233e389e14d628e9bf8dc764
+SHA512 (llvm-11.1.0.src.tar.xz.sig) =
b9db91294a3297d7b37081389e91ef50a91d119fa7afc54f1da12545ea43686c18f1bb587ac10a492ce2145cea24c4d85dbda51fd02afcc9be3d8bc1e69b8d75


	https://src.fedoraproject.org/rpms/llvm11/c/76f7a9988645561fe95dd5a220962...
    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004