[atlas] patching for Power8 to pass performance tunings and tests on P8 builders
Jaromír Cápík
jcapik at fedoraproject.org
Thu Oct 30 17:30:49 UTC 2014
commit f7d47dc9a324fb0342f0f3cf274ceadc636fdb75
Author: Jaromir Capik <jcapik at redhat.com>
Date: Thu Oct 30 18:29:59 2014 +0100
patching for Power8 to pass performance tunings and tests on P8 builders
atlas.spec | 32 +++++++++++-
getdoublearr.stripwhite.patch | 50 ++++++++++++++++++
initialize_malloc_memory.invtrsm.wms.oct23.patch | 10 ++++
p8-mem-barrier.patch | 12 ++++
ppc64le-abiv2.patch | 60 ++++++++++++++++++++++
ppc64le-remove-vsx.patch | 39 ++++++++++++++
xlf.command.not.found.patch | 24 +++++++++
7 files changed, 226 insertions(+), 1 deletions(-)
---
diff --git a/atlas.spec b/atlas.spec
index 1eb9553..bbbd85a 100644
--- a/atlas.spec
+++ b/atlas.spec
@@ -5,7 +5,7 @@ Version: 3.10.1
%if "%{?enable_native_atlas}" != "0"
%define dist .native
%endif
-Release: 16%{?dist}
+Release: 17%{?dist}
Summary: Automatically Tuned Linear Algebra Software
Group: System Environment/Libraries
@@ -44,6 +44,14 @@ Patch6: atlas-affinity.patch
Patch7: atlas-aarch64port.patch
Patch8: atlas-genparse.patch
+# ppc64le patches
+Patch95: initialize_malloc_memory.invtrsm.wms.oct23.patch
+Patch96: xlf.command.not.found.patch
+Patch98: getdoublearr.stripwhite.patch
+Patch99: ppc64le-remove-vsx.patch
+Patch100: ppc64le-abiv2.patch
+Patch110: p8-mem-barrier.patch
+
BuildRequires: gcc-gfortran
Provides: bundled(lapack)
@@ -332,6 +340,16 @@ cp %{SOURCE13} CONFIG/ARCHS/
cp %{SOURCE14} CONFIG/ARCHS/
#cp %{SOURCE8} CONFIG/ARCHS/
#cp %{SOURCE9} CONFIG/ARCHS/
+
+%ifarch ppc64le
+%patch99 -p2
+%patch98 -p2
+%patch96 -p2
+%patch95 -p2
+%patch100 -p2
+%patch110 -p1
+%endif
+
%ifarch %{arm}
# Set arm flags in atlcomp.txt
sed -i -e 's,-mfpu=vfpv3,-mfpu=neon,' CONFIG/src/atlcomp.txt
@@ -342,6 +360,7 @@ sed -i -e 's,-mfpu=vfpv3,,' tune/blas/gemm/CASES/*.flg
# Debug
#sed -i -e 's,> \(.*\)/ptsanity.out,> \1/ptsanity.out || cat \1/ptsanity.out \&\& exit 1,' makes/Make.*
+
%build
for type in %{types}; do
if [ "$type" = "base" ]; then
@@ -467,6 +486,14 @@ for type in %{types}; do
sed -i 's#-m64#-m32#g' Make.inc
%endif
+%ifarch ppc64le
+ sed -i 's#-mvsx##g' Make.inc
+ sed -i 's#-DATL_VSX##g' Make.inc
+ sed -i 's#-DATL_AltiVec##g' Make.inc
+ sed -i 's#-maltivec##g' Make.inc
+ sed -i 's#ARCH =.*#ARCH = POWER464#' Make.inc
+%endif
+
%endif
make build
cd lib
@@ -783,6 +810,9 @@ fi
%endif
%changelog
+* Thu Oct 30 2014 Jaromir Capik <jcapik at redhat.com> - 3.10.1-17
+- patching for Power8 to pass performance tunings and tests on P8 builders
+
* Fri Oct 24 2014 Orion Poplawski <orion at cora.nwra.com> - 3.10.1-16
- Fix alternatives install
diff --git a/getdoublearr.stripwhite.patch b/getdoublearr.stripwhite.patch
new file mode 100644
index 0000000..e1dc84d
--- /dev/null
+++ b/getdoublearr.stripwhite.patch
@@ -0,0 +1,50 @@
+Subject: getdoublearr.stripwhite
+From: Michel Normand <normand at fr.ibm.com>
+
+GetDoubleArr must only handle the comma delimited list at string head
+and ignore anything after the first blank character.
+
+Signed-off-by: Michel Normand <normand at fr.ibm.com>
+---
+ ATLAS/include/atlas_genparse.h | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+Index: atlas/ATLAS/include/atlas_genparse.h
+===================================================================
+--- atlas.orig/ATLAS/include/atlas_genparse.h
++++ atlas/ATLAS/include/atlas_genparse.h
+@@ -149,13 +149,24 @@ static int asmNames2bitfield(char *str)
+ }
+
+ /* procedure 7 */
+-static int GetDoubleArr(char *str, int N, double *d)
++static int GetDoubleArr(char *callerstr, int N, double *d)
+ /*
+ * Reads in a list with form "%le,%le...,%le"; N-length d recieves doubles.
+ * RETURNS: the number of doubles found, or N, whichever is less
+ */
+ {
+- int i=1;
++ int i;
++ char *dupstr = DupString(callerstr);
++ char *str = dupstr;
++ /* strip the string to end on first white space */
++ for (i=0; dupstr[i]; i++)
++ {
++ if (isspace(dupstr[i])) {
++ dupstr[i] = '\0';
++ break;
++ }
++ }
++ i = 1;
+ assert(sscanf(str, "%le", d) == 1);
+ while (i < N)
+ {
+@@ -166,6 +177,7 @@ static int GetDoubleArr(char *str, int N
+ break;
+ i++;
+ }
++ free(dupstr);
+ return(i);
+ }
+
diff --git a/initialize_malloc_memory.invtrsm.wms.oct23.patch b/initialize_malloc_memory.invtrsm.wms.oct23.patch
new file mode 100644
index 0000000..f57a9e4
--- /dev/null
+++ b/initialize_malloc_memory.invtrsm.wms.oct23.patch
@@ -0,0 +1,10 @@
+--- ./ATLAS.first/tune/blas/level3/invtrsm.c 2013-10-22 19:35:03.000000000 +0000
++++ ./ATLAS/tune/blas/level3/invtrsm.c 2013-10-23 21:24:01.000000000 +0000
+@@ -525,6 +525,7 @@
+ a = A = malloc(i * ATL_MulBySize(incA));
+ if (A)
+ {
++ memset(A,0,i*ATL_MulBySize(incA)); /* wms (!!) malloc call above returns non-initialized memory. */
+ if (Uplo == TestGE)
+ for (i=0; i < k; i++)
+ Mjoin(PATL,gegen)(N, N, A+i*incA, lda, N+lda);
diff --git a/p8-mem-barrier.patch b/p8-mem-barrier.patch
new file mode 100644
index 0000000..15d7b8a
--- /dev/null
+++ b/p8-mem-barrier.patch
@@ -0,0 +1,12 @@
+diff -Naur ATLAS.orig/include/atlas_pca.h ATLAS/include/atlas_pca.h
+--- ATLAS.orig/include/atlas_pca.h 2013-01-08 19:15:40.000000000 +0100
++++ ATLAS/include/atlas_pca.h 2014-10-23 13:45:36.956698637 +0200
+@@ -26,7 +26,7 @@
+ #endif
+ #elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \
+ defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \
+- defined(ATL_ARCH_POWER7)
++ defined(ATL_ARCH_POWER7) || 1
+ #ifdef __GNUC__
+ #define ATL_membarrier __asm__ __volatile__ ("dcs")
+ /* #define ATL_USEPCA 1 */
diff --git a/ppc64le-abiv2.patch b/ppc64le-abiv2.patch
new file mode 100644
index 0000000..556dd04
--- /dev/null
+++ b/ppc64le-abiv2.patch
@@ -0,0 +1,60 @@
+--- atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c 2013-12-05 19:19:57.000000000 +0100
++++ atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c.new 2013-12-06 16:29:57.000000000 +0100
+@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
+ const TYPE beta, TYPE *C, const int ldc)
+ (r10) 8(r1)
+ *******************************************************************************
+-64 bit ABIs:
++64 bit ABIv1s:
+ r3 r4 r5 r6/f1
+ void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
+ r7 r8 r9 r10
+ const TYPE *A, const int lda, const TYPE *B, const int ldb,
+ f2 120(r1) 128(r1)
+ const TYPE beta, TYPE *C, const int ldc)
++
++64 bit ABIv2s:
++ r3 r4 r5 r6/f1
++void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
++ r7 r8 r9 r10
++ const TYPE *A, const int lda, const TYPE *B, const int ldb,
++ f2 104(r1) 112(r1)
++ const TYPE beta, TYPE *C, const int ldc)
+ #endif
+ #ifdef ATL_AS_AIX_PPC
+ .csect .text[PR]
+@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
+ .globl Mjoin(_,ATL_USERMM)
+ Mjoin(_,ATL_USERMM):
+ #else
+- #if defined(ATL_USE64BITS)
++ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
+ /*
+ * Official Program Descripter section, seg fault w/o it on Linux/PPC64
+ */
+@@ -217,6 +225,7 @@ ATL_USERMM:
+ .globl Mjoin(.,ATL_USERMM)
+ Mjoin(.,ATL_USERMM):
+ #else
++/* ppc64 have no longer function descriptors in ABIv2 */
+ .globl ATL_USERMM
+ ATL_USERMM:
+ #endif
+@@ -257,9 +266,17 @@ ATL_USERMM:
+ #endif
+ #endif
+
++
+ #if defined (ATL_USE64BITS)
++#if _CALL_ELF == 2
++/* ABIv2 */
++ ld pC0, 104(r1)
++ ld incCn, 112(r1)
++#else
++/* ABIv1 */
+ ld pC0, 120(r1)
+ ld incCn, 128(r1)
++#endif
+ #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
+ lwz pC0, 68(r1)
+ lwz incCn, 72(r1)
diff --git a/ppc64le-remove-vsx.patch b/ppc64le-remove-vsx.patch
new file mode 100644
index 0000000..a79bea6
--- /dev/null
+++ b/ppc64le-remove-vsx.patch
@@ -0,0 +1,39 @@
+Subject: ppc64le remove vsx
+From: Michel Normand <normand at fr.ibm.com>
+
+temporarily remove the vsx related flags
+as long as not supported for ppc64le
+Note that also force as power4
+
+Signed-off-by: Michel Normand <normand at fr.ibm.com>
+Index: atlas/ATLAS/CONFIG/src/atlcomp.txt
+===================================================================
+--- atlas.orig/ATLAS/CONFIG/src/atlcomp.txt
++++ atlas/ATLAS/CONFIG/src/atlcomp.txt
+@@ -187,9 +187,9 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc
+ MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc
+ 'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave'
+ MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
+- 'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
++ 'gcc' '-O2 -m64 -mvrsave -funroll-all-loops'
+ MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
+- 'gfortran' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
++ 'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops'
+ MACH=POWER6 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
+ 'gcc' '-mcpu=power6 -mtune=power6 -maltivec -O3 -fno-schedule-insns -fschedule-insns2 -minsert-sched-nops=2'
+ MACH=POWER5 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
+Index: atlas/ATLAS/CONFIG/src/probe_comp.c
+===================================================================
+--- atlas.orig/ATLAS/CONFIG/src/probe_comp.c
++++ atlas/ATLAS/CONFIG/src/probe_comp.c
+@@ -446,8 +446,8 @@ COMPNODE **GetDefaultComps(enum OSTYPE O
+
+ if ((vecexts & (1<<ISA_AVXFMA4)) && arch == AmdDozer)
+ vp = "-msse4.2 -mfma4";
+- else if (vecexts & (1<<ISA_VSX))
+- vp = "-mvsx";
++ /*else if (vecexts & (1<<ISA_VSX))
++ vp = "-mvsx";*/
+ else if (vecexts & (1<<ISA_AV))
+ vp = "-maltivec";
+ else if (vecexts & (1<<ISA_AVX))
diff --git a/xlf.command.not.found.patch b/xlf.command.not.found.patch
new file mode 100644
index 0000000..83f8896
--- /dev/null
+++ b/xlf.command.not.found.patch
@@ -0,0 +1,24 @@
+Subject: xlf.command.not.found
+From: Michel Normand <normand at fr.ibm.com>
+
+try to bypass error while building ppc64le
+"make[2]: xlf: Command not found"
+
+Signed-off-by: Michel Normand <normand at fr.ibm.com>
+---
+ ATLAS/CONFIG/src/atlcomp.txt | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+Index: atlas/ATLAS/CONFIG/src/atlcomp.txt
+===================================================================
+--- atlas.orig/ATLAS/CONFIG/src/atlcomp.txt
++++ atlas/ATLAS/CONFIG/src/atlcomp.txt
+@@ -199,7 +199,7 @@ MACH=POWER6 OS=ALL LVL=1010 COMPS=f77
+ MACH=POWER5 OS=ALL LVL=1010 COMPS=f77
+ 'gfortran' '-mcpu=power5 -mtune=power5 -O3 -fno-schedule-insns -fno-rerun-loop-opt'
+ MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
+- 'xlf' '-qtune=pwr7 -qarch=pwr7 -O3 -qmaxmem=-1 -qfloat=hsflt'
++ 'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops'
+ MACH=POWER5 OS=ALL LVL=1010 COMPS=f77
+ 'xlf' '-qtune=pwr5 -qarch=pwr5 -O3 -qmaxmem=-1 -qfloat=hsflt'
+ MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dmc,smc,dkc,skc,xcc,gcc
More information about the scm-commits
mailing list