[atlas] patching for Power8 to pass performance tunings and tests on P8 builders

Jaromír Cápík jcapik at fedoraproject.org
Thu Oct 30 17:30:49 UTC 2014


commit f7d47dc9a324fb0342f0f3cf274ceadc636fdb75
Author: Jaromir Capik <jcapik at redhat.com>
Date:   Thu Oct 30 18:29:59 2014 +0100

    patching for Power8 to pass performance tunings and tests on P8 builders

 atlas.spec                                       |   32 +++++++++++-
 getdoublearr.stripwhite.patch                    |   50 ++++++++++++++++++
 initialize_malloc_memory.invtrsm.wms.oct23.patch |   10 ++++
 p8-mem-barrier.patch                             |   12 ++++
 ppc64le-abiv2.patch                              |   60 ++++++++++++++++++++++
 ppc64le-remove-vsx.patch                         |   39 ++++++++++++++
 xlf.command.not.found.patch                      |   24 +++++++++
 7 files changed, 226 insertions(+), 1 deletions(-)
---
diff --git a/atlas.spec b/atlas.spec
index 1eb9553..bbbd85a 100644
--- a/atlas.spec
+++ b/atlas.spec
@@ -5,7 +5,7 @@ Version:        3.10.1
 %if "%{?enable_native_atlas}" != "0"
 %define dist .native
 %endif
-Release:        16%{?dist}
+Release:        17%{?dist}
 Summary:        Automatically Tuned Linear Algebra Software
 
 Group:          System Environment/Libraries
@@ -44,6 +44,14 @@ Patch6:		atlas-affinity.patch
 Patch7:		atlas-aarch64port.patch
 Patch8:		atlas-genparse.patch
 
+# ppc64le patches
+Patch95:	initialize_malloc_memory.invtrsm.wms.oct23.patch
+Patch96:	xlf.command.not.found.patch
+Patch98:	getdoublearr.stripwhite.patch
+Patch99:	ppc64le-remove-vsx.patch
+Patch100:	ppc64le-abiv2.patch
+Patch110:	p8-mem-barrier.patch
+
 BuildRequires:  gcc-gfortran
 
 Provides: bundled(lapack)
@@ -332,6 +340,16 @@ cp %{SOURCE13} CONFIG/ARCHS/
 cp %{SOURCE14} CONFIG/ARCHS/
 #cp %{SOURCE8} CONFIG/ARCHS/
 #cp %{SOURCE9} CONFIG/ARCHS/
+
+%ifarch ppc64le
+%patch99 -p2
+%patch98 -p2
+%patch96 -p2
+%patch95 -p2
+%patch100 -p2
+%patch110 -p1
+%endif
+
 %ifarch %{arm}
 # Set arm flags in atlcomp.txt
 sed -i -e 's,-mfpu=vfpv3,-mfpu=neon,' CONFIG/src/atlcomp.txt
@@ -342,6 +360,7 @@ sed -i -e 's,-mfpu=vfpv3,,' tune/blas/gemm/CASES/*.flg
 # Debug
 #sed -i -e 's,> \(.*\)/ptsanity.out,> \1/ptsanity.out || cat \1/ptsanity.out \&\& exit 1,' makes/Make.*
 
+
 %build
 for type in %{types}; do
 	if [ "$type" = "base" ]; then
@@ -467,6 +486,14 @@ for type in %{types}; do
 	sed -i 's#-m64#-m32#g' Make.inc
 %endif
 
+%ifarch ppc64le
+	sed -i 's#-mvsx##g' Make.inc
+	sed -i 's#-DATL_VSX##g' Make.inc
+	sed -i 's#-DATL_AltiVec##g' Make.inc
+	sed -i 's#-maltivec##g' Make.inc
+	sed -i 's#ARCH =.*#ARCH = POWER464#' Make.inc
+%endif
+
 %endif
 	make build
 	cd lib
@@ -783,6 +810,9 @@ fi
 %endif
 
 %changelog
+* Thu Oct 30 2014 Jaromir Capik <jcapik at redhat.com> - 3.10.1-17
+- patching for Power8 to pass performance tunings and tests on P8 builders
+
 * Fri Oct 24 2014 Orion Poplawski <orion at cora.nwra.com> - 3.10.1-16
 - Fix alternatives install
 
diff --git a/getdoublearr.stripwhite.patch b/getdoublearr.stripwhite.patch
new file mode 100644
index 0000000..e1dc84d
--- /dev/null
+++ b/getdoublearr.stripwhite.patch
@@ -0,0 +1,50 @@
+Subject: getdoublearr.stripwhite
+From: Michel Normand <normand at fr.ibm.com>
+
+GetDoubleArr must only handle the comma delimited list at string head
+and ignore anything after the first blank character.
+
+Signed-off-by: Michel Normand <normand at fr.ibm.com>
+---
+ ATLAS/include/atlas_genparse.h |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+Index: atlas/ATLAS/include/atlas_genparse.h
+===================================================================
+--- atlas.orig/ATLAS/include/atlas_genparse.h
++++ atlas/ATLAS/include/atlas_genparse.h
+@@ -149,13 +149,24 @@ static int asmNames2bitfield(char *str)
+ }
+ 
+ /* procedure 7 */
+-static int GetDoubleArr(char *str, int N, double *d)
++static int GetDoubleArr(char *callerstr, int N, double *d)
+ /*
+  * Reads in a list with form "%le,%le...,%le"; N-length d recieves doubles.
+  * RETURNS: the number of doubles found, or N, whichever is less
+  */
+ {
+-   int i=1;
++   int i;
++   char *dupstr = DupString(callerstr);
++   char *str = dupstr;
++   /* strip the string to end on first white space */
++   for (i=0; dupstr[i]; i++)
++   {
++	if (isspace(dupstr[i])) {
++		dupstr[i] = '\0';
++		break;
++	}
++   }
++   i = 1;
+    assert(sscanf(str, "%le", d) == 1);
+    while (i < N)
+    {
+@@ -166,6 +177,7 @@ static int GetDoubleArr(char *str, int N
+	break;
+       i++;
+    }
++   free(dupstr);
+    return(i);
+ }
+ 
diff --git a/initialize_malloc_memory.invtrsm.wms.oct23.patch b/initialize_malloc_memory.invtrsm.wms.oct23.patch
new file mode 100644
index 0000000..f57a9e4
--- /dev/null
+++ b/initialize_malloc_memory.invtrsm.wms.oct23.patch
@@ -0,0 +1,10 @@
+--- ./ATLAS.first/tune/blas/level3/invtrsm.c	2013-10-22 19:35:03.000000000 +0000
++++ ./ATLAS/tune/blas/level3/invtrsm.c	2013-10-23 21:24:01.000000000 +0000
+@@ -525,6 +525,7 @@
+    a = A = malloc(i * ATL_MulBySize(incA));
+    if (A)
+    {
++     memset(A,0,i*ATL_MulBySize(incA)); /* wms  (!!) malloc call above returns non-initialized memory. */
+       if (Uplo == TestGE)
+          for (i=0; i < k; i++)
+             Mjoin(PATL,gegen)(N, N, A+i*incA, lda, N+lda);
diff --git a/p8-mem-barrier.patch b/p8-mem-barrier.patch
new file mode 100644
index 0000000..15d7b8a
--- /dev/null
+++ b/p8-mem-barrier.patch
@@ -0,0 +1,12 @@
+diff -Naur ATLAS.orig/include/atlas_pca.h ATLAS/include/atlas_pca.h
+--- ATLAS.orig/include/atlas_pca.h	2013-01-08 19:15:40.000000000 +0100
++++ ATLAS/include/atlas_pca.h	2014-10-23 13:45:36.956698637 +0200
+@@ -26,7 +26,7 @@
+    #endif
+ #elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \
+       defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \
+-      defined(ATL_ARCH_POWER7)
++      defined(ATL_ARCH_POWER7) || 1
+    #ifdef __GNUC__
+       #define ATL_membarrier __asm__ __volatile__ ("dcs")
+ /*      #define ATL_USEPCA 1 */
diff --git a/ppc64le-abiv2.patch b/ppc64le-abiv2.patch
new file mode 100644
index 0000000..556dd04
--- /dev/null
+++ b/ppc64le-abiv2.patch
@@ -0,0 +1,60 @@
+--- atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c	2013-12-05 19:19:57.000000000 +0100
++++ atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c.new	2013-12-06 16:29:57.000000000 +0100
+@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
+                 const TYPE beta, TYPE *C, const int ldc)
+                                   (r10)    8(r1)
+ *******************************************************************************
+-64 bit ABIs:
++64 bit ABIv1s:
+                          r3           r4           r5             r6/f1
+ void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
+                            r7             r8             r9            r10
+                 const TYPE *A, const int lda, const TYPE *B, const int ldb,
+                              f2   120(r1)        128(r1)
+                 const TYPE beta, TYPE *C, const int ldc)
++
++64 bit ABIv2s:
++                         r3           r4           r5             r6/f1
++void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
++                           r7             r8             r9            r10
++                const TYPE *A, const int lda, const TYPE *B, const int ldb,
++                             f2   104(r1)        112(r1)
++                const TYPE beta, TYPE *C, const int ldc)
+ #endif
+ #ifdef ATL_AS_AIX_PPC
+         .csect .text[PR]
+@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
+ 	.globl  Mjoin(_,ATL_USERMM)
+ Mjoin(_,ATL_USERMM):
+    #else
+-      #if defined(ATL_USE64BITS)
++      #if defined(ATL_USE64BITS) && _CALL_ELF != 2
+ /*
+  *      Official Program Descripter section, seg fault w/o it on Linux/PPC64
+  */
+@@ -217,6 +225,7 @@ ATL_USERMM:
+ 	.globl  Mjoin(.,ATL_USERMM)
+ Mjoin(.,ATL_USERMM):
+       #else
++/* ppc64 have no longer function descriptors in ABIv2 */      
+ 	.globl  ATL_USERMM
+ ATL_USERMM:
+       #endif
+@@ -257,9 +266,17 @@ ATL_USERMM:
+    #endif
+ #endif
+ 
++
+ #if defined (ATL_USE64BITS)
++#if _CALL_ELF == 2
++/* ABIv2 */
++        ld      pC0, 104(r1)
++        ld      incCn, 112(r1)
++#else
++/* ABIv1 */
+         ld      pC0, 120(r1)
+         ld      incCn, 128(r1)
++#endif
+ #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
+         lwz     pC0, 68(r1)
+         lwz     incCn,  72(r1)
diff --git a/ppc64le-remove-vsx.patch b/ppc64le-remove-vsx.patch
new file mode 100644
index 0000000..a79bea6
--- /dev/null
+++ b/ppc64le-remove-vsx.patch
@@ -0,0 +1,39 @@
+Subject: ppc64le remove vsx
+From: Michel Normand <normand at fr.ibm.com>
+
+temporarily remove the vsx related flags
+as long as not supported for ppc64le
+Note that also force as power4
+
+Signed-off-by: Michel Normand <normand at fr.ibm.com>
+Index: atlas/ATLAS/CONFIG/src/atlcomp.txt
+===================================================================
+--- atlas.orig/ATLAS/CONFIG/src/atlcomp.txt
++++ atlas/ATLAS/CONFIG/src/atlcomp.txt
+@@ -187,9 +187,9 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc
+ MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc
+    'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave'
+ MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
+-   'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
++   'gcc' '-O2 -m64 -mvrsave -funroll-all-loops'
+ MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
+-   'gfortran' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
++   'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops'
+ MACH=POWER6 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
+    'gcc' '-mcpu=power6 -mtune=power6 -maltivec -O3 -fno-schedule-insns -fschedule-insns2 -minsert-sched-nops=2'
+ MACH=POWER5 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
+Index: atlas/ATLAS/CONFIG/src/probe_comp.c
+===================================================================
+--- atlas.orig/ATLAS/CONFIG/src/probe_comp.c
++++ atlas/ATLAS/CONFIG/src/probe_comp.c
+@@ -446,8 +446,8 @@ COMPNODE **GetDefaultComps(enum OSTYPE O
+ 
+    if ((vecexts & (1<<ISA_AVXFMA4)) && arch == AmdDozer)
+       vp = "-msse4.2 -mfma4";
+-   else if (vecexts & (1<<ISA_VSX))
+-      vp = "-mvsx";
++   /*else if (vecexts & (1<<ISA_VSX))
++      vp = "-mvsx";*/
+    else if (vecexts & (1<<ISA_AV))
+       vp = "-maltivec";
+    else if (vecexts & (1<<ISA_AVX))
diff --git a/xlf.command.not.found.patch b/xlf.command.not.found.patch
new file mode 100644
index 0000000..83f8896
--- /dev/null
+++ b/xlf.command.not.found.patch
@@ -0,0 +1,24 @@
+Subject: xlf.command.not.found
+From: Michel Normand <normand at fr.ibm.com>
+
+try to bypass error while building ppc64le
+"make[2]: xlf: Command not found"
+
+Signed-off-by: Michel Normand <normand at fr.ibm.com>
+---
+ ATLAS/CONFIG/src/atlcomp.txt |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+Index: atlas/ATLAS/CONFIG/src/atlcomp.txt
+===================================================================
+--- atlas.orig/ATLAS/CONFIG/src/atlcomp.txt
++++ atlas/ATLAS/CONFIG/src/atlcomp.txt
+@@ -199,7 +199,7 @@ MACH=POWER6 OS=ALL LVL=1010 COMPS=f77
+ MACH=POWER5 OS=ALL LVL=1010 COMPS=f77
+    'gfortran' '-mcpu=power5 -mtune=power5 -O3 -fno-schedule-insns -fno-rerun-loop-opt'
+ MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
+-   'xlf' '-qtune=pwr7 -qarch=pwr7 -O3 -qmaxmem=-1 -qfloat=hsflt'
++   'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops'
+ MACH=POWER5 OS=ALL LVL=1010 COMPS=f77
+    'xlf' '-qtune=pwr5 -qarch=pwr5 -O3 -qmaxmem=-1 -qfloat=hsflt'
+ MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dmc,smc,dkc,skc,xcc,gcc


More information about the scm-commits mailing list