[mingw-openssl] Sync with native openssl-1.0.1i-3.fc21
Marc-André Lureau
elmarco at fedoraproject.org
Thu Aug 21 17:22:14 UTC 2014
commit 9edef96106c6e9e85a935a81137e0ea76d129f2d
Author: Marc-André Lureau <marcandre.lureau at gmail.com>
Date: Thu Aug 21 16:14:47 2014 +0200
Sync with native openssl-1.0.1i-3.fc21
Fixes various flaws (RHBZ#1096234 and RHBZ#1127705)
CVE-2014-3505 CVE-2014-3506 CVE-2014-3507 CVE-2014-3511
CVE-2014-3510 CVE-2014-3508 CVE-2014-3509 CVE-2014-0221
CVE-2014-0198 CVE-2014-0224 CVE-2014-0195 CVE-2010-5298
CVE-2014-3470
.gitignore | 1 +
mingw-openssl.spec | 66 +-
openssl-1.0.0c-fips-md5-allow.patch | 20 -
openssl-1.0.0e-doc-noeof.patch | 23 -
openssl-1.0.1-beta2-ssl-op-all.patch | 21 -
openssl-1.0.1e-3des-strength.patch | 171 -
openssl-1.0.1e-backports.patch | 345 -
openssl-1.0.1e-bad-mac.patch | 33 -
openssl-1.0.1e-cve-2013-4353.patch | 21 -
openssl-1.0.1e-cve-2013-6449.patch | 111 -
openssl-1.0.1e-cve-2013-6450.patch | 85 -
openssl-1.0.1e-fips-ec.patch | 2 +-
openssl-1.0.1e-manfix.patch | 555 --
openssl-1.0.1e-ppc-asm-update.patch | 6664 ++++++++++++++++++++
openssl-1.0.1e-ppc64le-target.patch | 10 -
openssl-1.0.1e-req-keylen.patch | 38 -
...rpmbuild.patch => openssl-1.0.1e-rpmbuild.patch | 22 +-
...-1.0.1e-fips.patch => openssl-1.0.1g-fips.patch | 797 ++--
openssl-1.0.1h-disable-sslv2v3.patch | 13 +
...v6-apps.patch => openssl-1.0.1h-ipv6-apps.patch | 59 +-
openssl-1.0.1h-system-cipherlist.patch | 289 +
...algo-doc.patch => openssl-1.0.1i-algo-doc.patch | 18 +-
openssl-1.0.1i-manfix.patch | 86 +
...eqs.patch => openssl-1.0.1i-new-fips-reqs.patch | 627 +--
openssl-1.0.1i-ppc-asm-update.patch | 6636 +++++++++++++++++++
...rst.patch => openssl-1.0.1i-trusted-first.patch | 134 +-
openssl.git-96db902.patch | 108 -
sources | 2 +-
28 files changed, 14373 insertions(+), 2584 deletions(-)
---
diff --git a/.gitignore b/.gitignore
index b9433ec..f36a459 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ openssl-1.0.0a-usa.tar.bz2
/openssl-1.0.1c-usa.tar.xz
/openssl-1.0.1e-usa.tar.xz
/openssl-1.0.1e-hobbled.tar.xz
+/openssl-1.0.1i-hobbled.tar.xz
diff --git a/mingw-openssl.spec b/mingw-openssl.spec
index 2b67928..c089dbc 100644
--- a/mingw-openssl.spec
+++ b/mingw-openssl.spec
@@ -23,8 +23,8 @@
%global thread_test_threads %{?threads:%{threads}}%{!?threads:1}
Name: mingw-openssl
-Version: 1.0.1e
-Release: 7%{?dist}
+Version: 1.0.1i
+Release: 1%{?dist}
Summary: MinGW port of the OpenSSL toolkit
License: OpenSSL
@@ -48,7 +48,7 @@ Source12: ec_curve.c
Source13: ectest.c
# Build changes
-Patch1: openssl-1.0.1-beta2-rpmbuild.patch
+Patch1: openssl-1.0.1e-rpmbuild.patch
Patch2: openssl-1.0.1e-defaults.patch
Patch4: openssl-1.0.0-beta5-enginesdir.patch
Patch5: openssl-0.9.8a-no-rpath.patch
@@ -56,7 +56,8 @@ Patch6: openssl-0.9.8b-test-use-localhost.patch
Patch7: openssl-1.0.0-timezone.patch
Patch8: openssl-1.0.1c-perlfind.patch
Patch9: openssl-1.0.1c-aliasing.patch
-Patch10: openssl-1.0.1e-ppc64le-target.patch
+# This patch must be applied first
+Patch10: openssl-1.0.1i-ppc-asm-update.patch
# Bug fixes
Patch23: openssl-1.0.1c-default-paths.patch
Patch24: openssl-1.0.1e-issuer-hash.patch
@@ -64,13 +65,11 @@ Patch24: openssl-1.0.1e-issuer-hash.patch
Patch33: openssl-1.0.0-beta4-ca-dir.patch
Patch34: openssl-0.9.6-x509.patch
Patch35: openssl-0.9.8j-version-add-engines.patch
-Patch36: openssl-1.0.0e-doc-noeof.patch
-Patch38: openssl-1.0.1-beta2-ssl-op-all.patch
-Patch39: openssl-1.0.1c-ipv6-apps.patch
-Patch40: openssl-1.0.1e-fips.patch
+Patch39: openssl-1.0.1h-ipv6-apps.patch
+Patch40: openssl-1.0.1g-fips.patch
Patch45: openssl-1.0.1e-env-zlib.patch
Patch47: openssl-1.0.0-beta5-readme-warning.patch
-Patch49: openssl-1.0.1a-algo-doc.patch
+Patch49: openssl-1.0.1i-algo-doc.patch
Patch50: openssl-1.0.1-beta2-dtls1-abi.patch
Patch51: openssl-1.0.1e-version.patch
Patch56: openssl-1.0.0c-rsa-x931.patch
@@ -82,28 +81,21 @@ Patch66: openssl-1.0.1-pkgconfig-krb5.patch
Patch68: openssl-1.0.1e-secure-getenv.patch
Patch69: openssl-1.0.1c-dh-1024.patch
Patch70: openssl-1.0.1e-fips-ec.patch
-Patch71: openssl-1.0.1e-manfix.patch
+Patch71: openssl-1.0.1i-manfix.patch
Patch72: openssl-1.0.1e-fips-ctor.patch
Patch73: openssl-1.0.1e-ecc-suiteb.patch
Patch74: openssl-1.0.1e-no-md5-verify.patch
Patch75: openssl-1.0.1e-compat-symbols.patch
-Patch76: openssl-1.0.1e-new-fips-reqs.patch
+Patch76: openssl-1.0.1i-new-fips-reqs.patch
Patch77: openssl-1.0.1e-weak-ciphers.patch
-Patch78: openssl-1.0.1e-3des-strength.patch
-Patch79: openssl-1.0.1e-req-keylen.patch
+Patch90: openssl-1.0.1e-enc-fail.patch
+Patch92: openssl-1.0.1h-system-cipherlist.patch
+Patch93: openssl-1.0.1h-disable-sslv2v3.patch
# Backported fixes including security fixes
Patch81: openssl-1.0.1-beta2-padlock64.patch
-Patch82: openssl-1.0.1e-backports.patch
-Patch83: openssl-1.0.1e-bad-mac.patch
-Patch84: openssl-1.0.1e-trusted-first.patch
+Patch84: openssl-1.0.1i-trusted-first.patch
Patch85: openssl-1.0.1e-arm-use-elf-auxv-caps.patch
-Patch86: openssl-1.0.1e-cve-2013-6449.patch
-Patch87: openssl-1.0.1e-cve-2013-6450.patch
-Patch88: openssl-1.0.1e-cve-2013-4353.patch
Patch89: openssl-1.0.1e-ephemeral-key-size.patch
-Patch90: openssl-1.0.1e-enc-fail.patch
-# upstream patch for CVE-2014-0160
-Patch100: openssl.git-96db902.patch
# MinGW-specific patches.
# Rename *eay32.dll to lib*.dll
@@ -217,6 +209,7 @@ Static version of the MinGW port of the OpenSSL toolkit.
cp %{SOURCE12} %{SOURCE13} crypto/ec/
+%patch10 -p1 -b .ppc-asm
%patch1 -p1 -b .rpmbuild
%patch2 -p1 -b .defaults
%patch4 -p1 -b .enginesdir %{?_rawbuild}
@@ -225,7 +218,6 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/
%patch7 -p1 -b .timezone
%patch8 -p1 -b .perlfind %{?_rawbuild}
%patch9 -p1 -b .aliasing
-%patch10 -p1 -b .ppc64le
%patch23 -p1 -b .default-paths
%patch24 -p1 -b .issuer-hash
@@ -233,8 +225,6 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/
%patch33 -p1 -b .ca-dir
%patch34 -p1 -b .x509
%patch35 -p1 -b .version-add-engines
-%patch36 -p1 -b .doc-noeof
-%patch38 -p1 -b .op-all
#patch39 -p1 -b .ipv6-apps
%patch40 -p1 -b .fips
%patch45 -p1 -b .env-zlib
@@ -251,27 +241,21 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/
#patch68 -p1 -b .secure-getenv
%patch69 -p1 -b .dh1024
#patch70 -p1 -b .fips-ec
+%patch71 -p1 -b .manfix
#patch72 -p1 -b .fips-ctor
%patch73 -p1 -b .suiteb
#patch74 -p1 -b .no-md5-verify
%patch75 -p1 -b .compat
#patch76 -p1 -b .fips-reqs
%patch77 -p1 -b .weak-ciphers
-%patch78 -p1 -b .3des-strength
-%patch79 -p1 -b .keylen
+%patch90 -p1 -b .enc-fail
+%patch92 -p1 -b .system
+%patch93 -p1 -b .v2v3
%patch81 -p1 -b .padlock64
-%patch82 -p1 -b .backports
-%patch71 -p1 -b .manfix
-%patch83 -p1 -b .bad-mac
%patch84 -p1 -b .trusted-first
%patch85 -p1 -b .armcap
-%patch86 -p1 -b .hash-crash
-%patch87 -p1 -b .dtls1-mitm
-%patch88 -p1 -b .handshake-crash
-#%patch89 -p1 -b .ephemeral
-%patch90 -p1 -b .enc-fail
-%patch100 -p1 -b .CVE-2014-0160
+#patch89 -p1 -b .ephemeral
# MinGW specific patches
%patch101 -p1 -b .mingw-libversion
@@ -516,6 +500,14 @@ mkdir -m700 $RPM_BUILD_ROOT%{mingw64_sysconfdir}/pki/CA/private
%changelog
+* Thu Aug 21 2014 Marc-André Lureau <marcandre.lureau at redhat.com> - 1.0.1i-1
+- Synced with native openssl-1.0.1i-3.fc21
+- Fixes various flaws (RHBZ#1096234 and RHBZ#1127705)
+ CVE-2014-3505 CVE-2014-3506 CVE-2014-3507 CVE-2014-3511
+ CVE-2014-3510 CVE-2014-3508 CVE-2014-3509 CVE-2014-0221
+ CVE-2014-0198 CVE-2014-0224 CVE-2014-0195 CVE-2010-5298
+ CVE-2014-3470
+
* Sat Jun 07 2014 Fedora Release Engineering <rel-eng at lists.fedoraproject.org> - 1.0.1e-7
- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild
@@ -612,7 +604,7 @@ mkdir -m700 $RPM_BUILD_ROOT%{mingw64_sysconfdir}/pki/CA/private
* Sun Aug 30 2009 Erik van Pienbroek <epienbro at fedoraproject.org> - 1.0.0-0.2.beta3
- Fixed invalid RPM Provides
-
+
* Fri Aug 28 2009 Erik van Pienbroek <epienbro at fedoraproject.org> - 1.0.0-0.1.beta3
- Update to version 1.0.0 beta 3
- Use %%global instead of %%define
diff --git a/openssl-1.0.1e-fips-ec.patch b/openssl-1.0.1e-fips-ec.patch
index 7287dae..e1f648c 100644
--- a/openssl-1.0.1e-fips-ec.patch
+++ b/openssl-1.0.1e-fips-ec.patch
@@ -241,7 +241,7 @@ diff -up openssl-1.0.1e/crypto/ec/ec_key.c.fips-ec openssl-1.0.1e/crypto/ec/ec_k
+
+ EVP_PKEY_set1_EC_KEY(pk, key);
+
-+ if (fips_pkey_signature_test(pk, tbs, 0, NULL, 0, NULL, 0, NULL))
++ if (fips_pkey_signature_test(pk, tbs, -1, NULL, 0, NULL, 0, NULL))
+ ret = 1;
+
+ err:
diff --git a/openssl-1.0.1e-ppc-asm-update.patch b/openssl-1.0.1e-ppc-asm-update.patch
new file mode 100644
index 0000000..caa92ec
--- /dev/null
+++ b/openssl-1.0.1e-ppc-asm-update.patch
@@ -0,0 +1,6664 @@
+diff --git a/Configure b/Configure
+index 9c803dc..5a5c2d8 100755
+--- a/Configure
++++ b/Configure
+@@ -139,8 +139,8 @@ my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes
+ my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
+ my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
+ my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
+-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
+-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
++my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:";
++my $ppc32_asm=$ppc64_asm;
+ my $no_asm=":::::::::::::::void";
+
+ # As for $BSDthreads. Idea is to maintain "collective" set of flags,
+@@ -357,6 +357,7 @@ my %table=(
+ ####
+ "linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
++"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
+ "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+@@ -462,8 +463,8 @@ my %table=(
+
+ #### IBM's AIX.
+ "aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::",
+-"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32",
+-"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:${ppc64_asm}:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64",
++"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:$ppc32_asm:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32",
++"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:$ppc64_asm:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64",
+ # Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE
+ # at build time. $OBJECT_MODE is respected at ./config stage!
+ "aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded -D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-q32 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
+@@ -1525,7 +1526,7 @@ else {
+ $wp_obj="wp_block.o";
+ }
+ $cmll_obj=$cmll_enc unless ($cmll_obj =~ /.o$/);
+-if ($modes_obj =~ /ghash/)
++if ($modes_obj =~ /ghash\-/)
+ {
+ $cflags.=" -DGHASH_ASM";
+ }
+diff --git a/config b/config
+index 88b9bc6..8b80802 100755
+--- a/config
++++ b/config
+@@ -587,13 +587,20 @@ case "$GUESSOS" in
+ fi
+ ;;
+ ppc64-*-linux2)
+- echo "WARNING! If you wish to build 64-bit library, then you have to"
+- echo " invoke './Configure linux-ppc64' *manually*."
+- if [ "$TEST" = "false" -a -t 1 ]; then
+- echo " You have about 5 seconds to press Ctrl-C to abort."
+- (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
++ if [ -z "$KERNEL_BITS" ]; then
++ echo "WARNING! If you wish to build 64-bit library, then you have to"
++ echo " invoke './Configure linux-ppc64' *manually*."
++ if [ "$TEST" = "false" -a -t 1 ]; then
++ echo " You have about 5 seconds to press Ctrl-C to abort."
++ (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
++ fi
++ fi
++ if [ "$KERNEL_BITS" = "64" ]; then
++ OUT="linux-ppc64"
++ else
++ OUT="linux-ppc"
++ (echo "__LP64__" | gcc -E -x c - 2>/dev/null | grep "^__LP64__" 2>&1 > /dev/null) || options="$options -m32"
+ fi
+- OUT="linux-ppc"
+ ;;
+ ppc-*-linux2) OUT="linux-ppc" ;;
+ ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;;
+diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile
+index 45ede0a..847f4ee 100644
+--- a/crypto/aes/Makefile
++++ b/crypto/aes/Makefile
+@@ -71,6 +71,10 @@ aes-sparcv9.s: asm/aes-sparcv9.pl
+
+ aes-ppc.s: asm/aes-ppc.pl
+ $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@
++vpaes-ppc.s: asm/vpaes-ppc.pl
++ $(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@
++aesp8-ppc.s: asm/aesp8-ppc.pl
++ $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@
+
+ aes-parisc.s: asm/aes-parisc.pl
+ $(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@
+diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl
+index 7c52cbe..7a99fc3 100644
+--- a/crypto/aes/asm/aes-ppc.pl
++++ b/crypto/aes/asm/aes-ppc.pl
+@@ -45,6 +45,8 @@ if ($flavour =~ /64/) {
+ $PUSH ="stw";
+ } else { die "nonsense $flavour"; }
+
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+@@ -68,7 +70,7 @@ $key="r5";
+ $Tbl0="r3";
+ $Tbl1="r6";
+ $Tbl2="r7";
+-$Tbl3="r2";
++$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
+
+ $s0="r8";
+ $s1="r9";
+@@ -76,7 +78,7 @@ $s2="r10";
+ $s3="r11";
+
+ $t0="r12";
+-$t1="r13";
++$t1="r0"; # stay away from "r13";
+ $t2="r14";
+ $t3="r15";
+
+@@ -100,9 +102,6 @@ $acc13="r29";
+ $acc14="r30";
+ $acc15="r31";
+
+-# stay away from TLS pointer
+-if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
+-else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
+ $mask80=$Tbl2;
+ $mask1b=$Tbl3;
+
+@@ -337,8 +336,7 @@ $code.=<<___;
+ $STU $sp,-$FRAME($sp)
+ mflr r0
+
+- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
+- $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
++ $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
+ $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
+ $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
+ $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -365,16 +363,61 @@ $code.=<<___;
+ bne Lenc_unaligned
+
+ Lenc_unaligned_ok:
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ lwz $s0,0($inp)
+ lwz $s1,4($inp)
+ lwz $s2,8($inp)
+ lwz $s3,12($inp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ lwz $t0,0($inp)
++ lwz $t1,4($inp)
++ lwz $t2,8($inp)
++ lwz $t3,12($inp)
++ rotlwi $s0,$t0,8
++ rotlwi $s1,$t1,8
++ rotlwi $s2,$t2,8
++ rotlwi $s3,$t3,8
++ rlwimi $s0,$t0,24,0,7
++ rlwimi $s1,$t1,24,0,7
++ rlwimi $s2,$t2,24,0,7
++ rlwimi $s3,$t3,24,0,7
++ rlwimi $s0,$t0,24,16,23
++ rlwimi $s1,$t1,24,16,23
++ rlwimi $s2,$t2,24,16,23
++ rlwimi $s3,$t3,24,16,23
++___
++$code.=<<___;
+ bl LAES_Te
+ bl Lppc_AES_encrypt_compact
++ $POP $out,`$FRAME-$SIZE_T*19`($sp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ rotlwi $t0,$s0,8
++ rotlwi $t1,$s1,8
++ rotlwi $t2,$s2,8
++ rotlwi $t3,$s3,8
++ rlwimi $t0,$s0,24,0,7
++ rlwimi $t1,$s1,24,0,7
++ rlwimi $t2,$s2,24,0,7
++ rlwimi $t3,$s3,24,0,7
++ rlwimi $t0,$s0,24,16,23
++ rlwimi $t1,$s1,24,16,23
++ rlwimi $t2,$s2,24,16,23
++ rlwimi $t3,$s3,24,16,23
++ stw $t0,0($out)
++ stw $t1,4($out)
++ stw $t2,8($out)
++ stw $t3,12($out)
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ stw $s0,0($out)
+ stw $s1,4($out)
+ stw $s2,8($out)
+ stw $s3,12($out)
++___
++$code.=<<___;
+ b Lenc_done
+
+ Lenc_unaligned:
+@@ -417,6 +460,7 @@ Lenc_xpage:
+
+ bl LAES_Te
+ bl Lppc_AES_encrypt_compact
++ $POP $out,`$FRAME-$SIZE_T*19`($sp)
+
+ extrwi $acc00,$s0,8,0
+ extrwi $acc01,$s0,8,8
+@@ -449,8 +493,6 @@ Lenc_xpage:
+
+ Lenc_done:
+ $POP r0,`$FRAME+$LRSAVE`($sp)
+- $POP $toc,`$FRAME-$SIZE_T*20`($sp)
+- $POP r13,`$FRAME-$SIZE_T*19`($sp)
+ $POP r14,`$FRAME-$SIZE_T*18`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -764,6 +806,7 @@ Lenc_compact_done:
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .AES_encrypt,.-.AES_encrypt
+
+ .globl .AES_decrypt
+ .align 7
+@@ -771,8 +814,7 @@ Lenc_compact_done:
+ $STU $sp,-$FRAME($sp)
+ mflr r0
+
+- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
+- $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
++ $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
+ $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
+ $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
+ $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -799,16 +841,61 @@ Lenc_compact_done:
+ bne Ldec_unaligned
+
+ Ldec_unaligned_ok:
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ lwz $s0,0($inp)
+ lwz $s1,4($inp)
+ lwz $s2,8($inp)
+ lwz $s3,12($inp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ lwz $t0,0($inp)
++ lwz $t1,4($inp)
++ lwz $t2,8($inp)
++ lwz $t3,12($inp)
++ rotlwi $s0,$t0,8
++ rotlwi $s1,$t1,8
++ rotlwi $s2,$t2,8
++ rotlwi $s3,$t3,8
++ rlwimi $s0,$t0,24,0,7
++ rlwimi $s1,$t1,24,0,7
++ rlwimi $s2,$t2,24,0,7
++ rlwimi $s3,$t3,24,0,7
++ rlwimi $s0,$t0,24,16,23
++ rlwimi $s1,$t1,24,16,23
++ rlwimi $s2,$t2,24,16,23
++ rlwimi $s3,$t3,24,16,23
++___
++$code.=<<___;
+ bl LAES_Td
+ bl Lppc_AES_decrypt_compact
++ $POP $out,`$FRAME-$SIZE_T*19`($sp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ rotlwi $t0,$s0,8
++ rotlwi $t1,$s1,8
++ rotlwi $t2,$s2,8
++ rotlwi $t3,$s3,8
++ rlwimi $t0,$s0,24,0,7
++ rlwimi $t1,$s1,24,0,7
++ rlwimi $t2,$s2,24,0,7
++ rlwimi $t3,$s3,24,0,7
++ rlwimi $t0,$s0,24,16,23
++ rlwimi $t1,$s1,24,16,23
++ rlwimi $t2,$s2,24,16,23
++ rlwimi $t3,$s3,24,16,23
++ stw $t0,0($out)
++ stw $t1,4($out)
++ stw $t2,8($out)
++ stw $t3,12($out)
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ stw $s0,0($out)
+ stw $s1,4($out)
+ stw $s2,8($out)
+ stw $s3,12($out)
++___
++$code.=<<___;
+ b Ldec_done
+
+ Ldec_unaligned:
+@@ -851,6 +938,7 @@ Ldec_xpage:
+
+ bl LAES_Td
+ bl Lppc_AES_decrypt_compact
++ $POP $out,`$FRAME-$SIZE_T*19`($sp)
+
+ extrwi $acc00,$s0,8,0
+ extrwi $acc01,$s0,8,8
+@@ -883,8 +971,6 @@ Ldec_xpage:
+
+ Ldec_done:
+ $POP r0,`$FRAME+$LRSAVE`($sp)
+- $POP $toc,`$FRAME-$SIZE_T*20`($sp)
+- $POP r13,`$FRAME-$SIZE_T*19`($sp)
+ $POP r14,`$FRAME-$SIZE_T*18`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -1355,6 +1441,7 @@ Ldec_compact_done:
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .AES_decrypt,.-.AES_decrypt
+
+ .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
+ .align 7
+diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl
+new file mode 100755
+index 0000000..3ee8979
+--- /dev/null
++++ b/crypto/aes/asm/aesp8-ppc.pl
+@@ -0,0 +1,1940 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# This module implements support for AES instructions as per PowerISA
++# specification version 2.07, first implemented by POWER8 processor.
++# The module is endian-agnostic in sense that it supports both big-
++# and little-endian cases. Data alignment in parallelizable modes is
++# handled with VSX loads and stores, which implies MSR.VSX flag being
++# set. It should also be noted that ISA specification doesn't prohibit
++# alignment exceptions for these instructions on page boundaries.
++# Initially alignment was handled in pure AltiVec/VMX way [when data
++# is aligned programmatically, which in turn guarantees exception-
++# free execution], but it turned to hamper performance when vcipher
++# instructions are interleaved. It's reckoned that eventual
++# misalignment penalties at page boundaries are in average lower
++# than additional overhead in pure AltiVec approach.
++
++$flavour = shift;
++
++if ($flavour =~ /64/) {
++ $SIZE_T =8;
++ $LRSAVE =2*$SIZE_T;
++ $STU ="stdu";
++ $POP ="ld";
++ $PUSH ="std";
++ $UCMP ="cmpld";
++ $SHL ="sldi";
++} elsif ($flavour =~ /32/) {
++ $SIZE_T =4;
++ $LRSAVE =$SIZE_T;
++ $STU ="stwu";
++ $POP ="lwz";
++ $PUSH ="stw";
++ $UCMP ="cmplw";
++ $SHL ="slwi";
++} else { die "nonsense $flavour"; }
++
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
++
++$FRAME=8*$SIZE_T;
++$prefix="aes_p8";
++
++$sp="r1";
++$vrsave="r12";
++
++#########################################################################
++{{{ # Key setup procedures #
++my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
++my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
++my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
++
++$code.=<<___;
++.machine "any"
++
++.text
++
++.align 7
++rcon:
++.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
++.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
++.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
++.long 0,0,0,0 ?asis
++Lconsts:
++ mflr r0
++ bcl 20,31,\$+4
++ mflr $ptr #vvvvv "distance between . and rcon
++ addi $ptr,$ptr,-0x48
++ mtlr r0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
++
++.globl .${prefix}_set_encrypt_key
++.align 5
++.${prefix}_set_encrypt_key:
++Lset_encrypt_key:
++ mflr r11
++ $PUSH r11,$LRSAVE($sp)
++
++ li $ptr,-1
++ ${UCMP}i $inp,0
++ beq- Lenc_key_abort # if ($inp==0) return -1;
++ ${UCMP}i $out,0
++ beq- Lenc_key_abort # if ($out==0) return -1;
++ li $ptr,-2
++ cmpwi $bits,128
++ blt- Lenc_key_abort
++ cmpwi $bits,256
++ bgt- Lenc_key_abort
++ andi. r0,$bits,0x3f
++ bne- Lenc_key_abort
++
++ lis r0,0xfff0
++ mfspr $vrsave,256
++ mtspr 256,r0
++
++ bl Lconsts
++ mtlr r11
++
++ neg r9,$inp
++ lvx $in0,0,$inp
++ addi $inp,$inp,15 # 15 is not typo
++ lvsr $key,0,r9 # borrow $key
++ li r8,0x20
++ cmpwi $bits,192
++ lvx $in1,0,$inp
++ le?vspltisb $mask,0x0f # borrow $mask
++ lvx $rcon,0,$ptr
++ le?vxor $key,$key,$mask # adjust for byte swap
++ lvx $mask,r8,$ptr
++ addi $ptr,$ptr,0x10
++ vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
++ li $cnt,8
++ vxor $zero,$zero,$zero
++ mtctr $cnt
++
++ ?lvsr $outperm,0,$out
++ vspltisb $outmask,-1
++ lvx $outhead,0,$out
++ ?vperm $outmask,$zero,$outmask,$outperm
++
++ blt Loop128
++ addi $inp,$inp,8
++ beq L192
++ addi $inp,$inp,8
++ b L256
++
++.align 4
++Loop128:
++ vperm $key,$in0,$in0,$mask # rotate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vcipherlast $key,$key,$rcon
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vadduwm $rcon,$rcon,$rcon
++ vxor $in0,$in0,$key
++ bdnz Loop128
++
++ lvx $rcon,0,$ptr # last two round keys
++
++ vperm $key,$in0,$in0,$mask # rotate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vcipherlast $key,$key,$rcon
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vadduwm $rcon,$rcon,$rcon
++ vxor $in0,$in0,$key
++
++ vperm $key,$in0,$in0,$mask # rotate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vcipherlast $key,$key,$rcon
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vxor $in0,$in0,$key
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ stvx $stage,0,$out
++
++ addi $inp,$out,15 # 15 is not typo
++ addi $out,$out,0x50
++
++ li $rounds,10
++ b Ldone
++
++.align 4
++L192:
++ lvx $tmp,0,$inp
++ li $cnt,4
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ stvx $stage,0,$out
++ addi $out,$out,16
++ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
++ vspltisb $key,8 # borrow $key
++ mtctr $cnt
++ vsububm $mask,$mask,$key # adjust the mask
++
++Loop192:
++ vperm $key,$in1,$in1,$mask # roate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vcipherlast $key,$key,$rcon
++
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++
++ vsldoi $stage,$zero,$in1,8
++ vspltw $tmp,$in0,3
++ vxor $tmp,$tmp,$in1
++ vsldoi $in1,$zero,$in1,12 # >>32
++ vadduwm $rcon,$rcon,$rcon
++ vxor $in1,$in1,$tmp
++ vxor $in0,$in0,$key
++ vxor $in1,$in1,$key
++ vsldoi $stage,$stage,$in0,8
++
++ vperm $key,$in1,$in1,$mask # rotate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vperm $outtail,$stage,$stage,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vcipherlast $key,$key,$rcon
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vsldoi $stage,$in0,$in1,8
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vperm $outtail,$stage,$stage,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vspltw $tmp,$in0,3
++ vxor $tmp,$tmp,$in1
++ vsldoi $in1,$zero,$in1,12 # >>32
++ vadduwm $rcon,$rcon,$rcon
++ vxor $in1,$in1,$tmp
++ vxor $in0,$in0,$key
++ vxor $in1,$in1,$key
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ stvx $stage,0,$out
++ addi $inp,$out,15 # 15 is not typo
++ addi $out,$out,16
++ bdnz Loop192
++
++ li $rounds,12
++ addi $out,$out,0x20
++ b Ldone
++
++.align 4
++L256:
++ lvx $tmp,0,$inp
++ li $cnt,7
++ li $rounds,14
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ stvx $stage,0,$out
++ addi $out,$out,16
++ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
++ mtctr $cnt
++
++Loop256:
++ vperm $key,$in1,$in1,$mask # rotate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vperm $outtail,$in1,$in1,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vcipherlast $key,$key,$rcon
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vadduwm $rcon,$rcon,$rcon
++ vxor $in0,$in0,$key
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ stvx $stage,0,$out
++ addi $inp,$out,15 # 15 is not typo
++ addi $out,$out,16
++ bdz Ldone
++
++ vspltw $key,$in0,3 # just splat
++ vsldoi $tmp,$zero,$in1,12 # >>32
++ vsbox $key,$key
++
++ vxor $in1,$in1,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in1,$in1,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in1,$in1,$tmp
++
++ vxor $in1,$in1,$key
++ b Loop256
++
++.align 4
++Ldone:
++ lvx $in1,0,$inp # redundant in aligned case
++ vsel $in1,$outhead,$in1,$outmask
++ stvx $in1,0,$inp
++ li $ptr,0
++ mtspr 256,$vrsave
++ stw $rounds,0($out)
++
++Lenc_key_abort:
++ mr r3,$ptr
++ blr
++ .long 0
++ .byte 0,12,0x14,1,0,0,3,0
++ .long 0
++.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
++
++.globl .${prefix}_set_decrypt_key
++.align 5
++.${prefix}_set_decrypt_key:
++ $STU $sp,-$FRAME($sp)
++ mflr r10
++ $PUSH r10,$FRAME+$LRSAVE($sp)
++ bl Lset_encrypt_key
++ mtlr r10
++
++ cmpwi r3,0
++ bne- Ldec_key_abort
++
++ slwi $cnt,$rounds,4
++ subi $inp,$out,240 # first round key
++ srwi $rounds,$rounds,1
++ add $out,$inp,$cnt # last round key
++ mtctr $rounds
++
++Ldeckey:
++ lwz r0, 0($inp)
++ lwz r6, 4($inp)
++ lwz r7, 8($inp)
++ lwz r8, 12($inp)
++ addi $inp,$inp,16
++ lwz r9, 0($out)
++ lwz r10,4($out)
++ lwz r11,8($out)
++ lwz r12,12($out)
++ stw r0, 0($out)
++ stw r6, 4($out)
++ stw r7, 8($out)
++ stw r8, 12($out)
++ subi $out,$out,16
++ stw r9, -16($inp)
++ stw r10,-12($inp)
++ stw r11,-8($inp)
++ stw r12,-4($inp)
++ bdnz Ldeckey
++
++ xor r3,r3,r3 # return value
++Ldec_key_abort:
++ addi $sp,$sp,$FRAME
++ blr
++ .long 0
++ .byte 0,12,4,1,0x80,0,3,0
++ .long 0
++.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
++___
++}}}
++#########################################################################
++{{{ # Single block en- and decrypt procedures #
++sub gen_block () {
++my $dir = shift;
++my $n = $dir eq "de" ? "n" : "";
++my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
++
++$code.=<<___;
++.globl .${prefix}_${dir}crypt
++.align 5
++.${prefix}_${dir}crypt:
++ lwz $rounds,240($key)
++ lis r0,0xfc00
++ mfspr $vrsave,256
++ li $idx,15 # 15 is not typo
++ mtspr 256,r0
++
++ lvx v0,0,$inp
++ neg r11,$out
++ lvx v1,$idx,$inp
++ lvsl v2,0,$inp # inpperm
++ le?vspltisb v4,0x0f
++ ?lvsl v3,0,r11 # outperm
++ le?vxor v2,v2,v4
++ li $idx,16
++ vperm v0,v0,v1,v2 # align [and byte swap in LE]
++ lvx v1,0,$key
++ ?lvsl v5,0,$key # keyperm
++ srwi $rounds,$rounds,1
++ lvx v2,$idx,$key
++ addi $idx,$idx,16
++ subi $rounds,$rounds,1
++ ?vperm v1,v1,v2,v5 # align round key
++
++ vxor v0,v0,v1
++ lvx v1,$idx,$key
++ addi $idx,$idx,16
++ mtctr $rounds
++
++Loop_${dir}c:
++ ?vperm v2,v2,v1,v5
++ v${n}cipher v0,v0,v2
++ lvx v2,$idx,$key
++ addi $idx,$idx,16
++ ?vperm v1,v1,v2,v5
++ v${n}cipher v0,v0,v1
++ lvx v1,$idx,$key
++ addi $idx,$idx,16
++ bdnz Loop_${dir}c
++
++ ?vperm v2,v2,v1,v5
++ v${n}cipher v0,v0,v2
++ lvx v2,$idx,$key
++ ?vperm v1,v1,v2,v5
++ v${n}cipherlast v0,v0,v1
++
++ vspltisb v2,-1
++ vxor v1,v1,v1
++ li $idx,15 # 15 is not typo
++ ?vperm v2,v1,v2,v3 # outmask
++ le?vxor v3,v3,v4
++ lvx v1,0,$out # outhead
++ vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
++ vsel v1,v1,v0,v2
++ lvx v4,$idx,$out
++ stvx v1,0,$out
++ vsel v0,v0,v4,v2
++ stvx v0,$idx,$out
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,3,0
++ .long 0
++.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
++___
++}
++&gen_block("en");
++&gen_block("de");
++}}}
++#########################################################################
++{{{ # CBC en- and decrypt procedures #
++my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
++my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
++ map("v$_",(4..10));
++$code.=<<___;
++.globl .${prefix}_cbc_encrypt
++.align 5
++.${prefix}_cbc_encrypt:
++ ${UCMP}i $len,16
++ bltlr-
++
++ cmpwi $enc,0 # test direction
++ lis r0,0xffe0
++ mfspr $vrsave,256
++ mtspr 256,r0
++
++ li $idx,15
++ vxor $rndkey0,$rndkey0,$rndkey0
++ le?vspltisb $tmp,0x0f
++
++ lvx $ivec,0,$ivp # load [unaligned] iv
++ lvsl $inpperm,0,$ivp
++ lvx $inptail,$idx,$ivp
++ le?vxor $inpperm,$inpperm,$tmp
++ vperm $ivec,$ivec,$inptail,$inpperm
++
++ neg r11,$inp
++ ?lvsl $keyperm,0,$key # prepare for unaligned key
++ lwz $rounds,240($key)
++
++ lvsr $inpperm,0,r11 # prepare for unaligned load
++ lvx $inptail,0,$inp
++ addi $inp,$inp,15 # 15 is not typo
++ le?vxor $inpperm,$inpperm,$tmp
++
++ ?lvsr $outperm,0,$out # prepare for unaligned store
++ vspltisb $outmask,-1
++ lvx $outhead,0,$out
++ ?vperm $outmask,$rndkey0,$outmask,$outperm
++ le?vxor $outperm,$outperm,$tmp
++
++ srwi $rounds,$rounds,1
++ li $idx,16
++ subi $rounds,$rounds,1
++ beq Lcbc_dec
++
++Lcbc_enc:
++ vmr $inout,$inptail
++ lvx $inptail,0,$inp
++ addi $inp,$inp,16
++ mtctr $rounds
++ subi $len,$len,16 # len-=16
++
++ lvx $rndkey0,0,$key
++ vperm $inout,$inout,$inptail,$inpperm
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vxor $inout,$inout,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ vxor $inout,$inout,$ivec
++
++Loop_cbc_enc:
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vcipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vcipher $inout,$inout,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ bdnz Loop_cbc_enc
++
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vcipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ li $idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vcipherlast $ivec,$inout,$rndkey0
++ ${UCMP}i $len,16
++
++ vperm $tmp,$ivec,$ivec,$outperm
++ vsel $inout,$outhead,$tmp,$outmask
++ vmr $outhead,$tmp
++ stvx $inout,0,$out
++ addi $out,$out,16
++ bge Lcbc_enc
++
++ b Lcbc_done
++
++.align 4
++Lcbc_dec:
++ ${UCMP}i $len,128
++ bge _aesp8_cbc_decrypt8x
++ vmr $tmp,$inptail
++ lvx $inptail,0,$inp
++ addi $inp,$inp,16
++ mtctr $rounds
++ subi $len,$len,16 # len-=16
++
++ lvx $rndkey0,0,$key
++ vperm $tmp,$tmp,$inptail,$inpperm
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vxor $inout,$tmp,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++
++Loop_cbc_dec:
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vncipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vncipher $inout,$inout,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ bdnz Loop_cbc_dec
++
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vncipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ li $idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vncipherlast $inout,$inout,$rndkey0
++ ${UCMP}i $len,16
++
++ vxor $inout,$inout,$ivec
++ vmr $ivec,$tmp
++ vperm $tmp,$inout,$inout,$outperm
++ vsel $inout,$outhead,$tmp,$outmask
++ vmr $outhead,$tmp
++ stvx $inout,0,$out
++ addi $out,$out,16
++ bge Lcbc_dec
++
++Lcbc_done:
++ addi $out,$out,-1
++ lvx $inout,0,$out # redundant in aligned case
++ vsel $inout,$outhead,$inout,$outmask
++ stvx $inout,0,$out
++
++ neg $enc,$ivp # write [unaligned] iv
++ li $idx,15 # 15 is not typo
++ vxor $rndkey0,$rndkey0,$rndkey0
++ vspltisb $outmask,-1
++ le?vspltisb $tmp,0x0f
++ ?lvsl $outperm,0,$enc
++ ?vperm $outmask,$rndkey0,$outmask,$outperm
++ le?vxor $outperm,$outperm,$tmp
++ lvx $outhead,0,$ivp
++ vperm $ivec,$ivec,$ivec,$outperm
++ vsel $inout,$outhead,$ivec,$outmask
++ lvx $inptail,$idx,$ivp
++ stvx $inout,0,$ivp
++ vsel $inout,$ivec,$inptail,$outmask
++ stvx $inout,$idx,$ivp
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,6,0
++ .long 0
++___
++#########################################################################
++{{ # Optimized CBC decrypt procedure #
++my $key_="r11";
++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
++my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
++ # v26-v31 last 6 round keys
++my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
++
++$code.=<<___;
++.align 5
++_aesp8_cbc_decrypt8x:
++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
++ li r10,`$FRAME+8*16+15`
++ li r11,`$FRAME+8*16+31`
++ stvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ li r0,-1
++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
++ li $x10,0x10
++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ li $x20,0x20
++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ li $x30,0x30
++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ li $x40,0x40
++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ li $x50,0x50
++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ li $x60,0x60
++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ li $x70,0x70
++ mtspr 256,r0
++
++ subi $rounds,$rounds,3 # -4 in total
++ subi $len,$len,128 # bias
++
++ lvx $rndkey0,$x00,$key # load key schedule
++ lvx v30,$x10,$key
++ addi $key,$key,0x20
++ lvx v31,$x00,$key
++ ?vperm $rndkey0,$rndkey0,v30,$keyperm
++ addi $key_,$sp,$FRAME+15
++ mtctr $rounds
++
++Load_cbc_dec_key:
++ ?vperm v24,v30,v31,$keyperm
++ lvx v30,$x10,$key
++ addi $key,$key,0x20
++ stvx v24,$x00,$key_ # off-load round[1]
++ ?vperm v25,v31,v30,$keyperm
++ lvx v31,$x00,$key
++ stvx v25,$x10,$key_ # off-load round[2]
++ addi $key_,$key_,0x20
++ bdnz Load_cbc_dec_key
++
++ lvx v26,$x10,$key
++ ?vperm v24,v30,v31,$keyperm
++ lvx v27,$x20,$key
++ stvx v24,$x00,$key_ # off-load round[3]
++ ?vperm v25,v31,v26,$keyperm
++ lvx v28,$x30,$key
++ stvx v25,$x10,$key_ # off-load round[4]
++ addi $key_,$sp,$FRAME+15 # rewind $key_
++ ?vperm v26,v26,v27,$keyperm
++ lvx v29,$x40,$key
++ ?vperm v27,v27,v28,$keyperm
++ lvx v30,$x50,$key
++ ?vperm v28,v28,v29,$keyperm
++ lvx v31,$x60,$key
++ ?vperm v29,v29,v30,$keyperm
++ lvx $out0,$x70,$key # borrow $out0
++ ?vperm v30,v30,v31,$keyperm
++ lvx v24,$x00,$key_ # pre-load round[1]
++ ?vperm v31,v31,$out0,$keyperm
++ lvx v25,$x10,$key_ # pre-load round[2]
++
++ #lvx $inptail,0,$inp # "caller" already did this
++ #addi $inp,$inp,15 # 15 is not typo
++ subi $inp,$inp,15 # undo "caller"
++
++ le?li $idx,8
++ lvx_u $in0,$x00,$inp # load first 8 "words"
++ le?lvsl $inpperm,0,$idx
++ le?vspltisb $tmp,0x0f
++ lvx_u $in1,$x10,$inp
++ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
++ lvx_u $in2,$x20,$inp
++ le?vperm $in0,$in0,$in0,$inpperm
++ lvx_u $in3,$x30,$inp
++ le?vperm $in1,$in1,$in1,$inpperm
++ lvx_u $in4,$x40,$inp
++ le?vperm $in2,$in2,$in2,$inpperm
++ vxor $out0,$in0,$rndkey0
++ lvx_u $in5,$x50,$inp
++ le?vperm $in3,$in3,$in3,$inpperm
++ vxor $out1,$in1,$rndkey0
++ lvx_u $in6,$x60,$inp
++ le?vperm $in4,$in4,$in4,$inpperm
++ vxor $out2,$in2,$rndkey0
++ lvx_u $in7,$x70,$inp
++ addi $inp,$inp,0x80
++ le?vperm $in5,$in5,$in5,$inpperm
++ vxor $out3,$in3,$rndkey0
++ le?vperm $in6,$in6,$in6,$inpperm
++ vxor $out4,$in4,$rndkey0
++ le?vperm $in7,$in7,$in7,$inpperm
++ vxor $out5,$in5,$rndkey0
++ vxor $out6,$in6,$rndkey0
++ vxor $out7,$in7,$rndkey0
++
++ mtctr $rounds
++ b Loop_cbc_dec8x
++.align 5
++Loop_cbc_dec8x:
++ vncipher $out0,$out0,v24
++ vncipher $out1,$out1,v24
++ vncipher $out2,$out2,v24
++ vncipher $out3,$out3,v24
++ vncipher $out4,$out4,v24
++ vncipher $out5,$out5,v24
++ vncipher $out6,$out6,v24
++ vncipher $out7,$out7,v24
++ lvx v24,$x20,$key_ # round[3]
++ addi $key_,$key_,0x20
++
++ vncipher $out0,$out0,v25
++ vncipher $out1,$out1,v25
++ vncipher $out2,$out2,v25
++ vncipher $out3,$out3,v25
++ vncipher $out4,$out4,v25
++ vncipher $out5,$out5,v25
++ vncipher $out6,$out6,v25
++ vncipher $out7,$out7,v25
++ lvx v25,$x10,$key_ # round[4]
++ bdnz Loop_cbc_dec8x
++
++ subic $len,$len,128 # $len-=128
++ vncipher $out0,$out0,v24
++ vncipher $out1,$out1,v24
++ vncipher $out2,$out2,v24
++ vncipher $out3,$out3,v24
++ vncipher $out4,$out4,v24
++ vncipher $out5,$out5,v24
++ vncipher $out6,$out6,v24
++ vncipher $out7,$out7,v24
++
++ subfe. r0,r0,r0 # borrow?-1:0
++ vncipher $out0,$out0,v25
++ vncipher $out1,$out1,v25
++ vncipher $out2,$out2,v25
++ vncipher $out3,$out3,v25
++ vncipher $out4,$out4,v25
++ vncipher $out5,$out5,v25
++ vncipher $out6,$out6,v25
++ vncipher $out7,$out7,v25
++
++ and r0,r0,$len
++ vncipher $out0,$out0,v26
++ vncipher $out1,$out1,v26
++ vncipher $out2,$out2,v26
++ vncipher $out3,$out3,v26
++ vncipher $out4,$out4,v26
++ vncipher $out5,$out5,v26
++ vncipher $out6,$out6,v26
++ vncipher $out7,$out7,v26
++
++ add $inp,$inp,r0 # $inp is adjusted in such
++ # way that at exit from the
++ # loop inX-in7 are loaded
++ # with last "words"
++ vncipher $out0,$out0,v27
++ vncipher $out1,$out1,v27
++ vncipher $out2,$out2,v27
++ vncipher $out3,$out3,v27
++ vncipher $out4,$out4,v27
++ vncipher $out5,$out5,v27
++ vncipher $out6,$out6,v27
++ vncipher $out7,$out7,v27
++
++ addi $key_,$sp,$FRAME+15 # rewind $key_
++ vncipher $out0,$out0,v28
++ vncipher $out1,$out1,v28
++ vncipher $out2,$out2,v28
++ vncipher $out3,$out3,v28
++ vncipher $out4,$out4,v28
++ vncipher $out5,$out5,v28
++ vncipher $out6,$out6,v28
++ vncipher $out7,$out7,v28
++ lvx v24,$x00,$key_ # re-pre-load round[1]
++
++ vncipher $out0,$out0,v29
++ vncipher $out1,$out1,v29
++ vncipher $out2,$out2,v29
++ vncipher $out3,$out3,v29
++ vncipher $out4,$out4,v29
++ vncipher $out5,$out5,v29
++ vncipher $out6,$out6,v29
++ vncipher $out7,$out7,v29
++ lvx v25,$x10,$key_ # re-pre-load round[2]
++
++ vncipher $out0,$out0,v30
++ vxor $ivec,$ivec,v31 # xor with last round key
++ vncipher $out1,$out1,v30
++ vxor $in0,$in0,v31
++ vncipher $out2,$out2,v30
++ vxor $in1,$in1,v31
++ vncipher $out3,$out3,v30
++ vxor $in2,$in2,v31
++ vncipher $out4,$out4,v30
++ vxor $in3,$in3,v31
++ vncipher $out5,$out5,v30
++ vxor $in4,$in4,v31
++ vncipher $out6,$out6,v30
++ vxor $in5,$in5,v31
++ vncipher $out7,$out7,v30
++ vxor $in6,$in6,v31
++
++ vncipherlast $out0,$out0,$ivec
++ vncipherlast $out1,$out1,$in0
++ lvx_u $in0,$x00,$inp # load next input block
++ vncipherlast $out2,$out2,$in1
++ lvx_u $in1,$x10,$inp
++ vncipherlast $out3,$out3,$in2
++ le?vperm $in0,$in0,$in0,$inpperm
++ lvx_u $in2,$x20,$inp
++ vncipherlast $out4,$out4,$in3
++ le?vperm $in1,$in1,$in1,$inpperm
++ lvx_u $in3,$x30,$inp
++ vncipherlast $out5,$out5,$in4
++ le?vperm $in2,$in2,$in2,$inpperm
++ lvx_u $in4,$x40,$inp
++ vncipherlast $out6,$out6,$in5
++ le?vperm $in3,$in3,$in3,$inpperm
++ lvx_u $in5,$x50,$inp
++ vncipherlast $out7,$out7,$in6
++ le?vperm $in4,$in4,$in4,$inpperm
++ lvx_u $in6,$x60,$inp
++ vmr $ivec,$in7
++ le?vperm $in5,$in5,$in5,$inpperm
++ lvx_u $in7,$x70,$inp
++ addi $inp,$inp,0x80
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $in6,$in6,$in6,$inpperm
++ vxor $out0,$in0,$rndkey0
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $in7,$in7,$in7,$inpperm
++ vxor $out1,$in1,$rndkey0
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ vxor $out2,$in2,$rndkey0
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x30,$out
++ vxor $out3,$in3,$rndkey0
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x40,$out
++ vxor $out4,$in4,$rndkey0
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x50,$out
++ vxor $out5,$in5,$rndkey0
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x60,$out
++ vxor $out6,$in6,$rndkey0
++ stvx_u $out7,$x70,$out
++ addi $out,$out,0x80
++ vxor $out7,$in7,$rndkey0
++
++ mtctr $rounds
++ beq Loop_cbc_dec8x # did $len-=128 borrow?
++
++ addic. $len,$len,128
++ beq Lcbc_dec8x_done
++ nop
++ nop
++
++Loop_cbc_dec8x_tail: # up to 7 "words" tail...
++ vncipher $out1,$out1,v24
++ vncipher $out2,$out2,v24
++ vncipher $out3,$out3,v24
++ vncipher $out4,$out4,v24
++ vncipher $out5,$out5,v24
++ vncipher $out6,$out6,v24
++ vncipher $out7,$out7,v24
++ lvx v24,$x20,$key_ # round[3]
++ addi $key_,$key_,0x20
++
++ vncipher $out1,$out1,v25
++ vncipher $out2,$out2,v25
++ vncipher $out3,$out3,v25
++ vncipher $out4,$out4,v25
++ vncipher $out5,$out5,v25
++ vncipher $out6,$out6,v25
++ vncipher $out7,$out7,v25
++ lvx v25,$x10,$key_ # round[4]
++ bdnz Loop_cbc_dec8x_tail
++
++ vncipher $out1,$out1,v24
++ vncipher $out2,$out2,v24
++ vncipher $out3,$out3,v24
++ vncipher $out4,$out4,v24
++ vncipher $out5,$out5,v24
++ vncipher $out6,$out6,v24
++ vncipher $out7,$out7,v24
++
++ vncipher $out1,$out1,v25
++ vncipher $out2,$out2,v25
++ vncipher $out3,$out3,v25
++ vncipher $out4,$out4,v25
++ vncipher $out5,$out5,v25
++ vncipher $out6,$out6,v25
++ vncipher $out7,$out7,v25
++
++ vncipher $out1,$out1,v26
++ vncipher $out2,$out2,v26
++ vncipher $out3,$out3,v26
++ vncipher $out4,$out4,v26
++ vncipher $out5,$out5,v26
++ vncipher $out6,$out6,v26
++ vncipher $out7,$out7,v26
++
++ vncipher $out1,$out1,v27
++ vncipher $out2,$out2,v27
++ vncipher $out3,$out3,v27
++ vncipher $out4,$out4,v27
++ vncipher $out5,$out5,v27
++ vncipher $out6,$out6,v27
++ vncipher $out7,$out7,v27
++
++ vncipher $out1,$out1,v28
++ vncipher $out2,$out2,v28
++ vncipher $out3,$out3,v28
++ vncipher $out4,$out4,v28
++ vncipher $out5,$out5,v28
++ vncipher $out6,$out6,v28
++ vncipher $out7,$out7,v28
++
++ vncipher $out1,$out1,v29
++ vncipher $out2,$out2,v29
++ vncipher $out3,$out3,v29
++ vncipher $out4,$out4,v29
++ vncipher $out5,$out5,v29
++ vncipher $out6,$out6,v29
++ vncipher $out7,$out7,v29
++
++ vncipher $out1,$out1,v30
++ vxor $ivec,$ivec,v31 # last round key
++ vncipher $out2,$out2,v30
++ vxor $in1,$in1,v31
++ vncipher $out3,$out3,v30
++ vxor $in2,$in2,v31
++ vncipher $out4,$out4,v30
++ vxor $in3,$in3,v31
++ vncipher $out5,$out5,v30
++ vxor $in4,$in4,v31
++ vncipher $out6,$out6,v30
++ vxor $in5,$in5,v31
++ vncipher $out7,$out7,v30
++ vxor $in6,$in6,v31
++
++ cmplwi $len,32 # switch($len)
++ blt Lcbc_dec8x_one
++ nop
++ beq Lcbc_dec8x_two
++ cmplwi $len,64
++ blt Lcbc_dec8x_three
++ nop
++ beq Lcbc_dec8x_four
++ cmplwi $len,96
++ blt Lcbc_dec8x_five
++ nop
++ beq Lcbc_dec8x_six
++
++Lcbc_dec8x_seven:
++ vncipherlast $out1,$out1,$ivec
++ vncipherlast $out2,$out2,$in1
++ vncipherlast $out3,$out3,$in2
++ vncipherlast $out4,$out4,$in3
++ vncipherlast $out5,$out5,$in4
++ vncipherlast $out6,$out6,$in5
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out1,$out1,$out1,$inpperm
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x00,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x10,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x20,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x30,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x40,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x50,$out
++ stvx_u $out7,$x60,$out
++ addi $out,$out,0x70
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_six:
++ vncipherlast $out2,$out2,$ivec
++ vncipherlast $out3,$out3,$in2
++ vncipherlast $out4,$out4,$in3
++ vncipherlast $out5,$out5,$in4
++ vncipherlast $out6,$out6,$in5
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out2,$out2,$out2,$inpperm
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x00,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x10,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x20,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x30,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x40,$out
++ stvx_u $out7,$x50,$out
++ addi $out,$out,0x60
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_five:
++ vncipherlast $out3,$out3,$ivec
++ vncipherlast $out4,$out4,$in3
++ vncipherlast $out5,$out5,$in4
++ vncipherlast $out6,$out6,$in5
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out3,$out3,$out3,$inpperm
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x00,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x10,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x20,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x30,$out
++ stvx_u $out7,$x40,$out
++ addi $out,$out,0x50
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_four:
++ vncipherlast $out4,$out4,$ivec
++ vncipherlast $out5,$out5,$in4
++ vncipherlast $out6,$out6,$in5
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out4,$out4,$out4,$inpperm
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x00,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x10,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x20,$out
++ stvx_u $out7,$x30,$out
++ addi $out,$out,0x40
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_three:
++ vncipherlast $out5,$out5,$ivec
++ vncipherlast $out6,$out6,$in5
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out5,$out5,$out5,$inpperm
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x00,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x10,$out
++ stvx_u $out7,$x20,$out
++ addi $out,$out,0x30
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_two:
++ vncipherlast $out6,$out6,$ivec
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out6,$out6,$out6,$inpperm
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x00,$out
++ stvx_u $out7,$x10,$out
++ addi $out,$out,0x20
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_one:
++ vncipherlast $out7,$out7,$ivec
++ vmr $ivec,$in7
++
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out7,0,$out
++ addi $out,$out,0x10
++
++Lcbc_dec8x_done:
++ le?vperm $ivec,$ivec,$ivec,$inpperm
++ stvx_u $ivec,0,$ivp # write [unaligned] iv
++
++ li r10,`$FRAME+15`
++ li r11,`$FRAME+31`
++ stvx $inpperm,r10,$sp # wipe copies of round keys
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++
++ mtspr 256,$vrsave
++ lvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0x80,6,6,0
++ .long 0
++.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
++___
++}} }}}
++
++#########################################################################
++{{{ # CTR procedure[s] #
++my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
++my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
++ map("v$_",(4..11));
++my $dat=$tmp;
++
++$code.=<<___;
++.globl .${prefix}_ctr32_encrypt_blocks
++.align 5
++.${prefix}_ctr32_encrypt_blocks:
++ ${UCMP}i $len,1
++ bltlr-
++
++ lis r0,0xfff0
++ mfspr $vrsave,256
++ mtspr 256,r0
++
++ li $idx,15
++ vxor $rndkey0,$rndkey0,$rndkey0
++ le?vspltisb $tmp,0x0f
++
++ lvx $ivec,0,$ivp # load [unaligned] iv
++ lvsl $inpperm,0,$ivp
++ lvx $inptail,$idx,$ivp
++ vspltisb $one,1
++ le?vxor $inpperm,$inpperm,$tmp
++ vperm $ivec,$ivec,$inptail,$inpperm
++ vsldoi $one,$rndkey0,$one,1
++
++ neg r11,$inp
++ ?lvsl $keyperm,0,$key # prepare for unaligned key
++ lwz $rounds,240($key)
++
++ lvsr $inpperm,0,r11 # prepare for unaligned load
++ lvx $inptail,0,$inp
++ addi $inp,$inp,15 # 15 is not typo
++ le?vxor $inpperm,$inpperm,$tmp
++
++ srwi $rounds,$rounds,1
++ li $idx,16
++ subi $rounds,$rounds,1
++
++ ${UCMP}i $len,8
++ bge _aesp8_ctr32_encrypt8x
++
++ ?lvsr $outperm,0,$out # prepare for unaligned store
++ vspltisb $outmask,-1
++ lvx $outhead,0,$out
++ ?vperm $outmask,$rndkey0,$outmask,$outperm
++ le?vxor $outperm,$outperm,$tmp
++
++ lvx $rndkey0,0,$key
++ mtctr $rounds
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vxor $inout,$ivec,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ b Loop_ctr32_enc
++
++.align 5
++Loop_ctr32_enc:
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vcipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vcipher $inout,$inout,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ bdnz Loop_ctr32_enc
++
++ vadduwm $ivec,$ivec,$one
++ vmr $dat,$inptail
++ lvx $inptail,0,$inp
++ addi $inp,$inp,16
++ subic. $len,$len,1 # blocks--
++
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vcipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ vperm $dat,$dat,$inptail,$inpperm
++ li $idx,16
++ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
++ lvx $rndkey0,0,$key
++ vxor $dat,$dat,$rndkey1 # last round key
++ vcipherlast $inout,$inout,$dat
++
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ vperm $inout,$inout,$inout,$outperm
++ vsel $dat,$outhead,$inout,$outmask
++ mtctr $rounds
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vmr $outhead,$inout
++ vxor $inout,$ivec,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ stvx $dat,0,$out
++ addi $out,$out,16
++ bne Loop_ctr32_enc
++
++ addi $out,$out,-1
++ lvx $inout,0,$out # redundant in aligned case
++ vsel $inout,$outhead,$inout,$outmask
++ stvx $inout,0,$out
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,6,0
++ .long 0
++___
++#########################################################################
++{{ # Optimized CTR procedure #
++my $key_="r11";
++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
++my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
++ # v26-v31 last 6 round keys
++my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
++my ($two,$three,$four)=($outhead,$outperm,$outmask);
++
++$code.=<<___;
++.align 5
++_aesp8_ctr32_encrypt8x:
++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
++ li r10,`$FRAME+8*16+15`
++ li r11,`$FRAME+8*16+31`
++ stvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ li r0,-1
++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
++ li $x10,0x10
++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ li $x20,0x20
++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ li $x30,0x30
++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ li $x40,0x40
++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ li $x50,0x50
++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ li $x60,0x60
++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ li $x70,0x70
++ mtspr 256,r0
++
++ subi $rounds,$rounds,3 # -4 in total
++
++ lvx $rndkey0,$x00,$key # load key schedule
++ lvx v30,$x10,$key
++ addi $key,$key,0x20
++ lvx v31,$x00,$key
++ ?vperm $rndkey0,$rndkey0,v30,$keyperm
++ addi $key_,$sp,$FRAME+15
++ mtctr $rounds
++
++Load_ctr32_enc_key:
++ ?vperm v24,v30,v31,$keyperm
++ lvx v30,$x10,$key
++ addi $key,$key,0x20
++ stvx v24,$x00,$key_ # off-load round[1]
++ ?vperm v25,v31,v30,$keyperm
++ lvx v31,$x00,$key
++ stvx v25,$x10,$key_ # off-load round[2]
++ addi $key_,$key_,0x20
++ bdnz Load_ctr32_enc_key
++
++ lvx v26,$x10,$key
++ ?vperm v24,v30,v31,$keyperm
++ lvx v27,$x20,$key
++ stvx v24,$x00,$key_ # off-load round[3]
++ ?vperm v25,v31,v26,$keyperm
++ lvx v28,$x30,$key
++ stvx v25,$x10,$key_ # off-load round[4]
++ addi $key_,$sp,$FRAME+15 # rewind $key_
++ ?vperm v26,v26,v27,$keyperm
++ lvx v29,$x40,$key
++ ?vperm v27,v27,v28,$keyperm
++ lvx v30,$x50,$key
++ ?vperm v28,v28,v29,$keyperm
++ lvx v31,$x60,$key
++ ?vperm v29,v29,v30,$keyperm
++ lvx $out0,$x70,$key # borrow $out0
++ ?vperm v30,v30,v31,$keyperm
++ lvx v24,$x00,$key_ # pre-load round[1]
++ ?vperm v31,v31,$out0,$keyperm
++ lvx v25,$x10,$key_ # pre-load round[2]
++
++ vadduwm $two,$one,$one
++ subi $inp,$inp,15 # undo "caller"
++ $SHL $len,$len,4
++
++ vadduwm $out1,$ivec,$one # counter values ...
++ vadduwm $out2,$ivec,$two
++ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
++ le?li $idx,8
++ vadduwm $out3,$out1,$two
++ vxor $out1,$out1,$rndkey0
++ le?lvsl $inpperm,0,$idx
++ vadduwm $out4,$out2,$two
++ vxor $out2,$out2,$rndkey0
++ le?vspltisb $tmp,0x0f
++ vadduwm $out5,$out3,$two
++ vxor $out3,$out3,$rndkey0
++ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
++ vadduwm $out6,$out4,$two
++ vxor $out4,$out4,$rndkey0
++ vadduwm $out7,$out5,$two
++ vxor $out5,$out5,$rndkey0
++ vadduwm $ivec,$out6,$two # next counter value
++ vxor $out6,$out6,$rndkey0
++ vxor $out7,$out7,$rndkey0
++
++ mtctr $rounds
++ b Loop_ctr32_enc8x
++.align 5
++Loop_ctr32_enc8x:
++ vcipher $out0,$out0,v24
++ vcipher $out1,$out1,v24
++ vcipher $out2,$out2,v24
++ vcipher $out3,$out3,v24
++ vcipher $out4,$out4,v24
++ vcipher $out5,$out5,v24
++ vcipher $out6,$out6,v24
++ vcipher $out7,$out7,v24
++Loop_ctr32_enc8x_middle:
++ lvx v24,$x20,$key_ # round[3]
++ addi $key_,$key_,0x20
++
++ vcipher $out0,$out0,v25
++ vcipher $out1,$out1,v25
++ vcipher $out2,$out2,v25
++ vcipher $out3,$out3,v25
++ vcipher $out4,$out4,v25
++ vcipher $out5,$out5,v25
++ vcipher $out6,$out6,v25
++ vcipher $out7,$out7,v25
++ lvx v25,$x10,$key_ # round[4]
++ bdnz Loop_ctr32_enc8x
++
++ subic r11,$len,256 # $len-256, borrow $key_
++ vcipher $out0,$out0,v24
++ vcipher $out1,$out1,v24
++ vcipher $out2,$out2,v24
++ vcipher $out3,$out3,v24
++ vcipher $out4,$out4,v24
++ vcipher $out5,$out5,v24
++ vcipher $out6,$out6,v24
++ vcipher $out7,$out7,v24
++
++ subfe r0,r0,r0 # borrow?-1:0
++ vcipher $out0,$out0,v25
++ vcipher $out1,$out1,v25
++ vcipher $out2,$out2,v25
++ vcipher $out3,$out3,v25
++ vcipher $out4,$out4,v25
++ vcipher $out5,$out5,v25
++ vcipher $out6,$out6,v25
++ vcipher $out7,$out7,v25
++
++ and r0,r0,r11
++ addi $key_,$sp,$FRAME+15 # rewind $key_
++ vcipher $out0,$out0,v26
++ vcipher $out1,$out1,v26
++ vcipher $out2,$out2,v26
++ vcipher $out3,$out3,v26
++ vcipher $out4,$out4,v26
++ vcipher $out5,$out5,v26
++ vcipher $out6,$out6,v26
++ vcipher $out7,$out7,v26
++ lvx v24,$x00,$key_ # re-pre-load round[1]
++
++ subic $len,$len,129 # $len-=129
++ vcipher $out0,$out0,v27
++ addi $len,$len,1 # $len-=128 really
++ vcipher $out1,$out1,v27
++ vcipher $out2,$out2,v27
++ vcipher $out3,$out3,v27
++ vcipher $out4,$out4,v27
++ vcipher $out5,$out5,v27
++ vcipher $out6,$out6,v27
++ vcipher $out7,$out7,v27
++ lvx v25,$x10,$key_ # re-pre-load round[2]
++
++ vcipher $out0,$out0,v28
++ lvx_u $in0,$x00,$inp # load input
++ vcipher $out1,$out1,v28
++ lvx_u $in1,$x10,$inp
++ vcipher $out2,$out2,v28
++ lvx_u $in2,$x20,$inp
++ vcipher $out3,$out3,v28
++ lvx_u $in3,$x30,$inp
++ vcipher $out4,$out4,v28
++ lvx_u $in4,$x40,$inp
++ vcipher $out5,$out5,v28
++ lvx_u $in5,$x50,$inp
++ vcipher $out6,$out6,v28
++ lvx_u $in6,$x60,$inp
++ vcipher $out7,$out7,v28
++ lvx_u $in7,$x70,$inp
++ addi $inp,$inp,0x80
++
++ vcipher $out0,$out0,v29
++ le?vperm $in0,$in0,$in0,$inpperm
++ vcipher $out1,$out1,v29
++ le?vperm $in1,$in1,$in1,$inpperm
++ vcipher $out2,$out2,v29
++ le?vperm $in2,$in2,$in2,$inpperm
++ vcipher $out3,$out3,v29
++ le?vperm $in3,$in3,$in3,$inpperm
++ vcipher $out4,$out4,v29
++ le?vperm $in4,$in4,$in4,$inpperm
++ vcipher $out5,$out5,v29
++ le?vperm $in5,$in5,$in5,$inpperm
++ vcipher $out6,$out6,v29
++ le?vperm $in6,$in6,$in6,$inpperm
++ vcipher $out7,$out7,v29
++ le?vperm $in7,$in7,$in7,$inpperm
++
++ add $inp,$inp,r0 # $inp is adjusted in such
++ # way that at exit from the
++ # loop inX-in7 are loaded
++ # with last "words"
++ subfe. r0,r0,r0 # borrow?-1:0
++ vcipher $out0,$out0,v30
++ vxor $in0,$in0,v31 # xor with last round key
++ vcipher $out1,$out1,v30
++ vxor $in1,$in1,v31
++ vcipher $out2,$out2,v30
++ vxor $in2,$in2,v31
++ vcipher $out3,$out3,v30
++ vxor $in3,$in3,v31
++ vcipher $out4,$out4,v30
++ vxor $in4,$in4,v31
++ vcipher $out5,$out5,v30
++ vxor $in5,$in5,v31
++ vcipher $out6,$out6,v30
++ vxor $in6,$in6,v31
++ vcipher $out7,$out7,v30
++ vxor $in7,$in7,v31
++
++ bne Lctr32_enc8x_break # did $len-129 borrow?
++
++ vcipherlast $in0,$out0,$in0
++ vcipherlast $in1,$out1,$in1
++ vadduwm $out1,$ivec,$one # counter values ...
++ vcipherlast $in2,$out2,$in2
++ vadduwm $out2,$ivec,$two
++ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
++ vcipherlast $in3,$out3,$in3
++ vadduwm $out3,$out1,$two
++ vxor $out1,$out1,$rndkey0
++ vcipherlast $in4,$out4,$in4
++ vadduwm $out4,$out2,$two
++ vxor $out2,$out2,$rndkey0
++ vcipherlast $in5,$out5,$in5
++ vadduwm $out5,$out3,$two
++ vxor $out3,$out3,$rndkey0
++ vcipherlast $in6,$out6,$in6
++ vadduwm $out6,$out4,$two
++ vxor $out4,$out4,$rndkey0
++ vcipherlast $in7,$out7,$in7
++ vadduwm $out7,$out5,$two
++ vxor $out5,$out5,$rndkey0
++ le?vperm $in0,$in0,$in0,$inpperm
++ vadduwm $ivec,$out6,$two # next counter value
++ vxor $out6,$out6,$rndkey0
++ le?vperm $in1,$in1,$in1,$inpperm
++ vxor $out7,$out7,$rndkey0
++ mtctr $rounds
++
++ vcipher $out0,$out0,v24
++ stvx_u $in0,$x00,$out
++ le?vperm $in2,$in2,$in2,$inpperm
++ vcipher $out1,$out1,v24
++ stvx_u $in1,$x10,$out
++ le?vperm $in3,$in3,$in3,$inpperm
++ vcipher $out2,$out2,v24
++ stvx_u $in2,$x20,$out
++ le?vperm $in4,$in4,$in4,$inpperm
++ vcipher $out3,$out3,v24
++ stvx_u $in3,$x30,$out
++ le?vperm $in5,$in5,$in5,$inpperm
++ vcipher $out4,$out4,v24
++ stvx_u $in4,$x40,$out
++ le?vperm $in6,$in6,$in6,$inpperm
++ vcipher $out5,$out5,v24
++ stvx_u $in5,$x50,$out
++ le?vperm $in7,$in7,$in7,$inpperm
++ vcipher $out6,$out6,v24
++ stvx_u $in6,$x60,$out
++ vcipher $out7,$out7,v24
++ stvx_u $in7,$x70,$out
++ addi $out,$out,0x80
++
++ b Loop_ctr32_enc8x_middle
++
++.align 5
++Lctr32_enc8x_break:
++ cmpwi $len,-0x60
++ blt Lctr32_enc8x_one
++ nop
++ beq Lctr32_enc8x_two
++ cmpwi $len,-0x40
++ blt Lctr32_enc8x_three
++ nop
++ beq Lctr32_enc8x_four
++ cmpwi $len,-0x20
++ blt Lctr32_enc8x_five
++ nop
++ beq Lctr32_enc8x_six
++ cmpwi $len,0x00
++ blt Lctr32_enc8x_seven
++
++Lctr32_enc8x_eight:
++ vcipherlast $out0,$out0,$in0
++ vcipherlast $out1,$out1,$in1
++ vcipherlast $out2,$out2,$in2
++ vcipherlast $out3,$out3,$in3
++ vcipherlast $out4,$out4,$in4
++ vcipherlast $out5,$out5,$in5
++ vcipherlast $out6,$out6,$in6
++ vcipherlast $out7,$out7,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x30,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x40,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x50,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x60,$out
++ stvx_u $out7,$x70,$out
++ addi $out,$out,0x80
++ b Lctr32_enc8x_done
++
++.align 5
++Lctr32_enc8x_seven:
++ vcipherlast $out0,$out0,$in1
++ vcipherlast $out1,$out1,$in2
++ vcipherlast $out2,$out2,$in3
++ vcipherlast $out3,$out3,$in4
++ vcipherlast $out4,$out4,$in5
++ vcipherlast $out5,$out5,$in6
++ vcipherlast $out6,$out6,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x30,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x40,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x50,$out
++ stvx_u $out6,$x60,$out
++ addi $out,$out,0x70
++ b Lctr32_enc8x_done
++
++.align 5
++Lctr32_enc8x_six:
++ vcipherlast $out0,$out0,$in2
++ vcipherlast $out1,$out1,$in3
++ vcipherlast $out2,$out2,$in4
++ vcipherlast $out3,$out3,$in5
++ vcipherlast $out4,$out4,$in6
++ vcipherlast $out5,$out5,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x30,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x40,$out
++ stvx_u $out5,$x50,$out
++ addi $out,$out,0x60
++ b Lctr32_enc8x_done
++
++.align 5
++Lctr32_enc8x_five:
++ vcipherlast $out0,$out0,$in3
++ vcipherlast $out1,$out1,$in4
++ vcipherlast $out2,$out2,$in5
++ vcipherlast $out3,$out3,$in6
++ vcipherlast $out4,$out4,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x30,$out
++ stvx_u $out4,$x40,$out
++ addi $out,$out,0x50
++ b Lctr32_enc8x_done
++
++.align 5
++Lctr32_enc8x_four:
++ vcipherlast $out0,$out0,$in4
++ vcipherlast $out1,$out1,$in5
++ vcipherlast $out2,$out2,$in6
++ vcipherlast $out3,$out3,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ stvx_u $out3,$x30,$out
++ addi $out,$out,0x40
++ b Lctr32_enc8x_done
++
++.align 5
++Lctr32_enc8x_three:
++ vcipherlast $out0,$out0,$in5
++ vcipherlast $out1,$out1,$in6
++ vcipherlast $out2,$out2,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ stvx_u $out2,$x20,$out
++ addi $out,$out,0x30
++ b Lcbc_dec8x_done
++
++.align 5
++Lctr32_enc8x_two:
++ vcipherlast $out0,$out0,$in6
++ vcipherlast $out1,$out1,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ stvx_u $out1,$x10,$out
++ addi $out,$out,0x20
++ b Lcbc_dec8x_done
++
++.align 5
++Lctr32_enc8x_one:
++ vcipherlast $out0,$out0,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ stvx_u $out0,0,$out
++ addi $out,$out,0x10
++
++Lctr32_enc8x_done:
++ li r10,`$FRAME+15`
++ li r11,`$FRAME+31`
++ stvx $inpperm,r10,$sp # wipe copies of round keys
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++
++ mtspr 256,$vrsave
++ lvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0x80,6,6,0
++ .long 0
++.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
++___
++}} }}}
++
++my $consts=1;
++foreach(split("\n",$code)) {
++ s/\`([^\`]*)\`/eval($1)/geo;
++
++ # constants table endian-specific conversion
++ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
++ my $conv=$3;
++ my @bytes=();
++
++ # convert to endian-agnostic format
++ if ($1 eq "long") {
++ foreach (split(/,\s*/,$2)) {
++ my $l = /^0/?oct:int;
++ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
++ }
++ } else {
++ @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
++ }
++
++ # little-endian conversion
++ if ($flavour =~ /le$/o) {
++ SWITCH: for($conv) {
++ /\?inv/ && do { @bytes=map($_^0xf, at bytes); last; };
++ /\?rev/ && do { @bytes=reverse(@bytes); last; };
++ }
++ }
++
++ #emit
++ print ".byte\t",join(',',map (sprintf("0x%02x",$_), at bytes)),"\n";
++ next;
++ }
++ $consts=0 if (m/Lconsts:/o); # end of table
++
++ # instructions prefixed with '?' are endian-specific and need
++ # to be adjusted accordingly...
++ if ($flavour =~ /le$/o) { # little-endian
++ s/le\?//o or
++ s/be\?/#be#/o or
++ s/\?lvsr/lvsl/o or
++ s/\?lvsl/lvsr/o or
++ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
++ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
++ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
++ } else { # big-endian
++ s/le\?/#le#/o or
++ s/be\?//o or
++ s/\?([a-z]+)/$1/o;
++ }
++
++ print $_,"\n";
++}
++
++close STDOUT;
+diff --git a/crypto/aes/asm/vpaes-ppc.pl b/crypto/aes/asm/vpaes-ppc.pl
+new file mode 100644
+index 0000000..7fda60e
+--- /dev/null
++++ b/crypto/aes/asm/vpaes-ppc.pl
+@@ -0,0 +1,1512 @@
++#!/usr/bin/env perl
++
++######################################################################
++## Constant-time SSSE3 AES core implementation.
++## version 0.1
++##
++## By Mike Hamburg (Stanford University), 2009
++## Public domain.
++##
++## For details see http://shiftleft.org/papers/vector_aes/ and
++## http://crypto.stanford.edu/vpaes/.
++
++# CBC encrypt/decrypt performance in cycles per byte processed with
++# 128-bit key.
++#
++# aes-ppc.pl this
++# G4e 35.5/52.1/(23.8) 11.9(*)/15.4
++# POWER6 42.7/54.3/(28.2) 63.0/92.8(**)
++# POWER7 32.3/42.9/(18.4) 18.5/23.3
++#
++# (*) This is ~10% worse than reported in paper. The reason is
++# twofold. This module doesn't make any assumption about
++# key schedule (or data for that matter) alignment and handles
++# it in-line. Secondly it, being transliterated from
++# vpaes-x86_64.pl, relies on "nested inversion" better suited
++# for Intel CPUs.
++# (**) Inadequate POWER6 performance is due to astronomic AltiVec
++# latency, 9 cycles per simple logical operation.
++
++$flavour = shift;
++
++if ($flavour =~ /64/) {
++ $SIZE_T =8;
++ $LRSAVE =2*$SIZE_T;
++ $STU ="stdu";
++ $POP ="ld";
++ $PUSH ="std";
++ $UCMP ="cmpld";
++} elsif ($flavour =~ /32/) {
++ $SIZE_T =4;
++ $LRSAVE =$SIZE_T;
++ $STU ="stwu";
++ $POP ="lwz";
++ $PUSH ="stw";
++ $UCMP ="cmplw";
++} else { die "nonsense $flavour"; }
++
++$sp="r1";
++$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
++
++$code.=<<___;
++.machine "any"
++
++.text
++
++.align 7 # totally strategic alignment
++_vpaes_consts:
++Lk_mc_forward: # mc_forward
++ .long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c ?inv
++ .long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300 ?inv
++ .long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704 ?inv
++ .long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08 ?inv
++Lk_mc_backward: # mc_backward
++ .long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e ?inv
++ .long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a ?inv
++ .long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506 ?inv
++ .long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102 ?inv
++Lk_sr: # sr
++ .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f ?inv
++ .long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b ?inv
++ .long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07 ?inv
++ .long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603 ?inv
++
++##
++## "Hot" constants
++##
++Lk_inv: # inv, inva
++ .long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704 ?rev
++ .long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03 ?rev
++Lk_ipt: # input transform (lo, hi)
++ .long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca ?rev
++ .long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd ?rev
++Lk_sbo: # sbou, sbot
++ .long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15 ?rev
++ .long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e ?rev
++Lk_sb1: # sb1u, sb1t
++ .long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b ?rev
++ .long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5 ?rev
++Lk_sb2: # sb2u, sb2t
++ .long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2 ?rev
++ .long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e ?rev
++
++##
++## Decryption stuff
++##
++Lk_dipt: # decryption input transform
++ .long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15 ?rev
++ .long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712 ?rev
++Lk_dsbo: # decryption sbox final output
++ .long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7 ?rev
++ .long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca ?rev
++Lk_dsb9: # decryption sbox output *9*u, *9*t
++ .long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca ?rev
++ .long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72 ?rev
++Lk_dsbd: # decryption sbox output *D*u, *D*t
++ .long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5 ?rev
++ .long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129 ?rev
++Lk_dsbb: # decryption sbox output *B*u, *B*t
++ .long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660 ?rev
++ .long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3 ?rev
++Lk_dsbe: # decryption sbox output *E*u, *E*t
++ .long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222 ?rev
++ .long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794 ?rev
++
++##
++## Key schedule constants
++##
++Lk_dksd: # decryption key schedule: invskew x*D
++ .long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007 ?rev
++ .long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f ?rev
++Lk_dksb: # decryption key schedule: invskew x*B
++ .long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603 ?rev
++ .long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9 ?rev
++Lk_dkse: # decryption key schedule: invskew x*E + 0x63
++ .long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553 ?rev
++ .long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd ?rev
++Lk_dks9: # decryption key schedule: invskew x*9
++ .long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a ?rev
++ .long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b ?rev
++
++Lk_rcon: # rcon
++ .long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70 ?asis
++Lk_s63:
++ .long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b ?asis
++
++Lk_opt: # output transform
++ .long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7 ?rev
++ .long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1 ?rev
++Lk_deskew: # deskew tables: inverts the sbox's "skew"
++ .long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d ?rev
++ .long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128 ?rev
++.align 5
++Lconsts:
++ mflr r0
++ bcl 20,31,\$+4
++ mflr r12 #vvvvv "distance between . and _vpaes_consts
++ addi r12,r12,-0x308
++ mtlr r0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++.asciz "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)"
++.align 6
++___
++
++my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31));
++{
++my ($inp,$out,$key) = map("r$_",(3..5));
++
++my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15));
++my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19));
++my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23));
++
++$code.=<<___;
++##
++## _aes_preheat
++##
++## Fills register %r10 -> .aes_consts (so you can -fPIC)
++## and %xmm9-%xmm15 as specified below.
++##
++.align 4
++_vpaes_encrypt_preheat:
++ mflr r8
++ bl Lconsts
++ mtlr r8
++ li r11, 0xc0 # Lk_inv
++ li r10, 0xd0
++ li r9, 0xe0 # Lk_ipt
++ li r8, 0xf0
++ vxor v7, v7, v7 # 0x00..00
++ vspltisb v8,4 # 0x04..04
++ vspltisb v9,0x0f # 0x0f..0f
++ lvx $invlo, r12, r11
++ li r11, 0x100
++ lvx $invhi, r12, r10
++ li r10, 0x110
++ lvx $iptlo, r12, r9
++ li r9, 0x120
++ lvx $ipthi, r12, r8
++ li r8, 0x130
++ lvx $sbou, r12, r11
++ li r11, 0x140
++ lvx $sbot, r12, r10
++ li r10, 0x150
++ lvx $sb1u, r12, r9
++ lvx $sb1t, r12, r8
++ lvx $sb2u, r12, r11
++ lvx $sb2t, r12, r10
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## _aes_encrypt_core
++##
++## AES-encrypt %xmm0.
++##
++## Inputs:
++## %xmm0 = input
++## %xmm9-%xmm15 as in _vpaes_preheat
++## (%rdx) = scheduled keys
++##
++## Output in %xmm0
++## Clobbers %xmm1-%xmm6, %r9, %r10, %r11, %rax
++##
++##
++.align 5
++_vpaes_encrypt_core:
++ lwz r8, 240($key) # pull rounds
++ li r9, 16
++ lvx v5, 0, $key # vmovdqu (%r9), %xmm5 # round0 key
++ li r11, 0x10
++ lvx v6, r9, $key
++ addi r9, r9, 16
++ ?vperm v5, v5, v6, $keyperm # align round key
++ addi r10, r11, 0x40
++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm1
++ vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm3, %xmm2
++ vxor v0, v0, v5 # vpxor %xmm5, %xmm1, %xmm0
++ vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0
++ mtctr r8
++ b Lenc_entry
++
++.align 4
++Lenc_loop:
++ # middle of middle round
++ vperm v4, $sb1t, v7, v2 # vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u
++ lvx v1, r12, r11 # vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
++ addi r11, r11, 16
++ vperm v0, $sb1u, v7, v3 # vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t
++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k
++ andi. r11, r11, 0x30 # and \$0x30, %r11 # ... mod 4
++ vperm v5, $sb2t, v7, v2 # vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u
++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A
++ vperm v2, $sb2u, v7, v3 # vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t
++ lvx v4, r12, r10 # vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[]
++ addi r10, r11, 0x40
++ vperm v3, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm3 # 0 = B
++ vxor v2, v2, v5 # vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A
++ vperm v0, v0, v7, v4 # vpshufb %xmm4, %xmm0, %xmm0 # 3 = D
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B
++ vperm v4, v3, v7, v1 # vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C
++ vxor v0, v0, v3 # vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D
++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D
++
++Lenc_entry:
++ # top of round
++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
++ vperm v5, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k
++ vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
++ vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
++ vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
++ vand v0, v0, v9
++ vxor v3, v3, v5 # vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
++ vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak
++ vmr v5, v6
++ lvx v6, r9, $key # vmovdqu (%r9), %xmm5
++ vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak
++ addi r9, r9, 16
++ vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io
++ ?vperm v5, v5, v6, $keyperm # align round key
++ vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
++ bdnz Lenc_loop
++
++ # middle of last round
++ addi r10, r11, 0x80
++ # vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo
++ # vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16
++ vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou
++ lvx v1, r12, r10 # vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[]
++ vperm v0, $sbot, v7, v3 # vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t
++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k
++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A
++ vperm v0, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++.globl .vpaes_encrypt
++.align 5
++.vpaes_encrypt:
++ $STU $sp,-$FRAME($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mflr r6
++ mfspr r7, 256 # save vrsave
++ stvx v20,r10,$sp
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ stw r7,`$FRAME-4`($sp) # save vrsave
++ li r0, -1
++ $PUSH r6,`$FRAME+$LRSAVE`($sp)
++ mtspr 256, r0 # preserve all AltiVec registers
++
++ bl _vpaes_encrypt_preheat
++
++ ?lvsl $inpperm, 0, $inp # prepare for unaligned access
++ lvx v0, 0, $inp
++ addi $inp, $inp, 15 # 15 is not a typo
++ ?lvsr $outperm, 0, $out
++ ?lvsl $keyperm, 0, $key # prepare for unaligned access
++ vnor $outmask, v7, v7 # 0xff..ff
++ lvx $inptail, 0, $inp # redundant in aligned case
++ ?vperm $outmask, v7, $outmask, $outperm
++ lvx $outhead, 0, $out
++ ?vperm v0, v0, $inptail, $inpperm
++
++ bl _vpaes_encrypt_core
++
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v1, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v1, 0, $out
++ addi $out, $out, 15 # 15 is not a typo
++ ########
++
++ lvx v1, 0, $out # redundant in aligned case
++ vsel v1, $outhead, v1, $outmask
++ stvx v1, 0, $out
++
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mtlr r6
++ mtspr 256, r7 # restore vrsave
++ lvx v20,r10,$sp
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ addi $sp,$sp,$FRAME
++ blr
++ .long 0
++ .byte 0,12,0x04,1,0x80,0,3,0
++ .long 0
++.size .vpaes_encrypt,.-.vpaes_encrypt
++
++.align 4
++_vpaes_decrypt_preheat:
++ mflr r8
++ bl Lconsts
++ mtlr r8
++ li r11, 0xc0 # Lk_inv
++ li r10, 0xd0
++ li r9, 0x160 # Ldipt
++ li r8, 0x170
++ vxor v7, v7, v7 # 0x00..00
++ vspltisb v8,4 # 0x04..04
++ vspltisb v9,0x0f # 0x0f..0f
++ lvx $invlo, r12, r11
++ li r11, 0x180
++ lvx $invhi, r12, r10
++ li r10, 0x190
++ lvx $iptlo, r12, r9
++ li r9, 0x1a0
++ lvx $ipthi, r12, r8
++ li r8, 0x1b0
++ lvx $sbou, r12, r11
++ li r11, 0x1c0
++ lvx $sbot, r12, r10
++ li r10, 0x1d0
++ lvx $sb9u, r12, r9
++ li r9, 0x1e0
++ lvx $sb9t, r12, r8
++ li r8, 0x1f0
++ lvx $sbdu, r12, r11
++ li r11, 0x200
++ lvx $sbdt, r12, r10
++ li r10, 0x210
++ lvx $sbbu, r12, r9
++ lvx $sbbt, r12, r8
++ lvx $sbeu, r12, r11
++ lvx $sbet, r12, r10
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## Decryption core
++##
++## Same API as encryption core.
++##
++.align 4
++_vpaes_decrypt_core:
++ lwz r8, 240($key) # pull rounds
++ li r9, 16
++ lvx v5, 0, $key # vmovdqu (%r9), %xmm4 # round0 key
++ li r11, 0x30
++ lvx v6, r9, $key
++ addi r9, r9, 16
++ ?vperm v5, v5, v6, $keyperm # align round key
++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2
++ vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm1, %xmm0
++ vxor v0, v0, v5 # vpxor %xmm4, %xmm2, %xmm2
++ vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0
++ mtctr r8
++ b Ldec_entry
++
++.align 4
++Ldec_loop:
++#
++# Inverse mix columns
++#
++ lvx v0, r12, r11 # v5 and v0 are flipped
++ # vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u
++ # vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t
++ vperm v4, $sb9u, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u
++ subi r11, r11, 16
++ vperm v1, $sb9t, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t
++ andi. r11, r11, 0x30
++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0
++ # vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu
++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
++ # vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt
++
++ vperm v4, $sbdu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu
++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
++ vperm v1, $sbdt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt
++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
++ # vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu
++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
++ # vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt
++
++ vperm v4, $sbbu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu
++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
++ vperm v1, $sbbt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt
++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
++ # vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu
++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
++ # vmovdqa 0x50(%r10), %xmm1 # 0 : sbet
++
++ vperm v4, $sbeu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu
++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
++ vperm v1, $sbet, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet
++ vxor v0, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
++ vxor v0, v0, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
++
++Ldec_entry:
++ # top of round
++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
++ vperm v2, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k
++ vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
++ vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
++ vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
++ vand v0, v0, v9
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
++ vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
++ vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak
++ vmr v5, v6
++ lvx v6, r9, $key # vmovdqu (%r9), %xmm0
++ vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak
++ addi r9, r9, 16
++ vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io
++ ?vperm v5, v5, v6, $keyperm # align round key
++ vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
++ bdnz Ldec_loop
++
++ # middle of last round
++ addi r10, r11, 0x80
++ # vmovdqa 0x60(%r10), %xmm4 # 3 : sbou
++ vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou
++ # vmovdqa 0x70(%r10), %xmm1 # 0 : sbot
++ lvx v2, r12, r10 # vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160
++ vperm v1, $sbot, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t
++ vxor v4, v4, v5 # vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k
++ vxor v0, v1, v4 # vpxor %xmm4, %xmm1, %xmm0 # 0 = A
++ vperm v0, v0, v7, v2 # vpshufb %xmm2, %xmm0, %xmm0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++.globl .vpaes_decrypt
++.align 5
++.vpaes_decrypt:
++ $STU $sp,-$FRAME($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mflr r6
++ mfspr r7, 256 # save vrsave
++ stvx v20,r10,$sp
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ stw r7,`$FRAME-4`($sp) # save vrsave
++ li r0, -1
++ $PUSH r6,`$FRAME+$LRSAVE`($sp)
++ mtspr 256, r0 # preserve all AltiVec registers
++
++ bl _vpaes_decrypt_preheat
++
++ ?lvsl $inpperm, 0, $inp # prepare for unaligned access
++ lvx v0, 0, $inp
++ addi $inp, $inp, 15 # 15 is not a typo
++ ?lvsr $outperm, 0, $out
++ ?lvsl $keyperm, 0, $key
++ vnor $outmask, v7, v7 # 0xff..ff
++ lvx $inptail, 0, $inp # redundant in aligned case
++ ?vperm $outmask, v7, $outmask, $outperm
++ lvx $outhead, 0, $out
++ ?vperm v0, v0, $inptail, $inpperm
++
++ bl _vpaes_decrypt_core
++
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v1, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v1, 0, $out
++ addi $out, $out, 15 # 15 is not a typo
++ ########
++
++ lvx v1, 0, $out # redundant in aligned case
++ vsel v1, $outhead, v1, $outmask
++ stvx v1, 0, $out
++
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mtlr r6
++ mtspr 256, r7 # restore vrsave
++ lvx v20,r10,$sp
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ addi $sp,$sp,$FRAME
++ blr
++ .long 0
++ .byte 0,12,0x04,1,0x80,0,3,0
++ .long 0
++.size .vpaes_decrypt,.-.vpaes_decrypt
++
++.globl .vpaes_cbc_encrypt
++.align 5
++.vpaes_cbc_encrypt:
++ ${UCMP}i r5,16
++ bltlr-
++
++ $STU $sp,-`($FRAME+2*$SIZE_T)`($sp)
++ mflr r0
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mfspr r12, 256
++ stvx v20,r10,$sp
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ stw r12,`$FRAME-4`($sp) # save vrsave
++ $PUSH r30,`$FRAME+$SIZE_T*0`($sp)
++ $PUSH r31,`$FRAME+$SIZE_T*1`($sp)
++ li r9, -16
++ $PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
++
++ and r30, r5, r9 # copy length&-16
++ mr r5, r6 # copy pointer to key
++ mr r31, r7 # copy pointer to iv
++ blt Lcbc_abort
++ cmpwi r8, 0 # test direction
++ li r6, -1
++ mr r7, r12 # copy vrsave
++ mtspr 256, r6 # preserve all AltiVec registers
++
++ lvx v24, 0, r31 # load [potentially unaligned] iv
++ li r9, 15
++ ?lvsl $inpperm, 0, r31
++ lvx v25, r9, r31
++ ?vperm v24, v24, v25, $inpperm
++
++ neg r8, $inp # prepare for unaligned access
++ vxor v7, v7, v7
++ ?lvsl $keyperm, 0, $key
++ ?lvsr $outperm, 0, $out
++ ?lvsr $inpperm, 0, r8 # -$inp
++ vnor $outmask, v7, v7 # 0xff..ff
++ lvx $inptail, 0, $inp
++ ?vperm $outmask, v7, $outmask, $outperm
++ addi $inp, $inp, 15 # 15 is not a typo
++ lvx $outhead, 0, $out
++
++ beq Lcbc_decrypt
++
++ bl _vpaes_encrypt_preheat
++ li r0, 16
++
++Lcbc_enc_loop:
++ vmr v0, $inptail
++ lvx $inptail, 0, $inp
++ addi $inp, $inp, 16
++ ?vperm v0, v0, $inptail, $inpperm
++ vxor v0, v0, v24 # ^= iv
++
++ bl _vpaes_encrypt_core
++
++ vmr v24, v0 # put aside iv
++ sub. r30, r30, r0 # len -= 16
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v1, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v1, 0, $out
++ addi $out, $out, 16
++ bne Lcbc_enc_loop
++
++ b Lcbc_done
++
++.align 5
++Lcbc_decrypt:
++ bl _vpaes_decrypt_preheat
++ li r0, 16
++
++Lcbc_dec_loop:
++ vmr v0, $inptail
++ lvx $inptail, 0, $inp
++ addi $inp, $inp, 16
++ ?vperm v0, v0, $inptail, $inpperm
++ vmr v25, v0 # put aside input
++
++ bl _vpaes_decrypt_core
++
++ vxor v0, v0, v24 # ^= iv
++ vmr v24, v25
++ sub. r30, r30, r0 # len -= 16
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v1, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v1, 0, $out
++ addi $out, $out, 16
++ bne Lcbc_dec_loop
++
++Lcbc_done:
++ addi $out, $out, -1
++ lvx v1, 0, $out # redundant in aligned case
++ vsel v1, $outhead, v1, $outmask
++ stvx v1, 0, $out
++
++ neg r8, r31 # write [potentially unaligned] iv
++ ?lvsl $outperm, 0, r8
++ li r6, 15
++ vnor $outmask, v7, v7 # 0xff..ff
++ ?vperm $outmask, v7, $outmask, $outperm
++ lvx $outhead, 0, r31
++ vperm v24, v24, v24, $outperm # rotate right/left
++ vsel v0, $outhead, v24, $outmask
++ lvx v1, r6, r31
++ stvx v0, 0, r31
++ vsel v1, v24, v1, $outmask
++ stvx v1, r6, r31
++
++ mtspr 256, r7 # restore vrsave
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ lvx v20,r10,$sp
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++Lcbc_abort:
++ $POP r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
++ $POP r30,`$FRAME+$SIZE_T*0`($sp)
++ $POP r31,`$FRAME+$SIZE_T*1`($sp)
++ mtlr r0
++ addi $sp,$sp,`$FRAME+$SIZE_T*2`
++ blr
++ .long 0
++ .byte 0,12,0x04,1,0x80,2,6,0
++ .long 0
++.size .vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt
++___
++}
++{
++my ($inp,$bits,$out)=map("r$_",(3..5));
++my $dir="cr1";
++my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24));
++
++$code.=<<___;
++########################################################
++## ##
++## AES key schedule ##
++## ##
++########################################################
++.align 4
++_vpaes_key_preheat:
++ mflr r8
++ bl Lconsts
++ mtlr r8
++ li r11, 0xc0 # Lk_inv
++ li r10, 0xd0
++ li r9, 0xe0 # L_ipt
++ li r8, 0xf0
++
++ vspltisb v8,4 # 0x04..04
++ vxor v9,v9,v9 # 0x00..00
++ lvx $invlo, r12, r11 # Lk_inv
++ li r11, 0x120
++ lvx $invhi, r12, r10
++ li r10, 0x130
++ lvx $iptlo, r12, r9 # Lk_ipt
++ li r9, 0x220
++ lvx $ipthi, r12, r8
++ li r8, 0x230
++
++ lvx v14, r12, r11 # Lk_sb1
++ li r11, 0x240
++ lvx v15, r12, r10
++ li r10, 0x250
++
++ lvx v16, r12, r9 # Lk_dksd
++ li r9, 0x260
++ lvx v17, r12, r8
++ li r8, 0x270
++ lvx v18, r12, r11 # Lk_dksb
++ li r11, 0x280
++ lvx v19, r12, r10
++ li r10, 0x290
++ lvx v20, r12, r9 # Lk_dkse
++ li r9, 0x2a0
++ lvx v21, r12, r8
++ li r8, 0x2b0
++ lvx v22, r12, r11 # Lk_dks9
++ lvx v23, r12, r10
++
++ lvx v24, r12, r9 # Lk_rcon
++ lvx v25, 0, r12 # Lk_mc_forward[0]
++ lvx v26, r12, r8 # Lks63
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++.align 4
++_vpaes_schedule_core:
++ mflr r7
++
++ bl _vpaes_key_preheat # load the tables
++
++ #lvx v0, 0, $inp # vmovdqu (%rdi), %xmm0 # load key (unaligned)
++ neg r8, $inp # prepare for unaligned access
++ lvx v0, 0, $inp
++ addi $inp, $inp, 15 # 15 is not typo
++ ?lvsr $inpperm, 0, r8 # -$inp
++ lvx v6, 0, $inp # v6 serves as inptail
++ addi $inp, $inp, 8
++ ?vperm v0, v0, v6, $inpperm
++
++ # input transform
++ vmr v3, v0 # vmovdqa %xmm0, %xmm3
++ bl _vpaes_schedule_transform
++ vmr v7, v0 # vmovdqa %xmm0, %xmm7
++
++ bne $dir, Lschedule_am_decrypting
++
++ # encrypting, output zeroth round key after transform
++ li r8, 0x30 # mov \$0x30,%r8d
++ addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
++
++ ?lvsr $outperm, 0, $out # prepare for unaligned access
++ vnor $outmask, v9, v9 # 0xff..ff
++ lvx $outhead, 0, $out
++ ?vperm $outmask, v9, $outmask, $outperm
++
++ #stvx v0, 0, $out # vmovdqu %xmm0, (%rdx)
++ vperm v1, v0, v0, $outperm # rotate right/left
++ vsel v2, $outhead, v1, $outmask
++ vmr $outhead, v1
++ stvx v2, 0, $out
++ b Lschedule_go
++
++Lschedule_am_decrypting:
++ srwi r8, $bits, 1 # shr \$1,%r8d
++ andi. r8, r8, 32 # and \$32,%r8d
++ xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32
++ addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
++ # decrypting, output zeroth round key after shiftrows
++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
++ vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
++
++ neg r0, $out # prepare for unaligned access
++ ?lvsl $outperm, 0, r0
++ addi $out, $out, 15 # 15 is not typo
++ vnor $outmask, v9, v9 # 0xff..ff
++ lvx $outhead, 0, $out
++ ?vperm $outmask, $outmask, v9, $outperm
++
++ #stvx v4, 0, $out # vmovdqu %xmm3, (%rdx)
++ vperm v4, v4, v4, $outperm # rotate right/left
++ vsel v2, $outhead, v4, $outmask
++ vmr $outhead, v4
++ stvx v2, 0, $out
++ xori r8, r8, 0x30 # xor \$0x30, %r8
++
++Lschedule_go:
++ cmplwi $bits, 192 # cmp \$192, %esi
++ bgt Lschedule_256
++ beq Lschedule_192
++ # 128: fall though
++
++##
++## .schedule_128
++##
++## 128-bit specific part of key schedule.
++##
++## This schedule is really simple, because all its parts
++## are accomplished by the subroutines.
++##
++Lschedule_128:
++ li r0, 10 # mov \$10, %esi
++ mtctr r0
++
++Loop_schedule_128:
++ bl _vpaes_schedule_round
++ bdz Lschedule_mangle_last # dec %esi
++ bl _vpaes_schedule_mangle # write output
++ b Loop_schedule_128
++
++##
++## .aes_schedule_192
++##
++## 192-bit specific part of key schedule.
++##
++## The main body of this schedule is the same as the 128-bit
++## schedule, but with more smearing. The long, high side is
++## stored in %xmm7 as before, and the short, low side is in
++## the high bits of %xmm6.
++##
++## This schedule is somewhat nastier, however, because each
++## round produces 192 bits of key material, or 1.5 round keys.
++## Therefore, on each cycle we do 2 rounds and produce 3 round
++## keys.
++##
++.align 4
++Lschedule_192:
++ li r0, 4 # mov \$4, %esi
++ lvx v0, 0, $inp
++ ?vperm v0, v6, v0, $inpperm
++ ?vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
++ bl _vpaes_schedule_transform # input transform
++ ?vsldoi v6, v0, v9, 8
++ ?vsldoi v6, v9, v6, 8 # clobber "low" side with zeros
++ mtctr r0
++
++Loop_schedule_192:
++ bl _vpaes_schedule_round
++ ?vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0
++ bl _vpaes_schedule_mangle # save key n
++ bl _vpaes_schedule_192_smear
++ bl _vpaes_schedule_mangle # save key n+1
++ bl _vpaes_schedule_round
++ bdz Lschedule_mangle_last # dec %esi
++ bl _vpaes_schedule_mangle # save key n+2
++ bl _vpaes_schedule_192_smear
++ b Loop_schedule_192
++
++##
++## .aes_schedule_256
++##
++## 256-bit specific part of key schedule.
++##
++## The structure here is very similar to the 128-bit
++## schedule, but with an additional "low side" in
++## %xmm6. The low side's rounds are the same as the
++## high side's, except no rcon and no rotation.
++##
++.align 4
++Lschedule_256:
++ li r0, 7 # mov \$7, %esi
++ addi $inp, $inp, 8
++ lvx v0, 0, $inp # vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
++ ?vperm v0, v6, v0, $inpperm
++ bl _vpaes_schedule_transform # input transform
++ mtctr r0
++
++Loop_schedule_256:
++ bl _vpaes_schedule_mangle # output low result
++ vmr v6, v0 # vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6
++
++ # high round
++ bl _vpaes_schedule_round
++ bdz Lschedule_mangle_last # dec %esi
++ bl _vpaes_schedule_mangle
++
++ # low round. swap xmm7 and xmm6
++ ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
++ vmr v5, v7 # vmovdqa %xmm7, %xmm5
++ vmr v7, v6 # vmovdqa %xmm6, %xmm7
++ bl _vpaes_schedule_low_round
++ vmr v7, v5 # vmovdqa %xmm5, %xmm7
++
++ b Loop_schedule_256
++##
++## .aes_schedule_mangle_last
++##
++## Mangler for last round of key schedule
++## Mangles %xmm0
++## when encrypting, outputs out(%xmm0) ^ 63
++## when decrypting, outputs unskew(%xmm0)
++##
++## Always called right before return... jumps to cleanup and exits
++##
++.align 4
++Lschedule_mangle_last:
++ # schedule last round key from xmm0
++ li r11, 0x2e0 # lea .Lk_deskew(%rip),%r11
++ li r9, 0x2f0
++ bne $dir, Lschedule_mangle_last_dec
++
++ # encrypting
++ lvx v1, r8, r10 # vmovdqa (%r8,%r10),%xmm1
++ li r11, 0x2c0 # lea .Lk_opt(%rip), %r11 # prepare to output transform
++ li r9, 0x2d0 # prepare to output transform
++ vperm v0, v0, v0, v1 # vpshufb %xmm1, %xmm0, %xmm0 # output permute
++
++ lvx $iptlo, r11, r12 # reload $ipt
++ lvx $ipthi, r9, r12
++ addi $out, $out, 16 # add \$16, %rdx
++ vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0
++ bl _vpaes_schedule_transform # output transform
++
++ #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v2, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v2, 0, $out
++
++ addi $out, $out, 15 # 15 is not typo
++ lvx v1, 0, $out # redundant in aligned case
++ vsel v1, $outhead, v1, $outmask
++ stvx v1, 0, $out
++ b Lschedule_mangle_done
++
++.align 4
++Lschedule_mangle_last_dec:
++ lvx $iptlo, r11, r12 # reload $ipt
++ lvx $ipthi, r9, r12
++ addi $out, $out, -16 # add \$-16, %rdx
++ vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0
++ bl _vpaes_schedule_transform # output transform
++
++ #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v2, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v2, 0, $out
++
++ addi $out, $out, -15 # -15 is not typo
++ lvx v1, 0, $out # redundant in aligned case
++ vsel v1, $outhead, v1, $outmask
++ stvx v1, 0, $out
++
++Lschedule_mangle_done:
++ mtlr r7
++ # cleanup
++ vxor v0, v0, v0 # vpxor %xmm0, %xmm0, %xmm0
++ vxor v1, v1, v1 # vpxor %xmm1, %xmm1, %xmm1
++ vxor v2, v2, v2 # vpxor %xmm2, %xmm2, %xmm2
++ vxor v3, v3, v3 # vpxor %xmm3, %xmm3, %xmm3
++ vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4
++ vxor v5, v5, v5 # vpxor %xmm5, %xmm5, %xmm5
++ vxor v6, v6, v6 # vpxor %xmm6, %xmm6, %xmm6
++ vxor v7, v7, v7 # vpxor %xmm7, %xmm7, %xmm7
++
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## .aes_schedule_192_smear
++##
++## Smear the short, low side in the 192-bit key schedule.
++##
++## Inputs:
++## %xmm7: high side, b a x y
++## %xmm6: low side, d c 0 0
++## %xmm13: 0
++##
++## Outputs:
++## %xmm6: b+c+d b+c 0 0
++## %xmm0: b+c+d b+c b a
++##
++.align 4
++_vpaes_schedule_192_smear:
++ ?vspltw v0, v7, 3
++ ?vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0
++ ?vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
++ vxor v6, v6, v1 # vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0
++ vxor v6, v6, v0 # vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a
++ vmr v0, v6
++ ?vsldoi v6, v6, v9, 8
++ ?vsldoi v6, v9, v6, 8 # clobber low side with zeros
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## .aes_schedule_round
++##
++## Runs one main round of the key schedule on %xmm0, %xmm7
++##
++## Specifically, runs subbytes on the high dword of %xmm0
++## then rotates it by one byte and xors into the low dword of
++## %xmm7.
++##
++## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
++## next rcon.
++##
++## Smears the dwords of %xmm7 by xoring the low into the
++## second low, result into third, result into highest.
++##
++## Returns results in %xmm7 = %xmm0.
++## Clobbers %xmm1-%xmm4, %r11.
++##
++.align 4
++_vpaes_schedule_round:
++ # extract rcon from xmm8
++ #vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4
++ ?vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1
++ ?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8
++ vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7
++
++ # rotate
++ ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
++ ?vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0
++
++ # fall through...
++
++ # low round: same as high round, but no rotation and no rcon.
++_vpaes_schedule_low_round:
++ # smear xmm7
++ ?vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1
++ vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7
++ vspltisb v1, 0x0f # 0x0f..0f
++ ?vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4
++
++ # subbytes
++ vand v1, v1, v0 # vpand %xmm9, %xmm0, %xmm1 # 0 = k
++ vsrb v0, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
++ vxor v7, v7, v4 # vpxor %xmm4, %xmm7, %xmm7
++ vperm v2, $invhi, v9, v1 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k
++ vxor v1, v1, v0 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
++ vperm v3, $invlo, v9, v0 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
++ vperm v4, $invlo, v9, v1 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
++ vxor v7, v7, v26 # vpxor .Lk_s63(%rip), %xmm7, %xmm7
++ vperm v3, $invlo, v9, v3 # vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak
++ vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
++ vperm v2, $invlo, v9, v4 # vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak
++ vxor v3, v3, v1 # vpxor %xmm1, %xmm3, %xmm3 # 2 = io
++ vxor v2, v2, v0 # vpxor %xmm0, %xmm2, %xmm2 # 3 = jo
++ vperm v4, v15, v9, v3 # vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou
++ vperm v1, v14, v9, v2 # vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t
++ vxor v1, v1, v4 # vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output
++
++ # add in smeared stuff
++ vxor v0, v1, v7 # vpxor %xmm7, %xmm1, %xmm0
++ vxor v7, v1, v7 # vmovdqa %xmm0, %xmm7
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## .aes_schedule_transform
++##
++## Linear-transform %xmm0 according to tables at (%r11)
++##
++## Requires that %xmm9 = 0x0F0F... as in preheat
++## Output in %xmm0
++## Clobbers %xmm2
++##
++.align 4
++_vpaes_schedule_transform:
++ #vand v1, v0, v9 # vpand %xmm9, %xmm0, %xmm1
++ vsrb v2, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
++ # vmovdqa (%r11), %xmm2 # lo
++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2
++ # vmovdqa 16(%r11), %xmm1 # hi
++ vperm v2, $ipthi, $ipthi, v2 # vpshufb %xmm0, %xmm1, %xmm0
++ vxor v0, v0, v2 # vpxor %xmm2, %xmm0, %xmm0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## .aes_schedule_mangle
++##
++## Mangle xmm0 from (basis-transformed) standard version
++## to our version.
++##
++## On encrypt,
++## xor with 0x63
++## multiply by circulant 0,1,1,1
++## apply shiftrows transform
++##
++## On decrypt,
++## xor with 0x63
++## multiply by "inverse mixcolumns" circulant E,B,D,9
++## deskew
++## apply shiftrows transform
++##
++##
++## Writes out to (%rdx), and increments or decrements it
++## Keeps track of round number mod 4 in %r8
++## Preserves xmm0
++## Clobbers xmm1-xmm5
++##
++.align 4
++_vpaes_schedule_mangle:
++ #vmr v4, v0 # vmovdqa %xmm0, %xmm4 # save xmm0 for later
++ # vmovdqa .Lk_mc_forward(%rip),%xmm5
++ bne $dir, Lschedule_mangle_dec
++
++ # encrypting
++ vxor v4, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm4
++ addi $out, $out, 16 # add \$16, %rdx
++ vperm v4, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm4
++ vperm v1, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm1
++ vperm v3, v1, v1, v25 # vpshufb %xmm5, %xmm1, %xmm3
++ vxor v4, v4, v1 # vpxor %xmm1, %xmm4, %xmm4
++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
++ vxor v3, v3, v4 # vpxor %xmm4, %xmm3, %xmm3
++
++ vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
++ addi r8, r8, -16 # add \$-16, %r8
++ andi. r8, r8, 0x30 # and \$0x30, %r8
++
++ #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx)
++ vperm v1, v3, v3, $outperm # rotate right/left
++ vsel v2, $outhead, v1, $outmask
++ vmr $outhead, v1
++ stvx v2, 0, $out
++ blr
++
++.align 4
++Lschedule_mangle_dec:
++ # inverse mix columns
++ # lea .Lk_dksd(%rip),%r11
++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm4, %xmm1 # 1 = hi
++ #and v4, v0, v9 # vpand %xmm9, %xmm4, %xmm4 # 4 = lo
++
++ # vmovdqa 0x00(%r11), %xmm2
++ vperm v2, v16, v16, v0 # vpshufb %xmm4, %xmm2, %xmm2
++ # vmovdqa 0x10(%r11), %xmm3
++ vperm v3, v17, v17, v1 # vpshufb %xmm1, %xmm3, %xmm3
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
++
++ # vmovdqa 0x20(%r11), %xmm2
++ vperm v2, v18, v18, v0 # vpshufb %xmm4, %xmm2, %xmm2
++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
++ # vmovdqa 0x30(%r11), %xmm3
++ vperm v3, v19, v19, v1 # vpshufb %xmm1, %xmm3, %xmm3
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
++
++ # vmovdqa 0x40(%r11), %xmm2
++ vperm v2, v20, v20, v0 # vpshufb %xmm4, %xmm2, %xmm2
++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
++ # vmovdqa 0x50(%r11), %xmm3
++ vperm v3, v21, v21, v1 # vpshufb %xmm1, %xmm3, %xmm3
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
++
++ # vmovdqa 0x60(%r11), %xmm2
++ vperm v2, v22, v22, v0 # vpshufb %xmm4, %xmm2, %xmm2
++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
++ # vmovdqa 0x70(%r11), %xmm4
++ vperm v4, v23, v23, v1 # vpshufb %xmm1, %xmm4, %xmm4
++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
++ vxor v3, v4, v2 # vpxor %xmm2, %xmm4, %xmm3
++
++ addi $out, $out, -16 # add \$-16, %rdx
++
++ vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
++ addi r8, r8, -16 # add \$-16, %r8
++ andi. r8, r8, 0x30 # and \$0x30, %r8
++
++ #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx)
++ vperm v1, v3, v3, $outperm # rotate right/left
++ vsel v2, $outhead, v1, $outmask
++ vmr $outhead, v1
++ stvx v2, 0, $out
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++.globl .vpaes_set_encrypt_key
++.align 5
++.vpaes_set_encrypt_key:
++ $STU $sp,-$FRAME($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mflr r0
++ mfspr r6, 256 # save vrsave
++ stvx v20,r10,$sp
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ stw r6,`$FRAME-4`($sp) # save vrsave
++ li r7, -1
++ $PUSH r0, `$FRAME+$LRSAVE`($sp)
++ mtspr 256, r7 # preserve all AltiVec registers
++
++ srwi r9, $bits, 5 # shr \$5,%eax
++ addi r9, r9, 6 # add \$5,%eax
++ stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
++
++ cmplw $dir, $bits, $bits # set encrypt direction
++ li r8, 0x30 # mov \$0x30,%r8d
++ bl _vpaes_schedule_core
++
++ $POP r0, `$FRAME+$LRSAVE`($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mtspr 256, r6 # restore vrsave
++ mtlr r0
++ xor r3, r3, r3
++ lvx v20,r10,$sp
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ addi $sp,$sp,$FRAME
++ blr
++ .long 0
++ .byte 0,12,0x04,1,0x80,0,3,0
++ .long 0
++.size .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key
++
++.globl .vpaes_set_decrypt_key
++.align 4
++.vpaes_set_decrypt_key:
++ $STU $sp,-$FRAME($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mflr r0
++ mfspr r6, 256 # save vrsave
++ stvx v20,r10,$sp
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ stw r6,`$FRAME-4`($sp) # save vrsave
++ li r7, -1
++ $PUSH r0, `$FRAME+$LRSAVE`($sp)
++ mtspr 256, r7 # preserve all AltiVec registers
++
++ srwi r9, $bits, 5 # shr \$5,%eax
++ addi r9, r9, 6 # add \$5,%eax
++ stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
++
++ slwi r9, r9, 4 # shl \$4,%eax
++ add $out, $out, r9 # lea (%rdx,%rax),%rdx
++
++ cmplwi $dir, $bits, 0 # set decrypt direction
++ srwi r8, $bits, 1 # shr \$1,%r8d
++ andi. r8, r8, 32 # and \$32,%r8d
++ xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32
++ bl _vpaes_schedule_core
++
++ $POP r0, `$FRAME+$LRSAVE`($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mtspr 256, r6 # restore vrsave
++ mtlr r0
++ xor r3, r3, r3
++ lvx v20,r10,$sp
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ addi $sp,$sp,$FRAME
++ blr
++ .long 0
++ .byte 0,12,0x04,1,0x80,0,3,0
++ .long 0
++.size .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key
++___
++}
++
++my $consts=1;
++foreach (split("\n",$code)) {
++ s/\`([^\`]*)\`/eval $1/geo;
++
++ # constants table endian-specific conversion
++ if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) {
++ my $conv=$2;
++ my @bytes=();
++
++ # convert to endian-agnostic format
++ foreach (split(/,\s+/,$1)) {
++ my $l = /^0/?oct:int;
++ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
++ }
++
++ # little-endian conversion
++ if ($flavour =~ /le$/o) {
++ SWITCH: for($conv) {
++ /\?inv/ && do { @bytes=map($_^0xf, at bytes); last; };
++ /\?rev/ && do { @bytes=reverse(@bytes); last; };
++ }
++ }
++
++ #emit
++ print ".byte\t",join(',',map (sprintf("0x%02x",$_), at bytes)),"\n";
++ next;
++ }
++ $consts=0 if (m/Lconsts:/o); # end of table
++
++ # instructions prefixed with '?' are endian-specific and need
++ # to be adjusted accordingly...
++ if ($flavour =~ /le$/o) { # little-endian
++ s/\?lvsr/lvsl/o or
++ s/\?lvsl/lvsr/o or
++ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
++ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
++ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
++ } else { # big-endian
++ s/\?([a-z]+)/$1/o;
++ }
++
++ print $_,"\n";
++}
++
++close STDOUT;
+diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl
+index f9b6992..da69c6a 100644
+--- a/crypto/bn/asm/ppc-mont.pl
++++ b/crypto/bn/asm/ppc-mont.pl
+@@ -325,6 +325,7 @@ Lcopy: ; copy or in-place refresh
+ .long 0
+ .byte 0,12,4,0,0x80,12,6,0
+ .long 0
++.size .bn_mul_mont_int,.-.bn_mul_mont_int
+
+ .asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>"
+ ___
+diff --git a/crypto/bn/asm/ppc.pl b/crypto/bn/asm/ppc.pl
+index 1249ce2..04df1fe 100644
+--- a/crypto/bn/asm/ppc.pl
++++ b/crypto/bn/asm/ppc.pl
+@@ -392,6 +392,7 @@ $data=<<EOF;
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
++.size .bn_sqr_comba4,.-.bn_sqr_comba4
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -819,6 +820,7 @@ $data=<<EOF;
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
++.size .bn_sqr_comba8,.-.bn_sqr_comba8
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -972,6 +974,7 @@ $data=<<EOF;
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
++.size .bn_mul_comba4,.-.bn_mul_comba4
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1510,6 +1513,7 @@ $data=<<EOF;
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
++.size .bn_mul_comba8,.-.bn_mul_comba8
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1560,6 +1564,7 @@ Lppcasm_sub_adios:
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
++.size .bn_sub_words,.-.bn_sub_words
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1605,6 +1610,7 @@ Lppcasm_add_adios:
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
++.size .bn_add_words,.-.bn_add_words
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1720,6 +1726,7 @@ Lppcasm_div9:
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
++.size .bn_div_words,.-.bn_div_words
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1761,6 +1768,7 @@ Lppcasm_sqr_adios:
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
++.size .bn_sqr_words,.-.bn_sqr_words
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1866,6 +1874,7 @@ Lppcasm_mw_OVER:
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
++.size bn_mul_words,.-bn_mul_words
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1991,6 +2000,7 @@ Lppcasm_maw_adios:
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
++.size .bn_mul_add_words,.-.bn_mul_add_words
+ .align 4
+ EOF
+ $data =~ s/\`([^\`]*)\`/eval $1/gem;
+diff --git a/crypto/bn/asm/ppc64-mont.pl b/crypto/bn/asm/ppc64-mont.pl
+index a14e769..68e3733 100644
+--- a/crypto/bn/asm/ppc64-mont.pl
++++ b/crypto/bn/asm/ppc64-mont.pl
+@@ -1,7 +1,7 @@
+ #!/usr/bin/env perl
+
+ # ====================================================================
+-# Written by Andy Polyakov <appro at fy.chalmers.se> for the OpenSSL
++# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
+@@ -65,6 +65,14 @@
+ # others alternative would be to break dependence on upper halves of
+ # GPRs by sticking to 32-bit integer operations...
+
++# December 2012
++
++# Remove above mentioned dependence on GPRs' upper halves in 32-bit
++# build. No signal masking overhead, but integer instructions are
++# *more* numerous... It's still "universally" faster than 32-bit
++# ppc-mont.pl, but improvement coefficient is not as impressive
++# for longer keys...
++
+ $flavour = shift;
+
+ if ($flavour =~ /32/) {
+@@ -110,6 +118,9 @@ $tp="r10";
+ $j="r11";
+ $i="r12";
+ # non-volatile registers
++$c1="r19";
++$n1="r20";
++$a1="r21";
+ $nap_d="r22"; # interleaved ap and np in double format
+ $a0="r23"; # ap[0]
+ $t0="r24"; # temporary registers
+@@ -180,8 +191,8 @@ $T3a="f30"; $T3b="f31";
+ # . .
+ # +-------------------------------+
+ # . .
+-# -12*size_t +-------------------------------+
+-# | 10 saved gpr, r22-r31 |
++# -13*size_t +-------------------------------+
++# | 13 saved gpr, r19-r31 |
+ # . .
+ # . .
+ # -12*8 +-------------------------------+
+@@ -215,6 +226,9 @@ $code=<<___;
+ mr $i,$sp
+ $STUX $sp,$sp,$tp ; alloca
+
++ $PUSH r19,`-12*8-13*$SIZE_T`($i)
++ $PUSH r20,`-12*8-12*$SIZE_T`($i)
++ $PUSH r21,`-12*8-11*$SIZE_T`($i)
+ $PUSH r22,`-12*8-10*$SIZE_T`($i)
+ $PUSH r23,`-12*8-9*$SIZE_T`($i)
+ $PUSH r24,`-12*8-8*$SIZE_T`($i)
+@@ -237,40 +251,26 @@ $code=<<___;
+ stfd f29,`-3*8`($i)
+ stfd f30,`-2*8`($i)
+ stfd f31,`-1*8`($i)
+-___
+-$code.=<<___ if ($SIZE_T==8);
+- ld $a0,0($ap) ; pull ap[0] value
+- ld $n0,0($n0) ; pull n0[0] value
+- ld $t3,0($bp) ; bp[0]
+-___
+-$code.=<<___ if ($SIZE_T==4);
+- mr $t1,$n0
+- lwz $a0,0($ap) ; pull ap[0,1] value
+- lwz $t0,4($ap)
+- lwz $n0,0($t1) ; pull n0[0,1] value
+- lwz $t1,4($t1)
+- lwz $t3,0($bp) ; bp[0,1]
+- lwz $t2,4($bp)
+- insrdi $a0,$t0,32,0
+- insrdi $n0,$t1,32,0
+- insrdi $t3,$t2,32,0
+-___
+-$code.=<<___;
++
+ addi $tp,$sp,`$FRAME+$TRANSFER+8+64`
+ li $i,-64
+ add $nap_d,$tp,$num
+ and $nap_d,$nap_d,$i ; align to 64 bytes
+-
+- mulld $t7,$a0,$t3 ; ap[0]*bp[0]
+ ; nap_d is off by 1, because it's used with stfdu/lfdu
+ addi $nap_d,$nap_d,-8
+ srwi $j,$num,`3+1` ; counter register, num/2
+- mulld $t7,$t7,$n0 ; tp[0]*n0
+ addi $j,$j,-1
+ addi $tp,$sp,`$FRAME+$TRANSFER-8`
+ li $carry,0
+ mtctr $j
++___
++
++$code.=<<___ if ($SIZE_T==8);
++ ld $a0,0($ap) ; pull ap[0] value
++ ld $t3,0($bp) ; bp[0]
++ ld $n0,0($n0) ; pull n0[0] value
+
++ mulld $t7,$a0,$t3 ; ap[0]*bp[0]
+ ; transfer bp[0] to FPU as 4x16-bit values
+ extrdi $t0,$t3,16,48
+ extrdi $t1,$t3,16,32
+@@ -280,6 +280,8 @@ $code.=<<___;
+ std $t1,`$FRAME+8`($sp)
+ std $t2,`$FRAME+16`($sp)
+ std $t3,`$FRAME+24`($sp)
++
++ mulld $t7,$t7,$n0 ; tp[0]*n0
+ ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
+ extrdi $t4,$t7,16,48
+ extrdi $t5,$t7,16,32
+@@ -289,21 +291,61 @@ $code.=<<___;
+ std $t5,`$FRAME+40`($sp)
+ std $t6,`$FRAME+48`($sp)
+ std $t7,`$FRAME+56`($sp)
+-___
+-$code.=<<___ if ($SIZE_T==8);
+- lwz $t0,4($ap) ; load a[j] as 32-bit word pair
+- lwz $t1,0($ap)
+- lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
++
++ extrdi $t0,$a0,32,32 ; lwz $t0,4($ap)
++ extrdi $t1,$a0,32,0 ; lwz $t1,0($ap)
++ lwz $t2,12($ap) ; load a[1] as 32-bit word pair
+ lwz $t3,8($ap)
+- lwz $t4,4($np) ; load n[j] as 32-bit word pair
++ lwz $t4,4($np) ; load n[0] as 32-bit word pair
+ lwz $t5,0($np)
+- lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
++ lwz $t6,12($np) ; load n[1] as 32-bit word pair
+ lwz $t7,8($np)
+ ___
+ $code.=<<___ if ($SIZE_T==4);
+- lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs
+- lwz $t1,4($ap)
+- lwz $t2,8($ap)
++ lwz $a0,0($ap) ; pull ap[0,1] value
++ mr $n1,$n0
++ lwz $a1,4($ap)
++ li $c1,0
++ lwz $t1,0($bp) ; bp[0,1]
++ lwz $t3,4($bp)
++ lwz $n0,0($n1) ; pull n0[0,1] value
++ lwz $n1,4($n1)
++
++ mullw $t4,$a0,$t1 ; mulld ap[0]*bp[0]
++ mulhwu $t5,$a0,$t1
++ mullw $t6,$a1,$t1
++ mullw $t7,$a0,$t3
++ add $t5,$t5,$t6
++ add $t5,$t5,$t7
++ ; transfer bp[0] to FPU as 4x16-bit values
++ extrwi $t0,$t1,16,16
++ extrwi $t1,$t1,16,0
++ extrwi $t2,$t3,16,16
++ extrwi $t3,$t3,16,0
++ std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build
++ std $t1,`$FRAME+8`($sp)
++ std $t2,`$FRAME+16`($sp)
++ std $t3,`$FRAME+24`($sp)
++
++ mullw $t0,$t4,$n0 ; mulld tp[0]*n0
++ mulhwu $t1,$t4,$n0
++ mullw $t2,$t5,$n0
++ mullw $t3,$t4,$n1
++ add $t1,$t1,$t2
++ add $t1,$t1,$t3
++ ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
++ extrwi $t4,$t0,16,16
++ extrwi $t5,$t0,16,0
++ extrwi $t6,$t1,16,16
++ extrwi $t7,$t1,16,0
++ std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build
++ std $t5,`$FRAME+40`($sp)
++ std $t6,`$FRAME+48`($sp)
++ std $t7,`$FRAME+56`($sp)
++
++ mr $t0,$a0 ; lwz $t0,0($ap)
++ mr $t1,$a1 ; lwz $t1,4($ap)
++ lwz $t2,8($ap) ; load a[j..j+3] as 32-bit word pairs
+ lwz $t3,12($ap)
+ lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs
+ lwz $t5,4($np)
+@@ -319,7 +361,7 @@ $code.=<<___;
+ lfd $nb,`$FRAME+40`($sp)
+ lfd $nc,`$FRAME+48`($sp)
+ lfd $nd,`$FRAME+56`($sp)
+- std $t0,`$FRAME+64`($sp)
++ std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build
+ std $t1,`$FRAME+72`($sp)
+ std $t2,`$FRAME+80`($sp)
+ std $t3,`$FRAME+88`($sp)
+@@ -441,7 +483,7 @@ $code.=<<___ if ($SIZE_T==4);
+ lwz $t7,12($np)
+ ___
+ $code.=<<___;
+- std $t0,`$FRAME+64`($sp)
++ std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build
+ std $t1,`$FRAME+72`($sp)
+ std $t2,`$FRAME+80`($sp)
+ std $t3,`$FRAME+88`($sp)
+@@ -449,6 +491,9 @@ $code.=<<___;
+ std $t5,`$FRAME+104`($sp)
+ std $t6,`$FRAME+112`($sp)
+ std $t7,`$FRAME+120`($sp)
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ ld $t0,`$FRAME+0`($sp)
+ ld $t1,`$FRAME+8`($sp)
+ ld $t2,`$FRAME+16`($sp)
+@@ -457,6 +502,20 @@ $code.=<<___;
+ ld $t5,`$FRAME+40`($sp)
+ ld $t6,`$FRAME+48`($sp)
+ ld $t7,`$FRAME+56`($sp)
++___
++} else {
++$code.=<<___;
++ lwz $t1,`$FRAME+0`($sp)
++ lwz $t0,`$FRAME+4`($sp)
++ lwz $t3,`$FRAME+8`($sp)
++ lwz $t2,`$FRAME+12`($sp)
++ lwz $t5,`$FRAME+16`($sp)
++ lwz $t4,`$FRAME+20`($sp)
++ lwz $t7,`$FRAME+24`($sp)
++ lwz $t6,`$FRAME+28`($sp)
++___
++}
++$code.=<<___;
+ lfd $A0,`$FRAME+64`($sp)
+ lfd $A1,`$FRAME+72`($sp)
+ lfd $A2,`$FRAME+80`($sp)
+@@ -488,7 +547,9 @@ $code.=<<___;
+ fmadd $T0b,$A0,$bb,$dotb
+ stfd $A2,24($nap_d) ; save a[j+1] in double format
+ stfd $A3,32($nap_d)
+-
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ fmadd $T1a,$A0,$bc,$T1a
+ fmadd $T1b,$A0,$bd,$T1b
+ fmadd $T2a,$A1,$bc,$T2a
+@@ -561,11 +622,123 @@ $code.=<<___;
+ stfd $T3b,`$FRAME+56`($sp)
+ std $t0,8($tp) ; tp[j-1]
+ stdu $t4,16($tp) ; tp[j]
++___
++} else {
++$code.=<<___;
++ fmadd $T1a,$A0,$bc,$T1a
++ fmadd $T1b,$A0,$bd,$T1b
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ fmadd $T2a,$A1,$bc,$T2a
++ fmadd $T2b,$A1,$bd,$T2b
++ stfd $N0,40($nap_d) ; save n[j] in double format
++ stfd $N1,48($nap_d)
++ srwi $c1,$t1,16
++ insrwi $carry,$t1,16,0
++ fmadd $T3a,$A2,$bc,$T3a
++ fmadd $T3b,$A2,$bd,$T3b
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ fmul $dota,$A3,$bc
++ fmul $dotb,$A3,$bd
++ stfd $N2,56($nap_d) ; save n[j+1] in double format
++ stfdu $N3,64($nap_d)
++ insrwi $t0,$t2,16,0 ; 0..31 bits
++ srwi $c1,$t3,16
++ insrwi $carry,$t3,16,0
++
++ fmadd $T1a,$N1,$na,$T1a
++ fmadd $T1b,$N1,$nb,$T1b
++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1
++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ fmadd $T2a,$N2,$na,$T2a
++ fmadd $T2b,$N2,$nb,$T2b
++ srwi $c1,$t5,16
++ insrwi $carry,$t5,16,0
++ fmadd $T3a,$N3,$na,$T3a
++ fmadd $T3b,$N3,$nb,$T3b
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ fmadd $T0a,$N0,$na,$T0a
++ fmadd $T0b,$N0,$nb,$T0b
++ insrwi $t4,$t6,16,0 ; 32..63 bits
++ srwi $c1,$t7,16
++ insrwi $carry,$t7,16,0
++
++ fmadd $T1a,$N0,$nc,$T1a
++ fmadd $T1b,$N0,$nd,$T1b
++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3
++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ fmadd $T2a,$N1,$nc,$T2a
++ fmadd $T2b,$N1,$nd,$T2b
++ stw $t0,12($tp) ; tp[j-1]
++ stw $t4,8($tp)
++ srwi $c1,$t3,16
++ insrwi $carry,$t3,16,0
++ fmadd $T3a,$N2,$nc,$T3a
++ fmadd $T3b,$N2,$nd,$T3b
++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5
++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ fmadd $dota,$N3,$nc,$dota
++ fmadd $dotb,$N3,$nd,$dotb
++ insrwi $t2,$t6,16,0 ; 64..95 bits
++ srwi $c1,$t7,16
++ insrwi $carry,$t7,16,0
++
++ fctid $T0a,$T0a
++ fctid $T0b,$T0b
++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7
++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ fctid $T1a,$T1a
++ fctid $T1b,$T1b
++ srwi $c1,$t1,16
++ insrwi $carry,$t1,16,0
++ fctid $T2a,$T2a
++ fctid $T2b,$T2b
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ fctid $T3a,$T3a
++ fctid $T3b,$T3b
++ insrwi $t0,$t4,16,0 ; 96..127 bits
++ srwi $c1,$t5,16
++ insrwi $carry,$t5,16,0
++
++ stfd $T0a,`$FRAME+0`($sp)
++ stfd $T0b,`$FRAME+8`($sp)
++ stfd $T1a,`$FRAME+16`($sp)
++ stfd $T1b,`$FRAME+24`($sp)
++ stfd $T2a,`$FRAME+32`($sp)
++ stfd $T2b,`$FRAME+40`($sp)
++ stfd $T3a,`$FRAME+48`($sp)
++ stfd $T3b,`$FRAME+56`($sp)
++ stw $t2,20($tp) ; tp[j]
++ stwu $t0,16($tp)
++___
++}
++$code.=<<___;
+ bdnz- L1st
+
+ fctid $dota,$dota
+ fctid $dotb,$dotb
+-
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ ld $t0,`$FRAME+0`($sp)
+ ld $t1,`$FRAME+8`($sp)
+ ld $t2,`$FRAME+16`($sp)
+@@ -611,33 +784,117 @@ $code.=<<___;
+ insrdi $t6,$t7,48,0
+ srdi $ovf,$t7,48
+ std $t6,8($tp) ; tp[num-1]
++___
++} else {
++$code.=<<___;
++ lwz $t1,`$FRAME+0`($sp)
++ lwz $t0,`$FRAME+4`($sp)
++ lwz $t3,`$FRAME+8`($sp)
++ lwz $t2,`$FRAME+12`($sp)
++ lwz $t5,`$FRAME+16`($sp)
++ lwz $t4,`$FRAME+20`($sp)
++ lwz $t7,`$FRAME+24`($sp)
++ lwz $t6,`$FRAME+28`($sp)
++ stfd $dota,`$FRAME+64`($sp)
++ stfd $dotb,`$FRAME+72`($sp)
+
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ insrwi $carry,$t1,16,0
++ srwi $c1,$t1,16
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ insrwi $t0,$t2,16,0 ; 0..31 bits
++ insrwi $carry,$t3,16,0
++ srwi $c1,$t3,16
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ insrwi $carry,$t5,16,0
++ srwi $c1,$t5,16
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ insrwi $t4,$t6,16,0 ; 32..63 bits
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ stw $t0,12($tp) ; tp[j-1]
++ stw $t4,8($tp)
++
++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1
++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0
++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3
++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2
++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5
++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4
++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7
++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6
++
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ insrwi $carry,$t3,16,0
++ srwi $c1,$t3,16
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ insrwi $t2,$t6,16,0 ; 64..95 bits
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ insrwi $carry,$t1,16,0
++ srwi $c1,$t1,16
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ insrwi $t0,$t4,16,0 ; 96..127 bits
++ insrwi $carry,$t5,16,0
++ srwi $c1,$t5,16
++ stw $t2,20($tp) ; tp[j]
++ stwu $t0,16($tp)
++
++ lwz $t7,`$FRAME+64`($sp)
++ lwz $t6,`$FRAME+68`($sp)
++ lwz $t5,`$FRAME+72`($sp)
++ lwz $t4,`$FRAME+76`($sp)
++
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++
++ insrwi $t6,$t4,16,0
++ srwi $t4,$t4,16
++ insrwi $t4,$t5,16,0
++ srwi $ovf,$t5,16
++ stw $t6,12($tp) ; tp[num-1]
++ stw $t4,8($tp)
++___
++}
++$code.=<<___;
+ slwi $t7,$num,2
+ subf $nap_d,$t7,$nap_d ; rewind pointer
+
+ li $i,8 ; i=1
+ .align 5
+ Louter:
+-___
+-$code.=<<___ if ($SIZE_T==8);
+- ldx $t3,$bp,$i ; bp[i]
+-___
+-$code.=<<___ if ($SIZE_T==4);
+- add $t0,$bp,$i
+- lwz $t3,0($t0) ; bp[i,i+1]
+- lwz $t0,4($t0)
+- insrdi $t3,$t0,32,0
+-___
+-$code.=<<___;
+- ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
+- mulld $t7,$a0,$t3 ; ap[0]*bp[i]
+-
+ addi $tp,$sp,`$FRAME+$TRANSFER`
+- add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0]
+ li $carry,0
+- mulld $t7,$t7,$n0 ; tp[0]*n0
+ mtctr $j
++___
++$code.=<<___ if ($SIZE_T==8);
++ ldx $t3,$bp,$i ; bp[i]
+
++ ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
++ mulld $t7,$a0,$t3 ; ap[0]*bp[i]
++ add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0]
+ ; transfer bp[i] to FPU as 4x16-bit values
+ extrdi $t0,$t3,16,48
+ extrdi $t1,$t3,16,32
+@@ -647,6 +904,8 @@ $code.=<<___;
+ std $t1,`$FRAME+8`($sp)
+ std $t2,`$FRAME+16`($sp)
+ std $t3,`$FRAME+24`($sp)
++
++ mulld $t7,$t7,$n0 ; tp[0]*n0
+ ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
+ extrdi $t4,$t7,16,48
+ extrdi $t5,$t7,16,32
+@@ -656,7 +915,50 @@ $code.=<<___;
+ std $t5,`$FRAME+40`($sp)
+ std $t6,`$FRAME+48`($sp)
+ std $t7,`$FRAME+56`($sp)
++___
++$code.=<<___ if ($SIZE_T==4);
++ add $t0,$bp,$i
++ li $c1,0
++ lwz $t1,0($t0) ; bp[i,i+1]
++ lwz $t3,4($t0)
++
++ mullw $t4,$a0,$t1 ; ap[0]*bp[i]
++ lwz $t0,`$FRAME+$TRANSFER+8+4`($sp) ; tp[0]
++ mulhwu $t5,$a0,$t1
++ lwz $t2,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
++ mullw $t6,$a1,$t1
++ mullw $t7,$a0,$t3
++ add $t5,$t5,$t6
++ add $t5,$t5,$t7
++ addc $t4,$t4,$t0 ; ap[0]*bp[i]+tp[0]
++ adde $t5,$t5,$t2
++ ; transfer bp[i] to FPU as 4x16-bit values
++ extrwi $t0,$t1,16,16
++ extrwi $t1,$t1,16,0
++ extrwi $t2,$t3,16,16
++ extrwi $t3,$t3,16,0
++ std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build
++ std $t1,`$FRAME+8`($sp)
++ std $t2,`$FRAME+16`($sp)
++ std $t3,`$FRAME+24`($sp)
+
++ mullw $t0,$t4,$n0 ; mulld tp[0]*n0
++ mulhwu $t1,$t4,$n0
++ mullw $t2,$t5,$n0
++ mullw $t3,$t4,$n1
++ add $t1,$t1,$t2
++ add $t1,$t1,$t3
++ ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
++ extrwi $t4,$t0,16,16
++ extrwi $t5,$t0,16,0
++ extrwi $t6,$t1,16,16
++ extrwi $t7,$t1,16,0
++ std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build
++ std $t5,`$FRAME+40`($sp)
++ std $t6,`$FRAME+48`($sp)
++ std $t7,`$FRAME+56`($sp)
++___
++$code.=<<___;
+ lfd $A0,8($nap_d) ; load a[j] in double format
+ lfd $A1,16($nap_d)
+ lfd $A2,24($nap_d) ; load a[j+1] in double format
+@@ -769,7 +1071,9 @@ Linner:
+ fmul $dotb,$A3,$bd
+ lfd $A2,24($nap_d) ; load a[j+1] in double format
+ lfd $A3,32($nap_d)
+-
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ fmadd $T1a,$N1,$na,$T1a
+ fmadd $T1b,$N1,$nb,$T1b
+ ld $t0,`$FRAME+0`($sp)
+@@ -856,10 +1160,131 @@ $code.=<<___;
+ addze $carry,$carry
+ std $t3,-16($tp) ; tp[j-1]
+ std $t5,-8($tp) ; tp[j]
++___
++} else {
++$code.=<<___;
++ fmadd $T1a,$N1,$na,$T1a
++ fmadd $T1b,$N1,$nb,$T1b
++ lwz $t1,`$FRAME+0`($sp)
++ lwz $t0,`$FRAME+4`($sp)
++ fmadd $T2a,$N2,$na,$T2a
++ fmadd $T2b,$N2,$nb,$T2b
++ lwz $t3,`$FRAME+8`($sp)
++ lwz $t2,`$FRAME+12`($sp)
++ fmadd $T3a,$N3,$na,$T3a
++ fmadd $T3b,$N3,$nb,$T3b
++ lwz $t5,`$FRAME+16`($sp)
++ lwz $t4,`$FRAME+20`($sp)
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ fmadd $T0a,$N0,$na,$T0a
++ fmadd $T0b,$N0,$nb,$T0b
++ lwz $t7,`$FRAME+24`($sp)
++ lwz $t6,`$FRAME+28`($sp)
++ srwi $c1,$t1,16
++ insrwi $carry,$t1,16,0
++
++ fmadd $T1a,$N0,$nc,$T1a
++ fmadd $T1b,$N0,$nd,$T1b
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ fmadd $T2a,$N1,$nc,$T2a
++ fmadd $T2b,$N1,$nd,$T2b
++ insrwi $t0,$t2,16,0 ; 0..31 bits
++ srwi $c1,$t3,16
++ insrwi $carry,$t3,16,0
++ fmadd $T3a,$N2,$nc,$T3a
++ fmadd $T3b,$N2,$nd,$T3b
++ lwz $t2,12($tp) ; tp[j]
++ lwz $t3,8($tp)
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ fmadd $dota,$N3,$nc,$dota
++ fmadd $dotb,$N3,$nd,$dotb
++ srwi $c1,$t5,16
++ insrwi $carry,$t5,16,0
++
++ fctid $T0a,$T0a
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ fctid $T0b,$T0b
++ insrwi $t4,$t6,16,0 ; 32..63 bits
++ srwi $c1,$t7,16
++ insrwi $carry,$t7,16,0
++ fctid $T1a,$T1a
++ addc $t0,$t0,$t2
++ adde $t4,$t4,$t3
++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1
++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0
++ fctid $T1b,$T1b
++ addze $carry,$carry
++ addze $c1,$c1
++ stw $t0,4($tp) ; tp[j-1]
++ stw $t4,0($tp)
++ fctid $T2a,$T2a
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3
++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2
++ fctid $T2b,$T2b
++ srwi $c1,$t3,16
++ insrwi $carry,$t3,16,0
++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5
++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4
++ fctid $T3a,$T3a
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7
++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6
++ fctid $T3b,$T3b
++
++ insrwi $t2,$t6,16,0 ; 64..95 bits
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ lwz $t6,20($tp)
++ lwzu $t7,16($tp)
++ addc $t0,$t0,$carry
++ stfd $T0a,`$FRAME+0`($sp)
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ stfd $T0b,`$FRAME+8`($sp)
++ insrwi $carry,$t1,16,0
++ srwi $c1,$t1,16
++ addc $t4,$t4,$carry
++ stfd $T1a,`$FRAME+16`($sp)
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ insrwi $t0,$t4,16,0 ; 96..127 bits
++ stfd $T1b,`$FRAME+24`($sp)
++ insrwi $carry,$t5,16,0
++ srwi $c1,$t5,16
++
++ addc $t2,$t2,$t6
++ stfd $T2a,`$FRAME+32`($sp)
++ adde $t0,$t0,$t7
++ stfd $T2b,`$FRAME+40`($sp)
++ addze $carry,$carry
++ stfd $T3a,`$FRAME+48`($sp)
++ addze $c1,$c1
++ stfd $T3b,`$FRAME+56`($sp)
++ stw $t2,-4($tp) ; tp[j]
++ stw $t0,-8($tp)
++___
++}
++$code.=<<___;
+ bdnz- Linner
+
+ fctid $dota,$dota
+ fctid $dotb,$dotb
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ ld $t0,`$FRAME+0`($sp)
+ ld $t1,`$FRAME+8`($sp)
+ ld $t2,`$FRAME+16`($sp)
+@@ -926,7 +1351,116 @@ $code.=<<___;
+ insrdi $t6,$t7,48,0
+ srdi $ovf,$t7,48
+ std $t6,0($tp) ; tp[num-1]
++___
++} else {
++$code.=<<___;
++ lwz $t1,`$FRAME+0`($sp)
++ lwz $t0,`$FRAME+4`($sp)
++ lwz $t3,`$FRAME+8`($sp)
++ lwz $t2,`$FRAME+12`($sp)
++ lwz $t5,`$FRAME+16`($sp)
++ lwz $t4,`$FRAME+20`($sp)
++ lwz $t7,`$FRAME+24`($sp)
++ lwz $t6,`$FRAME+28`($sp)
++ stfd $dota,`$FRAME+64`($sp)
++ stfd $dotb,`$FRAME+72`($sp)
+
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ insrwi $carry,$t1,16,0
++ srwi $c1,$t1,16
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ insrwi $t0,$t2,16,0 ; 0..31 bits
++ lwz $t2,12($tp) ; tp[j]
++ insrwi $carry,$t3,16,0
++ srwi $c1,$t3,16
++ lwz $t3,8($tp)
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ insrwi $carry,$t5,16,0
++ srwi $c1,$t5,16
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ insrwi $t4,$t6,16,0 ; 32..63 bits
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++
++ addc $t0,$t0,$t2
++ adde $t4,$t4,$t3
++ addze $carry,$carry
++ addze $c1,$c1
++ stw $t0,4($tp) ; tp[j-1]
++ stw $t4,0($tp)
++
++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1
++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0
++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3
++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2
++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5
++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4
++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7
++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6
++
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ insrwi $carry,$t3,16,0
++ srwi $c1,$t3,16
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ insrwi $t2,$t6,16,0 ; 64..95 bits
++ lwz $t6,20($tp)
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ lwzu $t7,16($tp)
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ insrwi $carry,$t1,16,0
++ srwi $c1,$t1,16
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ insrwi $t0,$t4,16,0 ; 96..127 bits
++ insrwi $carry,$t5,16,0
++ srwi $c1,$t5,16
++
++ addc $t2,$t2,$t6
++ adde $t0,$t0,$t7
++ lwz $t7,`$FRAME+64`($sp)
++ lwz $t6,`$FRAME+68`($sp)
++ addze $carry,$carry
++ addze $c1,$c1
++ lwz $t5,`$FRAME+72`($sp)
++ lwz $t4,`$FRAME+76`($sp)
++
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ stw $t2,-4($tp) ; tp[j]
++ stw $t0,-8($tp)
++ addc $t6,$t6,$ovf
++ addze $t7,$t7
++ srwi $carry,$t6,16
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++
++ insrwi $t6,$t4,16,0
++ srwi $t4,$t4,16
++ insrwi $t4,$t5,16,0
++ srwi $ovf,$t5,16
++ stw $t6,4($tp) ; tp[num-1]
++ stw $t4,0($tp)
++___
++}
++$code.=<<___;
+ slwi $t7,$num,2
+ addi $i,$i,8
+ subf $nap_d,$t7,$nap_d ; rewind pointer
+@@ -994,14 +1528,14 @@ $code.=<<___ if ($SIZE_T==4);
+ mtctr $j
+
+ .align 4
+-Lsub: ld $t0,8($tp) ; load tp[j..j+3] in 64-bit word order
+- ldu $t2,16($tp)
++Lsub: lwz $t0,12($tp) ; load tp[j..j+3] in 64-bit word order
++ lwz $t1,8($tp)
++ lwz $t2,20($tp)
++ lwzu $t3,16($tp)
+ lwz $t4,4($np) ; load np[j..j+3] in 32-bit word order
+ lwz $t5,8($np)
+ lwz $t6,12($np)
+ lwzu $t7,16($np)
+- extrdi $t1,$t0,32,0
+- extrdi $t3,$t2,32,0
+ subfe $t4,$t4,$t0 ; tp[j]-np[j]
+ stw $t0,4($ap) ; save tp[j..j+3] in 32-bit word order
+ subfe $t5,$t5,$t1 ; tp[j+1]-np[j+1]
+@@ -1052,6 +1586,9 @@ ___
+ $code.=<<___;
+ $POP $i,0($sp)
+ li r3,1 ; signal "handled"
++ $POP r19,`-12*8-13*$SIZE_T`($i)
++ $POP r20,`-12*8-12*$SIZE_T`($i)
++ $POP r21,`-12*8-11*$SIZE_T`($i)
+ $POP r22,`-12*8-10*$SIZE_T`($i)
+ $POP r23,`-12*8-9*$SIZE_T`($i)
+ $POP r24,`-12*8-8*$SIZE_T`($i)
+@@ -1077,8 +1614,9 @@ $code.=<<___;
+ mr $sp,$i
+ blr
+ .long 0
+- .byte 0,12,4,0,0x8c,10,6,0
++ .byte 0,12,4,0,0x8c,13,6,0
+ .long 0
++.size .$fname,.-.$fname
+
+ .asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@openssl.org>"
+ ___
+diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c
+index 1bfb5d9..51137fd 100644
+--- a/crypto/evp/e_aes.c
++++ b/crypto/evp/e_aes.c
+@@ -153,6 +153,20 @@ void AES_xts_decrypt(const char *inp,char *out,size_t len,
+ const unsigned char iv[16]);
+ #endif
+
++#if defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
++# include "ppc_arch.h"
++# ifdef VPAES_ASM
++# define VPAES_CAPABLE (OPENSSL_ppccap_P & PPC_ALTIVEC)
++# endif
++# define HWAES_CAPABLE (OPENSSL_ppccap_P & PPC_CRYPTO207)
++# define HWAES_set_encrypt_key aes_p8_set_encrypt_key
++# define HWAES_set_decrypt_key aes_p8_set_decrypt_key
++# define HWAES_encrypt aes_p8_encrypt
++# define HWAES_decrypt aes_p8_decrypt
++# define HWAES_cbc_encrypt aes_p8_cbc_encrypt
++# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
++#endif
++
+ #if defined(AES_ASM) && !defined(I386_ONLY) && ( \
+ ((defined(__i386) || defined(__i386__) || \
+ defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \
+diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile
+index c825b12..e684e02 100644
+--- a/crypto/modes/Makefile
++++ b/crypto/modes/Makefile
+@@ -56,6 +56,10 @@ ghash-alpha.s: asm/ghash-alpha.pl
+ $(PERL) $< | $(CC) -E - | tee $@ > /dev/null
+ ghash-parisc.s: asm/ghash-parisc.pl
+ $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
++ghashv8-armx.S: asm/ghashv8-armx.pl
++ $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@
++ghashp8-ppc.s: asm/ghashp8-ppc.pl
++ $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@
+
+ # GNU make "catch all"
+ ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
+diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl
+new file mode 100755
+index 0000000..e76a58c
+--- /dev/null
++++ b/crypto/modes/asm/ghashp8-ppc.pl
+@@ -0,0 +1,234 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# GHASH for for PowerISA v2.07.
++#
++# July 2014
++#
++# Accurate performance measurements are problematic, because it's
++# always virtualized setup with possibly throttled processor.
++# Relative comparison is therefore more informative. This initial
++# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
++# faster than "4-bit" integer-only compiler-generated 64-bit code.
++# "Initial version" means that there is room for futher improvement.
++
++$flavour=shift;
++$output =shift;
++
++if ($flavour =~ /64/) {
++ $SIZE_T=8;
++ $LRSAVE=2*$SIZE_T;
++ $STU="stdu";
++ $POP="ld";
++ $PUSH="std";
++} elsif ($flavour =~ /32/) {
++ $SIZE_T=4;
++ $LRSAVE=$SIZE_T;
++ $STU="stwu";
++ $POP="lwz";
++ $PUSH="stw";
++} else { die "nonsense $flavour"; }
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
++
++my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
++
++my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
++my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
++my $vrsave="r12";
++
++$code=<<___;
++.machine "any"
++
++.text
++
++.globl .gcm_init_p8
++.align 5
++.gcm_init_p8:
++ lis r0,0xfff0
++ li r8,0x10
++ mfspr $vrsave,256
++ li r9,0x20
++ mtspr 256,r0
++ li r10,0x30
++ lvx_u $H,0,r4 # load H
++
++ vspltisb $xC2,-16 # 0xf0
++ vspltisb $t0,1 # one
++ vaddubm $xC2,$xC2,$xC2 # 0xe0
++ vxor $zero,$zero,$zero
++ vor $xC2,$xC2,$t0 # 0xe1
++ vsldoi $xC2,$xC2,$zero,15 # 0xe1...
++ vsldoi $t1,$zero,$t0,1 # ...1
++ vaddubm $xC2,$xC2,$xC2 # 0xc2...
++ vspltisb $t2,7
++ vor $xC2,$xC2,$t1 # 0xc2....01
++ vspltb $t1,$H,0 # most significant byte
++ vsl $H,$H,$t0 # H<<=1
++ vsrab $t1,$t1,$t2 # broadcast carry bit
++ vand $t1,$t1,$xC2
++ vxor $H,$H,$t1 # twisted H
++
++ vsldoi $H,$H,$H,8 # twist even more ...
++ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
++ vsldoi $Hl,$zero,$H,8 # ... and split
++ vsldoi $Hh,$H,$zero,8
++
++ stvx_u $xC2,0,r3 # save pre-computed table
++ stvx_u $Hl,r8,r3
++ stvx_u $H, r9,r3
++ stvx_u $Hh,r10,r3
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,2,0
++ .long 0
++.size .gcm_init_p8,.-.gcm_init_p8
++
++.globl .gcm_gmult_p8
++.align 5
++.gcm_gmult_p8:
++ lis r0,0xfff8
++ li r8,0x10
++ mfspr $vrsave,256
++ li r9,0x20
++ mtspr 256,r0
++ li r10,0x30
++ lvx_u $IN,0,$Xip # load Xi
++
++ lvx_u $Hl,r8,$Htbl # load pre-computed table
++ le?lvsl $lemask,r0,r0
++ lvx_u $H, r9,$Htbl
++ le?vspltisb $t0,0x07
++ lvx_u $Hh,r10,$Htbl
++ le?vxor $lemask,$lemask,$t0
++ lvx_u $xC2,0,$Htbl
++ le?vperm $IN,$IN,$IN,$lemask
++ vxor $zero,$zero,$zero
++
++ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
++ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
++ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
++
++ vpmsumd $t2,$Xl,$xC2 # 1st phase
++
++ vsldoi $t0,$Xm,$zero,8
++ vsldoi $t1,$zero,$Xm,8
++ vxor $Xl,$Xl,$t0
++ vxor $Xh,$Xh,$t1
++
++ vsldoi $Xl,$Xl,$Xl,8
++ vxor $Xl,$Xl,$t2
++
++ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
++ vpmsumd $Xl,$Xl,$xC2
++ vxor $t1,$t1,$Xh
++ vxor $Xl,$Xl,$t1
++
++ le?vperm $Xl,$Xl,$Xl,$lemask
++ stvx_u $Xl,0,$Xip # write out Xi
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,2,0
++ .long 0
++.size .gcm_gmult_p8,.-.gcm_gmult_p8
++
++.globl .gcm_ghash_p8
++.align 5
++.gcm_ghash_p8:
++ lis r0,0xfff8
++ li r8,0x10
++ mfspr $vrsave,256
++ li r9,0x20
++ mtspr 256,r0
++ li r10,0x30
++ lvx_u $Xl,0,$Xip # load Xi
++
++ lvx_u $Hl,r8,$Htbl # load pre-computed table
++ le?lvsl $lemask,r0,r0
++ lvx_u $H, r9,$Htbl
++ le?vspltisb $t0,0x07
++ lvx_u $Hh,r10,$Htbl
++ le?vxor $lemask,$lemask,$t0
++ lvx_u $xC2,0,$Htbl
++ le?vperm $Xl,$Xl,$Xl,$lemask
++ vxor $zero,$zero,$zero
++
++ lvx_u $IN,0,$inp
++ addi $inp,$inp,16
++ subi $len,$len,16
++ le?vperm $IN,$IN,$IN,$lemask
++ vxor $IN,$IN,$Xl
++ b Loop
++
++.align 5
++Loop:
++ subic $len,$len,16
++ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
++ subfe. r0,r0,r0 # borrow?-1:0
++ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
++ and r0,r0,$len
++ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
++ add $inp,$inp,r0
++
++ vpmsumd $t2,$Xl,$xC2 # 1st phase
++
++ vsldoi $t0,$Xm,$zero,8
++ vsldoi $t1,$zero,$Xm,8
++ vxor $Xl,$Xl,$t0
++ vxor $Xh,$Xh,$t1
++
++ vsldoi $Xl,$Xl,$Xl,8
++ vxor $Xl,$Xl,$t2
++ lvx_u $IN,0,$inp
++ addi $inp,$inp,16
++
++ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
++ vpmsumd $Xl,$Xl,$xC2
++ le?vperm $IN,$IN,$IN,$lemask
++ vxor $t1,$t1,$Xh
++ vxor $IN,$IN,$t1
++ vxor $IN,$IN,$Xl
++ beq Loop # did $len-=16 borrow?
++
++ vxor $Xl,$Xl,$t1
++ le?vperm $Xl,$Xl,$Xl,$lemask
++ stvx_u $Xl,0,$Xip # write out Xi
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,4,0
++ .long 0
++.size .gcm_ghash_p8,.-.gcm_ghash_p8
++
++.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
++.align 2
++___
++
++foreach (split("\n",$code)) {
++ if ($flavour =~ /le$/o) { # little-endian
++ s/le\?//o or
++ s/be\?/#be#/o;
++ } else {
++ s/le\?/#le#/o or
++ s/be\?//o;
++ }
++ print $_,"\n";
++}
++
++close STDOUT; # enforce flush
+diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c
+index 0e6ff8b..6f8e7ee 100644
+--- a/crypto/modes/gcm128.c
++++ b/crypto/modes/gcm128.c
+@@ -671,6 +671,21 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
+ void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
+ void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+ # endif
++# elif defined(__sparc__) || defined(__sparc)
++# include "sparc_arch.h"
++# define GHASH_ASM_SPARC
++# define GCM_FUNCREF_4BIT
++extern unsigned int OPENSSL_sparcv9cap_P[];
++void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
++void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
++void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
++#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
++# include "ppc_arch.h"
++# define GHASH_ASM_PPC
++# define GCM_FUNCREF_4BIT
++void gcm_init_p8(u128 Htable[16],const u64 Xi[2]);
++void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]);
++void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+ # endif
+ #endif
+
+@@ -747,6 +762,16 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
+ ctx->gmult = gcm_gmult_4bit;
+ ctx->ghash = gcm_ghash_4bit;
+ }
++# elif defined(GHASH_ASM_PPC)
++ if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
++ gcm_init_p8(ctx->Htable,ctx->H.u);
++ ctx->gmult = gcm_gmult_p8;
++ ctx->ghash = gcm_ghash_p8;
++ } else {
++ gcm_init_4bit(ctx->Htable,ctx->H.u);
++ ctx->gmult = gcm_gmult_4bit;
++ ctx->ghash = gcm_ghash_4bit;
++ }
+ # else
+ gcm_init_4bit(ctx->Htable,ctx->H.u);
+ # endif
+diff --git a/crypto/perlasm/ppc-xlate.pl b/crypto/perlasm/ppc-xlate.pl
+index a3edd98..f89e814 100755
+--- a/crypto/perlasm/ppc-xlate.pl
++++ b/crypto/perlasm/ppc-xlate.pl
+@@ -27,7 +27,8 @@ my $globl = sub {
+ /osx/ && do { $name = "_$name";
+ last;
+ };
+- /linux.*32/ && do { $ret .= ".globl $name\n";
++ /linux.*(32|64le)/
++ && do { $ret .= ".globl $name\n";
+ $ret .= ".type $name,\@function";
+ last;
+ };
+@@ -37,7 +38,6 @@ my $globl = sub {
+ $ret .= ".align 3\n";
+ $ret .= "$name:\n";
+ $ret .= ".quad .$name,.TOC.\@tocbase,0\n";
+- $ret .= ".size $name,24\n";
+ $ret .= ".previous\n";
+
+ $name = ".$name";
+@@ -50,7 +50,9 @@ my $globl = sub {
+ $ret;
+ };
+ my $text = sub {
+- ($flavour =~ /aix/) ? ".csect" : ".text";
++ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
++ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
++ $ret;
+ };
+ my $machine = sub {
+ my $junk = shift;
+@@ -62,9 +64,12 @@ my $machine = sub {
+ ".machine $arch";
+ };
+ my $size = sub {
+- if ($flavour =~ /linux.*32/)
++ if ($flavour =~ /linux/)
+ { shift;
+- ".size " . join(",", at _);
++ my $name = shift; $name =~ s|^[\.\_]||;
++ my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name;
++ $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/);
++ $ret;
+ }
+ else
+ { ""; }
+@@ -77,6 +82,25 @@ my $asciz = sub {
+ else
+ { ""; }
+ };
++my $quad = sub {
++ shift;
++ my @ret;
++ my ($hi,$lo);
++ for (@_) {
++ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
++ { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
++ elsif (/^([0-9]+)$/o)
++ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
++ else
++ { $hi=undef; $lo=$_; }
++
++ if (defined($hi))
++ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
++ else
++ { push(@ret,".quad $lo"); }
++ }
++ join("\n", at ret);
++};
+
+ ################################################################
+ # simplified mnemonics not handled by at least one assembler
+@@ -122,6 +146,46 @@ my $extrdi = sub {
+ $b = ($b+$n)&63; $n = 64-$n;
+ " rldicl $ra,$rs,$b,$n";
+ };
++my $vmr = sub {
++ my ($f,$vx,$vy) = @_;
++ " vor $vx,$vy,$vy";
++};
++
++# PowerISA 2.06 stuff
++sub vsxmem_op {
++ my ($f, $vrt, $ra, $rb, $op) = @_;
++ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
++}
++# made-up unaligned memory reference AltiVec/VMX instructions
++my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
++my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
++my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
++my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
++my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
++my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
++
++# PowerISA 2.07 stuff
++sub vcrypto_op {
++ my ($f, $vrt, $vra, $vrb, $op) = @_;
++ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
++}
++my $vcipher = sub { vcrypto_op(@_, 1288); };
++my $vcipherlast = sub { vcrypto_op(@_, 1289); };
++my $vncipher = sub { vcrypto_op(@_, 1352); };
++my $vncipherlast= sub { vcrypto_op(@_, 1353); };
++my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
++my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
++my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
++my $vpmsumb = sub { vcrypto_op(@_, 1032); };
++my $vpmsumd = sub { vcrypto_op(@_, 1224); };
++my $vpmsubh = sub { vcrypto_op(@_, 1096); };
++my $vpmsumw = sub { vcrypto_op(@_, 1160); };
++my $vaddudm = sub { vcrypto_op(@_, 192); };
++
++my $mtsle = sub {
++ my ($f, $arg) = @_;
++ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
++};
+
+ while($line=<>) {
+
+@@ -138,7 +202,10 @@ while($line=<>) {
+ {
+ $line =~ s|(^[\.\w]+)\:\s*||;
+ my $label = $1;
+- printf "%s:",($GLOBALS{$label} or $label) if ($label);
++ if ($label) {
++ printf "%s:",($GLOBALS{$label} or $label);
++ printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/);
++ }
+ }
+
+ {
+@@ -147,7 +214,7 @@ while($line=<>) {
+ my $mnemonic = $2;
+ my $f = $3;
+ my $opcode = eval("\$$mnemonic");
+- $line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/);
++ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+ if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
+ elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
+ }
+diff --git a/crypto/ppc_arch.h b/crypto/ppc_arch.h
+new file mode 100644
+index 0000000..1192edf
+--- /dev/null
++++ b/crypto/ppc_arch.h
+@@ -0,0 +1,10 @@
++#ifndef __PPC_ARCH_H__
++#define __PPC_ARCH_H__
++
++extern unsigned int OPENSSL_ppccap_P;
++
++#define PPC_FPU64 (1<<0)
++#define PPC_ALTIVEC (1<<1)
++#define PPC_CRYPTO207 (1<<2)
++
++#endif
+diff --git a/crypto/ppccap.c b/crypto/ppccap.c
+index f71ba66..13c2ca5 100644
+--- a/crypto/ppccap.c
++++ b/crypto/ppccap.c
+@@ -4,13 +4,15 @@
+ #include <setjmp.h>
+ #include <signal.h>
+ #include <unistd.h>
++#if defined(__linux) || defined(_AIX)
++#include <sys/utsname.h>
++#endif
+ #include <crypto.h>
+ #include <openssl/bn.h>
+
+-#define PPC_FPU64 (1<<0)
+-#define PPC_ALTIVEC (1<<1)
++#include "ppc_arch.h"
+
+-static int OPENSSL_ppccap_P = 0;
++unsigned int OPENSSL_ppccap_P = 0;
+
+ static sigset_t all_masked;
+
+@@ -22,7 +24,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
+
+ if (sizeof(size_t)==4)
+ {
+-#if (defined(__APPLE__) && defined(__MACH__))
++#if 1 || (defined(__APPLE__) && defined(__MACH__))
+ if (num>=8 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64))
+ return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);
+ #else
+@@ -50,11 +52,28 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
+ }
+ #endif
+
++void sha256_block_p8(void *ctx,const void *inp,size_t len);
++void sha256_block_ppc(void *ctx,const void *inp,size_t len);
++void sha256_block_data_order(void *ctx,const void *inp,size_t len)
++ {
++ OPENSSL_ppccap_P&PPC_CRYPTO207? sha256_block_p8(ctx,inp,len):
++ sha256_block_ppc(ctx,inp,len);
++ }
++
++void sha512_block_p8(void *ctx,const void *inp,size_t len);
++void sha512_block_ppc(void *ctx,const void *inp,size_t len);
++void sha512_block_data_order(void *ctx,const void *inp,size_t len)
++ {
++ OPENSSL_ppccap_P&PPC_CRYPTO207? sha512_block_p8(ctx,inp,len):
++ sha512_block_ppc(ctx,inp,len);
++ }
++
+ static sigjmp_buf ill_jmp;
+ static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
+
+ void OPENSSL_ppc64_probe(void);
+ void OPENSSL_altivec_probe(void);
++void OPENSSL_crypto207_probe(void);
+
+ void OPENSSL_cpuid_setup(void)
+ {
+@@ -85,12 +104,14 @@ void OPENSSL_cpuid_setup(void)
+ OPENSSL_ppccap_P = 0;
+
+ #if defined(_AIX)
+- if (sizeof(size_t)==4
++ if (sizeof(size_t)==4)
++ {
++ struct utsname uts;
+ # if defined(_SC_AIX_KERNEL_BITMODE)
+- && sysconf(_SC_AIX_KERNEL_BITMODE)!=64
++ if (sysconf(_SC_AIX_KERNEL_BITMODE)!=64) return;
+ # endif
+- )
+- return;
++ if (uname(&uts)!=0 || atoi(uts.version)<6) return;
++ }
+ #endif
+
+ memset(&ill_act,0,sizeof(ill_act));
+@@ -102,6 +123,10 @@ void OPENSSL_cpuid_setup(void)
+
+ if (sizeof(size_t)==4)
+ {
++#ifdef __linux
++ struct utsname uts;
++ if (uname(&uts)==0 && strcmp(uts.machine,"ppc64")==0)
++#endif
+ if (sigsetjmp(ill_jmp,1) == 0)
+ {
+ OPENSSL_ppc64_probe();
+@@ -119,6 +144,11 @@ void OPENSSL_cpuid_setup(void)
+ {
+ OPENSSL_altivec_probe();
+ OPENSSL_ppccap_P |= PPC_ALTIVEC;
++ if (sigsetjmp(ill_jmp,1) == 0)
++ {
++ OPENSSL_crypto207_probe();
++ OPENSSL_ppccap_P |= PPC_CRYPTO207;
++ }
+ }
+
+ sigaction (SIGILL,&ill_oact,NULL);
+diff --git a/crypto/ppccpuid.pl b/crypto/ppccpuid.pl
+index 4ba736a..56cc851 100755
+--- a/crypto/ppccpuid.pl
++++ b/crypto/ppccpuid.pl
+@@ -31,6 +31,7 @@ $code=<<___;
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .OPENSSL_ppc64_probe,.-.OPENSSL_ppc64_probe
+
+ .globl .OPENSSL_altivec_probe
+ .align 4
+@@ -39,6 +40,17 @@ $code=<<___;
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe
++
++.globl .OPENSSL_crypto207_probe
++.align 4
++.OPENSSL_crypto207_probe:
++ lvx_u v0,0,r1
++ vcipher v0,v0,v0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++.size .OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe
+
+ .globl .OPENSSL_wipe_cpu
+ .align 4
+@@ -71,6 +83,7 @@ $code=<<___;
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .OPENSSL_wipe_cpu,.-.OPENSSL_wipe_cpu
+
+ .globl .OPENSSL_atomic_add
+ .align 4
+@@ -84,6 +97,7 @@ Ladd: lwarx r5,0,r3
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
++.size .OPENSSL_atomic_add,.-.OPENSSL_atomic_add
+
+ .globl .OPENSSL_rdtsc
+ .align 4
+@@ -93,6 +107,7 @@ Ladd: lwarx r5,0,r3
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .OPENSSL_rdtsc,.-.OPENSSL_rdtsc
+
+ .globl .OPENSSL_cleanse
+ .align 4
+@@ -125,7 +140,99 @@ Laligned:
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
++.size .OPENSSL_cleanse,.-.OPENSSL_cleanse
++___
++{
++my ($out,$cnt,$max)=("r3","r4","r5");
++my ($tick,$lasttick)=("r6","r7");
++my ($diff,$lastdiff)=("r8","r9");
++
++$code.=<<___;
++.globl .OPENSSL_instrument_bus
++.align 4
++.OPENSSL_instrument_bus:
++ mtctr $cnt
++
++ mftb $lasttick # collect 1st tick
++ li $diff,0
++
++ dcbf 0,$out # flush cache line
++ lwarx $tick,0,$out # load and lock
++ add $tick,$tick,$diff
++ stwcx. $tick,0,$out
++ stwx $tick,0,$out
++
++Loop: mftb $tick
++ sub $diff,$tick,$lasttick
++ mr $lasttick,$tick
++ dcbf 0,$out # flush cache line
++ lwarx $tick,0,$out # load and lock
++ add $tick,$tick,$diff
++ stwcx. $tick,0,$out
++ stwx $tick,0,$out
++ addi $out,$out,4 # ++$out
++ bdnz Loop
++
++ mr r3,$cnt
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,2,0
++ .long 0
++.size .OPENSSL_instrument_bus,.-.OPENSSL_instrument_bus
++
++.globl .OPENSSL_instrument_bus2
++.align 4
++.OPENSSL_instrument_bus2:
++ mr r0,$cnt
++ slwi $cnt,$cnt,2
++
++ mftb $lasttick # collect 1st tick
++ li $diff,0
++
++ dcbf 0,$out # flush cache line
++ lwarx $tick,0,$out # load and lock
++ add $tick,$tick,$diff
++ stwcx. $tick,0,$out
++ stwx $tick,0,$out
++
++ mftb $tick # collect 1st diff
++ sub $diff,$tick,$lasttick
++ mr $lasttick,$tick
++ mr $lastdiff,$diff
++Loop2:
++ dcbf 0,$out # flush cache line
++ lwarx $tick,0,$out # load and lock
++ add $tick,$tick,$diff
++ stwcx. $tick,0,$out
++ stwx $tick,0,$out
++
++ addic. $max,$max,-1
++ beq Ldone2
++
++ mftb $tick
++ sub $diff,$tick,$lasttick
++ mr $lasttick,$tick
++ cmplw 7,$diff,$lastdiff
++ mr $lastdiff,$diff
++
++ mfcr $tick # pull cr
++ not $tick,$tick # flip bits
++ rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale
++
++ sub. $cnt,$cnt,$tick # conditional --$cnt
++ add $out,$out,$tick # conditional ++$out
++ bne Loop2
++
++Ldone2:
++ srwi $cnt,$cnt,2
++ sub r3,r0,$cnt
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,3,0
++ .long 0
++.size .OPENSSL_instrument_bus2,.-.OPENSSL_instrument_bus2
+ ___
++}
+
+ $code =~ s/\`([^\`]*)\`/eval $1/gem;
+ print $code;
+diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile
+index 6d191d3..58c6705 100644
+--- a/crypto/sha/Makefile
++++ b/crypto/sha/Makefile
+@@ -73,6 +73,8 @@ sha512-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAG
+ sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@
+ sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
+ sha512-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
++sha256p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@
++sha512p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@
+
+ sha1-parisc.s: asm/sha1-parisc.pl; $(PERL) asm/sha1-parisc.pl $(PERLASM_SCHEME) $@
+ sha256-parisc.s:asm/sha512-parisc.pl; $(PERL) asm/sha512-parisc.pl $(PERLASM_SCHEME) $@
+diff --git a/crypto/sha/asm/sha1-ppc.pl b/crypto/sha/asm/sha1-ppc.pl
+index 2140dd2..df59896 100755
+--- a/crypto/sha/asm/sha1-ppc.pl
++++ b/crypto/sha/asm/sha1-ppc.pl
+@@ -9,8 +9,7 @@
+
+ # I let hardware handle unaligned input(*), except on page boundaries
+ # (see below for details). Otherwise straightforward implementation
+-# with X vector in register bank. The module is big-endian [which is
+-# not big deal as there're no little-endian targets left around].
++# with X vector in register bank.
+ #
+ # (*) this means that this module is inappropriate for PPC403? Does
+ # anybody know if pre-POWER3 can sustain unaligned load?
+@@ -38,6 +37,10 @@ if ($flavour =~ /64/) {
+ $PUSH ="stw";
+ } else { die "nonsense $flavour"; }
+
++# Define endianess based on flavour
++# i.e.: linux64le
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+@@ -68,14 +71,28 @@ $T ="r12";
+ @X=("r16","r17","r18","r19","r20","r21","r22","r23",
+ "r24","r25","r26","r27","r28","r29","r30","r31");
+
++sub loadbe {
++my ($dst, $src, $temp_reg) = @_;
++$code.=<<___ if (!$LITTLE_ENDIAN);
++ lwz $dst,$src
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ lwz $temp_reg,$src
++ rotlwi $dst,$temp_reg,8
++ rlwimi $dst,$temp_reg,24,0,7
++ rlwimi $dst,$temp_reg,24,16,23
++___
++}
++
+ sub BODY_00_19 {
+ my ($i,$a,$b,$c,$d,$e,$f)=@_;
+ my $j=$i+1;
+-$code.=<<___ if ($i==0);
+- lwz @X[$i],`$i*4`($inp)
+-___
++
++ # Since the last value of $f is discarded, we can use
++ # it as a temp reg to swap byte-order when needed.
++ loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
++ loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
+ $code.=<<___ if ($i<15);
+- lwz @X[$j],`$j*4`($inp)
+ add $f,$K,$e
+ rotlwi $e,$a,5
+ add $f,$f, at X[$i]
+@@ -108,31 +125,31 @@ my ($i,$a,$b,$c,$d,$e,$f)=@_;
+ my $j=$i+1;
+ $code.=<<___ if ($i<79);
+ add $f,$K,$e
++ xor $t0,$b,$d
+ rotlwi $e,$a,5
+ xor @X[$j%16], at X[$j%16], at X[($j+2)%16]
+ add $f,$f, at X[$i%16]
+- xor $t0,$b,$c
++ xor $t0,$t0,$c
+ xor @X[$j%16], at X[$j%16], at X[($j+8)%16]
+- add $f,$f,$e
++ add $f,$f,$t0
+ rotlwi $b,$b,30
+- xor $t0,$t0,$d
+ xor @X[$j%16], at X[$j%16], at X[($j+13)%16]
+- add $f,$f,$t0
++ add $f,$f,$e
+ rotlwi @X[$j%16], at X[$j%16],1
+ ___
+ $code.=<<___ if ($i==79);
+ add $f,$K,$e
++ xor $t0,$b,$d
+ rotlwi $e,$a,5
+ lwz r16,0($ctx)
+ add $f,$f, at X[$i%16]
+- xor $t0,$b,$c
++ xor $t0,$t0,$c
+ lwz r17,4($ctx)
+- add $f,$f,$e
++ add $f,$f,$t0
+ rotlwi $b,$b,30
+ lwz r18,8($ctx)
+- xor $t0,$t0,$d
+ lwz r19,12($ctx)
+- add $f,$f,$t0
++ add $f,$f,$e
+ lwz r20,16($ctx)
+ ___
+ }
+@@ -316,6 +333,7 @@ $code.=<<___;
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .sha1_block_data_order,.-.sha1_block_data_order
+ ___
+ $code.=<<___;
+ .asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
+diff --git a/crypto/sha/asm/sha512-ppc.pl b/crypto/sha/asm/sha512-ppc.pl
+index 6b44a68..734f3c1 100755
+--- a/crypto/sha/asm/sha512-ppc.pl
++++ b/crypto/sha/asm/sha512-ppc.pl
+@@ -1,7 +1,7 @@
+ #!/usr/bin/env perl
+
+ # ====================================================================
+-# Written by Andy Polyakov <appro at fy.chalmers.se> for the OpenSSL
++# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
+@@ -9,8 +9,7 @@
+
+ # I let hardware handle unaligned input, except on page boundaries
+ # (see below for details). Otherwise straightforward implementation
+-# with X vector in register bank. The module is big-endian [which is
+-# not big deal as there're no little-endian targets left around].
++# with X vector in register bank.
+
+ # sha256 | sha512
+ # -m64 -m32 | -m64 -m32
+@@ -56,6 +55,8 @@ if ($flavour =~ /64/) {
+ $PUSH="stw";
+ } else { die "nonsense $flavour"; }
+
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+@@ -64,7 +65,7 @@ die "can't locate ppc-xlate.pl";
+ open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+ if ($output =~ /512/) {
+- $func="sha512_block_data_order";
++ $func="sha512_block_ppc";
+ $SZ=8;
+ @Sigma0=(28,34,39);
+ @Sigma1=(14,18,41);
+@@ -76,7 +77,7 @@ if ($output =~ /512/) {
+ $ROR="rotrdi";
+ $SHR="srdi";
+ } else {
+- $func="sha256_block_data_order";
++ $func="sha256_block_ppc";
+ $SZ=4;
+ @Sigma0=( 2,13,22);
+ @Sigma1=( 6,11,25);
+@@ -110,7 +111,7 @@ $B ="r9";
+ $C ="r10";
+ $D ="r11";
+ $E ="r12";
+-$F ="r13"; $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer
++$F =$t1; $t1 = "r0"; # stay away from "r13";
+ $G ="r14";
+ $H ="r15";
+
+@@ -118,24 +119,23 @@ $H ="r15";
+ @X=("r16","r17","r18","r19","r20","r21","r22","r23",
+ "r24","r25","r26","r27","r28","r29","r30","r31");
+
+-$inp="r31"; # reassigned $inp! aliases with @X[15]
++$inp="r31" if($SZ==4 || $SIZE_T==8); # reassigned $inp! aliases with @X[15]
+
+ sub ROUND_00_15 {
+ my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+ $code.=<<___;
+- $LD $T,`$i*$SZ`($Tbl)
+ $ROR $a0,$e,$Sigma1[0]
+ $ROR $a1,$e,$Sigma1[1]
+ and $t0,$f,$e
+- andc $t1,$g,$e
+- add $T,$T,$h
+ xor $a0,$a0,$a1
++ add $h,$h,$t1
++ andc $t1,$g,$e
+ $ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]`
+ or $t0,$t0,$t1 ; Ch(e,f,g)
+- add $T,$T, at X[$i]
++ add $h,$h, at X[$i%16]
+ xor $a0,$a0,$a1 ; Sigma1(e)
+- add $T,$T,$t0
+- add $T,$T,$a0
++ add $h,$h,$t0
++ add $h,$h,$a0
+
+ $ROR $a0,$a,$Sigma0[0]
+ $ROR $a1,$a,$Sigma0[1]
+@@ -146,9 +146,14 @@ $code.=<<___;
+ xor $t0,$t0,$t1
+ and $t1,$b,$c
+ xor $a0,$a0,$a1 ; Sigma0(a)
+- add $d,$d,$T
++ add $d,$d,$h
+ xor $t0,$t0,$t1 ; Maj(a,b,c)
+- add $h,$T,$a0
++___
++$code.=<<___ if ($i<15);
++ $LD $t1,`($i+1)*$SZ`($Tbl)
++___
++$code.=<<___;
++ add $h,$h,$a0
+ add $h,$h,$t0
+
+ ___
+@@ -169,10 +174,11 @@ $code.=<<___;
+ add @X[$i], at X[$i], at X[($i+9)%16]
+ xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f])
+ xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f])
++ $LD $t1,`$i*$SZ`($Tbl)
+ add @X[$i], at X[$i],$a0
+ add @X[$i], at X[$i],$t0
+ ___
+-&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h);
++&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
+ }
+
+ $code=<<___;
+@@ -188,8 +194,6 @@ $func:
+
+ $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp)
+
+- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
+- $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
+ $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
+ $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
+ $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -209,7 +213,10 @@ $func:
+ $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
+ $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
+ $PUSH r0,`$FRAME+$LRSAVE`($sp)
++___
+
++if ($SZ==4 || $SIZE_T==8) {
++$code.=<<___;
+ $LD $A,`0*$SZ`($ctx)
+ mr $inp,r4 ; incarnate $inp
+ $LD $B,`1*$SZ`($ctx)
+@@ -219,7 +226,16 @@ $func:
+ $LD $F,`5*$SZ`($ctx)
+ $LD $G,`6*$SZ`($ctx)
+ $LD $H,`7*$SZ`($ctx)
++___
++} else {
++ for ($i=16;$i<32;$i++) {
++ $code.=<<___;
++ lwz r$i,`$LITTLE_ENDIAN^(4*($i-16))`($ctx)
++___
++ }
++}
+
++$code.=<<___;
+ bl LPICmeup
+ LPICedup:
+ andi. r0,$inp,3
+@@ -255,6 +271,9 @@ Lunaligned:
+ Lcross_page:
+ li $t1,`16*$SZ/4`
+ mtctr $t1
++___
++if ($SZ==4 || $SIZE_T==8) {
++$code.=<<___;
+ addi r20,$sp,$LOCALS ; aligned spot below the frame
+ Lmemcpy:
+ lbz r16,0($inp)
+@@ -268,7 +287,26 @@ Lmemcpy:
+ stb r19,3(r20)
+ addi r20,r20,4
+ bdnz Lmemcpy
++___
++} else {
++$code.=<<___;
++ addi r12,$sp,$LOCALS ; aligned spot below the frame
++Lmemcpy:
++ lbz r8,0($inp)
++ lbz r9,1($inp)
++ lbz r10,2($inp)
++ lbz r11,3($inp)
++ addi $inp,$inp,4
++ stb r8,0(r12)
++ stb r9,1(r12)
++ stb r10,2(r12)
++ stb r11,3(r12)
++ addi r12,r12,4
++ bdnz Lmemcpy
++___
++}
+
++$code.=<<___;
+ $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp
+ addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer
+ addi $inp,$sp,$LOCALS ; fictitious inp pointer
+@@ -283,8 +321,6 @@ Lmemcpy:
+
+ Ldone:
+ $POP r0,`$FRAME+$LRSAVE`($sp)
+- $POP $toc,`$FRAME-$SIZE_T*20`($sp)
+- $POP r13,`$FRAME-$SIZE_T*19`($sp)
+ $POP r14,`$FRAME-$SIZE_T*18`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -309,27 +345,48 @@ Ldone:
+ .long 0
+ .byte 0,12,4,1,0x80,18,3,0
+ .long 0
++___
+
++if ($SZ==4 || $SIZE_T==8) {
++$code.=<<___;
+ .align 4
+ Lsha2_block_private:
++ $LD $t1,0($Tbl)
+ ___
+ for($i=0;$i<16;$i++) {
+-$code.=<<___ if ($SZ==4);
++$code.=<<___ if ($SZ==4 && !$LITTLE_ENDIAN);
+ lwz @X[$i],`$i*$SZ`($inp)
+ ___
++$code.=<<___ if ($SZ==4 && $LITTLE_ENDIAN);
++ lwz $a0,`$i*$SZ`($inp)
++ rotlwi @X[$i],$a0,8
++ rlwimi @X[$i],$a0,24,0,7
++ rlwimi @X[$i],$a0,24,16,23
++___
+ # 64-bit loads are split to 2x32-bit ones, as CPU can't handle
+ # unaligned 64-bit loads, only 32-bit ones...
+-$code.=<<___ if ($SZ==8);
++$code.=<<___ if ($SZ==8 && !$LITTLE_ENDIAN);
+ lwz $t0,`$i*$SZ`($inp)
+ lwz @X[$i],`$i*$SZ+4`($inp)
+ insrdi @X[$i],$t0,32,0
+ ___
++$code.=<<___ if ($SZ==8 && $LITTLE_ENDIAN);
++ lwz $a0,`$i*$SZ`($inp)
++ lwz $a1,`$i*$SZ+4`($inp)
++ rotlwi $t0,$a0,8
++ rotlwi @X[$i],$a1,8
++ rlwimi $t0,$a0,24,0,7
++ rlwimi @X[$i],$a1,24,0,7
++ rlwimi $t0,$a0,24,16,23
++ rlwimi @X[$i],$a1,24,16,23
++ insrdi @X[$i],$t0,32,0
++___
+ &ROUND_00_15($i, at V);
+ unshift(@V,pop(@V));
+ }
+ $code.=<<___;
+- li $T,`$rounds/16-1`
+- mtctr $T
++ li $t0,`$rounds/16-1`
++ mtctr $t0
+ .align 4
+ Lrounds:
+ addi $Tbl,$Tbl,`16*$SZ`
+@@ -377,7 +434,282 @@ $code.=<<___;
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size $func,.-$func
++___
++} else {
++########################################################################
++# SHA512 for PPC32, X vector is off-loaded to stack...
++#
++# | sha512
++# | -m32
++# ----------------------+-----------------------
++# PPC74x0,gcc-4.0.1 | +48%
++# POWER6,gcc-4.4.6 | +124%(*)
++# POWER7,gcc-4.4.6 | +79%(*)
++# e300,gcc-4.1.0 | +167%
++#
++# (*) ~1/3 of -m64 result [and ~20% better than -m32 code generated
++# by xlc-12.1]
++
++my $XOFF=$LOCALS;
++
++my @V=map("r$_",(16..31)); # A..H
++
++my ($s0,$s1,$t0,$t1,$t2,$t3,$a0,$a1,$a2,$a3)=map("r$_",(0,5,6,8..12,14,15));
++my ($x0,$x1)=("r3","r4"); # zaps $ctx and $inp
++
++sub ROUND_00_15_ppc32 {
++my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
++ $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_;
++
++$code.=<<___;
++ lwz $t2,`$SZ*($i%16)+($LITTLE_ENDIAN^4)`($Tbl)
++ xor $a0,$flo,$glo
++ lwz $t3,`$SZ*($i%16)+($LITTLE_ENDIAN^0)`($Tbl)
++ xor $a1,$fhi,$ghi
++ addc $hlo,$hlo,$t0 ; h+=x[i]
++ stw $t0,`$XOFF+0+$SZ*($i%16)`($sp) ; save x[i]
++
++ srwi $s0,$elo,$Sigma1[0]
++ srwi $s1,$ehi,$Sigma1[0]
++ and $a0,$a0,$elo
++ adde $hhi,$hhi,$t1
++ and $a1,$a1,$ehi
++ stw $t1,`$XOFF+4+$SZ*($i%16)`($sp)
++ srwi $t0,$elo,$Sigma1[1]
++ srwi $t1,$ehi,$Sigma1[1]
++ addc $hlo,$hlo,$t2 ; h+=K512[i]
++ insrwi $s0,$ehi,$Sigma1[0],0
++ insrwi $s1,$elo,$Sigma1[0],0
++ xor $a0,$a0,$glo ; Ch(e,f,g)
++ adde $hhi,$hhi,$t3
++ xor $a1,$a1,$ghi
++ insrwi $t0,$ehi,$Sigma1[1],0
++ insrwi $t1,$elo,$Sigma1[1],0
++ addc $hlo,$hlo,$a0 ; h+=Ch(e,f,g)
++ srwi $t2,$ehi,$Sigma1[2]-32
++ srwi $t3,$elo,$Sigma1[2]-32
++ xor $s0,$s0,$t0
++ xor $s1,$s1,$t1
++ insrwi $t2,$elo,$Sigma1[2]-32,0
++ insrwi $t3,$ehi,$Sigma1[2]-32,0
++ xor $a0,$alo,$blo ; a^b, b^c in next round
++ adde $hhi,$hhi,$a1
++ xor $a1,$ahi,$bhi
++ xor $s0,$s0,$t2 ; Sigma1(e)
++ xor $s1,$s1,$t3
++
++ srwi $t0,$alo,$Sigma0[0]
++ and $a2,$a2,$a0
++ addc $hlo,$hlo,$s0 ; h+=Sigma1(e)
++ and $a3,$a3,$a1
++ srwi $t1,$ahi,$Sigma0[0]
++ srwi $s0,$ahi,$Sigma0[1]-32
++ adde $hhi,$hhi,$s1
++ srwi $s1,$alo,$Sigma0[1]-32
++ insrwi $t0,$ahi,$Sigma0[0],0
++ insrwi $t1,$alo,$Sigma0[0],0
++ xor $a2,$a2,$blo ; Maj(a,b,c)
++ addc $dlo,$dlo,$hlo ; d+=h
++ xor $a3,$a3,$bhi
++ insrwi $s0,$alo,$Sigma0[1]-32,0
++ insrwi $s1,$ahi,$Sigma0[1]-32,0
++ adde $dhi,$dhi,$hhi
++ srwi $t2,$ahi,$Sigma0[2]-32
++ srwi $t3,$alo,$Sigma0[2]-32
++ xor $s0,$s0,$t0
++ addc $hlo,$hlo,$a2 ; h+=Maj(a,b,c)
++ xor $s1,$s1,$t1
++ insrwi $t2,$alo,$Sigma0[2]-32,0
++ insrwi $t3,$ahi,$Sigma0[2]-32,0
++ adde $hhi,$hhi,$a3
++___
++$code.=<<___ if ($i>=15);
++ lwz $t0,`$XOFF+0+$SZ*(($i+2)%16)`($sp)
++ lwz $t1,`$XOFF+4+$SZ*(($i+2)%16)`($sp)
++___
++$code.=<<___ if ($i<15 && !$LITTLE_ENDIAN);
++ lwz $t1,`$SZ*($i+1)+0`($inp)
++ lwz $t0,`$SZ*($i+1)+4`($inp)
+ ___
++$code.=<<___ if ($i<15 && $LITTLE_ENDIAN);
++ lwz $a2,`$SZ*($i+1)+0`($inp)
++ lwz $a3,`$SZ*($i+1)+4`($inp)
++ rotlwi $t1,$a2,8
++ rotlwi $t0,$a3,8
++ rlwimi $t1,$a2,24,0,7
++ rlwimi $t0,$a3,24,0,7
++ rlwimi $t1,$a2,24,16,23
++ rlwimi $t0,$a3,24,16,23
++___
++$code.=<<___;
++ xor $s0,$s0,$t2 ; Sigma0(a)
++ xor $s1,$s1,$t3
++ addc $hlo,$hlo,$s0 ; h+=Sigma0(a)
++ adde $hhi,$hhi,$s1
++___
++$code.=<<___ if ($i==15);
++ lwz $x0,`$XOFF+0+$SZ*(($i+1)%16)`($sp)
++ lwz $x1,`$XOFF+4+$SZ*(($i+1)%16)`($sp)
++___
++}
++sub ROUND_16_xx_ppc32 {
++my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
++ $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_;
++
++$code.=<<___;
++ srwi $s0,$t0,$sigma0[0]
++ srwi $s1,$t1,$sigma0[0]
++ srwi $t2,$t0,$sigma0[1]
++ srwi $t3,$t1,$sigma0[1]
++ insrwi $s0,$t1,$sigma0[0],0
++ insrwi $s1,$t0,$sigma0[0],0
++ srwi $a0,$t0,$sigma0[2]
++ insrwi $t2,$t1,$sigma0[1],0
++ insrwi $t3,$t0,$sigma0[1],0
++ insrwi $a0,$t1,$sigma0[2],0
++ xor $s0,$s0,$t2
++ lwz $t2,`$XOFF+0+$SZ*(($i+14)%16)`($sp)
++ srwi $a1,$t1,$sigma0[2]
++ xor $s1,$s1,$t3
++ lwz $t3,`$XOFF+4+$SZ*(($i+14)%16)`($sp)
++ xor $a0,$a0,$s0
++ srwi $s0,$t2,$sigma1[0]
++ xor $a1,$a1,$s1
++ srwi $s1,$t3,$sigma1[0]
++ addc $x0,$x0,$a0 ; x[i]+=sigma0(x[i+1])
++ srwi $a0,$t3,$sigma1[1]-32
++ insrwi $s0,$t3,$sigma1[0],0
++ insrwi $s1,$t2,$sigma1[0],0
++ adde $x1,$x1,$a1
++ srwi $a1,$t2,$sigma1[1]-32
++
++ insrwi $a0,$t2,$sigma1[1]-32,0
++ srwi $t2,$t2,$sigma1[2]
++ insrwi $a1,$t3,$sigma1[1]-32,0
++ insrwi $t2,$t3,$sigma1[2],0
++ xor $s0,$s0,$a0
++ lwz $a0,`$XOFF+0+$SZ*(($i+9)%16)`($sp)
++ srwi $t3,$t3,$sigma1[2]
++ xor $s1,$s1,$a1
++ lwz $a1,`$XOFF+4+$SZ*(($i+9)%16)`($sp)
++ xor $s0,$s0,$t2
++ addc $x0,$x0,$a0 ; x[i]+=x[i+9]
++ xor $s1,$s1,$t3
++ adde $x1,$x1,$a1
++ addc $x0,$x0,$s0 ; x[i]+=sigma1(x[i+14])
++ adde $x1,$x1,$s1
++___
++ ($t0,$t1,$x0,$x1) = ($x0,$x1,$t0,$t1);
++ &ROUND_00_15_ppc32(@_);
++}
++
++$code.=<<___;
++.align 4
++Lsha2_block_private:
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
++ lwz $t1,0($inp)
++ xor $a2, at V[3], at V[5] ; B^C, magic seed
++ lwz $t0,4($inp)
++ xor $a3, at V[2], at V[4]
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ lwz $a1,0($inp)
++ xor $a2, at V[3], at V[5] ; B^C, magic seed
++ lwz $a0,4($inp)
++ xor $a3, at V[2], at V[4]
++ rotlwi $t1,$a1,8
++ rotlwi $t0,$a0,8
++ rlwimi $t1,$a1,24,0,7
++ rlwimi $t0,$a0,24,0,7
++ rlwimi $t1,$a1,24,16,23
++ rlwimi $t0,$a0,24,16,23
++___
++for($i=0;$i<16;$i++) {
++ &ROUND_00_15_ppc32($i, at V);
++ unshift(@V,pop(@V)); unshift(@V,pop(@V));
++ ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1);
++}
++$code.=<<___;
++ li $a0,`$rounds/16-1`
++ mtctr $a0
++.align 4
++Lrounds:
++ addi $Tbl,$Tbl,`16*$SZ`
++___
++for(;$i<32;$i++) {
++ &ROUND_16_xx_ppc32($i, at V);
++ unshift(@V,pop(@V)); unshift(@V,pop(@V));
++ ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1);
++}
++$code.=<<___;
++ bdnz- Lrounds
++
++ $POP $ctx,`$FRAME-$SIZE_T*22`($sp)
++ $POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
++ $POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
++ subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl
++
++ lwz $t0,`$LITTLE_ENDIAN^0`($ctx)
++ lwz $t1,`$LITTLE_ENDIAN^4`($ctx)
++ lwz $t2,`$LITTLE_ENDIAN^8`($ctx)
++ lwz $t3,`$LITTLE_ENDIAN^12`($ctx)
++ lwz $a0,`$LITTLE_ENDIAN^16`($ctx)
++ lwz $a1,`$LITTLE_ENDIAN^20`($ctx)
++ lwz $a2,`$LITTLE_ENDIAN^24`($ctx)
++ addc @V[1], at V[1],$t1
++ lwz $a3,`$LITTLE_ENDIAN^28`($ctx)
++ adde @V[0], at V[0],$t0
++ lwz $t0,`$LITTLE_ENDIAN^32`($ctx)
++ addc @V[3], at V[3],$t3
++ lwz $t1,`$LITTLE_ENDIAN^36`($ctx)
++ adde @V[2], at V[2],$t2
++ lwz $t2,`$LITTLE_ENDIAN^40`($ctx)
++ addc @V[5], at V[5],$a1
++ lwz $t3,`$LITTLE_ENDIAN^44`($ctx)
++ adde @V[4], at V[4],$a0
++ lwz $a0,`$LITTLE_ENDIAN^48`($ctx)
++ addc @V[7], at V[7],$a3
++ lwz $a1,`$LITTLE_ENDIAN^52`($ctx)
++ adde @V[6], at V[6],$a2
++ lwz $a2,`$LITTLE_ENDIAN^56`($ctx)
++ addc @V[9], at V[9],$t1
++ lwz $a3,`$LITTLE_ENDIAN^60`($ctx)
++ adde @V[8], at V[8],$t0
++ stw @V[0],`$LITTLE_ENDIAN^0`($ctx)
++ stw @V[1],`$LITTLE_ENDIAN^4`($ctx)
++ addc @V[11], at V[11],$t3
++ stw @V[2],`$LITTLE_ENDIAN^8`($ctx)
++ stw @V[3],`$LITTLE_ENDIAN^12`($ctx)
++ adde @V[10], at V[10],$t2
++ stw @V[4],`$LITTLE_ENDIAN^16`($ctx)
++ stw @V[5],`$LITTLE_ENDIAN^20`($ctx)
++ addc @V[13], at V[13],$a1
++ stw @V[6],`$LITTLE_ENDIAN^24`($ctx)
++ stw @V[7],`$LITTLE_ENDIAN^28`($ctx)
++ adde @V[12], at V[12],$a0
++ stw @V[8],`$LITTLE_ENDIAN^32`($ctx)
++ stw @V[9],`$LITTLE_ENDIAN^36`($ctx)
++ addc @V[15], at V[15],$a3
++ stw @V[10],`$LITTLE_ENDIAN^40`($ctx)
++ stw @V[11],`$LITTLE_ENDIAN^44`($ctx)
++ adde @V[14], at V[14],$a2
++ stw @V[12],`$LITTLE_ENDIAN^48`($ctx)
++ stw @V[13],`$LITTLE_ENDIAN^52`($ctx)
++ stw @V[14],`$LITTLE_ENDIAN^56`($ctx)
++ stw @V[15],`$LITTLE_ENDIAN^60`($ctx)
++
++ addi $inp,$inp,`16*$SZ` ; advance inp
++ $PUSH $inp,`$FRAME-$SIZE_T*23`($sp)
++ $UCMP $inp,$num
++ bne Lsha2_block_private
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++.size $func,.-$func
++___
++}
+
+ # Ugly hack here, because PPC assembler syntax seem to vary too
+ # much from platforms to platform...
+@@ -395,46 +727,46 @@ LPICmeup:
+ .space `64-9*4`
+ ___
+ $code.=<<___ if ($SZ==8);
+- .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
+- .long 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
+- .long 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
+- .long 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
+- .long 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
+- .long 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
+- .long 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
+- .long 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
+- .long 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
+- .long 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
+- .long 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
+- .long 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
+- .long 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
+- .long 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
+- .long 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
+- .long 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
+- .long 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
+- .long 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
+- .long 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
+- .long 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
+- .long 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
+- .long 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
+- .long 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
+- .long 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
+- .long 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
+- .long 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
+- .long 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
+- .long 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
+- .long 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
+- .long 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
+- .long 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
+- .long 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
+- .long 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
+- .long 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
+- .long 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
+- .long 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
+- .long 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
+- .long 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
+- .long 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
+- .long 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
++ .quad 0x428a2f98d728ae22,0x7137449123ef65cd
++ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
++ .quad 0x3956c25bf348b538,0x59f111f1b605d019
++ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
++ .quad 0xd807aa98a3030242,0x12835b0145706fbe
++ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
++ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
++ .quad 0x9bdc06a725c71235,0xc19bf174cf692694
++ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
++ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
++ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
++ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
++ .quad 0x983e5152ee66dfab,0xa831c66d2db43210
++ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
++ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
++ .quad 0x06ca6351e003826f,0x142929670a0e6e70
++ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
++ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
++ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
++ .quad 0x81c2c92e47edaee6,0x92722c851482353b
++ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
++ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
++ .quad 0xd192e819d6ef5218,0xd69906245565a910
++ .quad 0xf40e35855771202a,0x106aa07032bbd1b8
++ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
++ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
++ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
++ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
++ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
++ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
++ .quad 0x90befffa23631e28,0xa4506cebde82bde9
++ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
++ .quad 0xca273eceea26619c,0xd186b8c721c0c207
++ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
++ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
++ .quad 0x113f9804bef90dae,0x1b710b35131c471b
++ .quad 0x28db77f523047d84,0x32caab7b40c72493
++ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
++ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
++ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+ ___
+ $code.=<<___ if ($SZ==4);
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl
+new file mode 100755
+index 0000000..a316b31
+--- /dev/null
++++ b/crypto/sha/asm/sha512p8-ppc.pl
+@@ -0,0 +1,423 @@
++#!/usr/bin/env perl
++
++# ====================================================================
++# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++
++# SHA256/512 for PowerISA v2.07.
++#
++# Accurate performance measurements are problematic, because it's
++# always virtualized setup with possibly throttled processor.
++# Relative comparison is therefore more informative. This module is
++# ~60% faster than integer-only sha512-ppc.pl. To anchor to something
++# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than
++# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than
++# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
++# result is degree of computational resources' utilization. POWER8 is
++# "massively multi-threaded chip" and difference between single- and
++# maximum multi-process benchmark results tells that utlization is
++# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and
++# for sha1-ppc.pl - 73%. 100% means that multi-process result equals
++# to single-process one, given that all threads end up on the same
++# physical core.
++
++$flavour=shift;
++$output =shift;
++
++if ($flavour =~ /64/) {
++ $SIZE_T=8;
++ $LRSAVE=2*$SIZE_T;
++ $STU="stdu";
++ $POP="ld";
++ $PUSH="std";
++} elsif ($flavour =~ /32/) {
++ $SIZE_T=4;
++ $LRSAVE=$SIZE_T;
++ $STU="stwu";
++ $POP="lwz";
++ $PUSH="stw";
++} else { die "nonsense $flavour"; }
++
++$LENDIAN=($flavour=~/le/);
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
++
++if ($output =~ /512/) {
++ $bits=512;
++ $SZ=8;
++ $sz="d";
++ $rounds=80;
++} else {
++ $bits=256;
++ $SZ=4;
++ $sz="w";
++ $rounds=64;
++}
++
++$func="sha${bits}_block_p8";
++$FRAME=8*$SIZE_T;
++
++$sp ="r1";
++$toc="r2";
++$ctx="r3";
++$inp="r4";
++$num="r5";
++$Tbl="r6";
++$idx="r7";
++$lrsave="r8";
++$offload="r11";
++$vrsave="r12";
++($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31));
++
++ at V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7));
++ at X=map("v$_",(8..23));
++($Ki,$Func,$S0,$S1,$s0,$s1,$lemask)=map("v$_",(24..31));
++
++sub ROUND {
++my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
++my $j=($i+1)%16;
++
++$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1));
++ lvx_u @X[$i+1],0,$inp ; load X[i] in advance
++ addi $inp,$inp,16
++___
++$code.=<<___ if ($i<16 && ($i%(16/$SZ)));
++ vsldoi @X[$i], at X[$i-1], at X[$i-1],$SZ
++___
++$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0);
++ vperm @X[$i], at X[$i], at X[$i],$lemask
++___
++$code.=<<___;
++ `"vshasigma${sz} $s0, at X[($j+1)%16],0,0" if ($i>=15)`
++ vsel $Func,$g,$f,$e ; Ch(e,f,g)
++ vshasigma${sz} $S1,$e,1,15 ; Sigma1(e)
++ vaddu${sz}m $h,$h, at X[$i%16] ; h+=X[i]
++ vshasigma${sz} $S0,$a,1,0 ; Sigma0(a)
++ `"vshasigma${sz} $s1, at X[($j+14)%16],0,15" if ($i>=15)`
++ vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g)
++ vxor $Func,$a,$b
++ `"vaddu${sz}m @X[$j], at X[$j], at X[($j+9)%16]" if ($i>=15)`
++ vaddu${sz}m $h,$h,$S1 ; h+=Sigma1(e)
++ vsel $Func,$b,$c,$Func ; Maj(a,b,c)
++ vaddu${sz}m $g,$g,$Ki ; future h+=K[i]
++ vaddu${sz}m $d,$d,$h ; d+=h
++ vaddu${sz}m $S0,$S0,$Func ; Sigma0(a)+Maj(a,b,c)
++ `"vaddu${sz}m @X[$j], at X[$j],$s0" if ($i>=15)`
++ lvx $Ki,$idx,$Tbl ; load next K[i]
++ addi $idx,$idx,16
++ vaddu${sz}m $h,$h,$S0 ; h+=Sigma0(a)+Maj(a,b,c)
++ `"vaddu${sz}m @X[$j], at X[$j],$s1" if ($i>=15)`
++___
++}
++
++$code=<<___;
++.machine "any"
++.text
++
++.globl $func
++.align 6
++$func:
++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
++ mflr $lrsave
++ li r10,`$FRAME+8*16+15`
++ li r11,`$FRAME+8*16+31`
++ stvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ mfspr $vrsave,256
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ li r11,-1
++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
++ li $x10,0x10
++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ li $x20,0x20
++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ li $x30,0x30
++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ li $x40,0x40
++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ li $x50,0x50
++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ li $x60,0x60
++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ li $x70,0x70
++ $PUSH $lrsave,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
++ mtspr 256,r11
++
++ bl LPICmeup
++ addi $offload,$sp,$FRAME+15
++___
++$code.=<<___ if ($LENDIAN);
++ li $idx,8
++ lvsl $lemask,0,$idx
++ vspltisb $Ki,0x0f
++ vxor $lemask,$lemask,$Ki
++___
++$code.=<<___ if ($SZ==4);
++ lvx_4w $A,$x00,$ctx
++ lvx_4w $E,$x10,$ctx
++ vsldoi $B,$A,$A,4 # unpack
++ vsldoi $C,$A,$A,8
++ vsldoi $D,$A,$A,12
++ vsldoi $F,$E,$E,4
++ vsldoi $G,$E,$E,8
++ vsldoi $H,$E,$E,12
++___
++$code.=<<___ if ($SZ==8);
++ lvx_u $A,$x00,$ctx
++ lvx_u $C,$x10,$ctx
++ lvx_u $E,$x20,$ctx
++ vsldoi $B,$A,$A,8 # unpack
++ lvx_u $G,$x30,$ctx
++ vsldoi $D,$C,$C,8
++ vsldoi $F,$E,$E,8
++ vsldoi $H,$G,$G,8
++___
++$code.=<<___;
++ li r0,`($rounds-16)/16` # inner loop counter
++ b Loop
++.align 5
++Loop:
++ lvx $Ki,$x00,$Tbl
++ li $idx,16
++ lvx_u @X[0],0,$inp
++ addi $inp,$inp,16
++ stvx $A,$x00,$offload # offload $A-$H
++ stvx $B,$x10,$offload
++ stvx $C,$x20,$offload
++ stvx $D,$x30,$offload
++ stvx $E,$x40,$offload
++ stvx $F,$x50,$offload
++ stvx $G,$x60,$offload
++ stvx $H,$x70,$offload
++ vaddu${sz}m $H,$H,$Ki # h+K[i]
++ lvx $Ki,$idx,$Tbl
++ addi $idx,$idx,16
++___
++for ($i=0;$i<16;$i++) { &ROUND($i, at V); unshift(@V,pop(@V)); }
++$code.=<<___;
++ mtctr r0
++ b L16_xx
++.align 5
++L16_xx:
++___
++for (;$i<32;$i++) { &ROUND($i, at V); unshift(@V,pop(@V)); }
++$code.=<<___;
++ bdnz L16_xx
++
++ lvx @X[2],$x00,$offload
++ subic. $num,$num,1
++ lvx @X[3],$x10,$offload
++ vaddu${sz}m $A,$A, at X[2]
++ lvx @X[4],$x20,$offload
++ vaddu${sz}m $B,$B, at X[3]
++ lvx @X[5],$x30,$offload
++ vaddu${sz}m $C,$C, at X[4]
++ lvx @X[6],$x40,$offload
++ vaddu${sz}m $D,$D, at X[5]
++ lvx @X[7],$x50,$offload
++ vaddu${sz}m $E,$E, at X[6]
++ lvx @X[8],$x60,$offload
++ vaddu${sz}m $F,$F, at X[7]
++ lvx @X[9],$x70,$offload
++ vaddu${sz}m $G,$G, at X[8]
++ vaddu${sz}m $H,$H, at X[9]
++ bne Loop
++___
++$code.=<<___ if ($SZ==4);
++ lvx @X[0],$idx,$Tbl
++ addi $idx,$idx,16
++ vperm $A,$A,$B,$Ki # pack the answer
++ lvx @X[1],$idx,$Tbl
++ vperm $E,$E,$F,$Ki
++ vperm $A,$A,$C, at X[0]
++ vperm $E,$E,$G, at X[0]
++ vperm $A,$A,$D, at X[1]
++ vperm $E,$E,$H, at X[1]
++ stvx_4w $A,$x00,$ctx
++ stvx_4w $E,$x10,$ctx
++___
++$code.=<<___ if ($SZ==8);
++ vperm $A,$A,$B,$Ki # pack the answer
++ vperm $C,$C,$D,$Ki
++ vperm $E,$E,$F,$Ki
++ vperm $G,$G,$H,$Ki
++ stvx_u $A,$x00,$ctx
++ stvx_u $C,$x10,$ctx
++ stvx_u $E,$x20,$ctx
++ stvx_u $G,$x30,$ctx
++___
++$code.=<<___;
++ li r10,`$FRAME+8*16+15`
++ mtlr $lrsave
++ li r11,`$FRAME+8*16+31`
++ mtspr 256,$vrsave
++ lvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
++ blr
++ .long 0
++ .byte 0,12,4,1,0x80,6,3,0
++ .long 0
++.size $func,.-$func
++___
++
++# Ugly hack here, because PPC assembler syntax seem to vary too
++# much from platforms to platform...
++$code.=<<___;
++.align 6
++LPICmeup:
++ mflr r0
++ bcl 20,31,\$+4
++ mflr $Tbl ; vvvvvv "distance" between . and 1st data entry
++ addi $Tbl,$Tbl,`64-8`
++ mtlr r0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++ .space `64-9*4`
++___
++
++if ($SZ==8) {
++ local *table = sub {
++ foreach(@_) { $code.=".quad $_,$_\n"; }
++ };
++ table(
++ "0x428a2f98d728ae22","0x7137449123ef65cd",
++ "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc",
++ "0x3956c25bf348b538","0x59f111f1b605d019",
++ "0x923f82a4af194f9b","0xab1c5ed5da6d8118",
++ "0xd807aa98a3030242","0x12835b0145706fbe",
++ "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2",
++ "0x72be5d74f27b896f","0x80deb1fe3b1696b1",
++ "0x9bdc06a725c71235","0xc19bf174cf692694",
++ "0xe49b69c19ef14ad2","0xefbe4786384f25e3",
++ "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65",
++ "0x2de92c6f592b0275","0x4a7484aa6ea6e483",
++ "0x5cb0a9dcbd41fbd4","0x76f988da831153b5",
++ "0x983e5152ee66dfab","0xa831c66d2db43210",
++ "0xb00327c898fb213f","0xbf597fc7beef0ee4",
++ "0xc6e00bf33da88fc2","0xd5a79147930aa725",
++ "0x06ca6351e003826f","0x142929670a0e6e70",
++ "0x27b70a8546d22ffc","0x2e1b21385c26c926",
++ "0x4d2c6dfc5ac42aed","0x53380d139d95b3df",
++ "0x650a73548baf63de","0x766a0abb3c77b2a8",
++ "0x81c2c92e47edaee6","0x92722c851482353b",
++ "0xa2bfe8a14cf10364","0xa81a664bbc423001",
++ "0xc24b8b70d0f89791","0xc76c51a30654be30",
++ "0xd192e819d6ef5218","0xd69906245565a910",
++ "0xf40e35855771202a","0x106aa07032bbd1b8",
++ "0x19a4c116b8d2d0c8","0x1e376c085141ab53",
++ "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8",
++ "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb",
++ "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3",
++ "0x748f82ee5defb2fc","0x78a5636f43172f60",
++ "0x84c87814a1f0ab72","0x8cc702081a6439ec",
++ "0x90befffa23631e28","0xa4506cebde82bde9",
++ "0xbef9a3f7b2c67915","0xc67178f2e372532b",
++ "0xca273eceea26619c","0xd186b8c721c0c207",
++ "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178",
++ "0x06f067aa72176fba","0x0a637dc5a2c898a6",
++ "0x113f9804bef90dae","0x1b710b35131c471b",
++ "0x28db77f523047d84","0x32caab7b40c72493",
++ "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c",
++ "0x4cc5d4becb3e42b6","0x597f299cfc657e2a",
++ "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0");
++$code.=<<___ if (!$LENDIAN);
++.quad 0x0001020304050607,0x1011121314151617
++___
++$code.=<<___ if ($LENDIAN); # quad-swapped
++.quad 0x1011121314151617,0x0001020304050607
++___
++} else {
++ local *table = sub {
++ foreach(@_) { $code.=".long $_,$_,$_,$_\n"; }
++ };
++ table(
++ "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5",
++ "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5",
++ "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3",
++ "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174",
++ "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc",
++ "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da",
++ "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7",
++ "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967",
++ "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13",
++ "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85",
++ "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3",
++ "0xd192e819","0xd6990624","0xf40e3585","0x106aa070",
++ "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5",
++ "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3",
++ "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208",
++ "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0");
++$code.=<<___ if (!$LENDIAN);
++.long 0x00010203,0x10111213,0x10111213,0x10111213
++.long 0x00010203,0x04050607,0x10111213,0x10111213
++.long 0x00010203,0x04050607,0x08090a0b,0x10111213
++___
++$code.=<<___ if ($LENDIAN); # word-swapped
++.long 0x10111213,0x10111213,0x10111213,0x00010203
++.long 0x10111213,0x10111213,0x04050607,0x00010203
++.long 0x10111213,0x08090a0b,0x04050607,0x00010203
++___
++}
++$code.=<<___;
++.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
++.align 2
++___
++
++$code =~ s/\`([^\`]*)\`/eval $1/gem;
++print $code;
++close STDOUT;
diff --git a/openssl-1.0.1-beta2-rpmbuild.patch b/openssl-1.0.1e-rpmbuild.patch
similarity index 91%
rename from openssl-1.0.1-beta2-rpmbuild.patch
rename to openssl-1.0.1e-rpmbuild.patch
index a4bb691..14b2ba9 100644
--- a/openssl-1.0.1-beta2-rpmbuild.patch
+++ b/openssl-1.0.1e-rpmbuild.patch
@@ -1,7 +1,7 @@
-diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure
---- openssl-1.0.1-beta2/Configure.rpmbuild 2012-01-05 01:07:34.000000000 +0100
-+++ openssl-1.0.1-beta2/Configure 2012-02-02 12:43:56.547409325 +0100
-@@ -343,23 +343,23 @@ my %table=(
+diff -up openssl-1.0.1e/Configure.rpmbuild openssl-1.0.1e/Configure
+--- openssl-1.0.1e/Configure.rpmbuild 2014-08-13 19:19:53.211005598 +0200
++++ openssl-1.0.1e/Configure 2014-08-13 19:29:21.704099285 +0200
+@@ -345,24 +345,24 @@ my %table=(
####
# *-generic* is endian-neutral target, but ./config is free to
# throw in -D[BL]_ENDIAN, whichever appropriate...
@@ -21,9 +21,11 @@ diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure
####
-"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
+-"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
-"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-generic64","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
+"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
++"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64le:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
+"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
"linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@@ -34,7 +36,7 @@ diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure
#### So called "highgprs" target for z/Architecture CPUs
# "Highgprs" is kernel feature first implemented in Linux 2.6.32, see
# /proc/cpuinfo. The idea is to preserve most significant bits of
-@@ -373,16 +373,17 @@ my %table=(
+@@ -376,16 +376,17 @@ my %table=(
# ldconfig and run-time linker to autodiscover. Unfortunately it
# doesn't work just yet, because of couple of bugs in glibc
# sysdeps/s390/dl-procinfo.c affecting ldconfig and ld.so.1...
@@ -56,7 +58,7 @@ diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure
#### Alpha Linux with GNU C and Compaq C setups
# Special notes:
# - linux-alpha+bwx-gcc is ment to be used from ./config only. If you
-@@ -396,8 +397,8 @@ my %table=(
+@@ -399,8 +400,8 @@ my %table=(
#
# <appro at fy.chalmers.se>
#
@@ -67,7 +69,7 @@ diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure
"linux-alpha-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
"linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
-@@ -1678,7 +1679,7 @@ while (<IN>)
+@@ -1675,7 +1676,7 @@ while (<IN>)
elsif ($shared_extension ne "" && $shared_extension =~ /^\.s([ol])\.[^\.]*\.[^\.]*$/)
{
my $sotmp = $1;
@@ -76,9 +78,9 @@ diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure
}
elsif ($shared_extension ne "" && $shared_extension =~ /^\.[^\.]*\.[^\.]*\.dylib$/)
{
-diff -up openssl-1.0.1-beta2/Makefile.org.rpmbuild openssl-1.0.1-beta2/Makefile.org
---- openssl-1.0.1-beta2/Makefile.org.rpmbuild 2011-12-27 16:17:50.000000000 +0100
-+++ openssl-1.0.1-beta2/Makefile.org 2012-02-02 12:30:23.652495435 +0100
+diff -up openssl-1.0.1e/Makefile.org.rpmbuild openssl-1.0.1e/Makefile.org
+--- openssl-1.0.1e/Makefile.org.rpmbuild 2013-02-11 16:26:04.000000000 +0100
++++ openssl-1.0.1e/Makefile.org 2014-08-13 19:19:53.218005759 +0200
@@ -10,6 +10,7 @@ SHLIB_VERSION_HISTORY=
SHLIB_MAJOR=
SHLIB_MINOR=
diff --git a/openssl-1.0.1e-fips.patch b/openssl-1.0.1g-fips.patch
similarity index 95%
rename from openssl-1.0.1e-fips.patch
rename to openssl-1.0.1g-fips.patch
index f5496a0..449ca1c 100644
--- a/openssl-1.0.1e-fips.patch
+++ b/openssl-1.0.1g-fips.patch
@@ -1,31 +1,6 @@
-diff -up openssl-1.0.1e/apps/pkcs12.c.fips openssl-1.0.1e/apps/pkcs12.c
---- openssl-1.0.1e/apps/pkcs12.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/apps/pkcs12.c 2013-10-04 11:48:04.172693955 +0200
-@@ -67,6 +67,9 @@
- #include <openssl/err.h>
- #include <openssl/pem.h>
- #include <openssl/pkcs12.h>
-+#ifdef OPENSSL_FIPS
-+#include <openssl/fips.h>
-+#endif
-
- #define PROG pkcs12_main
-
-@@ -130,6 +133,11 @@ int MAIN(int argc, char **argv)
-
- apps_startup();
-
-+#ifdef OPENSSL_FIPS
-+ if (FIPS_mode())
-+ cert_pbe = key_pbe; /* cannot use RC2 in the FIPS mode */
-+#endif
-+
- enc = EVP_des_ede3_cbc();
- if (bio_err == NULL ) bio_err = BIO_new_fp (stderr, BIO_NOCLOSE);
-
-diff -up openssl-1.0.1e/apps/speed.c.fips openssl-1.0.1e/apps/speed.c
---- openssl-1.0.1e/apps/speed.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/apps/speed.c 2013-10-04 11:49:56.384227859 +0200
+diff -up openssl-1.0.1g/apps/speed.c.fips openssl-1.0.1g/apps/speed.c
+--- openssl-1.0.1g/apps/speed.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/apps/speed.c 2014-05-06 16:29:50.536922993 +0200
@@ -195,7 +195,6 @@
#ifdef OPENSSL_DOING_MAKEDEPEND
#undef AES_set_encrypt_key
@@ -151,10 +126,10 @@ diff -up openssl-1.0.1e/apps/speed.c.fips openssl-1.0.1e/apps/speed.c
HMAC_Init_ex(&hctx,(unsigned char *)"This is a key...",
16,EVP_md5(), NULL);
-diff -up openssl-1.0.1e/Configure.fips openssl-1.0.1e/Configure
---- openssl-1.0.1e/Configure.fips 2013-10-04 11:48:04.153693526 +0200
-+++ openssl-1.0.1e/Configure 2013-10-04 11:48:04.173693978 +0200
-@@ -995,11 +995,6 @@ if (defined($disabled{"md5"}) || defined
+diff -up openssl-1.0.1g/Configure.fips openssl-1.0.1g/Configure
+--- openssl-1.0.1g/Configure.fips 2014-05-06 16:29:50.523922693 +0200
++++ openssl-1.0.1g/Configure 2014-05-06 16:29:50.536922993 +0200
+@@ -997,11 +997,6 @@ if (defined($disabled{"md5"}) || defined
$disabled{"ssl2"} = "forced";
}
@@ -166,7 +141,7 @@ diff -up openssl-1.0.1e/Configure.fips openssl-1.0.1e/Configure
# RSAX ENGINE sets default non-FIPS RSA method.
if ($fips)
{
-@@ -1474,7 +1469,6 @@ $cflags.=" -DOPENSSL_BN_ASM_GF2m" if ($b
+@@ -1476,7 +1471,6 @@ $cflags.=" -DOPENSSL_BN_ASM_GF2m" if ($b
if ($fips)
{
$openssl_other_defines.="#define OPENSSL_FIPS\n";
@@ -174,7 +149,7 @@ diff -up openssl-1.0.1e/Configure.fips openssl-1.0.1e/Configure
}
$cpuid_obj="mem_clr.o" unless ($cpuid_obj =~ /\.o$/);
-@@ -1661,9 +1655,12 @@ while (<IN>)
+@@ -1663,9 +1657,12 @@ while (<IN>)
s/^FIPSDIR=.*/FIPSDIR=$fipsdir/;
s/^FIPSLIBDIR=.*/FIPSLIBDIR=$fipslibdir/;
@@ -188,9 +163,9 @@ diff -up openssl-1.0.1e/Configure.fips openssl-1.0.1e/Configure
s/^SHLIB_TARGET=.*/SHLIB_TARGET=$shared_target/;
s/^SHLIB_MARK=.*/SHLIB_MARK=$shared_mark/;
s/^SHARED_LIBS=.*/SHARED_LIBS=\$(SHARED_CRYPTO) \$(SHARED_SSL)/ if (!$no_shared);
-diff -up openssl-1.0.1e/crypto/aes/aes_misc.c.fips openssl-1.0.1e/crypto/aes/aes_misc.c
---- openssl-1.0.1e/crypto/aes/aes_misc.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/aes/aes_misc.c 2013-10-04 11:48:04.173693978 +0200
+diff -up openssl-1.0.1g/crypto/aes/aes_misc.c.fips openssl-1.0.1g/crypto/aes/aes_misc.c
+--- openssl-1.0.1g/crypto/aes/aes_misc.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/aes/aes_misc.c 2014-05-06 16:29:50.536922993 +0200
@@ -69,17 +69,11 @@ const char *AES_options(void) {
int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
AES_KEY *key)
@@ -209,9 +184,9 @@ diff -up openssl-1.0.1e/crypto/aes/aes_misc.c.fips openssl-1.0.1e/crypto/aes/aes
-#endif
return private_AES_set_decrypt_key(userKey, bits, key);
}
-diff -up openssl-1.0.1e/crypto/cmac/cmac.c.fips openssl-1.0.1e/crypto/cmac/cmac.c
---- openssl-1.0.1e/crypto/cmac/cmac.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/cmac/cmac.c 2013-10-04 11:48:04.173693978 +0200
+diff -up openssl-1.0.1g/crypto/cmac/cmac.c.fips openssl-1.0.1g/crypto/cmac/cmac.c
+--- openssl-1.0.1g/crypto/cmac/cmac.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/cmac/cmac.c 2014-05-06 16:29:50.537923016 +0200
@@ -107,13 +107,6 @@ CMAC_CTX *CMAC_CTX_new(void)
void CMAC_CTX_cleanup(CMAC_CTX *ctx)
@@ -260,9 +235,9 @@ diff -up openssl-1.0.1e/crypto/cmac/cmac.c.fips openssl-1.0.1e/crypto/cmac/cmac.
if (ctx->nlast_block == -1)
return 0;
bl = EVP_CIPHER_CTX_block_size(&ctx->cctx);
-diff -up openssl-1.0.1e/crypto/crypto.h.fips openssl-1.0.1e/crypto/crypto.h
---- openssl-1.0.1e/crypto/crypto.h.fips 2013-10-04 11:48:04.058691381 +0200
-+++ openssl-1.0.1e/crypto/crypto.h 2013-10-04 11:48:04.173693978 +0200
+diff -up openssl-1.0.1g/crypto/crypto.h.fips openssl-1.0.1g/crypto/crypto.h
+--- openssl-1.0.1g/crypto/crypto.h.fips 2014-05-06 16:29:50.419920288 +0200
++++ openssl-1.0.1g/crypto/crypto.h 2014-05-06 16:29:50.537923016 +0200
@@ -553,24 +553,29 @@ int FIPS_mode_set(int r);
void OPENSSL_init(void);
@@ -308,9 +283,9 @@ diff -up openssl-1.0.1e/crypto/crypto.h.fips openssl-1.0.1e/crypto/crypto.h
/* Error codes for the CRYPTO functions. */
/* Function codes. */
-diff -up openssl-1.0.1e/crypto/des/des.h.fips openssl-1.0.1e/crypto/des/des.h
---- openssl-1.0.1e/crypto/des/des.h.fips 2013-10-04 11:48:04.088692058 +0200
-+++ openssl-1.0.1e/crypto/des/des.h 2013-10-04 11:48:04.173693978 +0200
+diff -up openssl-1.0.1g/crypto/des/des.h.fips openssl-1.0.1g/crypto/des/des.h
+--- openssl-1.0.1g/crypto/des/des.h.fips 2014-05-06 16:29:50.449920982 +0200
++++ openssl-1.0.1g/crypto/des/des.h 2014-05-06 16:29:50.537923016 +0200
@@ -224,9 +224,6 @@ int DES_set_key(const_DES_cblock *key,DE
int DES_key_sched(const_DES_cblock *key,DES_key_schedule *schedule);
int DES_set_key_checked(const_DES_cblock *key,DES_key_schedule *schedule);
@@ -321,9 +296,9 @@ diff -up openssl-1.0.1e/crypto/des/des.h.fips openssl-1.0.1e/crypto/des/des.h
void DES_string_to_key(const char *str,DES_cblock *key);
void DES_string_to_2keys(const char *str,DES_cblock *key1,DES_cblock *key2);
void DES_cfb64_encrypt(const unsigned char *in,unsigned char *out,long length,
-diff -up openssl-1.0.1e/crypto/des/set_key.c.fips openssl-1.0.1e/crypto/des/set_key.c
---- openssl-1.0.1e/crypto/des/set_key.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/des/set_key.c 2013-10-04 11:48:04.174694001 +0200
+diff -up openssl-1.0.1g/crypto/des/set_key.c.fips openssl-1.0.1g/crypto/des/set_key.c
+--- openssl-1.0.1g/crypto/des/set_key.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/des/set_key.c 2014-05-06 16:29:50.537923016 +0200
@@ -336,13 +336,6 @@ int DES_set_key_checked(const_DES_cblock
}
@@ -338,9 +313,9 @@ diff -up openssl-1.0.1e/crypto/des/set_key.c.fips openssl-1.0.1e/crypto/des/set_
{
static const int shifts2[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0};
register DES_LONG c,d,t,s,t2;
-diff -up openssl-1.0.1e/crypto/dh/dh_gen.c.fips openssl-1.0.1e/crypto/dh/dh_gen.c
---- openssl-1.0.1e/crypto/dh/dh_gen.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dh/dh_gen.c 2013-10-04 11:48:04.174694001 +0200
+diff -up openssl-1.0.1g/crypto/dh/dh_gen.c.fips openssl-1.0.1g/crypto/dh/dh_gen.c
+--- openssl-1.0.1g/crypto/dh/dh_gen.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/dh/dh_gen.c 2014-05-06 16:29:50.537923016 +0200
@@ -84,11 +84,6 @@ int DH_generate_parameters_ex(DH *ret, i
#endif
if(ret->meth->generate_params)
@@ -374,9 +349,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh_gen.c.fips openssl-1.0.1e/crypto/dh/dh_gen.
ctx=BN_CTX_new();
if (ctx == NULL) goto err;
BN_CTX_start(ctx);
-diff -up openssl-1.0.1e/crypto/dh/dh.h.fips openssl-1.0.1e/crypto/dh/dh.h
---- openssl-1.0.1e/crypto/dh/dh.h.fips 2014-02-06 18:04:19.000000000 +0100
-+++ openssl-1.0.1e/crypto/dh/dh.h 2014-02-11 16:01:17.039345356 +0100
+diff -up openssl-1.0.1g/crypto/dh/dh.h.fips openssl-1.0.1g/crypto/dh/dh.h
+--- openssl-1.0.1g/crypto/dh/dh.h.fips 2014-05-06 16:29:50.394919710 +0200
++++ openssl-1.0.1g/crypto/dh/dh.h 2014-05-06 16:29:50.537923016 +0200
@@ -77,6 +77,8 @@
# define OPENSSL_DH_MAX_MODULUS_BITS 10000
#endif
@@ -394,9 +369,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh.h.fips openssl-1.0.1e/crypto/dh/dh.h
DH * d2i_DHparams(DH **a,const unsigned char **pp, long length);
int i2d_DHparams(const DH *a,unsigned char **pp);
#ifndef OPENSSL_NO_FP_API
-diff -up openssl-1.0.1e/crypto/dh/dh_key.c.fips openssl-1.0.1e/crypto/dh/dh_key.c
---- openssl-1.0.1e/crypto/dh/dh_key.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dh/dh_key.c 2014-02-11 15:57:55.266840301 +0100
+diff -up openssl-1.0.1g/crypto/dh/dh_key.c.fips openssl-1.0.1g/crypto/dh/dh_key.c
+--- openssl-1.0.1g/crypto/dh/dh_key.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/dh/dh_key.c 2014-05-06 16:29:50.538923040 +0200
@@ -61,6 +61,9 @@
#include <openssl/bn.h>
#include <openssl/rand.h>
@@ -477,9 +452,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh_key.c.fips openssl-1.0.1e/crypto/dh/dh_key.
dh->flags |= DH_FLAG_CACHE_MONT_P;
return(1);
}
-diff -up openssl-1.0.1e/crypto/dh/dh_lib.c.fips openssl-1.0.1e/crypto/dh/dh_lib.c
---- openssl-1.0.1e/crypto/dh/dh_lib.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dh/dh_lib.c 2013-10-04 11:48:04.174694001 +0200
+diff -up openssl-1.0.1g/crypto/dh/dh_lib.c.fips openssl-1.0.1g/crypto/dh/dh_lib.c
+--- openssl-1.0.1g/crypto/dh/dh_lib.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/dh/dh_lib.c 2014-05-06 16:29:50.538923040 +0200
@@ -81,14 +81,7 @@ const DH_METHOD *DH_get_default_method(v
{
if(!default_DH_method)
@@ -495,9 +470,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh_lib.c.fips openssl-1.0.1e/crypto/dh/dh_lib.
}
return default_DH_method;
}
-diff -up openssl-1.0.1e/crypto/dsa/dsa_err.c.fips openssl-1.0.1e/crypto/dsa/dsa_err.c
---- openssl-1.0.1e/crypto/dsa/dsa_err.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dsa/dsa_err.c 2013-10-04 11:48:04.174694001 +0200
+diff -up openssl-1.0.1g/crypto/dsa/dsa_err.c.fips openssl-1.0.1g/crypto/dsa/dsa_err.c
+--- openssl-1.0.1g/crypto/dsa/dsa_err.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/dsa/dsa_err.c 2014-05-06 16:29:50.538923040 +0200
@@ -74,6 +74,8 @@ static ERR_STRING_DATA DSA_str_functs[]=
{ERR_FUNC(DSA_F_DO_DSA_PRINT), "DO_DSA_PRINT"},
{ERR_FUNC(DSA_F_DSAPARAMS_PRINT), "DSAparams_print"},
@@ -516,9 +491,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_err.c.fips openssl-1.0.1e/crypto/dsa/dsa_
{ERR_REASON(DSA_R_MISSING_PARAMETERS) ,"missing parameters"},
{ERR_REASON(DSA_R_MODULUS_TOO_LARGE) ,"modulus too large"},
{ERR_REASON(DSA_R_NEED_NEW_SETUP_VALUES) ,"need new setup values"},
-diff -up openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips openssl-1.0.1e/crypto/dsa/dsa_gen.c
---- openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dsa/dsa_gen.c 2013-10-04 11:48:04.175694023 +0200
+diff -up openssl-1.0.1g/crypto/dsa/dsa_gen.c.fips openssl-1.0.1g/crypto/dsa/dsa_gen.c
+--- openssl-1.0.1g/crypto/dsa/dsa_gen.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/dsa/dsa_gen.c 2014-05-06 16:29:50.538923040 +0200
@@ -85,6 +85,14 @@
#include <openssl/fips.h>
#endif
@@ -925,9 +900,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips openssl-1.0.1e/crypto/dsa/dsa_
}
if (mont != NULL) BN_MONT_CTX_free(mont);
return ok;
-diff -up openssl-1.0.1e/crypto/dsa/dsa.h.fips openssl-1.0.1e/crypto/dsa/dsa.h
---- openssl-1.0.1e/crypto/dsa/dsa.h.fips 2013-10-04 11:48:03.956689078 +0200
-+++ openssl-1.0.1e/crypto/dsa/dsa.h 2013-10-04 11:48:04.175694023 +0200
+diff -up openssl-1.0.1g/crypto/dsa/dsa.h.fips openssl-1.0.1g/crypto/dsa/dsa.h
+--- openssl-1.0.1g/crypto/dsa/dsa.h.fips 2014-05-06 16:29:50.316917907 +0200
++++ openssl-1.0.1g/crypto/dsa/dsa.h 2014-05-06 16:29:50.538923040 +0200
@@ -88,6 +88,8 @@
# define OPENSSL_DSA_MAX_MODULUS_BITS 10000
#endif
@@ -988,9 +963,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa.h.fips openssl-1.0.1e/crypto/dsa/dsa.h
#define DSA_R_PARAMETER_ENCODING_ERROR 105
#ifdef __cplusplus
-diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips openssl-1.0.1e/crypto/dsa/dsa_key.c
---- openssl-1.0.1e/crypto/dsa/dsa_key.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dsa/dsa_key.c 2013-10-04 11:48:04.175694023 +0200
+diff -up openssl-1.0.1g/crypto/dsa/dsa_key.c.fips openssl-1.0.1g/crypto/dsa/dsa_key.c
+--- openssl-1.0.1g/crypto/dsa/dsa_key.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/dsa/dsa_key.c 2014-05-06 16:29:50.539923063 +0200
@@ -66,6 +66,35 @@
#ifdef OPENSSL_FIPS
@@ -1008,7 +983,7 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips openssl-1.0.1e/crypto/dsa/dsa_
+
+ EVP_PKEY_set1_DSA(pk, dsa);
+
-+ if (fips_pkey_signature_test(pk, tbs, 0, NULL, 0, NULL, 0, NULL))
++ if (fips_pkey_signature_test(pk, tbs, -1, NULL, 0, NULL, 0, NULL))
+ ret = 1;
+
+ err:
@@ -1069,9 +1044,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips openssl-1.0.1e/crypto/dsa/dsa_
ok=1;
err:
-diff -up openssl-1.0.1e/crypto/dsa/dsa_lib.c.fips openssl-1.0.1e/crypto/dsa/dsa_lib.c
---- openssl-1.0.1e/crypto/dsa/dsa_lib.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dsa/dsa_lib.c 2013-10-04 11:48:04.175694023 +0200
+diff -up openssl-1.0.1g/crypto/dsa/dsa_lib.c.fips openssl-1.0.1g/crypto/dsa/dsa_lib.c
+--- openssl-1.0.1g/crypto/dsa/dsa_lib.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/dsa/dsa_lib.c 2014-05-06 16:29:50.539923063 +0200
@@ -87,14 +87,7 @@ const DSA_METHOD *DSA_get_default_method
{
if(!default_DSA_method)
@@ -1087,18 +1062,18 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_lib.c.fips openssl-1.0.1e/crypto/dsa/dsa_
}
return default_DSA_method;
}
-diff -up openssl-1.0.1e/crypto/dsa/dsa_locl.h.fips openssl-1.0.1e/crypto/dsa/dsa_locl.h
---- openssl-1.0.1e/crypto/dsa/dsa_locl.h.fips 2013-10-04 11:48:03.958689123 +0200
-+++ openssl-1.0.1e/crypto/dsa/dsa_locl.h 2013-10-04 11:48:04.175694023 +0200
+diff -up openssl-1.0.1g/crypto/dsa/dsa_locl.h.fips openssl-1.0.1g/crypto/dsa/dsa_locl.h
+--- openssl-1.0.1g/crypto/dsa/dsa_locl.h.fips 2014-05-06 16:29:50.317917930 +0200
++++ openssl-1.0.1g/crypto/dsa/dsa_locl.h 2014-05-06 16:29:50.539923063 +0200
@@ -56,5 +56,4 @@
int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits,
const EVP_MD *evpmd, const unsigned char *seed_in, size_t seed_len,
- unsigned char *seed_out,
int *counter_ret, unsigned long *h_ret, BN_GENCB *cb);
-diff -up openssl-1.0.1e/crypto/dsa/dsa_ossl.c.fips openssl-1.0.1e/crypto/dsa/dsa_ossl.c
---- openssl-1.0.1e/crypto/dsa/dsa_ossl.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dsa/dsa_ossl.c 2013-10-04 11:48:04.175694023 +0200
+diff -up openssl-1.0.1g/crypto/dsa/dsa_ossl.c.fips openssl-1.0.1g/crypto/dsa/dsa_ossl.c
+--- openssl-1.0.1g/crypto/dsa/dsa_ossl.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/dsa/dsa_ossl.c 2014-05-06 16:29:50.539923063 +0200
@@ -65,6 +65,9 @@
#include <openssl/dsa.h>
#include <openssl/rand.h>
@@ -1172,9 +1147,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_ossl.c.fips openssl-1.0.1e/crypto/dsa/dsa
dsa->flags|=DSA_FLAG_CACHE_MONT_P;
return(1);
}
-diff -up openssl-1.0.1e/crypto/dsa/dsa_pmeth.c.fips openssl-1.0.1e/crypto/dsa/dsa_pmeth.c
---- openssl-1.0.1e/crypto/dsa/dsa_pmeth.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dsa/dsa_pmeth.c 2013-10-04 11:48:04.175694023 +0200
+diff -up openssl-1.0.1g/crypto/dsa/dsa_pmeth.c.fips openssl-1.0.1g/crypto/dsa/dsa_pmeth.c
+--- openssl-1.0.1g/crypto/dsa/dsa_pmeth.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/dsa/dsa_pmeth.c 2014-05-06 16:29:50.539923063 +0200
@@ -255,7 +255,7 @@ static int pkey_dsa_paramgen(EVP_PKEY_CT
if (!dsa)
return 0;
@@ -1184,9 +1159,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_pmeth.c.fips openssl-1.0.1e/crypto/dsa/ds
if (ret)
EVP_PKEY_assign_DSA(pkey, dsa);
else
-diff -up openssl-1.0.1e/crypto/dsa/dsatest.c.fips openssl-1.0.1e/crypto/dsa/dsatest.c
---- openssl-1.0.1e/crypto/dsa/dsatest.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dsa/dsatest.c 2013-10-04 11:48:04.176694045 +0200
+diff -up openssl-1.0.1g/crypto/dsa/dsatest.c.fips openssl-1.0.1g/crypto/dsa/dsatest.c
+--- openssl-1.0.1g/crypto/dsa/dsatest.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/dsa/dsatest.c 2014-05-06 16:29:50.539923063 +0200
@@ -96,36 +96,41 @@ static int MS_CALLBACK dsa_cb(int p, int
/* seed, out_p, out_q, out_g are taken from the updated Appendix 5 to
* FIPS PUB 186 and also appear in Appendix 5 to FIPS PIB 186-1 */
@@ -1271,9 +1246,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsatest.c.fips openssl-1.0.1e/crypto/dsa/dsat
goto end;
}
if (h != 2)
-diff -up openssl-1.0.1e/crypto/engine/eng_all.c.fips openssl-1.0.1e/crypto/engine/eng_all.c
---- openssl-1.0.1e/crypto/engine/eng_all.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/engine/eng_all.c 2013-10-04 11:48:04.176694045 +0200
+diff -up openssl-1.0.1g/crypto/engine/eng_all.c.fips openssl-1.0.1g/crypto/engine/eng_all.c
+--- openssl-1.0.1g/crypto/engine/eng_all.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/engine/eng_all.c 2014-05-06 16:29:50.539923063 +0200
@@ -58,11 +58,25 @@
#include "cryptlib.h"
@@ -1300,9 +1275,9 @@ diff -up openssl-1.0.1e/crypto/engine/eng_all.c.fips openssl-1.0.1e/crypto/engin
#if 0
/* There's no longer any need for an "openssl" ENGINE unless, one day,
* it is the *only* way for standard builtin implementations to be be
-diff -up openssl-1.0.1e/crypto/evp/c_allc.c.fips openssl-1.0.1e/crypto/evp/c_allc.c
---- openssl-1.0.1e/crypto/evp/c_allc.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/c_allc.c 2013-10-04 11:48:04.176694045 +0200
+diff -up openssl-1.0.1g/crypto/evp/c_allc.c.fips openssl-1.0.1g/crypto/evp/c_allc.c
+--- openssl-1.0.1g/crypto/evp/c_allc.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/c_allc.c 2014-05-06 16:29:50.540923086 +0200
@@ -65,6 +65,11 @@
void OpenSSL_add_all_ciphers(void)
{
@@ -1376,9 +1351,9 @@ diff -up openssl-1.0.1e/crypto/evp/c_allc.c.fips openssl-1.0.1e/crypto/evp/c_all
+ }
+#endif
}
-diff -up openssl-1.0.1e/crypto/evp/c_alld.c.fips openssl-1.0.1e/crypto/evp/c_alld.c
---- openssl-1.0.1e/crypto/evp/c_alld.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/c_alld.c 2013-10-04 11:48:04.176694045 +0200
+diff -up openssl-1.0.1g/crypto/evp/c_alld.c.fips openssl-1.0.1g/crypto/evp/c_alld.c
+--- openssl-1.0.1g/crypto/evp/c_alld.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/c_alld.c 2014-05-06 16:29:50.540923086 +0200
@@ -64,6 +64,11 @@
void OpenSSL_add_all_digests(void)
@@ -1424,9 +1399,9 @@ diff -up openssl-1.0.1e/crypto/evp/c_alld.c.fips openssl-1.0.1e/crypto/evp/c_all
+ }
+#endif
}
-diff -up openssl-1.0.1e/crypto/evp/digest.c.fips openssl-1.0.1e/crypto/evp/digest.c
---- openssl-1.0.1e/crypto/evp/digest.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/digest.c 2013-10-04 11:48:04.176694045 +0200
+diff -up openssl-1.0.1g/crypto/evp/digest.c.fips openssl-1.0.1g/crypto/evp/digest.c
+--- openssl-1.0.1g/crypto/evp/digest.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/digest.c 2014-05-06 16:29:50.540923086 +0200
@@ -142,9 +142,50 @@ int EVP_DigestInit(EVP_MD_CTX *ctx, cons
return EVP_DigestInit_ex(ctx, type, NULL);
}
@@ -1549,7 +1524,7 @@ diff -up openssl-1.0.1e/crypto/evp/digest.c.fips openssl-1.0.1e/crypto/evp/diges
}
int EVP_MD_CTX_copy(EVP_MD_CTX *out, const EVP_MD_CTX *in)
-@@ -373,7 +414,6 @@ void EVP_MD_CTX_destroy(EVP_MD_CTX *ctx)
+@@ -376,7 +417,6 @@ void EVP_MD_CTX_destroy(EVP_MD_CTX *ctx)
/* This call frees resources associated with the context */
int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx)
{
@@ -1557,7 +1532,7 @@ diff -up openssl-1.0.1e/crypto/evp/digest.c.fips openssl-1.0.1e/crypto/evp/diges
/* Don't assume ctx->md_data was cleaned in EVP_Digest_Final,
* because sometimes only copies of the context are ever finalised.
*/
-@@ -386,7 +426,6 @@ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx)
+@@ -389,7 +429,6 @@ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx)
OPENSSL_cleanse(ctx->md_data,ctx->digest->ctx_size);
OPENSSL_free(ctx->md_data);
}
@@ -1565,7 +1540,7 @@ diff -up openssl-1.0.1e/crypto/evp/digest.c.fips openssl-1.0.1e/crypto/evp/diges
if (ctx->pctx)
EVP_PKEY_CTX_free(ctx->pctx);
#ifndef OPENSSL_NO_ENGINE
-@@ -395,9 +434,6 @@ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx)
+@@ -398,9 +437,6 @@ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx)
* functional reference we held for this reason. */
ENGINE_finish(ctx->engine);
#endif
@@ -1575,9 +1550,9 @@ diff -up openssl-1.0.1e/crypto/evp/digest.c.fips openssl-1.0.1e/crypto/evp/diges
memset(ctx,'\0',sizeof *ctx);
return 1;
-diff -up openssl-1.0.1e/crypto/evp/e_aes.c.fips openssl-1.0.1e/crypto/evp/e_aes.c
---- openssl-1.0.1e/crypto/evp/e_aes.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/e_aes.c 2013-10-04 11:48:04.177694068 +0200
+diff -up openssl-1.0.1g/crypto/evp/e_aes.c.fips openssl-1.0.1g/crypto/evp/e_aes.c
+--- openssl-1.0.1g/crypto/evp/e_aes.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/e_aes.c 2014-05-06 16:29:50.540923086 +0200
@@ -56,7 +56,6 @@
#include <assert.h>
#include <openssl/aes.h>
@@ -1595,7 +1570,7 @@ diff -up openssl-1.0.1e/crypto/evp/e_aes.c.fips openssl-1.0.1e/crypto/evp/e_aes.
&& arg < 12)
return 0;
#endif
-@@ -1128,7 +1127,7 @@ static int aes_xts_cipher(EVP_CIPHER_CTX
+@@ -1134,7 +1133,7 @@ static int aes_xts_cipher(EVP_CIPHER_CTX
return 0;
#ifdef OPENSSL_FIPS
/* Requirement of SP800-38E */
@@ -1604,14 +1579,14 @@ diff -up openssl-1.0.1e/crypto/evp/e_aes.c.fips openssl-1.0.1e/crypto/evp/e_aes.
(len > (1UL<<20)*16))
{
EVPerr(EVP_F_AES_XTS_CIPHER, EVP_R_TOO_LARGE);
-@@ -1311,4 +1310,3 @@ BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm
+@@ -1317,4 +1316,3 @@ BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm
BLOCK_CIPHER_custom(NID_aes,256,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS)
#endif
-#endif
-diff -up openssl-1.0.1e/crypto/evp/e_des3.c.fips openssl-1.0.1e/crypto/evp/e_des3.c
---- openssl-1.0.1e/crypto/evp/e_des3.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/e_des3.c 2013-10-04 11:48:04.177694068 +0200
+diff -up openssl-1.0.1g/crypto/evp/e_des3.c.fips openssl-1.0.1g/crypto/evp/e_des3.c
+--- openssl-1.0.1g/crypto/evp/e_des3.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/e_des3.c 2014-05-06 16:29:50.540923086 +0200
@@ -65,8 +65,6 @@
#include <openssl/des.h>
#include <openssl/rand.h>
@@ -1670,9 +1645,9 @@ diff -up openssl-1.0.1e/crypto/evp/e_des3.c.fips openssl-1.0.1e/crypto/evp/e_des
}
#endif
-#endif
-diff -up openssl-1.0.1e/crypto/evp/e_null.c.fips openssl-1.0.1e/crypto/evp/e_null.c
---- openssl-1.0.1e/crypto/evp/e_null.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/e_null.c 2013-10-04 11:48:04.177694068 +0200
+diff -up openssl-1.0.1g/crypto/evp/e_null.c.fips openssl-1.0.1g/crypto/evp/e_null.c
+--- openssl-1.0.1g/crypto/evp/e_null.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/e_null.c 2014-05-06 16:29:50.540923086 +0200
@@ -61,8 +61,6 @@
#include <openssl/evp.h>
#include <openssl/objects.h>
@@ -1696,9 +1671,9 @@ diff -up openssl-1.0.1e/crypto/evp/e_null.c.fips openssl-1.0.1e/crypto/evp/e_nul
return 1;
}
-#endif
-diff -up openssl-1.0.1e/crypto/evp/evp_enc.c.fips openssl-1.0.1e/crypto/evp/evp_enc.c
---- openssl-1.0.1e/crypto/evp/evp_enc.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/evp_enc.c 2013-10-04 11:48:04.177694068 +0200
+diff -up openssl-1.0.1g/crypto/evp/evp_enc.c.fips openssl-1.0.1g/crypto/evp/evp_enc.c
+--- openssl-1.0.1g/crypto/evp/evp_enc.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/evp_enc.c 2014-05-06 16:29:50.541923109 +0200
@@ -69,17 +69,58 @@
#endif
#include "evp_locl.h"
@@ -1847,9 +1822,9 @@ diff -up openssl-1.0.1e/crypto/evp/evp_enc.c.fips openssl-1.0.1e/crypto/evp/evp_
memset(c,0,sizeof(EVP_CIPHER_CTX));
return 1;
}
-diff -up openssl-1.0.1e/crypto/evp/evp.h.fips openssl-1.0.1e/crypto/evp/evp.h
---- openssl-1.0.1e/crypto/evp/evp.h.fips 2013-10-04 11:48:04.071691675 +0200
-+++ openssl-1.0.1e/crypto/evp/evp.h 2013-10-04 11:48:04.177694068 +0200
+diff -up openssl-1.0.1g/crypto/evp/evp.h.fips openssl-1.0.1g/crypto/evp/evp.h
+--- openssl-1.0.1g/crypto/evp/evp.h.fips 2014-05-06 16:29:50.432920589 +0200
++++ openssl-1.0.1g/crypto/evp/evp.h 2014-05-06 16:29:50.541923109 +0200
@@ -75,6 +75,10 @@
#include <openssl/bio.h>
#endif
@@ -1902,9 +1877,9 @@ diff -up openssl-1.0.1e/crypto/evp/evp.h.fips openssl-1.0.1e/crypto/evp/evp.h
/* Cipher handles any and all padding logic as well
* as finalisation.
*/
-diff -up openssl-1.0.1e/crypto/evp/evp_lib.c.fips openssl-1.0.1e/crypto/evp/evp_lib.c
---- openssl-1.0.1e/crypto/evp/evp_lib.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/evp_lib.c 2013-10-04 11:48:04.177694068 +0200
+diff -up openssl-1.0.1g/crypto/evp/evp_lib.c.fips openssl-1.0.1g/crypto/evp/evp_lib.c
+--- openssl-1.0.1g/crypto/evp/evp_lib.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/evp_lib.c 2014-05-06 16:29:50.541923109 +0200
@@ -190,6 +190,9 @@ int EVP_CIPHER_CTX_block_size(const EVP_
int EVP_Cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, unsigned int inl)
@@ -1915,9 +1890,9 @@ diff -up openssl-1.0.1e/crypto/evp/evp_lib.c.fips openssl-1.0.1e/crypto/evp/evp_
return ctx->cipher->do_cipher(ctx,out,in,inl);
}
-diff -up openssl-1.0.1e/crypto/evp/evp_locl.h.fips openssl-1.0.1e/crypto/evp/evp_locl.h
---- openssl-1.0.1e/crypto/evp/evp_locl.h.fips 2013-10-04 11:48:04.067691584 +0200
-+++ openssl-1.0.1e/crypto/evp/evp_locl.h 2013-10-04 11:48:04.178694091 +0200
+diff -up openssl-1.0.1g/crypto/evp/evp_locl.h.fips openssl-1.0.1g/crypto/evp/evp_locl.h
+--- openssl-1.0.1g/crypto/evp/evp_locl.h.fips 2014-05-06 16:29:50.428920496 +0200
++++ openssl-1.0.1g/crypto/evp/evp_locl.h 2014-05-06 16:29:50.541923109 +0200
@@ -258,10 +258,9 @@ const EVP_CIPHER *EVP_##cname##_ecb(void
BLOCK_CIPHER_func_cfb(cipher##_##keysize,cprefix,cbits,kstruct,ksched) \
BLOCK_CIPHER_def_cfb(cipher##_##keysize,kstruct, \
@@ -1952,9 +1927,9 @@ diff -up openssl-1.0.1e/crypto/evp/evp_locl.h.fips openssl-1.0.1e/crypto/evp/evp
#define Camellia_set_key private_Camellia_set_key
#endif
-diff -up openssl-1.0.1e/crypto/evp/Makefile.fips openssl-1.0.1e/crypto/evp/Makefile
---- openssl-1.0.1e/crypto/evp/Makefile.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/Makefile 2013-10-04 11:48:04.178694091 +0200
+diff -up openssl-1.0.1g/crypto/evp/Makefile.fips openssl-1.0.1g/crypto/evp/Makefile
+--- openssl-1.0.1g/crypto/evp/Makefile.fips 2014-04-07 18:55:33.000000000 +0200
++++ openssl-1.0.1g/crypto/evp/Makefile 2014-05-06 16:29:50.541923109 +0200
@@ -28,7 +28,7 @@ LIBSRC= encode.c digest.c evp_enc.c evp_
bio_md.c bio_b64.c bio_enc.c evp_err.c e_null.c \
c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \
@@ -1973,9 +1948,9 @@ diff -up openssl-1.0.1e/crypto/evp/Makefile.fips openssl-1.0.1e/crypto/evp/Makef
e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o
SRC= $(LIBSRC)
-diff -up openssl-1.0.1e/crypto/evp/m_dss.c.fips openssl-1.0.1e/crypto/evp/m_dss.c
---- openssl-1.0.1e/crypto/evp/m_dss.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/m_dss.c 2013-10-04 11:48:04.178694091 +0200
+diff -up openssl-1.0.1g/crypto/evp/m_dss.c.fips openssl-1.0.1g/crypto/evp/m_dss.c
+--- openssl-1.0.1g/crypto/evp/m_dss.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/m_dss.c 2014-05-06 16:29:50.542923132 +0200
@@ -66,7 +66,6 @@
#endif
@@ -1998,9 +1973,9 @@ diff -up openssl-1.0.1e/crypto/evp/m_dss.c.fips openssl-1.0.1e/crypto/evp/m_dss.
}
#endif
-#endif
-diff -up openssl-1.0.1e/crypto/evp/m_dss1.c.fips openssl-1.0.1e/crypto/evp/m_dss1.c
---- openssl-1.0.1e/crypto/evp/m_dss1.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/m_dss1.c 2013-10-04 11:48:04.178694091 +0200
+diff -up openssl-1.0.1g/crypto/evp/m_dss1.c.fips openssl-1.0.1g/crypto/evp/m_dss1.c
+--- openssl-1.0.1g/crypto/evp/m_dss1.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/m_dss1.c 2014-05-06 16:29:50.542923132 +0200
@@ -68,8 +68,6 @@
#include <openssl/dsa.h>
#endif
@@ -2024,9 +1999,9 @@ diff -up openssl-1.0.1e/crypto/evp/m_dss1.c.fips openssl-1.0.1e/crypto/evp/m_dss
}
#endif
-#endif
-diff -up openssl-1.0.1e/crypto/evp/m_md2.c.fips openssl-1.0.1e/crypto/evp/m_md2.c
---- openssl-1.0.1e/crypto/evp/m_md2.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/m_md2.c 2013-10-04 11:48:04.178694091 +0200
+diff -up openssl-1.0.1g/crypto/evp/m_md2.c.fips openssl-1.0.1g/crypto/evp/m_md2.c
+--- openssl-1.0.1g/crypto/evp/m_md2.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/m_md2.c 2014-05-06 16:29:50.542923132 +0200
@@ -68,6 +68,7 @@
#ifndef OPENSSL_NO_RSA
#include <openssl/rsa.h>
@@ -2035,9 +2010,9 @@ diff -up openssl-1.0.1e/crypto/evp/m_md2.c.fips openssl-1.0.1e/crypto/evp/m_md2.
static int init(EVP_MD_CTX *ctx)
{ return MD2_Init(ctx->md_data); }
-diff -up openssl-1.0.1e/crypto/evp/m_sha1.c.fips openssl-1.0.1e/crypto/evp/m_sha1.c
---- openssl-1.0.1e/crypto/evp/m_sha1.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/m_sha1.c 2013-10-04 11:48:04.178694091 +0200
+diff -up openssl-1.0.1g/crypto/evp/m_sha1.c.fips openssl-1.0.1g/crypto/evp/m_sha1.c
+--- openssl-1.0.1g/crypto/evp/m_sha1.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/m_sha1.c 2014-05-06 16:29:50.542923132 +0200
@@ -59,8 +59,6 @@
#include <stdio.h>
#include "cryptlib.h"
@@ -2102,9 +2077,9 @@ diff -up openssl-1.0.1e/crypto/evp/m_sha1.c.fips openssl-1.0.1e/crypto/evp/m_sha
#endif /* ifndef OPENSSL_NO_SHA512 */
-#endif
-diff -up openssl-1.0.1e/crypto/evp/p_sign.c.fips openssl-1.0.1e/crypto/evp/p_sign.c
---- openssl-1.0.1e/crypto/evp/p_sign.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/p_sign.c 2013-10-04 11:48:04.179694114 +0200
+diff -up openssl-1.0.1g/crypto/evp/p_sign.c.fips openssl-1.0.1g/crypto/evp/p_sign.c
+--- openssl-1.0.1g/crypto/evp/p_sign.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/p_sign.c 2014-05-06 16:29:50.542923132 +0200
@@ -61,6 +61,7 @@
#include <openssl/evp.h>
#include <openssl/objects.h>
@@ -2136,9 +2111,9 @@ diff -up openssl-1.0.1e/crypto/evp/p_sign.c.fips openssl-1.0.1e/crypto/evp/p_sig
if (EVP_PKEY_sign(pkctx, sigret, &sltmp, m, m_len) <= 0)
goto err;
*siglen = sltmp;
-diff -up openssl-1.0.1e/crypto/evp/p_verify.c.fips openssl-1.0.1e/crypto/evp/p_verify.c
---- openssl-1.0.1e/crypto/evp/p_verify.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/evp/p_verify.c 2013-10-04 11:48:04.179694114 +0200
+diff -up openssl-1.0.1g/crypto/evp/p_verify.c.fips openssl-1.0.1g/crypto/evp/p_verify.c
+--- openssl-1.0.1g/crypto/evp/p_verify.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/evp/p_verify.c 2014-05-06 16:29:50.542923132 +0200
@@ -61,6 +61,7 @@
#include <openssl/evp.h>
#include <openssl/objects.h>
@@ -2170,9 +2145,9 @@ diff -up openssl-1.0.1e/crypto/evp/p_verify.c.fips openssl-1.0.1e/crypto/evp/p_v
i = EVP_PKEY_verify(pkctx, sigbuf, siglen, m, m_len);
err:
EVP_PKEY_CTX_free(pkctx);
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_aesavs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_aesavs.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_aesavs.c.fips 2013-10-04 11:48:04.179694114 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_aesavs.c 2013-10-04 11:48:04.179694114 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_aesavs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_aesavs.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_aesavs.c.fips 2014-05-06 16:29:50.543923155 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_aesavs.c 2014-05-06 16:29:50.543923155 +0200
@@ -0,0 +1,939 @@
+/* ====================================================================
+ * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
@@ -3113,9 +3088,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_aesavs.c.fips openssl-1.0.1e/crypt
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_cmactest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_cmactest.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_cmactest.c.fips 2013-10-04 11:48:04.179694114 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_cmactest.c 2013-10-04 11:48:04.179694114 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_cmactest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_cmactest.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_cmactest.c.fips 2014-05-06 16:29:50.543923155 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_cmactest.c 2014-05-06 16:29:50.543923155 +0200
@@ -0,0 +1,517 @@
+/* fips_cmactest.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -3634,9 +3609,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_cmactest.c.fips openssl-1.0.1e/cry
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_desmovs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_desmovs.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_desmovs.c.fips 2013-10-04 11:48:04.180694136 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_desmovs.c 2013-10-04 11:48:04.180694136 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_desmovs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_desmovs.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_desmovs.c.fips 2014-05-06 16:29:50.543923155 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_desmovs.c 2014-05-06 16:29:50.543923155 +0200
@@ -0,0 +1,702 @@
+/* ====================================================================
+ * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
@@ -4340,9 +4315,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_desmovs.c.fips openssl-1.0.1e/cryp
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_dhvs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_dhvs.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_dhvs.c.fips 2013-10-04 11:48:04.180694136 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_dhvs.c 2013-10-04 11:48:04.180694136 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_dhvs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_dhvs.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_dhvs.c.fips 2014-05-06 16:29:50.543923155 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_dhvs.c 2014-05-06 16:29:50.543923155 +0200
@@ -0,0 +1,292 @@
+/* fips/dh/fips_dhvs.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -4636,9 +4611,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_dhvs.c.fips openssl-1.0.1e/crypto/
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_drbgvs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_drbgvs.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_drbgvs.c.fips 2013-10-04 11:48:04.180694136 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_drbgvs.c 2013-10-04 11:48:04.180694136 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_drbgvs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_drbgvs.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_drbgvs.c.fips 2014-05-06 16:29:50.544923178 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_drbgvs.c 2014-05-06 16:29:50.544923178 +0200
@@ -0,0 +1,416 @@
+/* fips/rand/fips_drbgvs.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -5056,9 +5031,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_drbgvs.c.fips openssl-1.0.1e/crypt
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_dssvs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_dssvs.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_dssvs.c.fips 2013-10-04 11:48:04.180694136 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_dssvs.c 2013-10-04 11:48:04.180694136 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_dssvs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_dssvs.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_dssvs.c.fips 2014-05-06 16:29:50.544923178 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_dssvs.c 2014-05-06 16:29:50.544923178 +0200
@@ -0,0 +1,537 @@
+#include <openssl/opensslconf.h>
+
@@ -5597,9 +5572,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_dssvs.c.fips openssl-1.0.1e/crypto
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_gcmtest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_gcmtest.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_gcmtest.c.fips 2013-10-04 11:48:04.181694158 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_gcmtest.c 2013-10-04 11:48:04.181694158 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_gcmtest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_gcmtest.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_gcmtest.c.fips 2014-05-06 16:29:50.544923178 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_gcmtest.c 2014-05-06 16:29:50.544923178 +0200
@@ -0,0 +1,571 @@
+/* fips/aes/fips_gcmtest.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -6172,9 +6147,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_gcmtest.c.fips openssl-1.0.1e/cryp
+}
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rngvs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_rngvs.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_rngvs.c.fips 2013-10-04 11:48:04.181694158 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_rngvs.c 2013-10-04 11:48:04.181694158 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_rngvs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_rngvs.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_rngvs.c.fips 2014-05-06 16:29:50.544923178 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_rngvs.c 2014-05-06 16:29:50.544923178 +0200
@@ -0,0 +1,230 @@
+/*
+ * Crude test driver for processing the VST and MCT testvector files
@@ -6406,9 +6381,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rngvs.c.fips openssl-1.0.1e/crypto
+ return 0;
+ }
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsagtest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_rsagtest.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_rsagtest.c.fips 2013-10-04 11:48:04.181694158 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_rsagtest.c 2013-10-04 11:48:04.181694158 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_rsagtest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_rsagtest.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_rsagtest.c.fips 2014-05-06 16:29:50.545923201 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_rsagtest.c 2014-05-06 16:29:50.545923201 +0200
@@ -0,0 +1,390 @@
+/* fips_rsagtest.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -6800,9 +6775,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsagtest.c.fips openssl-1.0.1e/cry
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsastest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_rsastest.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_rsastest.c.fips 2013-10-04 11:48:04.181694158 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_rsastest.c 2013-10-04 11:48:04.181694158 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_rsastest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_rsastest.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_rsastest.c.fips 2014-05-06 16:29:50.545923201 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_rsastest.c 2014-05-06 16:29:50.545923201 +0200
@@ -0,0 +1,370 @@
+/* fips_rsastest.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -7174,9 +7149,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsastest.c.fips openssl-1.0.1e/cry
+ return ret;
+ }
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsavtest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_rsavtest.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_rsavtest.c.fips 2013-10-04 11:48:04.181694158 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_rsavtest.c 2013-10-04 11:48:04.181694158 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_rsavtest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_rsavtest.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_rsavtest.c.fips 2014-05-06 16:29:50.545923201 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_rsavtest.c 2014-05-06 16:29:50.545923201 +0200
@@ -0,0 +1,377 @@
+/* fips_rsavtest.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -7555,9 +7530,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsavtest.c.fips openssl-1.0.1e/cry
+ return ret;
+ }
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_shatest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_shatest.c
---- openssl-1.0.1e/crypto/fips/cavs/fips_shatest.c.fips 2013-10-04 11:48:04.182694181 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_shatest.c 2013-10-04 11:48:04.182694181 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_shatest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_shatest.c
+--- openssl-1.0.1g/crypto/fips/cavs/fips_shatest.c.fips 2014-05-06 16:29:50.545923201 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_shatest.c 2014-05-06 16:29:50.545923201 +0200
@@ -0,0 +1,388 @@
+/* fips_shatest.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -7947,9 +7922,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_shatest.c.fips openssl-1.0.1e/cryp
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/cavs/fips_utl.h.fips openssl-1.0.1e/crypto/fips/cavs/fips_utl.h
---- openssl-1.0.1e/crypto/fips/cavs/fips_utl.h.fips 2013-10-04 11:48:04.182694181 +0200
-+++ openssl-1.0.1e/crypto/fips/cavs/fips_utl.h 2013-10-04 11:48:04.182694181 +0200
+diff -up openssl-1.0.1g/crypto/fips/cavs/fips_utl.h.fips openssl-1.0.1g/crypto/fips/cavs/fips_utl.h
+--- openssl-1.0.1g/crypto/fips/cavs/fips_utl.h.fips 2014-05-06 16:29:50.545923201 +0200
++++ openssl-1.0.1g/crypto/fips/cavs/fips_utl.h 2014-05-06 16:29:50.545923201 +0200
@@ -0,0 +1,343 @@
+/* ====================================================================
+ * Copyright (c) 2007 The OpenSSL Project. All rights reserved.
@@ -8294,9 +8269,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_utl.h.fips openssl-1.0.1e/crypto/f
+#endif
+ }
+
-diff -up openssl-1.0.1e/crypto/fips/fips_aes_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_aes_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_aes_selftest.c.fips 2013-10-04 11:48:04.182694181 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_aes_selftest.c 2013-10-04 11:48:04.182694181 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_aes_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_aes_selftest.c
+--- openssl-1.0.1g/crypto/fips/fips_aes_selftest.c.fips 2014-05-06 16:29:50.546923224 +0200
++++ openssl-1.0.1g/crypto/fips/fips_aes_selftest.c 2014-05-06 16:29:50.546923224 +0200
@@ -0,0 +1,359 @@
+/* ====================================================================
+ * Copyright (c) 2003 The OpenSSL Project. All rights reserved.
@@ -8657,10 +8632,10 @@ diff -up openssl-1.0.1e/crypto/fips/fips_aes_selftest.c.fips openssl-1.0.1e/cryp
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips.c.fips openssl-1.0.1e/crypto/fips/fips.c
---- openssl-1.0.1e/crypto/fips/fips.c.fips 2013-10-04 11:48:04.182694181 +0200
-+++ openssl-1.0.1e/crypto/fips/fips.c 2013-10-04 11:48:04.182694181 +0200
-@@ -0,0 +1,489 @@
+diff -up openssl-1.0.1g/crypto/fips/fips.c.fips openssl-1.0.1g/crypto/fips/fips.c
+--- openssl-1.0.1g/crypto/fips/fips.c.fips 2014-05-06 16:29:50.546923224 +0200
++++ openssl-1.0.1g/crypto/fips/fips.c 2014-05-06 16:33:24.309865160 +0200
+@@ -0,0 +1,491 @@
+/* ====================================================================
+ * Copyright (c) 2003 The OpenSSL Project. All rights reserved.
+ *
@@ -8990,6 +8965,8 @@ diff -up openssl-1.0.1e/crypto/fips/fips.c.fips openssl-1.0.1e/crypto/fips/fips.
+ }
+ free(buf);
+ free(hex);
++ } else {
++ rv = -1;
+ }
+
+end:
@@ -9150,9 +9127,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips.c.fips openssl-1.0.1e/crypto/fips/fips.
+
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_cmac_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_cmac_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_cmac_selftest.c.fips 2013-10-04 11:48:04.183694204 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_cmac_selftest.c 2013-10-04 11:48:04.183694204 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_cmac_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_cmac_selftest.c
+--- openssl-1.0.1g/crypto/fips/fips_cmac_selftest.c.fips 2014-05-06 16:29:50.546923224 +0200
++++ openssl-1.0.1g/crypto/fips/fips_cmac_selftest.c 2014-05-06 16:29:50.546923224 +0200
@@ -0,0 +1,161 @@
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
@@ -9315,9 +9292,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_cmac_selftest.c.fips openssl-1.0.1e/cry
+ return rv;
+ }
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_des_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_des_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_des_selftest.c.fips 2013-10-04 11:48:04.183694204 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_des_selftest.c 2013-10-04 11:48:04.183694204 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_des_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_des_selftest.c
+--- openssl-1.0.1g/crypto/fips/fips_des_selftest.c.fips 2014-05-06 16:29:50.546923224 +0200
++++ openssl-1.0.1g/crypto/fips/fips_des_selftest.c 2014-05-06 16:29:50.546923224 +0200
@@ -0,0 +1,147 @@
+/* ====================================================================
+ * Copyright (c) 2003 The OpenSSL Project. All rights reserved.
@@ -9466,9 +9443,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_des_selftest.c.fips openssl-1.0.1e/cryp
+ return ret;
+ }
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_drbg_ctr.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_ctr.c
---- openssl-1.0.1e/crypto/fips/fips_drbg_ctr.c.fips 2013-10-04 11:48:04.183694204 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_drbg_ctr.c 2013-10-04 11:48:04.183694204 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_drbg_ctr.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_ctr.c
+--- openssl-1.0.1g/crypto/fips/fips_drbg_ctr.c.fips 2014-05-06 16:29:50.546923224 +0200
++++ openssl-1.0.1g/crypto/fips/fips_drbg_ctr.c 2014-05-06 16:29:50.546923224 +0200
@@ -0,0 +1,436 @@
+/* fips/rand/fips_drbg_ctr.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -9906,9 +9883,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_ctr.c.fips openssl-1.0.1e/crypto/f
+
+ return 1;
+ }
-diff -up openssl-1.0.1e/crypto/fips/fips_drbg_hash.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_hash.c
---- openssl-1.0.1e/crypto/fips/fips_drbg_hash.c.fips 2013-10-04 11:48:04.183694204 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_drbg_hash.c 2013-10-04 11:48:04.183694204 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_drbg_hash.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_hash.c
+--- openssl-1.0.1g/crypto/fips/fips_drbg_hash.c.fips 2014-05-06 16:29:50.547923248 +0200
++++ openssl-1.0.1g/crypto/fips/fips_drbg_hash.c 2014-05-06 16:29:50.547923248 +0200
@@ -0,0 +1,378 @@
+/* fips/rand/fips_drbg_hash.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -10288,9 +10265,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_hash.c.fips openssl-1.0.1e/crypto/
+
+ return 1;
+ }
-diff -up openssl-1.0.1e/crypto/fips/fips_drbg_hmac.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_hmac.c
---- openssl-1.0.1e/crypto/fips/fips_drbg_hmac.c.fips 2013-10-04 11:48:04.183694204 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_drbg_hmac.c 2013-10-04 11:48:04.183694204 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_drbg_hmac.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_hmac.c
+--- openssl-1.0.1g/crypto/fips/fips_drbg_hmac.c.fips 2014-05-06 16:29:50.547923248 +0200
++++ openssl-1.0.1g/crypto/fips/fips_drbg_hmac.c 2014-05-06 16:29:50.547923248 +0200
@@ -0,0 +1,281 @@
+/* fips/rand/fips_drbg_hmac.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -10573,9 +10550,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_hmac.c.fips openssl-1.0.1e/crypto/
+
+ return 1;
+ }
-diff -up openssl-1.0.1e/crypto/fips/fips_drbg_lib.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_lib.c
---- openssl-1.0.1e/crypto/fips/fips_drbg_lib.c.fips 2013-10-04 11:48:04.184694226 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_drbg_lib.c 2013-10-04 11:48:04.184694226 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_drbg_lib.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_lib.c
+--- openssl-1.0.1g/crypto/fips/fips_drbg_lib.c.fips 2014-05-06 16:29:50.547923248 +0200
++++ openssl-1.0.1g/crypto/fips/fips_drbg_lib.c 2014-05-06 16:29:50.547923248 +0200
@@ -0,0 +1,578 @@
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
+ * project.
@@ -11155,9 +11132,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_lib.c.fips openssl-1.0.1e/crypto/f
+ memcpy(dctx->lb, out, dctx->blocklength);
+ return 1;
+ }
-diff -up openssl-1.0.1e/crypto/fips/fips_drbg_rand.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_rand.c
---- openssl-1.0.1e/crypto/fips/fips_drbg_rand.c.fips 2013-10-04 11:48:04.184694226 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_drbg_rand.c 2013-10-04 11:48:04.184694226 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_drbg_rand.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_rand.c
+--- openssl-1.0.1g/crypto/fips/fips_drbg_rand.c.fips 2014-05-06 16:29:50.547923248 +0200
++++ openssl-1.0.1g/crypto/fips/fips_drbg_rand.c 2014-05-06 16:29:50.547923248 +0200
@@ -0,0 +1,172 @@
+/* fips/rand/fips_drbg_rand.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -11331,9 +11308,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_rand.c.fips openssl-1.0.1e/crypto/
+ return &rand_drbg_meth;
+ }
+
-diff -up openssl-1.0.1e/crypto/fips/fips_drbg_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_drbg_selftest.c.fips 2013-10-04 11:48:04.184694226 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_drbg_selftest.c 2013-10-04 11:48:04.184694226 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_drbg_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_selftest.c
+--- openssl-1.0.1g/crypto/fips/fips_drbg_selftest.c.fips 2014-05-06 16:29:50.548923271 +0200
++++ openssl-1.0.1g/crypto/fips/fips_drbg_selftest.c 2014-05-06 16:29:50.548923271 +0200
@@ -0,0 +1,862 @@
+/* fips/rand/fips_drbg_selftest.c */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -12197,9 +12174,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_selftest.c.fips openssl-1.0.1e/cry
+ return rv;
+ }
+
-diff -up openssl-1.0.1e/crypto/fips/fips_drbg_selftest.h.fips openssl-1.0.1e/crypto/fips/fips_drbg_selftest.h
---- openssl-1.0.1e/crypto/fips/fips_drbg_selftest.h.fips 2013-10-04 11:48:04.185694249 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_drbg_selftest.h 2013-10-04 11:48:04.185694249 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_drbg_selftest.h.fips openssl-1.0.1g/crypto/fips/fips_drbg_selftest.h
+--- openssl-1.0.1g/crypto/fips/fips_drbg_selftest.h.fips 2014-05-06 16:29:50.548923271 +0200
++++ openssl-1.0.1g/crypto/fips/fips_drbg_selftest.h 2014-05-06 16:29:50.548923271 +0200
@@ -0,0 +1,2335 @@
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
@@ -14536,9 +14513,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_selftest.h.fips openssl-1.0.1e/cry
+ 0xc2,0xd6,0xfd,0xa5
+ };
+
-diff -up openssl-1.0.1e/crypto/fips/fips_dsa_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_dsa_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_dsa_selftest.c.fips 2013-10-04 11:48:04.185694249 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_dsa_selftest.c 2013-10-04 11:48:04.185694249 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_dsa_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_dsa_selftest.c
+--- openssl-1.0.1g/crypto/fips/fips_dsa_selftest.c.fips 2014-05-06 16:29:50.549923294 +0200
++++ openssl-1.0.1g/crypto/fips/fips_dsa_selftest.c 2014-05-06 16:29:50.548923271 +0200
@@ -0,0 +1,193 @@
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
@@ -14733,9 +14710,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_dsa_selftest.c.fips openssl-1.0.1e/cryp
+ return ret;
+ }
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_enc.c.fips openssl-1.0.1e/crypto/fips/fips_enc.c
---- openssl-1.0.1e/crypto/fips/fips_enc.c.fips 2013-10-04 11:48:04.185694249 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_enc.c 2013-10-04 11:48:04.185694249 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_enc.c.fips openssl-1.0.1g/crypto/fips/fips_enc.c
+--- openssl-1.0.1g/crypto/fips/fips_enc.c.fips 2014-05-06 16:29:50.549923294 +0200
++++ openssl-1.0.1g/crypto/fips/fips_enc.c 2014-05-06 16:29:50.549923294 +0200
@@ -0,0 +1,191 @@
+/* fipe/evp/fips_enc.c */
+/* Copyright (C) 1995-1998 Eric Young (eay at cryptsoft.com)
@@ -14928,9 +14905,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_enc.c.fips openssl-1.0.1e/crypto/fips/f
+ }
+ }
+
-diff -up openssl-1.0.1e/crypto/fips/fips.h.fips openssl-1.0.1e/crypto/fips/fips.h
---- openssl-1.0.1e/crypto/fips/fips.h.fips 2013-10-04 11:48:04.186694271 +0200
-+++ openssl-1.0.1e/crypto/fips/fips.h 2013-10-04 11:48:04.186694271 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips.h.fips openssl-1.0.1g/crypto/fips/fips.h
+--- openssl-1.0.1g/crypto/fips/fips.h.fips 2014-05-06 16:29:50.549923294 +0200
++++ openssl-1.0.1g/crypto/fips/fips.h 2014-05-06 16:29:50.549923294 +0200
@@ -0,0 +1,279 @@
+/* ====================================================================
+ * Copyright (c) 2003 The OpenSSL Project. All rights reserved.
@@ -15211,9 +15188,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips.h.fips openssl-1.0.1e/crypto/fips/fips.
+}
+#endif
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_hmac_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_hmac_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_hmac_selftest.c.fips 2013-10-04 11:48:04.186694271 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_hmac_selftest.c 2013-10-04 11:48:04.186694271 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_hmac_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_hmac_selftest.c
+--- openssl-1.0.1g/crypto/fips/fips_hmac_selftest.c.fips 2014-05-06 16:29:50.549923294 +0200
++++ openssl-1.0.1g/crypto/fips/fips_hmac_selftest.c 2014-05-06 16:29:50.549923294 +0200
@@ -0,0 +1,137 @@
+/* ====================================================================
+ * Copyright (c) 2005 The OpenSSL Project. All rights reserved.
@@ -15352,9 +15329,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_hmac_selftest.c.fips openssl-1.0.1e/cry
+ return 1;
+ }
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_locl.h.fips openssl-1.0.1e/crypto/fips/fips_locl.h
---- openssl-1.0.1e/crypto/fips/fips_locl.h.fips 2013-10-04 11:48:04.186694271 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_locl.h 2013-10-04 11:48:04.186694271 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_locl.h.fips openssl-1.0.1g/crypto/fips/fips_locl.h
+--- openssl-1.0.1g/crypto/fips/fips_locl.h.fips 2014-05-06 16:29:50.549923294 +0200
++++ openssl-1.0.1g/crypto/fips/fips_locl.h 2014-05-06 16:29:50.549923294 +0200
@@ -0,0 +1,71 @@
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
@@ -15427,9 +15404,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_locl.h.fips openssl-1.0.1e/crypto/fips/
+}
+#endif
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_md.c.fips openssl-1.0.1e/crypto/fips/fips_md.c
---- openssl-1.0.1e/crypto/fips/fips_md.c.fips 2013-10-04 11:48:04.186694271 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_md.c 2013-10-04 11:48:04.186694271 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_md.c.fips openssl-1.0.1g/crypto/fips/fips_md.c
+--- openssl-1.0.1g/crypto/fips/fips_md.c.fips 2014-05-06 16:29:50.549923294 +0200
++++ openssl-1.0.1g/crypto/fips/fips_md.c 2014-05-06 16:29:50.549923294 +0200
@@ -0,0 +1,145 @@
+/* fips/evp/fips_md.c */
+/* Copyright (C) 1995-1998 Eric Young (eay at cryptsoft.com)
@@ -15576,9 +15553,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_md.c.fips openssl-1.0.1e/crypto/fips/fi
+ return NULL;
+ }
+ }
-diff -up openssl-1.0.1e/crypto/fips/fips_post.c.fips openssl-1.0.1e/crypto/fips/fips_post.c
---- openssl-1.0.1e/crypto/fips/fips_post.c.fips 2013-10-04 11:48:04.186694271 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_post.c 2013-10-04 11:48:04.186694271 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_post.c.fips openssl-1.0.1g/crypto/fips/fips_post.c
+--- openssl-1.0.1g/crypto/fips/fips_post.c.fips 2014-05-06 16:29:50.549923294 +0200
++++ openssl-1.0.1g/crypto/fips/fips_post.c 2014-05-06 16:29:50.549923294 +0200
@@ -0,0 +1,205 @@
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
@@ -15785,9 +15762,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_post.c.fips openssl-1.0.1e/crypto/fips/
+ return 1;
+ }
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_rand.c.fips openssl-1.0.1e/crypto/fips/fips_rand.c
---- openssl-1.0.1e/crypto/fips/fips_rand.c.fips 2013-10-04 11:48:04.187694294 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_rand.c 2013-10-04 11:48:04.187694294 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_rand.c.fips openssl-1.0.1g/crypto/fips/fips_rand.c
+--- openssl-1.0.1g/crypto/fips/fips_rand.c.fips 2014-05-06 16:29:50.550923317 +0200
++++ openssl-1.0.1g/crypto/fips/fips_rand.c 2014-05-06 16:29:50.550923317 +0200
@@ -0,0 +1,457 @@
+/* ====================================================================
+ * Copyright (c) 2007 The OpenSSL Project. All rights reserved.
@@ -16246,9 +16223,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rand.c.fips openssl-1.0.1e/crypto/fips/
+}
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_rand.h.fips openssl-1.0.1e/crypto/fips/fips_rand.h
---- openssl-1.0.1e/crypto/fips/fips_rand.h.fips 2013-10-04 11:48:04.187694294 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_rand.h 2013-10-04 11:48:04.187694294 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_rand.h.fips openssl-1.0.1g/crypto/fips/fips_rand.h
+--- openssl-1.0.1g/crypto/fips/fips_rand.h.fips 2014-05-06 16:29:50.550923317 +0200
++++ openssl-1.0.1g/crypto/fips/fips_rand.h 2014-05-06 16:29:50.550923317 +0200
@@ -0,0 +1,145 @@
+/* ====================================================================
+ * Copyright (c) 2003 The OpenSSL Project. All rights reserved.
@@ -16395,9 +16372,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rand.h.fips openssl-1.0.1e/crypto/fips/
+#endif
+#endif
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_rand_lcl.h.fips openssl-1.0.1e/crypto/fips/fips_rand_lcl.h
---- openssl-1.0.1e/crypto/fips/fips_rand_lcl.h.fips 2013-10-04 11:48:04.187694294 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_rand_lcl.h 2013-10-04 11:48:04.187694294 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_rand_lcl.h.fips openssl-1.0.1g/crypto/fips/fips_rand_lcl.h
+--- openssl-1.0.1g/crypto/fips/fips_rand_lcl.h.fips 2014-05-06 16:29:50.550923317 +0200
++++ openssl-1.0.1g/crypto/fips/fips_rand_lcl.h 2014-05-06 16:29:50.550923317 +0200
@@ -0,0 +1,219 @@
+/* fips/rand/fips_rand_lcl.h */
+/* Written by Dr Stephen N Henson (steve at openssl.org) for the OpenSSL
@@ -16618,9 +16595,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rand_lcl.h.fips openssl-1.0.1e/crypto/f
+#define FIPS_digestupdate EVP_DigestUpdate
+#define FIPS_digestfinal EVP_DigestFinal
+#define M_EVP_MD_size EVP_MD_size
-diff -up openssl-1.0.1e/crypto/fips/fips_rand_lib.c.fips openssl-1.0.1e/crypto/fips/fips_rand_lib.c
---- openssl-1.0.1e/crypto/fips/fips_rand_lib.c.fips 2013-10-04 11:48:04.187694294 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_rand_lib.c 2013-10-04 11:48:04.187694294 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_rand_lib.c.fips openssl-1.0.1g/crypto/fips/fips_rand_lib.c
+--- openssl-1.0.1g/crypto/fips/fips_rand_lib.c.fips 2014-05-06 16:29:50.550923317 +0200
++++ openssl-1.0.1g/crypto/fips/fips_rand_lib.c 2014-05-06 16:29:50.550923317 +0200
@@ -0,0 +1,191 @@
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
@@ -16813,9 +16790,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rand_lib.c.fips openssl-1.0.1e/crypto/f
+ }
+ return 0;
+ }
-diff -up openssl-1.0.1e/crypto/fips/fips_rand_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_rand_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_rand_selftest.c.fips 2013-10-04 11:48:04.187694294 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_rand_selftest.c 2013-10-04 11:48:04.187694294 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_rand_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_rand_selftest.c
+--- openssl-1.0.1g/crypto/fips/fips_rand_selftest.c.fips 2014-05-06 16:29:50.550923317 +0200
++++ openssl-1.0.1g/crypto/fips/fips_rand_selftest.c 2014-05-06 16:29:50.550923317 +0200
@@ -0,0 +1,183 @@
+/* ====================================================================
+ * Copyright (c) 2003 The OpenSSL Project. All rights reserved.
@@ -17000,9 +16977,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rand_selftest.c.fips openssl-1.0.1e/cry
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_randtest.c.fips openssl-1.0.1e/crypto/fips/fips_randtest.c
---- openssl-1.0.1e/crypto/fips/fips_randtest.c.fips 2013-10-04 11:48:04.188694316 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_randtest.c 2013-10-04 11:48:04.187694294 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_randtest.c.fips openssl-1.0.1g/crypto/fips/fips_randtest.c
+--- openssl-1.0.1g/crypto/fips/fips_randtest.c.fips 2014-05-06 16:29:50.551923340 +0200
++++ openssl-1.0.1g/crypto/fips/fips_randtest.c 2014-05-06 16:29:50.551923340 +0200
@@ -0,0 +1,250 @@
+/* Copyright (C) 1995-1998 Eric Young (eay at cryptsoft.com)
+ * All rights reserved.
@@ -17254,9 +17231,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_randtest.c.fips openssl-1.0.1e/crypto/f
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips 2013-10-04 11:48:04.188694316 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c 2013-10-04 11:48:04.188694316 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_rsa_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_rsa_selftest.c
+--- openssl-1.0.1g/crypto/fips/fips_rsa_selftest.c.fips 2014-05-06 16:29:50.551923340 +0200
++++ openssl-1.0.1g/crypto/fips/fips_rsa_selftest.c 2014-05-06 16:29:50.551923340 +0200
@@ -0,0 +1,444 @@
+/* ====================================================================
+ * Copyright (c) 2003-2007 The OpenSSL Project. All rights reserved.
@@ -17702,9 +17679,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips openssl-1.0.1e/cryp
+ }
+
+#endif /* def OPENSSL_FIPS */
-diff -up openssl-1.0.1e/crypto/fips/fips_rsa_x931g.c.fips openssl-1.0.1e/crypto/fips/fips_rsa_x931g.c
---- openssl-1.0.1e/crypto/fips/fips_rsa_x931g.c.fips 2013-10-04 11:48:04.188694316 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_rsa_x931g.c 2013-10-04 11:48:04.188694316 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_rsa_x931g.c.fips openssl-1.0.1g/crypto/fips/fips_rsa_x931g.c
+--- openssl-1.0.1g/crypto/fips/fips_rsa_x931g.c.fips 2014-05-06 16:29:50.551923340 +0200
++++ openssl-1.0.1g/crypto/fips/fips_rsa_x931g.c 2014-05-06 16:29:50.551923340 +0200
@@ -0,0 +1,282 @@
+/* crypto/rsa/rsa_gen.c */
+/* Copyright (C) 1995-1998 Eric Young (eay at cryptsoft.com)
@@ -17988,9 +17965,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_x931g.c.fips openssl-1.0.1e/crypto/
+ return 0;
+
+ }
-diff -up openssl-1.0.1e/crypto/fips/fips_sha_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_sha_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_sha_selftest.c.fips 2013-10-04 11:48:04.188694316 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_sha_selftest.c 2013-10-04 11:48:04.188694316 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_sha_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_sha_selftest.c
+--- openssl-1.0.1g/crypto/fips/fips_sha_selftest.c.fips 2014-05-06 16:29:50.551923340 +0200
++++ openssl-1.0.1g/crypto/fips/fips_sha_selftest.c 2014-05-06 16:29:50.551923340 +0200
@@ -0,0 +1,140 @@
+/* ====================================================================
+ * Copyright (c) 2003 The OpenSSL Project. All rights reserved.
@@ -18132,10 +18109,10 @@ diff -up openssl-1.0.1e/crypto/fips/fips_sha_selftest.c.fips openssl-1.0.1e/cryp
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c
---- openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips 2013-10-04 11:48:04.188694316 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c 2013-10-04 11:48:04.188694316 +0200
-@@ -0,0 +1,180 @@
+diff -up openssl-1.0.1g/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1g/crypto/fips/fips_standalone_hmac.c
+--- openssl-1.0.1g/crypto/fips/fips_standalone_hmac.c.fips 2014-05-06 16:29:50.551923340 +0200
++++ openssl-1.0.1g/crypto/fips/fips_standalone_hmac.c 2014-05-06 16:29:50.551923340 +0200
+@@ -0,0 +1,236 @@
+/* ====================================================================
+ * Copyright (c) 2003 The OpenSSL Project. All rights reserved.
+ *
@@ -18195,17 +18172,73 @@ diff -up openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1e/c
+#ifndef FIPSCANISTER_O
+int FIPS_selftest_failed() { return 0; }
+void FIPS_selftest_check() {}
-+void OPENSSL_cleanse(void *p,size_t len) {}
+#endif
+
++#ifdef OPENSSL_FIPS
++int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { return 0; };
++int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { return 0; };
++
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
+ defined(__INTEL__) || \
+ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
+
+unsigned int OPENSSL_ia32cap_P[2];
++unsigned long *OPENSSL_ia32cap_loc(void)
++{ if (sizeof(long)==4)
++ /*
++ * If 32-bit application pulls address of OPENSSL_ia32cap_P[0]
++ * clear second element to maintain the illusion that vector
++ * is 32-bit.
++ */
++ OPENSSL_ia32cap_P[1]=0;
++ return (unsigned long *)OPENSSL_ia32cap_P;
++}
++
++#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
++#define OPENSSL_CPUID_SETUP
++#if defined(_WIN32)
++typedef unsigned __int64 IA32CAP;
++#else
++typedef unsigned long long IA32CAP;
++#endif
++void OPENSSL_cpuid_setup(void)
++{ static int trigger=0;
++ IA32CAP OPENSSL_ia32_cpuid(void);
++ IA32CAP vec;
++ char *env;
++
++ if (trigger) return;
++
++ trigger=1;
++ if ((env=getenv("OPENSSL_ia32cap"))) {
++ int off = (env[0]=='~')?1:0;
++#if defined(_WIN32)
++ if (!sscanf(env+off,"%I64i",&vec)) vec = strtoul(env+off,NULL,0);
++#else
++ if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0);
++#endif
++ if (off) vec = OPENSSL_ia32_cpuid()&~vec;
++ }
++ else
++ vec = OPENSSL_ia32_cpuid();
++
++ /*
++ * |(1<<10) sets a reserved bit to signal that variable
++ * was initialized already... This is to avoid interference
++ * with cpuid snippets in ELF .init segment.
++ */
++ OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10);
++ OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32);
++}
+#endif
+
-+#ifdef OPENSSL_FIPS
++#else
++unsigned long *OPENSSL_ia32cap_loc(void) { return NULL; }
++#endif
++int OPENSSL_NONPIC_relocated = 0;
++#if !defined(OPENSSL_CPUID_SETUP) && !defined(OPENSSL_CPUID_OBJ)
++void OPENSSL_cpuid_setup(void) {}
++#endif
+
+static void hmac_init(SHA256_CTX *md_ctx,SHA256_CTX *o_ctx,
+ const char *key)
@@ -18316,9 +18349,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1e/c
+ }
+
+
-diff -up openssl-1.0.1e/crypto/fips/fips_test_suite.c.fips openssl-1.0.1e/crypto/fips/fips_test_suite.c
---- openssl-1.0.1e/crypto/fips/fips_test_suite.c.fips 2013-10-04 11:48:04.189694339 +0200
-+++ openssl-1.0.1e/crypto/fips/fips_test_suite.c 2013-10-04 11:48:04.189694339 +0200
+diff -up openssl-1.0.1g/crypto/fips/fips_test_suite.c.fips openssl-1.0.1g/crypto/fips/fips_test_suite.c
+--- openssl-1.0.1g/crypto/fips/fips_test_suite.c.fips 2014-05-06 16:29:50.552923363 +0200
++++ openssl-1.0.1g/crypto/fips/fips_test_suite.c 2014-05-06 16:29:50.552923363 +0200
@@ -0,0 +1,588 @@
+/* ====================================================================
+ * Copyright (c) 2003 The OpenSSL Project. All rights reserved.
@@ -18908,10 +18941,10 @@ diff -up openssl-1.0.1e/crypto/fips/fips_test_suite.c.fips openssl-1.0.1e/crypto
+ }
+
+#endif
-diff -up openssl-1.0.1e/crypto/fips/Makefile.fips openssl-1.0.1e/crypto/fips/Makefile
---- openssl-1.0.1e/crypto/fips/Makefile.fips 2013-10-04 11:48:04.189694339 +0200
-+++ openssl-1.0.1e/crypto/fips/Makefile 2013-10-04 11:48:04.189694339 +0200
-@@ -0,0 +1,340 @@
+diff -up openssl-1.0.1g/crypto/fips/Makefile.fips openssl-1.0.1g/crypto/fips/Makefile
+--- openssl-1.0.1g/crypto/fips/Makefile.fips 2014-05-06 16:29:50.552923363 +0200
++++ openssl-1.0.1g/crypto/fips/Makefile 2014-05-06 16:29:50.552923363 +0200
+@@ -0,0 +1,341 @@
+#
+# OpenSSL/crypto/fips/Makefile
+#
@@ -19004,6 +19037,7 @@ diff -up openssl-1.0.1e/crypto/fips/Makefile.fips openssl-1.0.1e/crypto/fips/Mak
+
+$(EXE): $(PROGRAM).o
+ FIPS_SHA_ASM=""; for i in $(SHA1_ASM_OBJ) sha256.o; do FIPS_SHA_ASM="$$FIPS_SHA_ASM ../sha/$$i" ; done; \
++ for i in $(CPUID_OBJ); do FIPS_SHA_ASM="$$FIPS_SHA_ASM ../$$i" ; done; \
+ $(CC) -o $@ $(CFLAGS) $(PROGRAM).o $$FIPS_SHA_ASM
+
+# DO NOT DELETE THIS LINE -- make depend depends on it.
@@ -19252,9 +19286,9 @@ diff -up openssl-1.0.1e/crypto/fips/Makefile.fips openssl-1.0.1e/crypto/fips/Mak
+fips_sha_selftest.o: ../../include/openssl/safestack.h
+fips_sha_selftest.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
+fips_sha_selftest.o: ../../include/openssl/symhacks.h fips_sha_selftest.c
-diff -up openssl-1.0.1e/crypto/hmac/hmac.c.fips openssl-1.0.1e/crypto/hmac/hmac.c
---- openssl-1.0.1e/crypto/hmac/hmac.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/hmac/hmac.c 2013-10-04 11:48:04.189694339 +0200
+diff -up openssl-1.0.1g/crypto/hmac/hmac.c.fips openssl-1.0.1g/crypto/hmac/hmac.c
+--- openssl-1.0.1g/crypto/hmac/hmac.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/hmac/hmac.c 2014-05-06 16:29:50.552923363 +0200
@@ -81,11 +81,6 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const vo
EVPerr(EVP_F_HMAC_INIT_EX, EVP_R_DISABLED_FOR_FIPS);
return 0;
@@ -19317,9 +19351,9 @@ diff -up openssl-1.0.1e/crypto/hmac/hmac.c.fips openssl-1.0.1e/crypto/hmac/hmac.
EVP_MD_CTX_cleanup(&ctx->i_ctx);
EVP_MD_CTX_cleanup(&ctx->o_ctx);
EVP_MD_CTX_cleanup(&ctx->md_ctx);
-diff -up openssl-1.0.1e/crypto/mdc2/mdc2dgst.c.fips openssl-1.0.1e/crypto/mdc2/mdc2dgst.c
---- openssl-1.0.1e/crypto/mdc2/mdc2dgst.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/mdc2/mdc2dgst.c 2013-10-04 11:48:04.189694339 +0200
+diff -up openssl-1.0.1g/crypto/mdc2/mdc2dgst.c.fips openssl-1.0.1g/crypto/mdc2/mdc2dgst.c
+--- openssl-1.0.1g/crypto/mdc2/mdc2dgst.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/mdc2/mdc2dgst.c 2014-05-06 16:29:50.552923363 +0200
@@ -76,7 +76,7 @@
*((c)++)=(unsigned char)(((l)>>24L)&0xff))
@@ -19329,9 +19363,9 @@ diff -up openssl-1.0.1e/crypto/mdc2/mdc2dgst.c.fips openssl-1.0.1e/crypto/mdc2/m
{
c->num=0;
c->pad_type=1;
-diff -up openssl-1.0.1e/crypto/md2/md2_dgst.c.fips openssl-1.0.1e/crypto/md2/md2_dgst.c
---- openssl-1.0.1e/crypto/md2/md2_dgst.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/md2/md2_dgst.c 2013-10-04 11:48:04.189694339 +0200
+diff -up openssl-1.0.1g/crypto/md2/md2_dgst.c.fips openssl-1.0.1g/crypto/md2/md2_dgst.c
+--- openssl-1.0.1g/crypto/md2/md2_dgst.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/md2/md2_dgst.c 2014-05-06 16:29:50.552923363 +0200
@@ -62,6 +62,11 @@
#include <openssl/md2.h>
#include <openssl/opensslv.h>
@@ -19353,9 +19387,9 @@ diff -up openssl-1.0.1e/crypto/md2/md2_dgst.c.fips openssl-1.0.1e/crypto/md2/md2
{
c->num=0;
memset(c->state,0,sizeof c->state);
-diff -up openssl-1.0.1e/crypto/md4/md4_dgst.c.fips openssl-1.0.1e/crypto/md4/md4_dgst.c
---- openssl-1.0.1e/crypto/md4/md4_dgst.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/md4/md4_dgst.c 2013-10-04 11:48:04.190694362 +0200
+diff -up openssl-1.0.1g/crypto/md4/md4_dgst.c.fips openssl-1.0.1g/crypto/md4/md4_dgst.c
+--- openssl-1.0.1g/crypto/md4/md4_dgst.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/md4/md4_dgst.c 2014-05-06 16:29:50.552923363 +0200
@@ -71,7 +71,7 @@ const char MD4_version[]="MD4" OPENSSL_V
#define INIT_DATA_C (unsigned long)0x98badcfeL
#define INIT_DATA_D (unsigned long)0x10325476L
@@ -19365,9 +19399,9 @@ diff -up openssl-1.0.1e/crypto/md4/md4_dgst.c.fips openssl-1.0.1e/crypto/md4/md4
{
memset (c,0,sizeof(*c));
c->A=INIT_DATA_A;
-diff -up openssl-1.0.1e/crypto/md5/md5_dgst.c.fips openssl-1.0.1e/crypto/md5/md5_dgst.c
---- openssl-1.0.1e/crypto/md5/md5_dgst.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/md5/md5_dgst.c 2013-10-04 11:48:04.190694362 +0200
+diff -up openssl-1.0.1g/crypto/md5/md5_dgst.c.fips openssl-1.0.1g/crypto/md5/md5_dgst.c
+--- openssl-1.0.1g/crypto/md5/md5_dgst.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/md5/md5_dgst.c 2014-05-06 16:29:50.553923386 +0200
@@ -71,7 +71,7 @@ const char MD5_version[]="MD5" OPENSSL_V
#define INIT_DATA_C (unsigned long)0x98badcfeL
#define INIT_DATA_D (unsigned long)0x10325476L
@@ -19377,9 +19411,9 @@ diff -up openssl-1.0.1e/crypto/md5/md5_dgst.c.fips openssl-1.0.1e/crypto/md5/md5
{
memset (c,0,sizeof(*c));
c->A=INIT_DATA_A;
-diff -up openssl-1.0.1e/crypto/o_fips.c.fips openssl-1.0.1e/crypto/o_fips.c
---- openssl-1.0.1e/crypto/o_fips.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/o_fips.c 2013-10-04 11:48:04.190694362 +0200
+diff -up openssl-1.0.1g/crypto/o_fips.c.fips openssl-1.0.1g/crypto/o_fips.c
+--- openssl-1.0.1g/crypto/o_fips.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/o_fips.c 2014-05-06 16:29:50.553923386 +0200
@@ -79,6 +79,8 @@ int FIPS_mode_set(int r)
#ifndef FIPS_AUTH_USER_PASS
#define FIPS_AUTH_USER_PASS "Default FIPS Crypto User Password"
@@ -19389,9 +19423,9 @@ diff -up openssl-1.0.1e/crypto/o_fips.c.fips openssl-1.0.1e/crypto/o_fips.c
if (!FIPS_module_mode_set(r, FIPS_AUTH_USER_PASS))
return 0;
if (r)
-diff -up openssl-1.0.1e/crypto/o_init.c.fips openssl-1.0.1e/crypto/o_init.c
---- openssl-1.0.1e/crypto/o_init.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/o_init.c 2013-10-04 11:48:04.190694362 +0200
+diff -up openssl-1.0.1g/crypto/o_init.c.fips openssl-1.0.1g/crypto/o_init.c
+--- openssl-1.0.1g/crypto/o_init.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/o_init.c 2014-05-06 16:29:50.553923386 +0200
@@ -55,28 +55,68 @@
#include <e_os.h>
#include <openssl/err.h>
@@ -19465,9 +19499,9 @@ diff -up openssl-1.0.1e/crypto/o_init.c.fips openssl-1.0.1e/crypto/o_init.c
+ {
+ OPENSSL_init_library();
+ }
-diff -up openssl-1.0.1e/crypto/opensslconf.h.in.fips openssl-1.0.1e/crypto/opensslconf.h.in
---- openssl-1.0.1e/crypto/opensslconf.h.in.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/opensslconf.h.in 2013-10-04 11:48:04.190694362 +0200
+diff -up openssl-1.0.1g/crypto/opensslconf.h.in.fips openssl-1.0.1g/crypto/opensslconf.h.in
+--- openssl-1.0.1g/crypto/opensslconf.h.in.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/opensslconf.h.in 2014-05-06 16:29:50.553923386 +0200
@@ -1,5 +1,20 @@
/* crypto/opensslconf.h.in */
@@ -19489,39 +19523,10 @@ diff -up openssl-1.0.1e/crypto/opensslconf.h.in.fips openssl-1.0.1e/crypto/opens
/* Generate 80386 code? */
#undef I386_ONLY
-diff -up openssl-1.0.1e/crypto/pkcs12/p12_crt.c.fips openssl-1.0.1e/crypto/pkcs12/p12_crt.c
---- openssl-1.0.1e/crypto/pkcs12/p12_crt.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/pkcs12/p12_crt.c 2013-10-04 11:48:04.190694362 +0200
-@@ -59,6 +59,10 @@
- #include <stdio.h>
- #include "cryptlib.h"
- #include <openssl/pkcs12.h>
-+#ifdef OPENSSL_FIPS
-+#include <openssl/fips.h>
-+#endif
-+
-
-
- static int pkcs12_add_bag(STACK_OF(PKCS12_SAFEBAG) **pbags, PKCS12_SAFEBAG *bag);
-@@ -90,7 +94,14 @@ PKCS12 *PKCS12_create(char *pass, char *
-
- /* Set defaults */
- if (!nid_cert)
-+ {
-+#ifdef OPENSSL_FIPS
-+ if (FIPS_mode())
-+ nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC;
-+ else
-+#endif
- nid_cert = NID_pbe_WithSHA1And40BitRC2_CBC;
-+ }
- if (!nid_key)
- nid_key = NID_pbe_WithSHA1And3_Key_TripleDES_CBC;
- if (!iter)
-diff -up openssl-1.0.1e/crypto/rand/md_rand.c.fips openssl-1.0.1e/crypto/rand/md_rand.c
---- openssl-1.0.1e/crypto/rand/md_rand.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/rand/md_rand.c 2013-10-04 11:48:04.190694362 +0200
-@@ -389,7 +389,10 @@ static int ssleay_rand_bytes(unsigned ch
+diff -up openssl-1.0.1g/crypto/rand/md_rand.c.fips openssl-1.0.1g/crypto/rand/md_rand.c
+--- openssl-1.0.1g/crypto/rand/md_rand.c.fips 2014-04-07 18:54:21.000000000 +0200
++++ openssl-1.0.1g/crypto/rand/md_rand.c 2014-05-06 16:29:50.553923386 +0200
+@@ -395,7 +395,10 @@ static int ssleay_rand_bytes(unsigned ch
CRYPTO_w_unlock(CRYPTO_LOCK_RAND2);
crypto_lock_rand = 1;
@@ -19533,10 +19538,10 @@ diff -up openssl-1.0.1e/crypto/rand/md_rand.c.fips openssl-1.0.1e/crypto/rand/md
{
RAND_poll();
initialized = 1;
-diff -up openssl-1.0.1e/crypto/rand/rand.h.fips openssl-1.0.1e/crypto/rand/rand.h
---- openssl-1.0.1e/crypto/rand/rand.h.fips 2013-10-04 11:48:03.945688829 +0200
-+++ openssl-1.0.1e/crypto/rand/rand.h 2013-10-04 11:48:04.190694362 +0200
-@@ -133,15 +133,33 @@ void ERR_load_RAND_strings(void);
+diff -up openssl-1.0.1g/crypto/rand/rand.h.fips openssl-1.0.1g/crypto/rand/rand.h
+--- openssl-1.0.1g/crypto/rand/rand.h.fips 2014-05-06 16:29:50.303917606 +0200
++++ openssl-1.0.1g/crypto/rand/rand.h 2014-05-06 16:29:50.553923386 +0200
+@@ -133,16 +133,34 @@ void ERR_load_RAND_strings(void);
/* Error codes for the RAND functions. */
/* Function codes. */
@@ -19554,9 +19559,11 @@ diff -up openssl-1.0.1e/crypto/rand/rand.h.fips openssl-1.0.1e/crypto/rand/rand.
#define RAND_F_SSLEAY_RAND_BYTES 100
/* Reason codes. */
+-#define RAND_R_DUAL_EC_DRBG_DISABLED 104
-#define RAND_R_ERROR_INITIALISING_DRBG 102
-#define RAND_R_ERROR_INSTANTIATING_DRBG 103
-#define RAND_R_NO_FIPS_RANDOM_METHOD_SET 101
++#define RAND_R_DUAL_EC_DRBG_DISABLED 114
+#define RAND_R_ERROR_INITIALISING_DRBG 112
+#define RAND_R_ERROR_INSTANTIATING_DRBG 113
+#define RAND_R_NON_FIPS_METHOD 105
@@ -19574,9 +19581,9 @@ diff -up openssl-1.0.1e/crypto/rand/rand.h.fips openssl-1.0.1e/crypto/rand/rand.
#ifdef __cplusplus
}
-diff -up openssl-1.0.1e/crypto/ripemd/rmd_dgst.c.fips openssl-1.0.1e/crypto/ripemd/rmd_dgst.c
---- openssl-1.0.1e/crypto/ripemd/rmd_dgst.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/ripemd/rmd_dgst.c 2013-10-04 11:48:04.191694385 +0200
+diff -up openssl-1.0.1g/crypto/ripemd/rmd_dgst.c.fips openssl-1.0.1g/crypto/ripemd/rmd_dgst.c
+--- openssl-1.0.1g/crypto/ripemd/rmd_dgst.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/ripemd/rmd_dgst.c 2014-05-06 16:29:50.553923386 +0200
@@ -70,7 +70,7 @@ const char RMD160_version[]="RIPE-MD160"
void ripemd160_block(RIPEMD160_CTX *c, unsigned long *p,size_t num);
# endif
@@ -19586,9 +19593,9 @@ diff -up openssl-1.0.1e/crypto/ripemd/rmd_dgst.c.fips openssl-1.0.1e/crypto/ripe
{
memset (c,0,sizeof(*c));
c->A=RIPEMD160_A;
-diff -up openssl-1.0.1e/crypto/rsa/rsa_crpt.c.fips openssl-1.0.1e/crypto/rsa/rsa_crpt.c
---- openssl-1.0.1e/crypto/rsa/rsa_crpt.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/rsa/rsa_crpt.c 2013-10-04 11:48:04.191694385 +0200
+diff -up openssl-1.0.1g/crypto/rsa/rsa_crpt.c.fips openssl-1.0.1g/crypto/rsa/rsa_crpt.c
+--- openssl-1.0.1g/crypto/rsa/rsa_crpt.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/rsa/rsa_crpt.c 2014-05-06 16:29:50.554923409 +0200
@@ -90,10 +90,9 @@ int RSA_private_encrypt(int flen, const
RSA *rsa, int padding)
{
@@ -19615,9 +19622,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_crpt.c.fips openssl-1.0.1e/crypto/rsa/rsa
return -1;
}
#endif
-diff -up openssl-1.0.1e/crypto/rsa/rsa_eay.c.fips openssl-1.0.1e/crypto/rsa/rsa_eay.c
---- openssl-1.0.1e/crypto/rsa/rsa_eay.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/rsa/rsa_eay.c 2013-10-04 11:48:04.191694385 +0200
+diff -up openssl-1.0.1g/crypto/rsa/rsa_eay.c.fips openssl-1.0.1g/crypto/rsa/rsa_eay.c
+--- openssl-1.0.1g/crypto/rsa/rsa_eay.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/rsa/rsa_eay.c 2014-05-06 16:29:50.554923409 +0200
@@ -114,6 +114,10 @@
#include <openssl/bn.h>
#include <openssl/rsa.h>
@@ -19748,9 +19755,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_eay.c.fips openssl-1.0.1e/crypto/rsa/rsa_
rsa->flags|=RSA_FLAG_CACHE_PUBLIC|RSA_FLAG_CACHE_PRIVATE;
return(1);
}
-diff -up openssl-1.0.1e/crypto/rsa/rsa_err.c.fips openssl-1.0.1e/crypto/rsa/rsa_err.c
---- openssl-1.0.1e/crypto/rsa/rsa_err.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/rsa/rsa_err.c 2013-10-04 11:48:04.191694385 +0200
+diff -up openssl-1.0.1g/crypto/rsa/rsa_err.c.fips openssl-1.0.1g/crypto/rsa/rsa_err.c
+--- openssl-1.0.1g/crypto/rsa/rsa_err.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/rsa/rsa_err.c 2014-05-06 16:29:50.554923409 +0200
@@ -121,6 +121,8 @@ static ERR_STRING_DATA RSA_str_functs[]=
{ERR_FUNC(RSA_F_RSA_PUBLIC_ENCRYPT), "RSA_public_encrypt"},
{ERR_FUNC(RSA_F_RSA_PUB_DECODE), "RSA_PUB_DECODE"},
@@ -19760,9 +19767,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_err.c.fips openssl-1.0.1e/crypto/rsa/rsa_
{ERR_FUNC(RSA_F_RSA_SIGN), "RSA_sign"},
{ERR_FUNC(RSA_F_RSA_SIGN_ASN1_OCTET_STRING), "RSA_sign_ASN1_OCTET_STRING"},
{ERR_FUNC(RSA_F_RSA_VERIFY), "RSA_verify"},
-diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips openssl-1.0.1e/crypto/rsa/rsa_gen.c
---- openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/rsa/rsa_gen.c 2013-10-04 11:48:04.191694385 +0200
+diff -up openssl-1.0.1g/crypto/rsa/rsa_gen.c.fips openssl-1.0.1g/crypto/rsa/rsa_gen.c
+--- openssl-1.0.1g/crypto/rsa/rsa_gen.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/rsa/rsa_gen.c 2014-05-06 16:29:50.554923409 +0200
@@ -69,6 +69,78 @@
#include <openssl/rsa.h>
#ifdef OPENSSL_FIPS
@@ -19904,9 +19911,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips openssl-1.0.1e/crypto/rsa/rsa_
ok=1;
err:
if (ok == -1)
-diff -up openssl-1.0.1e/crypto/rsa/rsa.h.fips openssl-1.0.1e/crypto/rsa/rsa.h
---- openssl-1.0.1e/crypto/rsa/rsa.h.fips 2013-10-04 11:48:04.075691765 +0200
-+++ openssl-1.0.1e/crypto/rsa/rsa.h 2013-10-04 11:48:04.192694407 +0200
+diff -up openssl-1.0.1g/crypto/rsa/rsa.h.fips openssl-1.0.1g/crypto/rsa/rsa.h
+--- openssl-1.0.1g/crypto/rsa/rsa.h.fips 2014-05-06 16:29:50.436920681 +0200
++++ openssl-1.0.1g/crypto/rsa/rsa.h 2014-05-06 16:29:50.554923409 +0200
@@ -164,6 +164,8 @@ struct rsa_st
# define OPENSSL_RSA_MAX_MODULUS_BITS 16384
#endif
@@ -19991,9 +19998,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa.h.fips openssl-1.0.1e/crypto/rsa/rsa.h
#define RSA_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE 148
#define RSA_R_PADDING_CHECK_FAILED 114
#define RSA_R_P_NOT_PRIME 128
-diff -up openssl-1.0.1e/crypto/rsa/rsa_lib.c.fips openssl-1.0.1e/crypto/rsa/rsa_lib.c
---- openssl-1.0.1e/crypto/rsa/rsa_lib.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/rsa/rsa_lib.c 2013-10-04 11:48:04.192694407 +0200
+diff -up openssl-1.0.1g/crypto/rsa/rsa_lib.c.fips openssl-1.0.1g/crypto/rsa/rsa_lib.c
+--- openssl-1.0.1g/crypto/rsa/rsa_lib.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/rsa/rsa_lib.c 2014-05-06 16:29:50.554923409 +0200
@@ -84,6 +84,13 @@ RSA *RSA_new(void)
void RSA_set_default_method(const RSA_METHOD *meth)
@@ -20069,9 +20076,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_lib.c.fips openssl-1.0.1e/crypto/rsa/rsa_
if (!CRYPTO_new_ex_data(CRYPTO_EX_INDEX_RSA, ret, &ret->ex_data))
{
#ifndef OPENSSL_NO_ENGINE
-diff -up openssl-1.0.1e/crypto/rsa/rsa_pmeth.c.fips openssl-1.0.1e/crypto/rsa/rsa_pmeth.c
---- openssl-1.0.1e/crypto/rsa/rsa_pmeth.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/rsa/rsa_pmeth.c 2013-10-04 11:48:04.192694407 +0200
+diff -up openssl-1.0.1g/crypto/rsa/rsa_pmeth.c.fips openssl-1.0.1g/crypto/rsa/rsa_pmeth.c
+--- openssl-1.0.1g/crypto/rsa/rsa_pmeth.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/rsa/rsa_pmeth.c 2014-05-06 16:29:50.555923432 +0200
@@ -206,22 +206,6 @@ static int pkey_rsa_sign(EVP_PKEY_CTX *c
RSA_R_INVALID_DIGEST_LENGTH);
return -1;
@@ -20115,9 +20122,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_pmeth.c.fips openssl-1.0.1e/crypto/rsa/rs
if (rctx->pad_mode == RSA_PKCS1_PADDING)
return RSA_verify(EVP_MD_type(rctx->md), tbs, tbslen,
sig, siglen, rsa);
-diff -up openssl-1.0.1e/crypto/rsa/rsa_sign.c.fips openssl-1.0.1e/crypto/rsa/rsa_sign.c
---- openssl-1.0.1e/crypto/rsa/rsa_sign.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/rsa/rsa_sign.c 2013-10-04 11:48:04.192694407 +0200
+diff -up openssl-1.0.1g/crypto/rsa/rsa_sign.c.fips openssl-1.0.1g/crypto/rsa/rsa_sign.c
+--- openssl-1.0.1g/crypto/rsa/rsa_sign.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/rsa/rsa_sign.c 2014-05-06 16:29:50.555923432 +0200
@@ -138,7 +138,8 @@ int RSA_sign(int type, const unsigned ch
i2d_X509_SIG(&sig,&p);
s=tmps;
@@ -20149,9 +20156,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_sign.c.fips openssl-1.0.1e/crypto/rsa/rsa
if (i <= 0) goto err;
/* Oddball MDC2 case: signature can be OCTET STRING.
-diff -up openssl-1.0.1e/crypto/sha/sha.h.fips openssl-1.0.1e/crypto/sha/sha.h
---- openssl-1.0.1e/crypto/sha/sha.h.fips 2013-10-04 11:48:03.867687068 +0200
-+++ openssl-1.0.1e/crypto/sha/sha.h 2013-10-04 11:48:04.192694407 +0200
+diff -up openssl-1.0.1g/crypto/sha/sha.h.fips openssl-1.0.1g/crypto/sha/sha.h
+--- openssl-1.0.1g/crypto/sha/sha.h.fips 2014-05-06 16:29:50.224915780 +0200
++++ openssl-1.0.1g/crypto/sha/sha.h 2014-05-06 16:29:50.555923432 +0200
@@ -116,9 +116,6 @@ unsigned char *SHA(const unsigned char *
void SHA_Transform(SHA_CTX *c, const unsigned char *data);
#endif
@@ -20184,9 +20191,9 @@ diff -up openssl-1.0.1e/crypto/sha/sha.h.fips openssl-1.0.1e/crypto/sha/sha.h
int SHA384_Init(SHA512_CTX *c);
int SHA384_Update(SHA512_CTX *c, const void *data, size_t len);
int SHA384_Final(unsigned char *md, SHA512_CTX *c);
-diff -up openssl-1.0.1e/crypto/sha/sha_locl.h.fips openssl-1.0.1e/crypto/sha/sha_locl.h
---- openssl-1.0.1e/crypto/sha/sha_locl.h.fips 2013-10-04 11:48:03.870687135 +0200
-+++ openssl-1.0.1e/crypto/sha/sha_locl.h 2013-10-04 11:48:04.192694407 +0200
+diff -up openssl-1.0.1g/crypto/sha/sha_locl.h.fips openssl-1.0.1g/crypto/sha/sha_locl.h
+--- openssl-1.0.1g/crypto/sha/sha_locl.h.fips 2014-05-06 16:29:50.226915826 +0200
++++ openssl-1.0.1g/crypto/sha/sha_locl.h 2014-05-06 16:29:50.555923432 +0200
@@ -123,11 +123,14 @@ void sha1_block_data_order (SHA_CTX *c,
#define INIT_DATA_h4 0xc3d2e1f0UL
@@ -20203,9 +20210,9 @@ diff -up openssl-1.0.1e/crypto/sha/sha_locl.h.fips openssl-1.0.1e/crypto/sha/sha
memset (c,0,sizeof(*c));
c->h0=INIT_DATA_h0;
c->h1=INIT_DATA_h1;
-diff -up openssl-1.0.1e/crypto/sha/sha256.c.fips openssl-1.0.1e/crypto/sha/sha256.c
---- openssl-1.0.1e/crypto/sha/sha256.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/sha/sha256.c 2013-10-04 11:48:04.193694429 +0200
+diff -up openssl-1.0.1g/crypto/sha/sha256.c.fips openssl-1.0.1g/crypto/sha/sha256.c
+--- openssl-1.0.1g/crypto/sha/sha256.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/sha/sha256.c 2014-05-06 16:29:50.555923432 +0200
@@ -12,12 +12,19 @@
#include <openssl/crypto.h>
@@ -20236,9 +20243,9 @@ diff -up openssl-1.0.1e/crypto/sha/sha256.c.fips openssl-1.0.1e/crypto/sha/sha25
memset (c,0,sizeof(*c));
c->h[0]=0x6a09e667UL; c->h[1]=0xbb67ae85UL;
c->h[2]=0x3c6ef372UL; c->h[3]=0xa54ff53aUL;
-diff -up openssl-1.0.1e/crypto/sha/sha512.c.fips openssl-1.0.1e/crypto/sha/sha512.c
---- openssl-1.0.1e/crypto/sha/sha512.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/sha/sha512.c 2013-10-04 11:48:04.193694429 +0200
+diff -up openssl-1.0.1g/crypto/sha/sha512.c.fips openssl-1.0.1g/crypto/sha/sha512.c
+--- openssl-1.0.1g/crypto/sha/sha512.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/sha/sha512.c 2014-05-06 16:29:50.555923432 +0200
@@ -5,6 +5,10 @@
* ====================================================================
*/
@@ -20270,9 +20277,9 @@ diff -up openssl-1.0.1e/crypto/sha/sha512.c.fips openssl-1.0.1e/crypto/sha/sha51
c->h[0]=U64(0x6a09e667f3bcc908);
c->h[1]=U64(0xbb67ae8584caa73b);
c->h[2]=U64(0x3c6ef372fe94f82b);
-diff -up openssl-1.0.1e/crypto/whrlpool/wp_dgst.c.fips openssl-1.0.1e/crypto/whrlpool/wp_dgst.c
---- openssl-1.0.1e/crypto/whrlpool/wp_dgst.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/whrlpool/wp_dgst.c 2013-10-04 11:48:04.193694429 +0200
+diff -up openssl-1.0.1g/crypto/whrlpool/wp_dgst.c.fips openssl-1.0.1g/crypto/whrlpool/wp_dgst.c
+--- openssl-1.0.1g/crypto/whrlpool/wp_dgst.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/crypto/whrlpool/wp_dgst.c 2014-05-06 16:29:50.555923432 +0200
@@ -55,7 +55,7 @@
#include <openssl/crypto.h>
#include <string.h>
@@ -20282,9 +20289,9 @@ diff -up openssl-1.0.1e/crypto/whrlpool/wp_dgst.c.fips openssl-1.0.1e/crypto/whr
{
memset (c,0,sizeof(*c));
return(1);
-diff -up openssl-1.0.1e/Makefile.org.fips openssl-1.0.1e/Makefile.org
---- openssl-1.0.1e/Makefile.org.fips 2013-10-04 11:48:04.157693616 +0200
-+++ openssl-1.0.1e/Makefile.org 2013-10-04 11:48:04.193694429 +0200
+diff -up openssl-1.0.1g/Makefile.org.fips openssl-1.0.1g/Makefile.org
+--- openssl-1.0.1g/Makefile.org.fips 2014-05-06 16:29:50.519922600 +0200
++++ openssl-1.0.1g/Makefile.org 2014-05-06 16:29:50.556923456 +0200
@@ -136,6 +136,9 @@ FIPSCANLIB=
BASEADDR=
@@ -20312,10 +20319,10 @@ diff -up openssl-1.0.1e/Makefile.org.fips openssl-1.0.1e/Makefile.org
THIS=$${THIS:-$@} MAKEFILE=Makefile MAKEOVERRIDES=
# MAKEOVERRIDES= effectively "equalizes" GNU-ish and SysV-ish make flavors,
# which in turn eliminates ambiguities in variable treatment with -e.
-diff -up openssl-1.0.1e/ssl/d1_srvr.c.fips openssl-1.0.1e/ssl/d1_srvr.c
---- openssl-1.0.1e/ssl/d1_srvr.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/ssl/d1_srvr.c 2013-10-04 11:48:04.193694429 +0200
-@@ -1372,6 +1372,8 @@ int dtls1_send_server_key_exchange(SSL *
+diff -up openssl-1.0.1g/ssl/d1_srvr.c.fips openssl-1.0.1g/ssl/d1_srvr.c
+--- openssl-1.0.1g/ssl/d1_srvr.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/ssl/d1_srvr.c 2014-05-06 16:29:50.556923456 +0200
+@@ -1383,6 +1383,8 @@ int dtls1_send_server_key_exchange(SSL *
j=0;
for (num=2; num > 0; num--)
{
@@ -20324,9 +20331,9 @@ diff -up openssl-1.0.1e/ssl/d1_srvr.c.fips openssl-1.0.1e/ssl/d1_srvr.c
EVP_DigestInit_ex(&md_ctx,(num == 2)
?s->ctx->md5:s->ctx->sha1, NULL);
EVP_DigestUpdate(&md_ctx,&(s->s3->client_random[0]),SSL3_RANDOM_SIZE);
-diff -up openssl-1.0.1e/ssl/ssl_algs.c.fips openssl-1.0.1e/ssl/ssl_algs.c
---- openssl-1.0.1e/ssl/ssl_algs.c.fips 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/ssl/ssl_algs.c 2013-10-04 11:48:04.193694429 +0200
+diff -up openssl-1.0.1g/ssl/ssl_algs.c.fips openssl-1.0.1g/ssl/ssl_algs.c
+--- openssl-1.0.1g/ssl/ssl_algs.c.fips 2014-03-17 17:14:20.000000000 +0100
++++ openssl-1.0.1g/ssl/ssl_algs.c 2014-05-06 16:29:50.556923456 +0200
@@ -64,6 +64,12 @@
int SSL_library_init(void)
{
diff --git a/openssl-1.0.1h-disable-sslv2v3.patch b/openssl-1.0.1h-disable-sslv2v3.patch
new file mode 100644
index 0000000..7a028aa
--- /dev/null
+++ b/openssl-1.0.1h-disable-sslv2v3.patch
@@ -0,0 +1,13 @@
+diff -up openssl-1.0.1h/ssl/ssl_lib.c.v2v3 openssl-1.0.1h/ssl/ssl_lib.c
+--- openssl-1.0.1h/ssl/ssl_lib.c.v2v3 2014-06-11 16:02:52.000000000 +0200
++++ openssl-1.0.1h/ssl/ssl_lib.c 2014-06-30 14:18:04.290248080 +0200
+@@ -1875,6 +1875,9 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *m
+ */
+ ret->options |= SSL_OP_LEGACY_SERVER_CONNECT;
+
++ /* Disable SSLv2 by default (affects the SSLv23_method() only) */
++ ret->options |= SSL_OP_NO_SSLv2;
++
+ return(ret);
+ err:
+ SSLerr(SSL_F_SSL_CTX_NEW,ERR_R_MALLOC_FAILURE);
diff --git a/openssl-1.0.1c-ipv6-apps.patch b/openssl-1.0.1h-ipv6-apps.patch
similarity index 86%
rename from openssl-1.0.1c-ipv6-apps.patch
rename to openssl-1.0.1h-ipv6-apps.patch
index 41e0b36..db6b543 100644
--- a/openssl-1.0.1c-ipv6-apps.patch
+++ b/openssl-1.0.1h-ipv6-apps.patch
@@ -1,6 +1,6 @@
-diff -up openssl-1.0.1c/apps/s_apps.h.ipv6-apps openssl-1.0.1c/apps/s_apps.h
---- openssl-1.0.1c/apps/s_apps.h.ipv6-apps 2012-07-11 22:46:02.409221206 +0200
-+++ openssl-1.0.1c/apps/s_apps.h 2012-07-11 22:46:02.451222165 +0200
+diff -up openssl-1.0.1h/apps/s_apps.h.ipv6-apps openssl-1.0.1h/apps/s_apps.h
+--- openssl-1.0.1h/apps/s_apps.h.ipv6-apps 2014-06-05 14:33:38.515668750 +0200
++++ openssl-1.0.1h/apps/s_apps.h 2014-06-05 14:33:38.540669335 +0200
@@ -148,7 +148,7 @@ typedef fd_mask fd_set;
#define PORT_STR "4433"
#define PROTOCOL "tcp"
@@ -23,10 +23,10 @@ diff -up openssl-1.0.1c/apps/s_apps.h.ipv6-apps openssl-1.0.1c/apps/s_apps.h
long MS_CALLBACK bio_dump_callback(BIO *bio, int cmd, const char *argp,
int argi, long argl, long ret);
-diff -up openssl-1.0.1c/apps/s_client.c.ipv6-apps openssl-1.0.1c/apps/s_client.c
---- openssl-1.0.1c/apps/s_client.c.ipv6-apps 2012-07-11 22:46:02.433221754 +0200
-+++ openssl-1.0.1c/apps/s_client.c 2012-07-11 22:46:02.452222187 +0200
-@@ -563,7 +563,7 @@ int MAIN(int argc, char **argv)
+diff -up openssl-1.0.1h/apps/s_client.c.ipv6-apps openssl-1.0.1h/apps/s_client.c
+--- openssl-1.0.1h/apps/s_client.c.ipv6-apps 2014-06-05 14:33:38.533669171 +0200
++++ openssl-1.0.1h/apps/s_client.c 2014-06-05 14:33:38.540669335 +0200
+@@ -567,7 +567,7 @@ int MAIN(int argc, char **argv)
int cbuf_len,cbuf_off;
int sbuf_len,sbuf_off;
fd_set readfds,writefds;
@@ -35,7 +35,7 @@ diff -up openssl-1.0.1c/apps/s_client.c.ipv6-apps openssl-1.0.1c/apps/s_client.c
int full_log=1;
char *host=SSL_HOST_NAME;
char *cert_file=NULL,*key_file=NULL;
-@@ -664,13 +664,12 @@ int MAIN(int argc, char **argv)
+@@ -668,13 +668,12 @@ int MAIN(int argc, char **argv)
else if (strcmp(*argv,"-port") == 0)
{
if (--argc < 1) goto bad;
@@ -51,7 +51,7 @@ diff -up openssl-1.0.1c/apps/s_client.c.ipv6-apps openssl-1.0.1c/apps/s_client.c
goto bad;
}
else if (strcmp(*argv,"-verify") == 0)
-@@ -1253,7 +1252,7 @@ bad:
+@@ -1267,7 +1266,7 @@ bad:
re_start:
@@ -60,10 +60,10 @@ diff -up openssl-1.0.1c/apps/s_client.c.ipv6-apps openssl-1.0.1c/apps/s_client.c
{
BIO_printf(bio_err,"connect:errno=%d\n",get_last_socket_error());
SHUTDOWN(s);
-diff -up openssl-1.0.1c/apps/s_server.c.ipv6-apps openssl-1.0.1c/apps/s_server.c
---- openssl-1.0.1c/apps/s_server.c.ipv6-apps 2012-07-11 22:46:02.434221777 +0200
-+++ openssl-1.0.1c/apps/s_server.c 2012-07-11 22:46:02.453222210 +0200
-@@ -929,7 +929,7 @@ int MAIN(int argc, char *argv[])
+diff -up openssl-1.0.1h/apps/s_server.c.ipv6-apps openssl-1.0.1h/apps/s_server.c
+--- openssl-1.0.1h/apps/s_server.c.ipv6-apps 2014-06-05 14:33:38.533669171 +0200
++++ openssl-1.0.1h/apps/s_server.c 2014-06-05 14:33:38.541669358 +0200
+@@ -933,7 +933,7 @@ int MAIN(int argc, char *argv[])
{
X509_VERIFY_PARAM *vpm = NULL;
int badarg = 0;
@@ -72,7 +72,7 @@ diff -up openssl-1.0.1c/apps/s_server.c.ipv6-apps openssl-1.0.1c/apps/s_server.c
char *CApath=NULL,*CAfile=NULL;
unsigned char *context = NULL;
char *dhfile = NULL;
-@@ -1000,8 +1000,7 @@ int MAIN(int argc, char *argv[])
+@@ -1004,8 +1004,7 @@ int MAIN(int argc, char *argv[])
(strcmp(*argv,"-accept") == 0))
{
if (--argc < 1) goto bad;
@@ -82,7 +82,7 @@ diff -up openssl-1.0.1c/apps/s_server.c.ipv6-apps openssl-1.0.1c/apps/s_server.c
}
else if (strcmp(*argv,"-verify") == 0)
{
-@@ -1878,9 +1877,9 @@ bad:
+@@ -1892,9 +1891,9 @@ bad:
BIO_printf(bio_s_out,"ACCEPT\n");
(void)BIO_flush(bio_s_out);
if (www)
@@ -94,9 +94,9 @@ diff -up openssl-1.0.1c/apps/s_server.c.ipv6-apps openssl-1.0.1c/apps/s_server.c
print_stats(bio_s_out,ctx);
ret=0;
end:
-diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c
---- openssl-1.0.1c/apps/s_socket.c.ipv6-apps 2011-12-02 15:39:40.000000000 +0100
-+++ openssl-1.0.1c/apps/s_socket.c 2012-07-11 22:49:05.411400450 +0200
+diff -up openssl-1.0.1h/apps/s_socket.c.ipv6-apps openssl-1.0.1h/apps/s_socket.c
+--- openssl-1.0.1h/apps/s_socket.c.ipv6-apps 2014-06-05 11:44:33.000000000 +0200
++++ openssl-1.0.1h/apps/s_socket.c 2014-06-05 14:39:53.226442195 +0200
@@ -102,9 +102,7 @@ static struct hostent *GetHostByName(cha
static void ssl_sock_cleanup(void);
#endif
@@ -108,7 +108,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c
static int do_accept(int acc_sock, int *sock, char **host);
static int host_ip(char *str, unsigned char ip[4]);
-@@ -234,57 +232,70 @@ static int ssl_sock_init(void)
+@@ -234,57 +232,71 @@ static int ssl_sock_init(void)
return(1);
}
@@ -178,7 +178,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c
{
- i=0;
- i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i));
-- if (i < 0) { perror("keepalive"); return(0); }
+- if (i < 0) { closesocket(s); perror("keepalive"); return(0); }
+ int i=0;
+ i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,
+ (char *)&i,sizeof(i));
@@ -207,6 +207,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c
+ res = res->ai_next;
+ }
+ freeaddrinfo(res0);
++ closesocket(s);
+
+ perror(failed_call);
+ return(0);
@@ -216,7 +217,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c
{
int sock;
char *name = NULL;
-@@ -322,33 +333,50 @@ int do_server(int port, int type, int *r
+@@ -322,33 +334,50 @@ int do_server(int port, int type, int *r
}
}
@@ -288,7 +289,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c
#if defined SOL_SOCKET && defined SO_REUSEADDR
{
int j = 1;
-@@ -356,35 +384,49 @@ static int init_server_long(int *sock, i
+@@ -356,35 +385,49 @@ static int init_server_long(int *sock, i
(void *) &j, sizeof j);
}
#endif
@@ -355,7 +356,16 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c
int len;
/* struct linger ling; */
-@@ -431,135 +473,58 @@ redoit:
+@@ -424,145 +467,66 @@ redoit:
+ ling.l_onoff=1;
+ ling.l_linger=0;
+ i=setsockopt(ret,SOL_SOCKET,SO_LINGER,(char *)&ling,sizeof(ling));
+- if (i < 0) { perror("linger"); return(0); }
++ if (i < 0) { closesocket(ret); perror("linger"); return(0); }
+ i=0;
+ i=setsockopt(ret,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i));
+- if (i < 0) { perror("keepalive"); return(0); }
++ if (i < 0) { closesocket(ret); perror("keepalive"); return(0); }
*/
if (host == NULL) goto end;
@@ -384,6 +394,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c
+ if ((*host=(char *)OPENSSL_malloc(strlen(buffer)+1)) == NULL)
{
perror("OPENSSL_malloc");
+ closesocket(ret);
return(0);
}
- BUF_strlcpy(*host,h1->h_name,strlen(h1->h_name)+1);
@@ -392,11 +403,13 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c
- if (h2 == NULL)
- {
- BIO_printf(bio_err,"gethostbyname failure\n");
+- closesocket(ret);
- return(0);
- }
- if (h2->h_addrtype != AF_INET)
- {
- BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n");
+- closesocket(ret);
- return(0);
- }
+ strcpy(*host, buffer);
diff --git a/openssl-1.0.1h-system-cipherlist.patch b/openssl-1.0.1h-system-cipherlist.patch
new file mode 100644
index 0000000..c7c5106
--- /dev/null
+++ b/openssl-1.0.1h-system-cipherlist.patch
@@ -0,0 +1,289 @@
+diff -up openssl-1.0.1h/Configure.system openssl-1.0.1h/Configure
+--- openssl-1.0.1h/Configure.system 2014-06-05 14:47:37.509312875 +0200
++++ openssl-1.0.1h/Configure 2014-06-11 14:05:28.560359069 +0200
+@@ -10,7 +10,7 @@ use strict;
+
+ # see INSTALL for instructions.
+
+-my $usage="Usage: Configure [no-<cipher> ...] [enable-<cipher> ...] [experimental-<cipher> ...] [-Dxxx] [-lxxx] [-Lxxx] [-fxxx] [-Kxxx] [no-hw-xxx|no-hw] [[no-]threads] [[no-]shared] [[no-]zlib|zlib-dynamic] [no-asm] [no-dso] [no-krb5] [sctp] [386] [--prefix=DIR] [--openssldir=OPENSSLDIR] [--with-xxx[=vvv]] [--test-sanity] os/compiler[:flags]\n";
++my $usage="Usage: Configure [no-<cipher> ...] [enable-<cipher> ...] [experimental-<cipher> ...] [-Dxxx] [-lxxx] [-Lxxx] [-fxxx] [-Kxxx] [no-hw-xxx|no-hw] [[no-]threads] [[no-]shared] [[no-]zlib|zlib-dynamic] [no-asm] [no-dso] [no-krb5] [sctp] [386] [--prefix=DIR] [--openssldir=OPENSSLDIR] [--system-ciphers-file=SYSTEMCIPHERFILE] [--with-xxx[=vvv]] [--test-sanity] os/compiler[:flags]\n";
+
+ # Options:
+ #
+@@ -35,6 +35,9 @@ my $usage="Usage: Configure [no-<cipher>
+ # --with-krb5-flavor Declare what flavor of Kerberos 5 is used. Currently
+ # supported values are "MIT" and "Heimdal". A value is required.
+ #
++# --system-ciphers-file A file to read cipher string from when the PROFILE=SYSTEM
++# cipher is specified (default).
++#
+ # --test-sanity Make a number of sanity checks on the data in this file.
+ # This is a debugging tool for OpenSSL developers.
+ #
+@@ -663,6 +666,7 @@ my $prefix="";
+ my $libdir="";
+ my $openssldir="";
+ my $enginesdir="";
++my $system_ciphers_file="";
+ my $exe_ext="";
+ my $install_prefix= "$ENV{'INSTALL_PREFIX'}";
+ my $cross_compile_prefix="";
+@@ -895,6 +899,10 @@ PROCESS_ARGS:
+ {
+ $enginesdir=$1;
+ }
++ elsif (/^--system-ciphers-file=(.*)$/)
++ {
++ $system_ciphers_file=$1;
++ }
+ elsif (/^--install.prefix=(.*)$/)
+ {
+ $install_prefix=$1;
+@@ -1053,6 +1061,7 @@ print "Configuring for $target\n";
+
+ &usage if (!defined($table{$target}));
+
++chop $system_ciphers_file if $system_ciphers_file =~ /\/$/;
+
+ foreach (sort (keys %disabled))
+ {
+@@ -1607,6 +1616,7 @@ while (<IN>)
+ s/^INSTALLTOP=.*$/INSTALLTOP=$prefix/;
+ s/^MULTILIB=.*$/MULTILIB=$multilib/;
+ s/^OPENSSLDIR=.*$/OPENSSLDIR=$openssldir/;
++ s/^SYSTEM_CIPHERS_FILE=.*$/SYSTEM_CIPHERS_FILE=$system_ciphers_file/;
+ s/^LIBDIR=.*$/LIBDIR=$libdir/;
+ s/^INSTALL_PREFIX=.*$/INSTALL_PREFIX=$install_prefix/;
+ s/^PLATFORM=.*$/PLATFORM=$target/;
+@@ -1812,6 +1822,14 @@ while (<IN>)
+ $foo =~ s/\\/\\\\/g;
+ print OUT "#define ENGINESDIR \"$foo\"\n";
+ }
++ elsif (/^#((define)|(undef))\s+SYSTEM_CIPHERS_FILE/)
++ {
++ my $foo = "$system_ciphers_file";
++ if ($foo ne '') {
++ $foo =~ s/\\/\\\\/g;
++ print OUT "#define SYSTEM_CIPHERS_FILE \"$foo\"\n";
++ }
++ }
+ elsif (/^#((define)|(undef))\s+OPENSSL_EXPORT_VAR_AS_FUNCTION/)
+ { printf OUT "#undef OPENSSL_EXPORT_VAR_AS_FUNCTION\n"
+ if $export_var_as_fn;
+diff -up openssl-1.0.1h/crypto/opensslconf.h.in.system openssl-1.0.1h/crypto/opensslconf.h.in
+--- openssl-1.0.1h/crypto/opensslconf.h.in.system 2014-06-05 14:47:37.437311188 +0200
++++ openssl-1.0.1h/crypto/opensslconf.h.in 2014-06-11 13:31:44.497722345 +0200
+@@ -25,6 +25,8 @@
+ #endif
+ #endif
+
++#undef SYSTEM_CIPHERS_FILE
++
+ #undef OPENSSL_UNISTD
+ #define OPENSSL_UNISTD <unistd.h>
+
+diff -up openssl-1.0.1h/ssl/ssl_ciph.c.system openssl-1.0.1h/ssl/ssl_ciph.c
+--- openssl-1.0.1h/ssl/ssl_ciph.c.system 2014-06-05 14:47:37.441311282 +0200
++++ openssl-1.0.1h/ssl/ssl_ciph.c 2014-06-11 13:55:28.194381937 +0200
+@@ -1352,6 +1352,54 @@ static int ssl_cipher_process_rulestr(co
+ return(retval);
+ }
+
++#ifdef SYSTEM_CIPHERS_FILE
++static char* load_system_str(const char* suffix)
++ {
++ FILE* fp;
++ char buf[1024];
++ char *new_rules;
++ unsigned len, slen;
++
++ fp = fopen(SYSTEM_CIPHERS_FILE, "r");
++ if (fp == NULL || fgets(buf, sizeof(buf), fp) == NULL)
++ {
++ /* cannot open or file is empty */
++ snprintf(buf, sizeof(buf), "%s", SSL_DEFAULT_CIPHER_LIST);
++ }
++
++ if (fp)
++ fclose(fp);
++
++ slen = strlen(suffix);
++ len = strlen(buf);
++
++ if (buf[len-1] == '\n')
++ {
++ len--;
++ buf[len] = 0;
++ }
++ if (buf[len-1] == '\r')
++ {
++ len--;
++ buf[len] = 0;
++ }
++
++ new_rules = OPENSSL_malloc(len + slen + 1);
++ if (new_rules == 0)
++ return NULL;
++
++ memcpy(new_rules, buf, len);
++ if (slen > 0)
++ {
++ memcpy(&new_rules[len], suffix, slen);
++ len += slen;
++ }
++ new_rules[len] = 0;
++
++ return new_rules;
++ }
++#endif
++
+ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *ssl_method,
+ STACK_OF(SSL_CIPHER) **cipher_list,
+ STACK_OF(SSL_CIPHER) **cipher_list_by_id,
+@@ -1359,16 +1407,29 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_
+ {
+ int ok, num_of_ciphers, num_of_alias_max, num_of_group_aliases;
+ unsigned long disabled_mkey, disabled_auth, disabled_enc, disabled_mac, disabled_ssl;
+- STACK_OF(SSL_CIPHER) *cipherstack, *tmp_cipher_list;
++ STACK_OF(SSL_CIPHER) *cipherstack = NULL, *tmp_cipher_list;
+ const char *rule_p;
+ CIPHER_ORDER *co_list = NULL, *head = NULL, *tail = NULL, *curr;
+ const SSL_CIPHER **ca_list = NULL;
++#ifdef SYSTEM_CIPHERS_FILE
++ char *new_rules = NULL;
++
++ if (rule_str != NULL && strncmp(rule_str, "PROFILE=SYSTEM", 14) == 0)
++ {
++ char *p = rule_str + 14;
++
++ new_rules = load_system_str(p);
++ rule_str = new_rules;
++ }
++#endif
+
+ /*
+ * Return with error if nothing to do.
+ */
+ if (rule_str == NULL || cipher_list == NULL || cipher_list_by_id == NULL)
+- return NULL;
++ {
++ goto end;
++ }
+
+ /*
+ * To reduce the work to do we only want to process the compiled
+@@ -1389,7 +1450,7 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_
+ if (co_list == NULL)
+ {
+ SSLerr(SSL_F_SSL_CREATE_CIPHER_LIST,ERR_R_MALLOC_FAILURE);
+- return(NULL); /* Failure */
++ goto end;
+ }
+
+ ssl_cipher_collect_ciphers(ssl_method, num_of_ciphers,
+@@ -1431,8 +1492,7 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_
+ * in force within each class */
+ if (!ssl_cipher_strength_sort(&head, &tail))
+ {
+- OPENSSL_free(co_list);
+- return NULL;
++ goto end;
+ }
+
+ /* Now disable everything (maintaining the ordering!) */
+@@ -1452,9 +1512,8 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_
+ ca_list = OPENSSL_malloc(sizeof(SSL_CIPHER *) * num_of_alias_max);
+ if (ca_list == NULL)
+ {
+- OPENSSL_free(co_list);
+ SSLerr(SSL_F_SSL_CREATE_CIPHER_LIST,ERR_R_MALLOC_FAILURE);
+- return(NULL); /* Failure */
++ goto end;
+ }
+ ssl_cipher_collect_aliases(ca_list, num_of_group_aliases,
+ disabled_mkey, disabled_auth, disabled_enc,
+@@ -1482,8 +1541,7 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_
+
+ if (!ok)
+ { /* Rule processing failure */
+- OPENSSL_free(co_list);
+- return(NULL);
++ goto end;
+ }
+
+ /*
+@@ -1492,8 +1550,7 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_
+ */
+ if ((cipherstack = sk_SSL_CIPHER_new_null()) == NULL)
+ {
+- OPENSSL_free(co_list);
+- return(NULL);
++ goto end;
+ }
+
+ /*
+@@ -1514,13 +1571,13 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_
+ #endif
+ }
+ }
+- OPENSSL_free(co_list); /* Not needed any longer */
+
+ tmp_cipher_list = sk_SSL_CIPHER_dup(cipherstack);
+ if (tmp_cipher_list == NULL)
+ {
+ sk_SSL_CIPHER_free(cipherstack);
+- return NULL;
++ cipherstack = NULL;
++ goto end;
+ }
+ if (*cipher_list != NULL)
+ sk_SSL_CIPHER_free(*cipher_list);
+@@ -1531,6 +1588,12 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_
+ (void)sk_SSL_CIPHER_set_cmp_func(*cipher_list_by_id,ssl_cipher_ptr_id_cmp);
+
+ sk_SSL_CIPHER_sort(*cipher_list_by_id);
++
++end:
++ OPENSSL_free(co_list);
++#ifdef SYSTEM_CIPHERS_FILE
++ OPENSSL_free(new_rules);
++#endif
+ return(cipherstack);
+ }
+
+diff -up openssl-1.0.1h/ssl/ssl.h.system openssl-1.0.1h/ssl/ssl.h
+--- openssl-1.0.1h/ssl/ssl.h.system 2014-06-05 14:47:37.000000000 +0200
++++ openssl-1.0.1h/ssl/ssl.h 2014-06-11 14:08:35.243461447 +0200
+@@ -338,6 +338,11 @@ extern "C" {
+ * (The latter are not actually enabled by ALL, but "ALL:RSA" would enable
+ * some of them.)
+ */
++#ifdef SYSTEM_CIPHERS_FILE
++#define SSL_SYSTEM_DEFAULT_CIPHER_LIST "PROFILE=SYSTEM"
++#else
++#define SSL_SYSTEM_DEFAULT_CIPHER_LIST SSL_DEFAULT_CIPHER_LIST
++#endif
+
+ /* Used in SSL_set_shutdown()/SSL_get_shutdown(); */
+ #define SSL_SENT_SHUTDOWN 1
+diff -up openssl-1.0.1h/ssl/ssl_lib.c.system openssl-1.0.1h/ssl/ssl_lib.c
+--- openssl-1.0.1h/ssl/ssl_lib.c.system 2014-06-05 11:44:33.000000000 +0200
++++ openssl-1.0.1h/ssl/ssl_lib.c 2014-06-11 13:59:40.696578139 +0200
+@@ -263,7 +263,7 @@ int SSL_CTX_set_ssl_version(SSL_CTX *ctx
+
+ sk=ssl_create_cipher_list(ctx->method,&(ctx->cipher_list),
+ &(ctx->cipher_list_by_id),
+- meth->version == SSL2_VERSION ? "SSLv2" : SSL_DEFAULT_CIPHER_LIST);
++ meth->version == SSL2_VERSION ? "SSLv2" : SSL_SYSTEM_DEFAULT_CIPHER_LIST);
+ if ((sk == NULL) || (sk_SSL_CIPHER_num(sk) <= 0))
+ {
+ SSLerr(SSL_F_SSL_CTX_SET_SSL_VERSION,SSL_R_SSL_LIBRARY_HAS_NO_CIPHERS);
+@@ -1767,7 +1767,7 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *m
+
+ ssl_create_cipher_list(ret->method,
+ &ret->cipher_list,&ret->cipher_list_by_id,
+- meth->version == SSL2_VERSION ? "SSLv2" : SSL_DEFAULT_CIPHER_LIST);
++ meth->version == SSL2_VERSION ? "SSLv2" : SSL_SYSTEM_DEFAULT_CIPHER_LIST);
+ if (ret->cipher_list == NULL
+ || sk_SSL_CIPHER_num(ret->cipher_list) <= 0)
+ {
diff --git a/openssl-1.0.1a-algo-doc.patch b/openssl-1.0.1i-algo-doc.patch
similarity index 80%
rename from openssl-1.0.1a-algo-doc.patch
rename to openssl-1.0.1i-algo-doc.patch
index c4aaa89..a19877d 100644
--- a/openssl-1.0.1a-algo-doc.patch
+++ b/openssl-1.0.1i-algo-doc.patch
@@ -1,6 +1,6 @@
-diff -up openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod.algo-doc openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod
---- openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod.algo-doc 2012-04-11 00:28:22.000000000 +0200
-+++ openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod 2012-04-20 09:14:01.865167011 +0200
+diff -up openssl-1.0.1i/doc/crypto/EVP_DigestInit.pod.algo-doc openssl-1.0.1i/doc/crypto/EVP_DigestInit.pod
+--- openssl-1.0.1i/doc/crypto/EVP_DigestInit.pod.algo-doc 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/doc/crypto/EVP_DigestInit.pod 2014-08-07 11:18:01.290773970 +0200
@@ -75,7 +75,7 @@ EVP_MD_CTX_create() allocates, initializ
EVP_DigestInit_ex() sets up digest context B<ctx> to use a digest
@@ -10,9 +10,9 @@ diff -up openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod.algo-doc openssl-1.0.1a/do
If B<impl> is NULL then the default implementation of digest B<type> is used.
EVP_DigestUpdate() hashes B<cnt> bytes of data at B<d> into the
-@@ -165,7 +165,8 @@ EVP_MD_size(), EVP_MD_block_size(), EVP_
- EVP_MD_CTX_block_size() and EVP_MD_block_size() return the digest or block
- size in bytes.
+@@ -164,7 +164,8 @@ corresponding OBJECT IDENTIFIER or NID_u
+ EVP_MD_size(), EVP_MD_block_size(), EVP_MD_CTX_size() and
+ EVP_MD_CTX_block_size() return the digest or block size in bytes.
-EVP_md_null(), EVP_md2(), EVP_md5(), EVP_sha(), EVP_sha1(), EVP_dss(),
+EVP_md_null(), EVP_md2(), EVP_md5(), EVP_sha(), EVP_sha1(),
@@ -20,9 +20,9 @@ diff -up openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod.algo-doc openssl-1.0.1a/do
EVP_dss1(), EVP_mdc2() and EVP_ripemd160() return pointers to the
corresponding EVP_MD structures.
-diff -up openssl-1.0.1a/doc/crypto/EVP_EncryptInit.pod.algo-doc openssl-1.0.1a/doc/crypto/EVP_EncryptInit.pod
---- openssl-1.0.1a/doc/crypto/EVP_EncryptInit.pod.algo-doc 2005-04-15 18:01:35.000000000 +0200
-+++ openssl-1.0.1a/doc/crypto/EVP_EncryptInit.pod 2012-04-20 09:10:59.114736465 +0200
+diff -up openssl-1.0.1i/doc/crypto/EVP_EncryptInit.pod.algo-doc openssl-1.0.1i/doc/crypto/EVP_EncryptInit.pod
+--- openssl-1.0.1i/doc/crypto/EVP_EncryptInit.pod.algo-doc 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/doc/crypto/EVP_EncryptInit.pod 2014-08-07 10:55:25.100638252 +0200
@@ -91,6 +91,32 @@ EVP_CIPHER_CTX_set_padding - EVP cipher
int EVP_CIPHER_param_to_asn1(EVP_CIPHER_CTX *c, ASN1_TYPE *type);
int EVP_CIPHER_asn1_to_param(EVP_CIPHER_CTX *c, ASN1_TYPE *type);
diff --git a/openssl-1.0.1i-manfix.patch b/openssl-1.0.1i-manfix.patch
new file mode 100644
index 0000000..f2f8be7
--- /dev/null
+++ b/openssl-1.0.1i-manfix.patch
@@ -0,0 +1,86 @@
+diff -up openssl-1.0.1i/doc/apps/ec.pod.manfix openssl-1.0.1i/doc/apps/ec.pod
+--- openssl-1.0.1i/doc/apps/ec.pod.manfix 2014-07-22 21:41:23.000000000 +0200
++++ openssl-1.0.1i/doc/apps/ec.pod 2014-08-07 11:21:57.258887741 +0200
+@@ -93,10 +93,6 @@ prints out the public, private key compo
+
+ this option prevents output of the encoded version of the key.
+
+-=item B<-modulus>
+-
+-this option prints out the value of the public key component of the key.
+-
+ =item B<-pubin>
+
+ by default a private key is read from the input file: with this option a
+diff -up openssl-1.0.1i/doc/apps/openssl.pod.manfix openssl-1.0.1i/doc/apps/openssl.pod
+--- openssl-1.0.1i/doc/apps/openssl.pod.manfix 2014-07-22 21:43:11.000000000 +0200
++++ openssl-1.0.1i/doc/apps/openssl.pod 2014-08-07 11:21:57.259887746 +0200
+@@ -163,7 +163,7 @@ Create or examine a netscape certificate
+
+ Online Certificate Status Protocol utility.
+
+-=item L<B<passwd>|passwd(1)>
++=item L<B<passwd>|sslpasswd(1)>
+
+ Generation of hashed passwords.
+
+@@ -187,7 +187,7 @@ Public key algorithm parameter managemen
+
+ Public key algorithm cryptographic operation utility.
+
+-=item L<B<rand>|rand(1)>
++=item L<B<rand>|sslrand(1)>
+
+ Generate pseudo-random bytes.
+
+@@ -401,9 +401,9 @@ L<crl(1)|crl(1)>, L<crl2pkcs7(1)|crl2pkc
+ L<dhparam(1)|dhparam(1)>, L<dsa(1)|dsa(1)>, L<dsaparam(1)|dsaparam(1)>,
+ L<enc(1)|enc(1)>, L<gendsa(1)|gendsa(1)>, L<genpkey(1)|genpkey(1)>,
+ L<genrsa(1)|genrsa(1)>, L<nseq(1)|nseq(1)>, L<openssl(1)|openssl(1)>,
+-L<passwd(1)|passwd(1)>,
++L<sslpasswd(1)|sslpasswd(1)>,
+ L<pkcs12(1)|pkcs12(1)>, L<pkcs7(1)|pkcs7(1)>, L<pkcs8(1)|pkcs8(1)>,
+-L<rand(1)|rand(1)>, L<req(1)|req(1)>, L<rsa(1)|rsa(1)>,
++L<sslrand(1)|sslrand(1)>, L<req(1)|req(1)>, L<rsa(1)|rsa(1)>,
+ L<rsautl(1)|rsautl(1)>, L<s_client(1)|s_client(1)>,
+ L<s_server(1)|s_server(1)>, L<s_time(1)|s_time(1)>,
+ L<smime(1)|smime(1)>, L<spkac(1)|spkac(1)>,
+diff -up openssl-1.0.1i/doc/apps/s_client.pod.manfix openssl-1.0.1i/doc/apps/s_client.pod
+--- openssl-1.0.1i/doc/apps/s_client.pod.manfix 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/doc/apps/s_client.pod 2014-08-07 11:24:28.736604443 +0200
+@@ -34,9 +34,14 @@ B<openssl> B<s_client>
+ [B<-ssl2>]
+ [B<-ssl3>]
+ [B<-tls1>]
++[B<-tls1_1>]
++[B<-tls1_2>]
++[B<-dtls1>]
+ [B<-no_ssl2>]
+ [B<-no_ssl3>]
+ [B<-no_tls1>]
++[B<-no_tls1_1>]
++[B<-no_tls1_2>]
+ [B<-bugs>]
+ [B<-cipher cipherlist>]
+ [B<-serverpref>]
+@@ -196,7 +201,7 @@ Use the PSK key B<key> when using a PSK
+ given as a hexadecimal number without leading 0x, for example -psk
+ 1a2b3c4d.
+
+-=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>
++=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-tls1_1>, B<-tls1_2>, B<-dtls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2>
+
+ these options disable the use of certain SSL or TLS protocols. By default
+ the initial handshake uses a method which should be compatible with all
+diff -up openssl-1.0.1i/doc/apps/s_server.pod.manfix openssl-1.0.1i/doc/apps/s_server.pod
+--- openssl-1.0.1i/doc/apps/s_server.pod.manfix 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/doc/apps/s_server.pod 2014-08-07 11:21:57.259887746 +0200
+@@ -216,7 +216,7 @@ Use the PSK key B<key> when using a PSK
+ given as a hexadecimal number without leading 0x, for example -psk
+ 1a2b3c4d.
+
+-=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>
++=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-tls1_1>, B<-tls1_2>, B<-dtls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2>
+
+ these options disable the use of certain SSL or TLS protocols. By default
+ the initial handshake uses a method which should be compatible with all
diff --git a/openssl-1.0.1e-new-fips-reqs.patch b/openssl-1.0.1i-new-fips-reqs.patch
similarity index 73%
rename from openssl-1.0.1e-new-fips-reqs.patch
rename to openssl-1.0.1i-new-fips-reqs.patch
index 4afdde1..e33494c 100644
--- a/openssl-1.0.1e-new-fips-reqs.patch
+++ b/openssl-1.0.1i-new-fips-reqs.patch
@@ -1,6 +1,6 @@
-diff -up openssl-1.0.1e/crypto/bn/bn_rand.c.fips-reqs openssl-1.0.1e/crypto/bn/bn_rand.c
---- openssl-1.0.1e/crypto/bn/bn_rand.c.fips-reqs 2013-02-11 16:02:47.000000000 +0100
-+++ openssl-1.0.1e/crypto/bn/bn_rand.c 2014-02-13 16:50:34.280893285 +0100
+diff -up openssl-1.0.1i/crypto/bn/bn_rand.c.fips-reqs openssl-1.0.1i/crypto/bn/bn_rand.c
+--- openssl-1.0.1i/crypto/bn/bn_rand.c.fips-reqs 2014-07-22 21:43:11.000000000 +0200
++++ openssl-1.0.1i/crypto/bn/bn_rand.c 2014-08-13 19:58:06.818832577 +0200
@@ -138,9 +138,12 @@ static int bnrand(int pseudorand, BIGNUM
goto err;
}
@@ -17,9 +17,9 @@ diff -up openssl-1.0.1e/crypto/bn/bn_rand.c.fips-reqs openssl-1.0.1e/crypto/bn/b
if (pseudorand)
{
-diff -up openssl-1.0.1e/crypto/dh/dh_gen.c.fips-reqs openssl-1.0.1e/crypto/dh/dh_gen.c
---- openssl-1.0.1e/crypto/dh/dh_gen.c.fips-reqs 2013-12-18 12:17:09.748636614 +0100
-+++ openssl-1.0.1e/crypto/dh/dh_gen.c 2013-12-18 12:17:09.798637687 +0100
+diff -up openssl-1.0.1i/crypto/dh/dh_gen.c.fips-reqs openssl-1.0.1i/crypto/dh/dh_gen.c
+--- openssl-1.0.1i/crypto/dh/dh_gen.c.fips-reqs 2014-08-13 19:58:06.765831356 +0200
++++ openssl-1.0.1i/crypto/dh/dh_gen.c 2014-08-13 19:58:06.818832577 +0200
@@ -125,7 +125,7 @@ static int dh_builtin_genparams(DH *ret,
return 0;
}
@@ -29,9 +29,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh_gen.c.fips-reqs openssl-1.0.1e/crypto/dh/dh
{
DHerr(DH_F_DH_BUILTIN_GENPARAMS, DH_R_KEY_SIZE_TOO_SMALL);
goto err;
-diff -up openssl-1.0.1e/crypto/dh/dh.h.fips-reqs openssl-1.0.1e/crypto/dh/dh.h
---- openssl-1.0.1e/crypto/dh/dh.h.fips-reqs 2013-12-18 12:17:09.748636614 +0100
-+++ openssl-1.0.1e/crypto/dh/dh.h 2013-12-18 12:17:09.798637687 +0100
+diff -up openssl-1.0.1i/crypto/dh/dh.h.fips-reqs openssl-1.0.1i/crypto/dh/dh.h
+--- openssl-1.0.1i/crypto/dh/dh.h.fips-reqs 2014-08-13 19:58:06.765831356 +0200
++++ openssl-1.0.1i/crypto/dh/dh.h 2014-08-13 19:58:06.818832577 +0200
@@ -78,6 +78,7 @@
#endif
@@ -40,9 +40,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh.h.fips-reqs openssl-1.0.1e/crypto/dh/dh.h
#define DH_FLAG_CACHE_MONT_P 0x01
#define DH_FLAG_NO_EXP_CONSTTIME 0x02 /* new with 0.9.7h; the built-in DH
-diff -up openssl-1.0.1e/crypto/dh/dh_check.c.fips-reqs openssl-1.0.1e/crypto/dh/dh_check.c
---- openssl-1.0.1e/crypto/dh/dh_check.c.fips-reqs 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/dh/dh_check.c 2013-12-18 12:17:09.799637708 +0100
+diff -up openssl-1.0.1i/crypto/dh/dh_check.c.fips-reqs openssl-1.0.1i/crypto/dh/dh_check.c
+--- openssl-1.0.1i/crypto/dh/dh_check.c.fips-reqs 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/dh/dh_check.c 2014-08-13 19:58:06.818832577 +0200
@@ -134,7 +134,33 @@ int DH_check_pub_key(const DH *dh, const
BN_sub_word(q,1);
if (BN_cmp(pub_key,q)>=0)
@@ -77,25 +77,26 @@ diff -up openssl-1.0.1e/crypto/dh/dh_check.c.fips-reqs openssl-1.0.1e/crypto/dh/
ok = 1;
err:
if (q != NULL) BN_free(q);
-diff -up openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips-reqs openssl-1.0.1e/crypto/dsa/dsa_gen.c
---- openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips-reqs 2013-12-18 12:17:09.749636636 +0100
-+++ openssl-1.0.1e/crypto/dsa/dsa_gen.c 2013-12-18 12:17:09.799637708 +0100
-@@ -159,7 +159,6 @@ int dsa_builtin_paramgen(DSA *ret, size_
+diff -up openssl-1.0.1i/crypto/dsa/dsa_gen.c.fips-reqs openssl-1.0.1i/crypto/dsa/dsa_gen.c
+--- openssl-1.0.1i/crypto/dsa/dsa_gen.c.fips-reqs 2014-08-13 19:58:06.766831380 +0200
++++ openssl-1.0.1i/crypto/dsa/dsa_gen.c 2014-08-13 19:58:06.818832577 +0200
+@@ -159,7 +159,7 @@ int dsa_builtin_paramgen(DSA *ret, size_
}
if (FIPS_module_mode() &&
- (bits != 1024 || qbits != 160) &&
++ (getenv("OPENSSL_ENFORCE_MODULUS_BITS") || bits != 1024 || qbits != 160) &&
(bits != 2048 || qbits != 224) &&
(bits != 2048 || qbits != 256) &&
(bits != 3072 || qbits != 256))
-diff -up openssl-1.0.1e/crypto/dsa/dsa.h.fips-reqs openssl-1.0.1e/crypto/dsa/dsa.h
---- openssl-1.0.1e/crypto/dsa/dsa.h.fips-reqs 2013-12-18 12:17:09.749636636 +0100
-+++ openssl-1.0.1e/crypto/dsa/dsa.h 2013-12-18 12:17:09.799637708 +0100
+diff -up openssl-1.0.1i/crypto/dsa/dsa.h.fips-reqs openssl-1.0.1i/crypto/dsa/dsa.h
+--- openssl-1.0.1i/crypto/dsa/dsa.h.fips-reqs 2014-08-13 19:58:06.766831380 +0200
++++ openssl-1.0.1i/crypto/dsa/dsa.h 2014-08-13 19:58:06.818832577 +0200
@@ -89,6 +89,7 @@
#endif
#define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS 1024
-+#define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS_GEN 2048
++#define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS_GEN (getenv("OPENSSL_ENFORCE_MODULUS_BITS")?2048:1024)
#define DSA_FLAG_CACHE_MONT_P 0x01
#define DSA_FLAG_NO_EXP_CONSTTIME 0x02 /* new with 0.9.7h; the built-in DSA
@@ -112,9 +113,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa.h.fips-reqs openssl-1.0.1e/crypto/dsa/dsa
#define DSA_is_prime(n, callback, cb_arg) \
BN_is_prime(n, DSS_prime_checks, callback, NULL, cb_arg)
-diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips-reqs openssl-1.0.1e/crypto/dsa/dsa_key.c
---- openssl-1.0.1e/crypto/dsa/dsa_key.c.fips-reqs 2013-12-18 12:17:09.797637665 +0100
-+++ openssl-1.0.1e/crypto/dsa/dsa_key.c 2013-12-18 12:17:09.799637708 +0100
+diff -up openssl-1.0.1i/crypto/dsa/dsa_key.c.fips-reqs openssl-1.0.1i/crypto/dsa/dsa_key.c
+--- openssl-1.0.1i/crypto/dsa/dsa_key.c.fips-reqs 2014-08-13 19:58:06.816832531 +0200
++++ openssl-1.0.1i/crypto/dsa/dsa_key.c 2014-08-13 19:58:06.818832577 +0200
@@ -127,7 +127,7 @@ static int dsa_builtin_keygen(DSA *dsa)
#ifdef OPENSSL_FIPS
@@ -124,9 +125,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips-reqs openssl-1.0.1e/crypto/dsa
{
DSAerr(DSA_F_DSA_BUILTIN_KEYGEN, DSA_R_KEY_SIZE_TOO_SMALL);
goto err;
-diff -up openssl-1.0.1e/crypto/fips/fips_dh_selftest.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_dh_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_dh_selftest.c.fips-reqs 2013-12-18 17:06:36.575114314 +0100
-+++ openssl-1.0.1e/crypto/fips/fips_dh_selftest.c 2013-12-18 17:26:14.409036334 +0100
+diff -up openssl-1.0.1i/crypto/fips/fips_dh_selftest.c.fips-reqs openssl-1.0.1i/crypto/fips/fips_dh_selftest.c
+--- openssl-1.0.1i/crypto/fips/fips_dh_selftest.c.fips-reqs 2014-08-13 19:58:06.819832600 +0200
++++ openssl-1.0.1i/crypto/fips/fips_dh_selftest.c 2014-08-13 19:58:06.819832600 +0200
@@ -0,0 +1,162 @@
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
@@ -290,92 +291,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_dh_selftest.c.fips-reqs openssl-1.0.1e/
+ return ret;
+ }
+#endif
-diff -up openssl-1.0.1e/crypto/fips/fips_drbg_rand.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_drbg_rand.c
---- openssl-1.0.1e/crypto/fips/fips_drbg_rand.c.fips-reqs 2013-12-18 12:17:09.757636808 +0100
-+++ openssl-1.0.1e/crypto/fips/fips_drbg_rand.c 2013-12-18 18:53:33.263711297 +0100
-@@ -77,7 +77,8 @@ static int fips_drbg_bytes(unsigned char
- int rv = 0;
- unsigned char *adin = NULL;
- size_t adinlen = 0;
-- CRYPTO_w_lock(CRYPTO_LOCK_RAND);
-+ int locked;
-+ locked = private_RAND_lock(1);
- do
- {
- size_t rcnt;
-@@ -109,7 +110,8 @@ static int fips_drbg_bytes(unsigned char
- while (count);
- rv = 1;
- err:
-- CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
-+ if (locked)
-+ private_RAND_lock(0);
- return rv;
- }
-
-@@ -124,35 +126,51 @@ static int fips_drbg_status(void)
- {
- DRBG_CTX *dctx = &ossl_dctx;
- int rv;
-- CRYPTO_r_lock(CRYPTO_LOCK_RAND);
-+ int locked;
-+ locked = private_RAND_lock(1);
- rv = dctx->status == DRBG_STATUS_READY ? 1 : 0;
-- CRYPTO_r_unlock(CRYPTO_LOCK_RAND);
-+ if (locked)
-+ private_RAND_lock(0);
- return rv;
- }
-
- static void fips_drbg_cleanup(void)
- {
- DRBG_CTX *dctx = &ossl_dctx;
-- CRYPTO_w_lock(CRYPTO_LOCK_RAND);
-+ int locked;
-+ locked = private_RAND_lock(1);
- FIPS_drbg_uninstantiate(dctx);
-- CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
-+ if (locked)
-+ private_RAND_lock(0);
- }
-
- static int fips_drbg_seed(const void *seed, int seedlen)
- {
- DRBG_CTX *dctx = &ossl_dctx;
-+ int locked;
-+ int ret = 1;
-+
-+ locked = private_RAND_lock(1);
- if (dctx->rand_seed_cb)
-- return dctx->rand_seed_cb(dctx, seed, seedlen);
-- return 1;
-+ ret = dctx->rand_seed_cb(dctx, seed, seedlen);
-+ if (locked)
-+ private_RAND_lock(0);
-+ return ret;
- }
-
- static int fips_drbg_add(const void *seed, int seedlen,
- double add_entropy)
- {
- DRBG_CTX *dctx = &ossl_dctx;
-+ int locked;
-+ int ret = 1;
-+
-+ locked = private_RAND_lock(1);
- if (dctx->rand_add_cb)
-- return dctx->rand_add_cb(dctx, seed, seedlen, add_entropy);
-- return 1;
-+ ret = dctx->rand_add_cb(dctx, seed, seedlen, add_entropy);
-+ if (locked)
-+ private_RAND_lock(0);
-+ return ret;
- }
-
- static const RAND_METHOD rand_drbg_meth =
-diff -up openssl-1.0.1e/crypto/fips/fips.h.fips-reqs openssl-1.0.1e/crypto/fips/fips.h
---- openssl-1.0.1e/crypto/fips/fips.h.fips-reqs 2013-12-18 12:17:09.000000000 +0100
-+++ openssl-1.0.1e/crypto/fips/fips.h 2013-12-18 17:13:00.928586689 +0100
+diff -up openssl-1.0.1i/crypto/fips/fips.h.fips-reqs openssl-1.0.1i/crypto/fips/fips.h
+--- openssl-1.0.1i/crypto/fips/fips.h.fips-reqs 2014-08-13 19:58:06.812832439 +0200
++++ openssl-1.0.1i/crypto/fips/fips.h 2014-08-13 19:58:06.819832600 +0200
@@ -96,6 +96,7 @@ void FIPS_corrupt_dsa_keygen(void);
int FIPS_selftest_dsa(void);
int FIPS_selftest_ecdsa(void);
@@ -384,9 +302,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips.h.fips-reqs openssl-1.0.1e/crypto/fips/
void FIPS_corrupt_rng(void);
void FIPS_rng_stick(void);
void FIPS_x931_stick(int onoff);
-diff -up openssl-1.0.1e/crypto/fips/fips_post.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_post.c
---- openssl-1.0.1e/crypto/fips/fips_post.c.fips-reqs 2013-12-18 12:17:09.000000000 +0100
-+++ openssl-1.0.1e/crypto/fips/fips_post.c 2013-12-18 17:12:26.721832716 +0100
+diff -up openssl-1.0.1i/crypto/fips/fips_post.c.fips-reqs openssl-1.0.1i/crypto/fips/fips_post.c
+--- openssl-1.0.1i/crypto/fips/fips_post.c.fips-reqs 2014-08-13 19:58:06.809832370 +0200
++++ openssl-1.0.1i/crypto/fips/fips_post.c 2014-08-13 19:58:06.819832600 +0200
@@ -99,6 +99,8 @@ int FIPS_selftest(void)
rv = 0;
if (!FIPS_selftest_dsa())
@@ -396,9 +314,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_post.c.fips-reqs openssl-1.0.1e/crypto/
if (!FIPS_selftest_ecdh())
rv = 0;
return rv;
-diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs 2014-03-14 14:47:18.809259727 +0100
-+++ openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c 2014-03-14 15:37:26.295687852 +0100
+diff -up openssl-1.0.1i/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1i/crypto/fips/fips_rsa_selftest.c
+--- openssl-1.0.1i/crypto/fips/fips_rsa_selftest.c.fips-reqs 2014-08-13 19:58:06.779831679 +0200
++++ openssl-1.0.1i/crypto/fips/fips_rsa_selftest.c 2014-08-13 19:59:16.491437297 +0200
@@ -60,69 +60,113 @@
#ifdef OPENSSL_FIPS
@@ -557,7 +475,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
key->e = BN_bin2bn(e, sizeof(e)-1, key->e);
key->d = BN_bin2bn(d, sizeof(d)-1, key->d);
key->p = BN_bin2bn(p, sizeof(p)-1, key->p);
-@@ -145,201 +189,391 @@ void FIPS_corrupt_rsa()
+@@ -145,201 +189,291 @@ void FIPS_corrupt_rsa()
static const unsigned char kat_tbs[] = "OpenSSL FIPS 140-2 Public Key RSA KAT";
static const unsigned char kat_RSA_PSS_SHA1[] = {
@@ -920,7 +838,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
+ 0x43, 0xA8, 0x34, 0x0A
};
- static const unsigned char kat_RSA_X931_SHA1[] = {
+-static const unsigned char kat_RSA_X931_SHA1[] = {
- 0x86, 0xB4, 0x18, 0xBA, 0xD1, 0x80, 0xB6, 0x7C, 0x42, 0x45, 0x4D, 0xDF,
- 0xE9, 0x2D, 0xE1, 0x83, 0x5F, 0xB5, 0x2F, 0xC9, 0xCD, 0xC4, 0xB2, 0x75,
- 0x80, 0xA4, 0xF1, 0x4A, 0xE7, 0x83, 0x12, 0x1E, 0x1E, 0x14, 0xB8, 0xAC,
@@ -932,31 +850,14 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
- 0x48, 0xAF, 0x82, 0xFE, 0x32, 0x41, 0x9B, 0xB2, 0xDB, 0xEA, 0xED, 0x76,
- 0x8E, 0x6E, 0xCA, 0x7E, 0x4E, 0x14, 0xBA, 0x30, 0x84, 0x1C, 0xB3, 0x67,
- 0xA3, 0x29, 0x80, 0x70, 0x54, 0x68, 0x7D, 0x49
-+ 0xB1, 0x0E, 0x4F, 0xC6, 0xE0, 0x95, 0x85, 0x7B, 0xBE, 0xDE, 0xC4, 0xE6,
-+ 0x1F, 0x12, 0x2E, 0x9B, 0x3E, 0x11, 0xA3, 0xF0, 0xF0, 0xA8, 0x23, 0x1A,
-+ 0x96, 0x6E, 0x99, 0xB5, 0x5F, 0x82, 0xC5, 0x87, 0x75, 0xE9, 0xD4, 0xBF,
-+ 0x9F, 0xE0, 0xA4, 0xED, 0xC7, 0x01, 0x2A, 0x3F, 0x6F, 0x43, 0x1D, 0x4F,
-+ 0xE8, 0x05, 0x34, 0x32, 0x20, 0x36, 0x94, 0xA0, 0x6D, 0xCC, 0xF6, 0x41,
-+ 0x49, 0x56, 0x96, 0xEC, 0x9C, 0x7C, 0xD1, 0x0E, 0x9E, 0xD8, 0x1B, 0x48,
-+ 0xD9, 0xDF, 0x99, 0x9F, 0x92, 0x17, 0x96, 0xA4, 0xF1, 0x87, 0x64, 0x61,
-+ 0x3C, 0xAF, 0x00, 0x24, 0xB3, 0x64, 0x88, 0x8E, 0x41, 0xBF, 0x29, 0x1F,
-+ 0xA3, 0x28, 0xAD, 0x21, 0x1E, 0xA3, 0x96, 0x40, 0x0A, 0x0B, 0x82, 0xCD,
-+ 0x97, 0x58, 0x33, 0xB6, 0x52, 0xAC, 0xC5, 0x3B, 0x14, 0xE7, 0x1E, 0x5D,
-+ 0x09, 0xC9, 0x76, 0xB5, 0x89, 0xC6, 0x9B, 0x4C, 0xC2, 0xC2, 0x31, 0x0E,
-+ 0xBA, 0x1E, 0xB5, 0x11, 0xD0, 0xFD, 0xC1, 0xDA, 0x64, 0x17, 0xA8, 0xCB,
-+ 0xF0, 0x94, 0xF4, 0xDD, 0x84, 0xB7, 0xEF, 0x9C, 0x13, 0x4F, 0xDD, 0x06,
-+ 0x0C, 0xE4, 0xC7, 0xFD, 0x69, 0x10, 0x20, 0xD3, 0x93, 0x5E, 0xF8, 0xBA,
-+ 0x21, 0xFB, 0x62, 0xC4, 0x63, 0x76, 0x43, 0xAA, 0x7E, 0x3C, 0x56, 0x5E,
-+ 0xB4, 0x47, 0x3A, 0x05, 0x0D, 0xBB, 0x13, 0xC4, 0x93, 0xFB, 0x29, 0xA8,
-+ 0x3E, 0x76, 0x41, 0x54, 0x9E, 0x7B, 0xE2, 0xE0, 0x07, 0x1D, 0xA7, 0x9C,
-+ 0x85, 0x11, 0xB5, 0xA5, 0x88, 0x58, 0x02, 0xD8, 0xC0, 0x4B, 0x81, 0xBF,
-+ 0x2B, 0x38, 0xE2, 0x2F, 0x42, 0xCA, 0x63, 0x8A, 0x0A, 0x78, 0xBA, 0x50,
-+ 0xE5, 0x84, 0x35, 0xD3, 0x6A, 0x1E, 0x96, 0x0B, 0x91, 0xB1, 0x0E, 0x85,
-+ 0xA8, 0x5C, 0x6E, 0x46, 0x5C, 0x61, 0x8C, 0x4F, 0x5B, 0x61, 0xB6, 0x3C,
-+ 0xB7, 0x2C, 0xA5, 0x1A
- };
+-};
++static int fips_rsa_encrypt_test(RSA *rsa, const unsigned char *plaintext, int ptlen)
++ {
++ unsigned char *ctbuf = NULL, *ptbuf = NULL;
++ int ret = 0;
++ int len;
- static const unsigned char kat_RSA_X931_SHA256[] = {
+-static const unsigned char kat_RSA_X931_SHA256[] = {
- 0x7E, 0xA2, 0x77, 0xFE, 0xB8, 0x54, 0x8A, 0xC7, 0x7F, 0x64, 0x54, 0x89,
- 0xE5, 0x52, 0x15, 0x8E, 0x52, 0x96, 0x4E, 0xA6, 0x58, 0x92, 0x1C, 0xDD,
- 0xEA, 0xA2, 0x2D, 0x5C, 0xD1, 0x62, 0x00, 0x49, 0x05, 0x95, 0x73, 0xCF,
@@ -968,31 +869,12 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
- 0x33, 0x1D, 0x82, 0x8C, 0x03, 0xEA, 0x69, 0x88, 0x35, 0xA1, 0x42, 0xBD,
- 0x21, 0xED, 0x8D, 0xBC, 0xBC, 0xDB, 0x30, 0xFF, 0x86, 0xF0, 0x5B, 0xDC,
- 0xE3, 0xE2, 0xE8, 0x0A, 0x0A, 0x29, 0x94, 0x80
-+ 0xC6, 0x6C, 0x01, 0x7F, 0xB6, 0x8C, 0xD4, 0x61, 0x83, 0xC5, 0xBC, 0x75,
-+ 0x39, 0x22, 0xDD, 0x17, 0x5B, 0x95, 0x4B, 0x4C, 0x46, 0x39, 0x37, 0xA7,
-+ 0x54, 0x6C, 0x49, 0x5A, 0x67, 0x90, 0x47, 0xF6, 0x59, 0xAE, 0xFC, 0xDD,
-+ 0xDF, 0xDB, 0xC7, 0x91, 0xB9, 0xB6, 0xCE, 0xD8, 0xFA, 0x30, 0x01, 0x9F,
-+ 0xCA, 0xE5, 0x4A, 0x51, 0xB7, 0xBE, 0xBD, 0x4E, 0x56, 0x25, 0x0B, 0x49,
-+ 0xE0, 0x46, 0xBB, 0x81, 0x0E, 0x14, 0x47, 0xFF, 0xCB, 0xBB, 0xA1, 0x6D,
-+ 0x44, 0x9B, 0xF7, 0xEE, 0x81, 0xEB, 0xF6, 0x62, 0xEA, 0x0D, 0x76, 0x76,
-+ 0x4E, 0x25, 0xD7, 0x9A, 0x2B, 0xB1, 0x92, 0xED, 0x5C, 0x7F, 0x9D, 0x99,
-+ 0x07, 0x9E, 0xBF, 0x62, 0x83, 0x12, 0x61, 0x99, 0x3E, 0xF5, 0x6A, 0x4C,
-+ 0x58, 0xB0, 0x2A, 0x15, 0x1C, 0xA0, 0xD2, 0x91, 0x87, 0x9C, 0x7D, 0x4F,
-+ 0xEF, 0x3B, 0x0F, 0x60, 0xD7, 0x1E, 0xEF, 0x7C, 0xBE, 0x68, 0x95, 0xE6,
-+ 0xBA, 0xFA, 0xF6, 0xD1, 0x67, 0x3D, 0x9D, 0x39, 0xAE, 0xC2, 0x85, 0xD2,
-+ 0xDE, 0xA5, 0x85, 0x1E, 0x4D, 0x2B, 0x2C, 0x06, 0x44, 0x98, 0x17, 0x46,
-+ 0x89, 0x41, 0x13, 0xFC, 0x99, 0xD6, 0x6C, 0xCF, 0x26, 0xA2, 0x77, 0x8A,
-+ 0x3F, 0x10, 0xF8, 0xC5, 0xC9, 0x4A, 0xB6, 0x93, 0xF5, 0x38, 0x89, 0xBD,
-+ 0xFF, 0xAE, 0x42, 0x06, 0x2D, 0xCD, 0x1B, 0x3D, 0x5A, 0xCD, 0xF2, 0x8A,
-+ 0x65, 0xA4, 0xB7, 0xB6, 0xF6, 0x5B, 0xE8, 0xA4, 0x68, 0xB4, 0x27, 0xDA,
-+ 0xF1, 0x59, 0x37, 0x24, 0x18, 0xB5, 0x5B, 0x15, 0x62, 0x64, 0x6F, 0x78,
-+ 0xBB, 0x17, 0x94, 0x42, 0xAD, 0xB3, 0x0D, 0x18, 0xB0, 0x1B, 0x28, 0x29,
-+ 0x3B, 0x15, 0xBF, 0xD1, 0xC8, 0x28, 0x4F, 0xDF, 0x7F, 0x34, 0x49, 0x2A,
-+ 0x44, 0xD5, 0x4C, 0x59, 0x90, 0x83, 0x8D, 0xFC, 0x58, 0x7E, 0xEC, 0x4B,
-+ 0x54, 0xF0, 0xB5, 0xBD
- };
+-};
++ ctbuf = OPENSSL_malloc(RSA_size(rsa));
++ if (!ctbuf)
++ goto err;
- static const unsigned char kat_RSA_X931_SHA384[] = {
+-static const unsigned char kat_RSA_X931_SHA384[] = {
- 0x5C, 0x7D, 0x96, 0x35, 0xEC, 0x7E, 0x11, 0x38, 0xBB, 0x7B, 0xEC, 0x7B,
- 0xF2, 0x82, 0x8E, 0x99, 0xBD, 0xEF, 0xD8, 0xAE, 0xD7, 0x39, 0x37, 0xCB,
- 0xE6, 0x4F, 0x5E, 0x0A, 0x13, 0xE4, 0x2E, 0x40, 0xB9, 0xBE, 0x2E, 0xE3,
@@ -1004,31 +886,15 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
- 0x0E, 0x09, 0xEE, 0x2E, 0xE2, 0x37, 0xB9, 0xDE, 0xC5, 0x12, 0x44, 0x35,
- 0xEF, 0x01, 0xE6, 0x5E, 0x39, 0x31, 0x2D, 0x71, 0xA5, 0xDC, 0xC6, 0x6D,
- 0xE2, 0xCD, 0x85, 0xDB, 0x73, 0x82, 0x65, 0x28
-+ 0x88, 0x85, 0xE1, 0xC1, 0xE2, 0xE5, 0x0B, 0x6C, 0x03, 0x27, 0xAC, 0xC8,
-+ 0x3A, 0x72, 0xB4, 0x9A, 0xF3, 0xAE, 0x9C, 0x88, 0x8C, 0xBE, 0x28, 0x0D,
-+ 0x89, 0x5F, 0x06, 0x0F, 0x5F, 0x08, 0xE3, 0x9C, 0xF9, 0x28, 0x4F, 0xBB,
-+ 0x24, 0xDD, 0x21, 0x4C, 0x44, 0x96, 0x50, 0xB5, 0xD4, 0x8E, 0x13, 0x60,
-+ 0x7C, 0xCB, 0xD9, 0x5E, 0x7C, 0xB6, 0xAD, 0xA5, 0x6A, 0x41, 0x04, 0xA7,
-+ 0x8E, 0xF0, 0x39, 0x08, 0x7E, 0x18, 0x91, 0xF9, 0x46, 0x97, 0xEF, 0xF2,
-+ 0x14, 0xB2, 0x01, 0xFD, 0xB2, 0x2B, 0x3A, 0xF8, 0x4A, 0x59, 0xD1, 0x36,
-+ 0x1A, 0x7D, 0x2D, 0xB9, 0xC6, 0x7F, 0xDE, 0x62, 0xB6, 0x56, 0xBA, 0xFA,
-+ 0x5A, 0xA1, 0x5B, 0x8C, 0x5F, 0x98, 0xEC, 0xF8, 0x93, 0x13, 0x11, 0x42,
-+ 0xEE, 0xC4, 0x6C, 0x4A, 0x87, 0x4E, 0x98, 0x22, 0xB6, 0xBB, 0xB0, 0x3A,
-+ 0x70, 0xA9, 0xCC, 0xBC, 0x31, 0x27, 0xE7, 0xBC, 0xCA, 0xEC, 0x52, 0x81,
-+ 0x76, 0x9A, 0x3F, 0x18, 0xC1, 0x1C, 0x4A, 0xC7, 0x56, 0xE3, 0xF0, 0x6F,
-+ 0x36, 0xBB, 0x9B, 0xF9, 0x43, 0x90, 0xBE, 0x79, 0x59, 0x63, 0x1C, 0xFE,
-+ 0xB6, 0x46, 0x8B, 0xBA, 0xBD, 0xAA, 0x28, 0x71, 0x9B, 0xD6, 0xDD, 0x05,
-+ 0x00, 0x3B, 0xBC, 0x2D, 0x48, 0xE7, 0x6E, 0x6E, 0x42, 0x95, 0x27, 0xAE,
-+ 0x93, 0x92, 0x6D, 0x59, 0x47, 0x10, 0x59, 0xAC, 0xDD, 0x95, 0x29, 0xC3,
-+ 0x1B, 0x86, 0x67, 0x12, 0x98, 0x48, 0x10, 0xA6, 0x90, 0xA3, 0x59, 0x9D,
-+ 0x10, 0x4E, 0xEA, 0xD8, 0xCB, 0xE3, 0x81, 0xBA, 0xA1, 0x52, 0x55, 0x78,
-+ 0xFF, 0x95, 0x40, 0xE0, 0xAE, 0x93, 0x38, 0x5D, 0x21, 0x13, 0x8A, 0xFC,
-+ 0x72, 0xC7, 0xFB, 0x70, 0x1C, 0xEE, 0x5D, 0xB0, 0xE5, 0xFA, 0x44, 0x86,
-+ 0x67, 0x97, 0x66, 0x64, 0xA4, 0x1E, 0xF8, 0x3A, 0x16, 0xF8, 0xC9, 0xE0,
-+ 0x09, 0xF3, 0x61, 0x4F
- };
+-};
++ len = RSA_public_encrypt(ptlen, plaintext, ctbuf, rsa, RSA_PKCS1_PADDING);
++ if (len <= 0)
++ goto err;
++ /* Check ciphertext doesn't match plaintext */
++ if (len >= ptlen && !memcmp(plaintext, ctbuf, ptlen))
++ goto err;
- static const unsigned char kat_RSA_X931_SHA512[] = {
+-static const unsigned char kat_RSA_X931_SHA512[] = {
- 0xA6, 0x65, 0xA2, 0x77, 0x4F, 0xB3, 0x86, 0xCB, 0x64, 0x3A, 0xC1, 0x63,
- 0xFC, 0xA1, 0xAA, 0xCB, 0x9B, 0x79, 0xDD, 0x4B, 0xE1, 0xD9, 0xDA, 0xAC,
- 0xE7, 0x47, 0x09, 0xB2, 0x11, 0x4B, 0x8A, 0xAA, 0x05, 0x9E, 0x77, 0xD7,
@@ -1040,47 +906,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
- 0x9F, 0x09, 0xCA, 0x84, 0x15, 0x85, 0xE0, 0xED, 0x04, 0x2D, 0xFB, 0x7C,
- 0x36, 0x35, 0x21, 0x31, 0xC3, 0xFD, 0x92, 0x42, 0x11, 0x30, 0x71, 0x1B,
- 0x60, 0x83, 0x18, 0x88, 0xA3, 0xF5, 0x59, 0xC3
-+ 0xC9, 0x2B, 0x6D, 0x50, 0xBB, 0xD8, 0x0B, 0x35, 0xE8, 0x78, 0xF5, 0xFC,
-+ 0xBB, 0x6A, 0xB4, 0x32, 0x63, 0x9C, 0x75, 0x19, 0x1D, 0xFB, 0x68, 0xC0,
-+ 0xFC, 0x34, 0xCE, 0x09, 0xFD, 0xF4, 0x33, 0x42, 0x70, 0x24, 0x57, 0xBC,
-+ 0xB3, 0xBD, 0x24, 0x33, 0x9E, 0x4B, 0x00, 0xCE, 0x15, 0xB3, 0x27, 0xC6,
-+ 0x39, 0x7C, 0xC1, 0x28, 0x75, 0xFE, 0x7B, 0x76, 0x4F, 0xFB, 0x60, 0xA0,
-+ 0x30, 0xBF, 0x74, 0x2C, 0x9D, 0xE4, 0xC8, 0x03, 0xA8, 0xDE, 0xB9, 0x2A,
-+ 0xD9, 0x23, 0x24, 0xDC, 0xEE, 0xF0, 0xC1, 0x8B, 0x4D, 0x12, 0x4A, 0x41,
-+ 0x33, 0x3B, 0x23, 0xFE, 0xDD, 0xE9, 0xE8, 0x55, 0x2B, 0x3E, 0xA4, 0x1B,
-+ 0x95, 0x21, 0x2A, 0xEF, 0x84, 0x2E, 0x13, 0x3D, 0x97, 0x7C, 0x08, 0x86,
-+ 0xB1, 0x60, 0xA4, 0xB9, 0xC4, 0x5A, 0x5B, 0x2D, 0x3F, 0xD7, 0x0D, 0xB2,
-+ 0x41, 0x72, 0x7A, 0x7F, 0xA3, 0x12, 0xB0, 0xAD, 0x80, 0x2E, 0xD6, 0xD3,
-+ 0x8A, 0x71, 0x72, 0x67, 0x94, 0x6F, 0x51, 0x05, 0x39, 0xFD, 0xBE, 0x91,
-+ 0xDE, 0x1D, 0x65, 0xE4, 0xA7, 0xA6, 0x0F, 0xA5, 0x08, 0x1F, 0xFC, 0x53,
-+ 0x48, 0x7B, 0xB8, 0xCE, 0x79, 0xDA, 0xDC, 0x18, 0xD1, 0xD3, 0x8A, 0x73,
-+ 0xCE, 0x5A, 0x62, 0x1E, 0x33, 0xD0, 0x21, 0x9C, 0xF9, 0xDE, 0x9E, 0x7E,
-+ 0x4D, 0x0E, 0x24, 0x30, 0x94, 0xB8, 0xDC, 0x8B, 0x57, 0x7E, 0x3B, 0xC6,
-+ 0xD7, 0x0F, 0xFC, 0xA6, 0x1F, 0xEB, 0xAF, 0x19, 0xD0, 0xFF, 0x3D, 0x63,
-+ 0x03, 0x1D, 0xAB, 0x11, 0x0C, 0xAD, 0x45, 0x46, 0x67, 0x76, 0xC8, 0x26,
-+ 0xD4, 0xD4, 0x70, 0x1F, 0xDF, 0xEB, 0xE5, 0x7D, 0x75, 0xD8, 0x3B, 0x52,
-+ 0x6C, 0xE7, 0x23, 0xCB, 0xB9, 0x1B, 0xA4, 0x2E, 0x5B, 0xEC, 0xB4, 0xB6,
-+ 0xB6, 0x2D, 0x0B, 0x60, 0xE3, 0x7B, 0x05, 0xE8, 0x1E, 0xAD, 0xC7, 0xE7,
-+ 0xBE, 0xF4, 0x71, 0xAE
- };
-
-+static int fips_rsa_encrypt_test(RSA *rsa, const unsigned char *plaintext, int ptlen)
-+ {
-+ unsigned char *ctbuf = NULL, *ptbuf = NULL;
-+ int ret = 0;
-+ int len;
-+
-+ ctbuf = OPENSSL_malloc(RSA_size(rsa));
-+ if (!ctbuf)
-+ goto err;
-+
-+ len = RSA_public_encrypt(ptlen, plaintext, ctbuf, rsa, RSA_PKCS1_PADDING);
-+ if (len <= 0)
-+ goto err;
-+ /* Check ciphertext doesn't match plaintext */
-+ if (len >= ptlen && !memcmp(plaintext, ctbuf, ptlen))
-+ goto err;
-+
+-};
+ ptbuf = OPENSSL_malloc(RSA_size(rsa));
+ if (!ptbuf)
+ goto err;
@@ -1092,7 +918,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
+ goto err;
+
+ ret = 1;
-+
+
+ err:
+ if (ctbuf)
+ OPENSSL_free(ctbuf);
@@ -1103,7 +929,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
int FIPS_selftest_rsa()
{
-@@ -353,7 +587,7 @@ int FIPS_selftest_rsa()
+@@ -353,7 +487,7 @@ int FIPS_selftest_rsa()
if ((pk=EVP_PKEY_new()) == NULL)
goto err;
@@ -1112,13 +938,35 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1,
kat_RSA_SHA1, sizeof(kat_RSA_SHA1),
-@@ -430,13 +664,15 @@ int FIPS_selftest_rsa()
- "RSA SHA512 X931"))
+@@ -407,36 +541,15 @@ int FIPS_selftest_rsa()
+ "RSA SHA512 PSS"))
goto err;
+-
+- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1,
+- kat_RSA_X931_SHA1, sizeof(kat_RSA_X931_SHA1),
+- EVP_sha1(), EVP_MD_CTX_FLAG_PAD_X931,
+- "RSA SHA1 X931"))
+- goto err;
+- /* NB: SHA224 not supported in X9.31 */
+- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1,
+- kat_RSA_X931_SHA256, sizeof(kat_RSA_X931_SHA256),
+- EVP_sha256(), EVP_MD_CTX_FLAG_PAD_X931,
+- "RSA SHA256 X931"))
+- goto err;
+- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1,
+- kat_RSA_X931_SHA384, sizeof(kat_RSA_X931_SHA384),
+- EVP_sha384(), EVP_MD_CTX_FLAG_PAD_X931,
+- "RSA SHA384 X931"))
+- goto err;
+- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1,
+- kat_RSA_X931_SHA512, sizeof(kat_RSA_X931_SHA512),
+- EVP_sha512(), EVP_MD_CTX_FLAG_PAD_X931,
+- "RSA SHA512 X931"))
+ if (!fips_rsa_encrypt_test(key, kat_tbs, sizeof(kat_tbs) - 1))
-+ goto err;
+ goto err;
+-
ret = 1;
err:
@@ -1129,9 +977,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
RSA_free(key);
return ret;
}
-diff -up openssl-1.0.1e/crypto/fips/Makefile.fips-reqs openssl-1.0.1e/crypto/fips/Makefile
---- openssl-1.0.1e/crypto/fips/Makefile.fips-reqs 2013-12-18 12:17:20.000000000 +0100
-+++ openssl-1.0.1e/crypto/fips/Makefile 2013-12-18 17:14:20.348337362 +0100
+diff -up openssl-1.0.1i/crypto/fips/Makefile.fips-reqs openssl-1.0.1i/crypto/fips/Makefile
+--- openssl-1.0.1i/crypto/fips/Makefile.fips-reqs 2014-08-13 19:58:06.809832370 +0200
++++ openssl-1.0.1i/crypto/fips/Makefile 2014-08-13 19:58:06.820832624 +0200
@@ -24,13 +24,15 @@ LIBSRC=fips_aes_selftest.c fips_des_self
fips_rsa_selftest.c fips_sha_selftest.c fips.c fips_dsa_selftest.c fips_rand.c \
fips_rsa_x931g.c fips_post.c fips_drbg_ctr.c fips_drbg_hash.c fips_drbg_hmac.c \
@@ -1150,10 +998,10 @@ diff -up openssl-1.0.1e/crypto/fips/Makefile.fips-reqs openssl-1.0.1e/crypto/fip
LIBCRYPTO=-L.. -lcrypto
-diff -up openssl-1.0.1e/crypto/modes/gcm128.c.fips-reqs openssl-1.0.1e/crypto/modes/gcm128.c
---- openssl-1.0.1e/crypto/modes/gcm128.c.fips-reqs 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/modes/gcm128.c 2013-12-18 12:17:09.800637730 +0100
-@@ -898,6 +898,10 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT
+diff -up openssl-1.0.1i/crypto/modes/gcm128.c.fips-reqs openssl-1.0.1i/crypto/modes/gcm128.c
+--- openssl-1.0.1i/crypto/modes/gcm128.c.fips-reqs 2014-08-13 19:58:06.740830781 +0200
++++ openssl-1.0.1i/crypto/modes/gcm128.c 2014-08-13 19:58:06.820832624 +0200
+@@ -931,6 +931,10 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT
# endif
#endif
@@ -1164,7 +1012,7 @@ diff -up openssl-1.0.1e/crypto/modes/gcm128.c.fips-reqs openssl-1.0.1e/crypto/mo
#if 0
n = (unsigned int)mlen%16; /* alternative to ctx->mres */
#endif
-@@ -1200,6 +1204,10 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_C
+@@ -1294,6 +1298,10 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_C
# endif
#endif
@@ -1175,10 +1023,10 @@ diff -up openssl-1.0.1e/crypto/modes/gcm128.c.fips-reqs openssl-1.0.1e/crypto/mo
mlen += len;
if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
return -1;
-diff -up openssl-1.0.1e/crypto/modes/modes_lcl.h.fips-reqs openssl-1.0.1e/crypto/modes/modes_lcl.h
---- openssl-1.0.1e/crypto/modes/modes_lcl.h.fips-reqs 2013-12-18 12:17:09.388628895 +0100
-+++ openssl-1.0.1e/crypto/modes/modes_lcl.h 2013-12-18 12:17:09.800637730 +0100
-@@ -115,6 +115,7 @@ struct gcm128_context {
+diff -up openssl-1.0.1i/crypto/modes/modes_lcl.h.fips-reqs openssl-1.0.1i/crypto/modes/modes_lcl.h
+--- openssl-1.0.1i/crypto/modes/modes_lcl.h.fips-reqs 2014-08-13 19:58:06.410823180 +0200
++++ openssl-1.0.1i/crypto/modes/modes_lcl.h 2014-08-13 19:58:06.820832624 +0200
+@@ -112,6 +112,7 @@ struct gcm128_context {
unsigned int mres, ares;
block128_f block;
void *key;
@@ -1186,196 +1034,9 @@ diff -up openssl-1.0.1e/crypto/modes/modes_lcl.h.fips-reqs openssl-1.0.1e/crypto
};
struct xts128_context {
-diff -up openssl-1.0.1e/crypto/rand/md_rand.c.fips-reqs openssl-1.0.1e/crypto/rand/md_rand.c
---- openssl-1.0.1e/crypto/rand/md_rand.c.fips-reqs 2013-12-18 12:17:09.763636936 +0100
-+++ openssl-1.0.1e/crypto/rand/md_rand.c 2013-12-18 12:17:09.800637730 +0100
-@@ -143,12 +143,6 @@ static long md_count[2]={0,0};
- static double entropy=0;
- static int initialized=0;
-
--static unsigned int crypto_lock_rand = 0; /* may be set only when a thread
-- * holds CRYPTO_LOCK_RAND
-- * (to prevent double locking) */
--/* access to lockin_thread is synchronized by CRYPTO_LOCK_RAND2 */
--static CRYPTO_THREADID locking_threadid; /* valid iff crypto_lock_rand is set */
--
-
- #ifdef PREDICT
- int rand_predictable=0;
-@@ -196,7 +190,7 @@ static void ssleay_rand_add(const void *
- long md_c[2];
- unsigned char local_md[MD_DIGEST_LENGTH];
- EVP_MD_CTX m;
-- int do_not_lock;
-+ int locked;
-
- /*
- * (Based on the rand(3) manpage)
-@@ -213,19 +207,8 @@ static void ssleay_rand_add(const void *
- * hash function.
- */
-
-- /* check if we already have the lock */
-- if (crypto_lock_rand)
-- {
-- CRYPTO_THREADID cur;
-- CRYPTO_THREADID_current(&cur);
-- CRYPTO_r_lock(CRYPTO_LOCK_RAND2);
-- do_not_lock = !CRYPTO_THREADID_cmp(&locking_threadid, &cur);
-- CRYPTO_r_unlock(CRYPTO_LOCK_RAND2);
-- }
-- else
-- do_not_lock = 0;
-+ locked = private_RAND_lock(1);
-
-- if (!do_not_lock) CRYPTO_w_lock(CRYPTO_LOCK_RAND);
- st_idx=state_index;
-
- /* use our own copies of the counters so that even
-@@ -257,7 +240,8 @@ static void ssleay_rand_add(const void *
-
- md_count[1] += (num / MD_DIGEST_LENGTH) + (num % MD_DIGEST_LENGTH > 0);
-
-- if (!do_not_lock) CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
-+ if (locked)
-+ private_RAND_lock(0);
-
- EVP_MD_CTX_init(&m);
- for (i=0; i<num; i+=MD_DIGEST_LENGTH)
-@@ -308,7 +292,7 @@ static void ssleay_rand_add(const void *
- }
- EVP_MD_CTX_cleanup(&m);
-
-- if (!do_not_lock) CRYPTO_w_lock(CRYPTO_LOCK_RAND);
-+ locked = private_RAND_lock(1);
- /* Don't just copy back local_md into md -- this could mean that
- * other thread's seeding remains without effect (except for
- * the incremented counter). By XORing it we keep at least as
-@@ -319,7 +303,8 @@ static void ssleay_rand_add(const void *
- }
- if (entropy < ENTROPY_NEEDED) /* stop counting when we have enough */
- entropy += add;
-- if (!do_not_lock) CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
-+ if (locked)
-+ private_RAND_lock(0);
-
- #if !defined(OPENSSL_THREADS) && !defined(OPENSSL_SYS_WIN32)
- assert(md_c[1] == md_count[1]);
-@@ -344,6 +329,7 @@ static int ssleay_rand_bytes(unsigned ch
- pid_t curr_pid = getpid();
- #endif
- int do_stir_pool = 0;
-+ int locked;
-
- #ifdef PREDICT
- if (rand_predictable)
-@@ -381,13 +367,7 @@ static int ssleay_rand_bytes(unsigned ch
- * global 'md'.
- */
-
-- CRYPTO_w_lock(CRYPTO_LOCK_RAND);
--
-- /* prevent ssleay_rand_bytes() from trying to obtain the lock again */
-- CRYPTO_w_lock(CRYPTO_LOCK_RAND2);
-- CRYPTO_THREADID_current(&locking_threadid);
-- CRYPTO_w_unlock(CRYPTO_LOCK_RAND2);
-- crypto_lock_rand = 1;
-+ locked = private_RAND_lock(1);
-
- /* always poll for external entropy in FIPS mode, drbg provides the
- * expansion
-@@ -461,9 +441,8 @@ static int ssleay_rand_bytes(unsigned ch
-
- md_count[0] += 1;
-
-- /* before unlocking, we must clear 'crypto_lock_rand' */
-- crypto_lock_rand = 0;
-- CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
-+ if (locked)
-+ private_RAND_lock(0);
-
- while (num > 0)
- {
-@@ -515,10 +494,11 @@ static int ssleay_rand_bytes(unsigned ch
- MD_Init(&m);
- MD_Update(&m,(unsigned char *)&(md_c[0]),sizeof(md_c));
- MD_Update(&m,local_md,MD_DIGEST_LENGTH);
-- CRYPTO_w_lock(CRYPTO_LOCK_RAND);
-+ locked = private_RAND_lock(1);
- MD_Update(&m,md,MD_DIGEST_LENGTH);
- MD_Final(&m,md);
-- CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
-+ if (locked)
-+ private_RAND_lock(0);
-
- EVP_MD_CTX_cleanup(&m);
- if (ok)
-@@ -548,32 +528,10 @@ static int ssleay_rand_pseudo_bytes(unsi
-
- static int ssleay_rand_status(void)
- {
-- CRYPTO_THREADID cur;
- int ret;
-- int do_not_lock;
-+ int locked;
-
-- CRYPTO_THREADID_current(&cur);
-- /* check if we already have the lock
-- * (could happen if a RAND_poll() implementation calls RAND_status()) */
-- if (crypto_lock_rand)
-- {
-- CRYPTO_r_lock(CRYPTO_LOCK_RAND2);
-- do_not_lock = !CRYPTO_THREADID_cmp(&locking_threadid, &cur);
-- CRYPTO_r_unlock(CRYPTO_LOCK_RAND2);
-- }
-- else
-- do_not_lock = 0;
--
-- if (!do_not_lock)
-- {
-- CRYPTO_w_lock(CRYPTO_LOCK_RAND);
--
-- /* prevent ssleay_rand_bytes() from trying to obtain the lock again */
-- CRYPTO_w_lock(CRYPTO_LOCK_RAND2);
-- CRYPTO_THREADID_cpy(&locking_threadid, &cur);
-- CRYPTO_w_unlock(CRYPTO_LOCK_RAND2);
-- crypto_lock_rand = 1;
-- }
-+ locked = private_RAND_lock(1);
-
- if (!initialized)
- {
-@@ -583,13 +541,8 @@ static int ssleay_rand_status(void)
-
- ret = entropy >= ENTROPY_NEEDED;
-
-- if (!do_not_lock)
-- {
-- /* before unlocking, we must clear 'crypto_lock_rand' */
-- crypto_lock_rand = 0;
--
-- CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
-- }
-+ if (locked)
-+ private_RAND_lock(0);
-
- return ret;
- }
-diff -up openssl-1.0.1e/crypto/rand/rand.h.fips-reqs openssl-1.0.1e/crypto/rand/rand.h
---- openssl-1.0.1e/crypto/rand/rand.h.fips-reqs 2013-12-18 12:17:09.764636958 +0100
-+++ openssl-1.0.1e/crypto/rand/rand.h 2013-12-18 12:17:09.800637730 +0100
-@@ -124,6 +124,8 @@ void RAND_set_fips_drbg_type(int type, i
- int RAND_init_fips(void);
- #endif
-
-+int private_RAND_lock(int lock);
-+
- /* BEGIN ERROR CODES */
- /* The following lines are auto generated by the script mkerr.pl. Any changes
- * made after this point may be overwritten when the script is next run.
-diff -up openssl-1.0.1e/crypto/rand/rand_lcl.h.fips-reqs openssl-1.0.1e/crypto/rand/rand_lcl.h
---- openssl-1.0.1e/crypto/rand/rand_lcl.h.fips-reqs 2013-12-18 12:17:09.507631447 +0100
-+++ openssl-1.0.1e/crypto/rand/rand_lcl.h 2013-12-18 12:17:09.800637730 +0100
+diff -up openssl-1.0.1i/crypto/rand/rand_lcl.h.fips-reqs openssl-1.0.1i/crypto/rand/rand_lcl.h
+--- openssl-1.0.1i/crypto/rand/rand_lcl.h.fips-reqs 2014-08-13 19:58:06.525825829 +0200
++++ openssl-1.0.1i/crypto/rand/rand_lcl.h 2014-08-13 19:58:06.820832624 +0200
@@ -112,7 +112,7 @@
#ifndef HEADER_RAND_LCL_H
#define HEADER_RAND_LCL_H
@@ -1385,57 +1046,19 @@ diff -up openssl-1.0.1e/crypto/rand/rand_lcl.h.fips-reqs openssl-1.0.1e/crypto/r
#if !defined(USE_MD5_RAND) && !defined(USE_SHA1_RAND) && !defined(USE_MDC2_RAND) && !defined(USE_MD2_RAND)
-diff -up openssl-1.0.1e/crypto/rand/rand_lib.c.fips-reqs openssl-1.0.1e/crypto/rand/rand_lib.c
---- openssl-1.0.1e/crypto/rand/rand_lib.c.fips-reqs 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/crypto/rand/rand_lib.c 2013-12-18 18:16:45.625850730 +0100
-@@ -181,6 +181,41 @@ int RAND_status(void)
- return 0;
- }
-
-+int private_RAND_lock(int lock)
-+ {
-+ static int crypto_lock_rand;
-+ static CRYPTO_THREADID locking_threadid;
-+ int do_lock;
-+
-+ if (!lock)
-+ {
-+ crypto_lock_rand = 0;
-+ CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
-+ return 0;
-+ }
-+
-+ /* check if we already have the lock */
-+ if (crypto_lock_rand)
-+ {
-+ CRYPTO_THREADID cur;
-+ CRYPTO_THREADID_current(&cur);
-+ CRYPTO_r_lock(CRYPTO_LOCK_RAND2);
-+ do_lock = !!CRYPTO_THREADID_cmp(&locking_threadid, &cur);
-+ CRYPTO_r_unlock(CRYPTO_LOCK_RAND2);
-+ }
-+ else
-+ do_lock = 1;
-+ if (do_lock)
-+ {
-+ CRYPTO_w_lock(CRYPTO_LOCK_RAND);
-+ crypto_lock_rand = 1;
-+ CRYPTO_w_lock(CRYPTO_LOCK_RAND2);
-+ CRYPTO_THREADID_current(&locking_threadid);
-+ CRYPTO_w_unlock(CRYPTO_LOCK_RAND2);
-+ }
-+ return do_lock;
-+ }
-+
- #ifdef OPENSSL_FIPS
-
- /* FIPS DRBG initialisation code. This sets up the DRBG for use by the
-@@ -239,12 +274,16 @@ static int drbg_rand_add(DRBG_CTX *ctx,
+diff -up openssl-1.0.1i/crypto/rand/rand_lib.c.fips-reqs openssl-1.0.1i/crypto/rand/rand_lib.c
+--- openssl-1.0.1i/crypto/rand/rand_lib.c.fips-reqs 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/rand/rand_lib.c 2014-08-13 19:58:06.820832624 +0200
+@@ -240,12 +240,24 @@ static int drbg_rand_add(DRBG_CTX *ctx,
double entropy)
{
RAND_SSLeay()->add(in, inlen, entropy);
+ if (FIPS_rand_status())
++ {
++ CRYPTO_w_lock(CRYPTO_LOCK_RAND);
+ FIPS_drbg_reseed(ctx, NULL, 0);
++ CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
++ }
return 1;
}
@@ -1443,13 +1066,17 @@ diff -up openssl-1.0.1e/crypto/rand/rand_lib.c.fips-reqs openssl-1.0.1e/crypto/r
{
RAND_SSLeay()->seed(in, inlen);
+ if (FIPS_rand_status())
++ {
++ CRYPTO_w_lock(CRYPTO_LOCK_RAND);
+ FIPS_drbg_reseed(ctx, NULL, 0);
++ CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
++ }
return 1;
}
-diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa/rsa_gen.c
---- openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs 2013-12-18 12:17:09.764636958 +0100
-+++ openssl-1.0.1e/crypto/rsa/rsa_gen.c 2013-12-19 17:40:58.483154314 +0100
+diff -up openssl-1.0.1i/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1i/crypto/rsa/rsa_gen.c
+--- openssl-1.0.1i/crypto/rsa/rsa_gen.c.fips-reqs 2014-08-13 19:58:06.782831748 +0200
++++ openssl-1.0.1i/crypto/rsa/rsa_gen.c 2014-08-13 19:58:06.821832646 +0200
@@ -1,5 +1,6 @@
/* crypto/rsa/rsa_gen.c */
/* Copyright (C) 1995-1998 Eric Young (eay at cryptsoft.com)
@@ -1480,7 +1107,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
+ return 0;
+ }
+
-+ if (bits != 2048 && bits != 3072)
++ if ((pbits & 0xFF) || (getenv("OPENSSL_ENFORCE_MODULUS_BITS") && bits != 2048 && bits != 3072))
+ {
+ FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN, FIPS_R_INVALID_KEY_LENGTH);
+ return 0;
@@ -1680,7 +1307,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
static int rsa_builtin_keygen(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb)
{
BIGNUM *r0=NULL,*r1=NULL,*r2=NULL,*r3=NULL,*tmp;
-@@ -176,17 +393,7 @@ static int rsa_builtin_keygen(RSA *rsa,
+@@ -176,17 +393,12 @@ static int rsa_builtin_keygen(RSA *rsa,
#ifdef OPENSSL_FIPS
if (FIPS_module_mode())
{
@@ -1690,16 +1317,16 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
- return 0;
- }
-
-- if (bits < OPENSSL_RSA_FIPS_MIN_MODULUS_BITS)
-- {
-- FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN,FIPS_R_KEY_TOO_SHORT);
-- return 0;
-- }
+ if (bits < OPENSSL_RSA_FIPS_MIN_MODULUS_BITS)
+ {
+ FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN,FIPS_R_KEY_TOO_SHORT);
+ return 0;
+ }
+ return FIPS_rsa_builtin_keygen(rsa, bits, e_value, cb);
}
#endif
-@@ -301,17 +508,6 @@ static int rsa_builtin_keygen(RSA *rsa,
+@@ -301,17 +513,6 @@ static int rsa_builtin_keygen(RSA *rsa,
p = rsa->p;
if (!BN_mod_inverse(rsa->iqmp,rsa->q,p,ctx)) goto err;
@@ -1717,9 +1344,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
ok=1;
err:
if (ok == -1)
-diff -up openssl-1.0.1e/ssl/t1_enc.c.fips-reqs openssl-1.0.1e/ssl/t1_enc.c
---- openssl-1.0.1e/ssl/t1_enc.c.fips-reqs 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/ssl/t1_enc.c 2013-12-18 12:17:09.801637751 +0100
+diff -up openssl-1.0.1i/ssl/t1_enc.c.fips-reqs openssl-1.0.1i/ssl/t1_enc.c
+--- openssl-1.0.1i/ssl/t1_enc.c.fips-reqs 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/ssl/t1_enc.c 2014-08-13 19:58:06.821832646 +0200
@@ -291,6 +291,27 @@ static int tls1_PRF(long digest_mask,
err:
return ret;
diff --git a/openssl-1.0.1i-ppc-asm-update.patch b/openssl-1.0.1i-ppc-asm-update.patch
new file mode 100644
index 0000000..cbf220c
--- /dev/null
+++ b/openssl-1.0.1i-ppc-asm-update.patch
@@ -0,0 +1,6636 @@
+diff -up openssl-1.0.1i/config.ppc-asm openssl-1.0.1i/config
+--- openssl-1.0.1i/config.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/config 2014-08-13 19:46:21.092578104 +0200
+@@ -587,13 +587,20 @@ case "$GUESSOS" in
+ fi
+ ;;
+ ppc64-*-linux2)
+- echo "WARNING! If you wish to build 64-bit library, then you have to"
+- echo " invoke './Configure linux-ppc64' *manually*."
+- if [ "$TEST" = "false" -a -t 1 ]; then
+- echo " You have about 5 seconds to press Ctrl-C to abort."
+- (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
++ if [ -z "$KERNEL_BITS" ]; then
++ echo "WARNING! If you wish to build 64-bit library, then you have to"
++ echo " invoke './Configure linux-ppc64' *manually*."
++ if [ "$TEST" = "false" -a -t 1 ]; then
++ echo " You have about 5 seconds to press Ctrl-C to abort."
++ (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
++ fi
++ fi
++ if [ "$KERNEL_BITS" = "64" ]; then
++ OUT="linux-ppc64"
++ else
++ OUT="linux-ppc"
++ (echo "__LP64__" | gcc -E -x c - 2>/dev/null | grep "^__LP64__" 2>&1 > /dev/null) || options="$options -m32"
+ fi
+- OUT="linux-ppc"
+ ;;
+ ppc-*-linux2) OUT="linux-ppc" ;;
+ ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;;
+diff -up openssl-1.0.1i/Configure.ppc-asm openssl-1.0.1i/Configure
+--- openssl-1.0.1i/Configure.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/Configure 2014-08-13 19:46:21.092578104 +0200
+@@ -139,8 +139,8 @@ my $s390x_asm="s390xcap.o s390xcpuid.o:b
+ my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
+ my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
+ my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
+-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
+-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
++my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:";
++my $ppc32_asm=$ppc64_asm;
+ my $no_asm=":::::::::::::::void";
+
+ # As for $BSDthreads. Idea is to maintain "collective" set of flags,
+@@ -357,6 +357,7 @@ my %table=(
+ ####
+ "linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
++"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
+ "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+@@ -462,8 +463,8 @@ my %table=(
+
+ #### IBM's AIX.
+ "aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::",
+-"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32",
+-"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:${ppc64_asm}:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64",
++"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:$ppc32_asm:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32",
++"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:$ppc64_asm:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64",
+ # Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE
+ # at build time. $OBJECT_MODE is respected at ./config stage!
+ "aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded -D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-q32 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
+@@ -1526,7 +1527,7 @@ else {
+ $wp_obj="wp_block.o";
+ }
+ $cmll_obj=$cmll_enc unless ($cmll_obj =~ /.o$/);
+-if ($modes_obj =~ /ghash/)
++if ($modes_obj =~ /ghash\-/)
+ {
+ $cflags.=" -DGHASH_ASM";
+ }
+diff -up openssl-1.0.1i/crypto/aes/asm/aes-ppc.pl.ppc-asm openssl-1.0.1i/crypto/aes/asm/aes-ppc.pl
+--- openssl-1.0.1i/crypto/aes/asm/aes-ppc.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/aes/asm/aes-ppc.pl 2014-08-13 19:46:21.092578104 +0200
+@@ -45,6 +45,8 @@ if ($flavour =~ /64/) {
+ $PUSH ="stw";
+ } else { die "nonsense $flavour"; }
+
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+@@ -68,7 +70,7 @@ $key="r5";
+ $Tbl0="r3";
+ $Tbl1="r6";
+ $Tbl2="r7";
+-$Tbl3="r2";
++$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
+
+ $s0="r8";
+ $s1="r9";
+@@ -76,7 +78,7 @@ $s2="r10";
+ $s3="r11";
+
+ $t0="r12";
+-$t1="r13";
++$t1="r0"; # stay away from "r13";
+ $t2="r14";
+ $t3="r15";
+
+@@ -100,9 +102,6 @@ $acc13="r29";
+ $acc14="r30";
+ $acc15="r31";
+
+-# stay away from TLS pointer
+-if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
+-else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
+ $mask80=$Tbl2;
+ $mask1b=$Tbl3;
+
+@@ -337,8 +336,7 @@ $code.=<<___;
+ $STU $sp,-$FRAME($sp)
+ mflr r0
+
+- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
+- $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
++ $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
+ $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
+ $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
+ $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -365,16 +363,61 @@ $code.=<<___;
+ bne Lenc_unaligned
+
+ Lenc_unaligned_ok:
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ lwz $s0,0($inp)
+ lwz $s1,4($inp)
+ lwz $s2,8($inp)
+ lwz $s3,12($inp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ lwz $t0,0($inp)
++ lwz $t1,4($inp)
++ lwz $t2,8($inp)
++ lwz $t3,12($inp)
++ rotlwi $s0,$t0,8
++ rotlwi $s1,$t1,8
++ rotlwi $s2,$t2,8
++ rotlwi $s3,$t3,8
++ rlwimi $s0,$t0,24,0,7
++ rlwimi $s1,$t1,24,0,7
++ rlwimi $s2,$t2,24,0,7
++ rlwimi $s3,$t3,24,0,7
++ rlwimi $s0,$t0,24,16,23
++ rlwimi $s1,$t1,24,16,23
++ rlwimi $s2,$t2,24,16,23
++ rlwimi $s3,$t3,24,16,23
++___
++$code.=<<___;
+ bl LAES_Te
+ bl Lppc_AES_encrypt_compact
++ $POP $out,`$FRAME-$SIZE_T*19`($sp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ rotlwi $t0,$s0,8
++ rotlwi $t1,$s1,8
++ rotlwi $t2,$s2,8
++ rotlwi $t3,$s3,8
++ rlwimi $t0,$s0,24,0,7
++ rlwimi $t1,$s1,24,0,7
++ rlwimi $t2,$s2,24,0,7
++ rlwimi $t3,$s3,24,0,7
++ rlwimi $t0,$s0,24,16,23
++ rlwimi $t1,$s1,24,16,23
++ rlwimi $t2,$s2,24,16,23
++ rlwimi $t3,$s3,24,16,23
++ stw $t0,0($out)
++ stw $t1,4($out)
++ stw $t2,8($out)
++ stw $t3,12($out)
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ stw $s0,0($out)
+ stw $s1,4($out)
+ stw $s2,8($out)
+ stw $s3,12($out)
++___
++$code.=<<___;
+ b Lenc_done
+
+ Lenc_unaligned:
+@@ -417,6 +460,7 @@ Lenc_xpage:
+
+ bl LAES_Te
+ bl Lppc_AES_encrypt_compact
++ $POP $out,`$FRAME-$SIZE_T*19`($sp)
+
+ extrwi $acc00,$s0,8,0
+ extrwi $acc01,$s0,8,8
+@@ -449,8 +493,6 @@ Lenc_xpage:
+
+ Lenc_done:
+ $POP r0,`$FRAME+$LRSAVE`($sp)
+- $POP $toc,`$FRAME-$SIZE_T*20`($sp)
+- $POP r13,`$FRAME-$SIZE_T*19`($sp)
+ $POP r14,`$FRAME-$SIZE_T*18`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -764,6 +806,7 @@ Lenc_compact_done:
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .AES_encrypt,.-.AES_encrypt
+
+ .globl .AES_decrypt
+ .align 7
+@@ -771,8 +814,7 @@ Lenc_compact_done:
+ $STU $sp,-$FRAME($sp)
+ mflr r0
+
+- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
+- $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
++ $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
+ $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
+ $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
+ $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -799,16 +841,61 @@ Lenc_compact_done:
+ bne Ldec_unaligned
+
+ Ldec_unaligned_ok:
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ lwz $s0,0($inp)
+ lwz $s1,4($inp)
+ lwz $s2,8($inp)
+ lwz $s3,12($inp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ lwz $t0,0($inp)
++ lwz $t1,4($inp)
++ lwz $t2,8($inp)
++ lwz $t3,12($inp)
++ rotlwi $s0,$t0,8
++ rotlwi $s1,$t1,8
++ rotlwi $s2,$t2,8
++ rotlwi $s3,$t3,8
++ rlwimi $s0,$t0,24,0,7
++ rlwimi $s1,$t1,24,0,7
++ rlwimi $s2,$t2,24,0,7
++ rlwimi $s3,$t3,24,0,7
++ rlwimi $s0,$t0,24,16,23
++ rlwimi $s1,$t1,24,16,23
++ rlwimi $s2,$t2,24,16,23
++ rlwimi $s3,$t3,24,16,23
++___
++$code.=<<___;
+ bl LAES_Td
+ bl Lppc_AES_decrypt_compact
++ $POP $out,`$FRAME-$SIZE_T*19`($sp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ rotlwi $t0,$s0,8
++ rotlwi $t1,$s1,8
++ rotlwi $t2,$s2,8
++ rotlwi $t3,$s3,8
++ rlwimi $t0,$s0,24,0,7
++ rlwimi $t1,$s1,24,0,7
++ rlwimi $t2,$s2,24,0,7
++ rlwimi $t3,$s3,24,0,7
++ rlwimi $t0,$s0,24,16,23
++ rlwimi $t1,$s1,24,16,23
++ rlwimi $t2,$s2,24,16,23
++ rlwimi $t3,$s3,24,16,23
++ stw $t0,0($out)
++ stw $t1,4($out)
++ stw $t2,8($out)
++ stw $t3,12($out)
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ stw $s0,0($out)
+ stw $s1,4($out)
+ stw $s2,8($out)
+ stw $s3,12($out)
++___
++$code.=<<___;
+ b Ldec_done
+
+ Ldec_unaligned:
+@@ -851,6 +938,7 @@ Ldec_xpage:
+
+ bl LAES_Td
+ bl Lppc_AES_decrypt_compact
++ $POP $out,`$FRAME-$SIZE_T*19`($sp)
+
+ extrwi $acc00,$s0,8,0
+ extrwi $acc01,$s0,8,8
+@@ -883,8 +971,6 @@ Ldec_xpage:
+
+ Ldec_done:
+ $POP r0,`$FRAME+$LRSAVE`($sp)
+- $POP $toc,`$FRAME-$SIZE_T*20`($sp)
+- $POP r13,`$FRAME-$SIZE_T*19`($sp)
+ $POP r14,`$FRAME-$SIZE_T*18`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -1355,6 +1441,7 @@ Ldec_compact_done:
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .AES_decrypt,.-.AES_decrypt
+
+ .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
+ .align 7
+diff -up openssl-1.0.1i/crypto/aes/asm/aesp8-ppc.pl.ppc-asm openssl-1.0.1i/crypto/aes/asm/aesp8-ppc.pl
+--- openssl-1.0.1i/crypto/aes/asm/aesp8-ppc.pl.ppc-asm 2014-08-13 19:46:21.093578128 +0200
++++ openssl-1.0.1i/crypto/aes/asm/aesp8-ppc.pl 2014-08-13 19:46:21.093578128 +0200
+@@ -0,0 +1,1940 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# This module implements support for AES instructions as per PowerISA
++# specification version 2.07, first implemented by POWER8 processor.
++# The module is endian-agnostic in sense that it supports both big-
++# and little-endian cases. Data alignment in parallelizable modes is
++# handled with VSX loads and stores, which implies MSR.VSX flag being
++# set. It should also be noted that ISA specification doesn't prohibit
++# alignment exceptions for these instructions on page boundaries.
++# Initially alignment was handled in pure AltiVec/VMX way [when data
++# is aligned programmatically, which in turn guarantees exception-
++# free execution], but it turned to hamper performance when vcipher
++# instructions are interleaved. It's reckoned that eventual
++# misalignment penalties at page boundaries are in average lower
++# than additional overhead in pure AltiVec approach.
++
++$flavour = shift;
++
++if ($flavour =~ /64/) {
++ $SIZE_T =8;
++ $LRSAVE =2*$SIZE_T;
++ $STU ="stdu";
++ $POP ="ld";
++ $PUSH ="std";
++ $UCMP ="cmpld";
++ $SHL ="sldi";
++} elsif ($flavour =~ /32/) {
++ $SIZE_T =4;
++ $LRSAVE =$SIZE_T;
++ $STU ="stwu";
++ $POP ="lwz";
++ $PUSH ="stw";
++ $UCMP ="cmplw";
++ $SHL ="slwi";
++} else { die "nonsense $flavour"; }
++
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
++
++$FRAME=8*$SIZE_T;
++$prefix="aes_p8";
++
++$sp="r1";
++$vrsave="r12";
++
++#########################################################################
++{{{ # Key setup procedures #
++my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
++my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
++my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
++
++$code.=<<___;
++.machine "any"
++
++.text
++
++.align 7
++rcon:
++.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
++.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
++.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
++.long 0,0,0,0 ?asis
++Lconsts:
++ mflr r0
++ bcl 20,31,\$+4
++ mflr $ptr #vvvvv "distance between . and rcon
++ addi $ptr,$ptr,-0x48
++ mtlr r0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
++
++.globl .${prefix}_set_encrypt_key
++.align 5
++.${prefix}_set_encrypt_key:
++Lset_encrypt_key:
++ mflr r11
++ $PUSH r11,$LRSAVE($sp)
++
++ li $ptr,-1
++ ${UCMP}i $inp,0
++ beq- Lenc_key_abort # if ($inp==0) return -1;
++ ${UCMP}i $out,0
++ beq- Lenc_key_abort # if ($out==0) return -1;
++ li $ptr,-2
++ cmpwi $bits,128
++ blt- Lenc_key_abort
++ cmpwi $bits,256
++ bgt- Lenc_key_abort
++ andi. r0,$bits,0x3f
++ bne- Lenc_key_abort
++
++ lis r0,0xfff0
++ mfspr $vrsave,256
++ mtspr 256,r0
++
++ bl Lconsts
++ mtlr r11
++
++ neg r9,$inp
++ lvx $in0,0,$inp
++ addi $inp,$inp,15 # 15 is not typo
++ lvsr $key,0,r9 # borrow $key
++ li r8,0x20
++ cmpwi $bits,192
++ lvx $in1,0,$inp
++ le?vspltisb $mask,0x0f # borrow $mask
++ lvx $rcon,0,$ptr
++ le?vxor $key,$key,$mask # adjust for byte swap
++ lvx $mask,r8,$ptr
++ addi $ptr,$ptr,0x10
++ vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
++ li $cnt,8
++ vxor $zero,$zero,$zero
++ mtctr $cnt
++
++ ?lvsr $outperm,0,$out
++ vspltisb $outmask,-1
++ lvx $outhead,0,$out
++ ?vperm $outmask,$zero,$outmask,$outperm
++
++ blt Loop128
++ addi $inp,$inp,8
++ beq L192
++ addi $inp,$inp,8
++ b L256
++
++.align 4
++Loop128:
++ vperm $key,$in0,$in0,$mask # rotate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vcipherlast $key,$key,$rcon
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vadduwm $rcon,$rcon,$rcon
++ vxor $in0,$in0,$key
++ bdnz Loop128
++
++ lvx $rcon,0,$ptr # last two round keys
++
++ vperm $key,$in0,$in0,$mask # rotate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vcipherlast $key,$key,$rcon
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vadduwm $rcon,$rcon,$rcon
++ vxor $in0,$in0,$key
++
++ vperm $key,$in0,$in0,$mask # rotate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vcipherlast $key,$key,$rcon
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vxor $in0,$in0,$key
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ stvx $stage,0,$out
++
++ addi $inp,$out,15 # 15 is not typo
++ addi $out,$out,0x50
++
++ li $rounds,10
++ b Ldone
++
++.align 4
++L192:
++ lvx $tmp,0,$inp
++ li $cnt,4
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ stvx $stage,0,$out
++ addi $out,$out,16
++ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
++ vspltisb $key,8 # borrow $key
++ mtctr $cnt
++ vsububm $mask,$mask,$key # adjust the mask
++
++Loop192:
++ vperm $key,$in1,$in1,$mask # roate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vcipherlast $key,$key,$rcon
++
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++
++ vsldoi $stage,$zero,$in1,8
++ vspltw $tmp,$in0,3
++ vxor $tmp,$tmp,$in1
++ vsldoi $in1,$zero,$in1,12 # >>32
++ vadduwm $rcon,$rcon,$rcon
++ vxor $in1,$in1,$tmp
++ vxor $in0,$in0,$key
++ vxor $in1,$in1,$key
++ vsldoi $stage,$stage,$in0,8
++
++ vperm $key,$in1,$in1,$mask # rotate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vperm $outtail,$stage,$stage,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vcipherlast $key,$key,$rcon
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vsldoi $stage,$in0,$in1,8
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vperm $outtail,$stage,$stage,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vspltw $tmp,$in0,3
++ vxor $tmp,$tmp,$in1
++ vsldoi $in1,$zero,$in1,12 # >>32
++ vadduwm $rcon,$rcon,$rcon
++ vxor $in1,$in1,$tmp
++ vxor $in0,$in0,$key
++ vxor $in1,$in1,$key
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ stvx $stage,0,$out
++ addi $inp,$out,15 # 15 is not typo
++ addi $out,$out,16
++ bdnz Loop192
++
++ li $rounds,12
++ addi $out,$out,0x20
++ b Ldone
++
++.align 4
++L256:
++ lvx $tmp,0,$inp
++ li $cnt,7
++ li $rounds,14
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ stvx $stage,0,$out
++ addi $out,$out,16
++ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
++ mtctr $cnt
++
++Loop256:
++ vperm $key,$in1,$in1,$mask # rotate-n-splat
++ vsldoi $tmp,$zero,$in0,12 # >>32
++ vperm $outtail,$in1,$in1,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ vcipherlast $key,$key,$rcon
++ stvx $stage,0,$out
++ addi $out,$out,16
++
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in0,$in0,$tmp
++ vadduwm $rcon,$rcon,$rcon
++ vxor $in0,$in0,$key
++ vperm $outtail,$in0,$in0,$outperm # rotate
++ vsel $stage,$outhead,$outtail,$outmask
++ vmr $outhead,$outtail
++ stvx $stage,0,$out
++ addi $inp,$out,15 # 15 is not typo
++ addi $out,$out,16
++ bdz Ldone
++
++ vspltw $key,$in0,3 # just splat
++ vsldoi $tmp,$zero,$in1,12 # >>32
++ vsbox $key,$key
++
++ vxor $in1,$in1,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in1,$in1,$tmp
++ vsldoi $tmp,$zero,$tmp,12 # >>32
++ vxor $in1,$in1,$tmp
++
++ vxor $in1,$in1,$key
++ b Loop256
++
++.align 4
++Ldone:
++ lvx $in1,0,$inp # redundant in aligned case
++ vsel $in1,$outhead,$in1,$outmask
++ stvx $in1,0,$inp
++ li $ptr,0
++ mtspr 256,$vrsave
++ stw $rounds,0($out)
++
++Lenc_key_abort:
++ mr r3,$ptr
++ blr
++ .long 0
++ .byte 0,12,0x14,1,0,0,3,0
++ .long 0
++.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
++
++.globl .${prefix}_set_decrypt_key
++.align 5
++.${prefix}_set_decrypt_key:
++ $STU $sp,-$FRAME($sp)
++ mflr r10
++ $PUSH r10,$FRAME+$LRSAVE($sp)
++ bl Lset_encrypt_key
++ mtlr r10
++
++ cmpwi r3,0
++ bne- Ldec_key_abort
++
++ slwi $cnt,$rounds,4
++ subi $inp,$out,240 # first round key
++ srwi $rounds,$rounds,1
++ add $out,$inp,$cnt # last round key
++ mtctr $rounds
++
++Ldeckey:
++ lwz r0, 0($inp)
++ lwz r6, 4($inp)
++ lwz r7, 8($inp)
++ lwz r8, 12($inp)
++ addi $inp,$inp,16
++ lwz r9, 0($out)
++ lwz r10,4($out)
++ lwz r11,8($out)
++ lwz r12,12($out)
++ stw r0, 0($out)
++ stw r6, 4($out)
++ stw r7, 8($out)
++ stw r8, 12($out)
++ subi $out,$out,16
++ stw r9, -16($inp)
++ stw r10,-12($inp)
++ stw r11,-8($inp)
++ stw r12,-4($inp)
++ bdnz Ldeckey
++
++ xor r3,r3,r3 # return value
++Ldec_key_abort:
++ addi $sp,$sp,$FRAME
++ blr
++ .long 0
++ .byte 0,12,4,1,0x80,0,3,0
++ .long 0
++.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
++___
++}}}
++#########################################################################
++{{{ # Single block en- and decrypt procedures #
++sub gen_block () {
++my $dir = shift;
++my $n = $dir eq "de" ? "n" : "";
++my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
++
++$code.=<<___;
++.globl .${prefix}_${dir}crypt
++.align 5
++.${prefix}_${dir}crypt:
++ lwz $rounds,240($key)
++ lis r0,0xfc00
++ mfspr $vrsave,256
++ li $idx,15 # 15 is not typo
++ mtspr 256,r0
++
++ lvx v0,0,$inp
++ neg r11,$out
++ lvx v1,$idx,$inp
++ lvsl v2,0,$inp # inpperm
++ le?vspltisb v4,0x0f
++ ?lvsl v3,0,r11 # outperm
++ le?vxor v2,v2,v4
++ li $idx,16
++ vperm v0,v0,v1,v2 # align [and byte swap in LE]
++ lvx v1,0,$key
++ ?lvsl v5,0,$key # keyperm
++ srwi $rounds,$rounds,1
++ lvx v2,$idx,$key
++ addi $idx,$idx,16
++ subi $rounds,$rounds,1
++ ?vperm v1,v1,v2,v5 # align round key
++
++ vxor v0,v0,v1
++ lvx v1,$idx,$key
++ addi $idx,$idx,16
++ mtctr $rounds
++
++Loop_${dir}c:
++ ?vperm v2,v2,v1,v5
++ v${n}cipher v0,v0,v2
++ lvx v2,$idx,$key
++ addi $idx,$idx,16
++ ?vperm v1,v1,v2,v5
++ v${n}cipher v0,v0,v1
++ lvx v1,$idx,$key
++ addi $idx,$idx,16
++ bdnz Loop_${dir}c
++
++ ?vperm v2,v2,v1,v5
++ v${n}cipher v0,v0,v2
++ lvx v2,$idx,$key
++ ?vperm v1,v1,v2,v5
++ v${n}cipherlast v0,v0,v1
++
++ vspltisb v2,-1
++ vxor v1,v1,v1
++ li $idx,15 # 15 is not typo
++ ?vperm v2,v1,v2,v3 # outmask
++ le?vxor v3,v3,v4
++ lvx v1,0,$out # outhead
++ vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
++ vsel v1,v1,v0,v2
++ lvx v4,$idx,$out
++ stvx v1,0,$out
++ vsel v0,v0,v4,v2
++ stvx v0,$idx,$out
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,3,0
++ .long 0
++.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
++___
++}
++&gen_block("en");
++&gen_block("de");
++}}}
++#########################################################################
++{{{ # CBC en- and decrypt procedures #
++my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
++my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
++ map("v$_",(4..10));
++$code.=<<___;
++.globl .${prefix}_cbc_encrypt
++.align 5
++.${prefix}_cbc_encrypt:
++ ${UCMP}i $len,16
++ bltlr-
++
++ cmpwi $enc,0 # test direction
++ lis r0,0xffe0
++ mfspr $vrsave,256
++ mtspr 256,r0
++
++ li $idx,15
++ vxor $rndkey0,$rndkey0,$rndkey0
++ le?vspltisb $tmp,0x0f
++
++ lvx $ivec,0,$ivp # load [unaligned] iv
++ lvsl $inpperm,0,$ivp
++ lvx $inptail,$idx,$ivp
++ le?vxor $inpperm,$inpperm,$tmp
++ vperm $ivec,$ivec,$inptail,$inpperm
++
++ neg r11,$inp
++ ?lvsl $keyperm,0,$key # prepare for unaligned key
++ lwz $rounds,240($key)
++
++ lvsr $inpperm,0,r11 # prepare for unaligned load
++ lvx $inptail,0,$inp
++ addi $inp,$inp,15 # 15 is not typo
++ le?vxor $inpperm,$inpperm,$tmp
++
++ ?lvsr $outperm,0,$out # prepare for unaligned store
++ vspltisb $outmask,-1
++ lvx $outhead,0,$out
++ ?vperm $outmask,$rndkey0,$outmask,$outperm
++ le?vxor $outperm,$outperm,$tmp
++
++ srwi $rounds,$rounds,1
++ li $idx,16
++ subi $rounds,$rounds,1
++ beq Lcbc_dec
++
++Lcbc_enc:
++ vmr $inout,$inptail
++ lvx $inptail,0,$inp
++ addi $inp,$inp,16
++ mtctr $rounds
++ subi $len,$len,16 # len-=16
++
++ lvx $rndkey0,0,$key
++ vperm $inout,$inout,$inptail,$inpperm
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vxor $inout,$inout,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ vxor $inout,$inout,$ivec
++
++Loop_cbc_enc:
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vcipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vcipher $inout,$inout,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ bdnz Loop_cbc_enc
++
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vcipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ li $idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vcipherlast $ivec,$inout,$rndkey0
++ ${UCMP}i $len,16
++
++ vperm $tmp,$ivec,$ivec,$outperm
++ vsel $inout,$outhead,$tmp,$outmask
++ vmr $outhead,$tmp
++ stvx $inout,0,$out
++ addi $out,$out,16
++ bge Lcbc_enc
++
++ b Lcbc_done
++
++.align 4
++Lcbc_dec:
++ ${UCMP}i $len,128
++ bge _aesp8_cbc_decrypt8x
++ vmr $tmp,$inptail
++ lvx $inptail,0,$inp
++ addi $inp,$inp,16
++ mtctr $rounds
++ subi $len,$len,16 # len-=16
++
++ lvx $rndkey0,0,$key
++ vperm $tmp,$tmp,$inptail,$inpperm
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vxor $inout,$tmp,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++
++Loop_cbc_dec:
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vncipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vncipher $inout,$inout,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ bdnz Loop_cbc_dec
++
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vncipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ li $idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vncipherlast $inout,$inout,$rndkey0
++ ${UCMP}i $len,16
++
++ vxor $inout,$inout,$ivec
++ vmr $ivec,$tmp
++ vperm $tmp,$inout,$inout,$outperm
++ vsel $inout,$outhead,$tmp,$outmask
++ vmr $outhead,$tmp
++ stvx $inout,0,$out
++ addi $out,$out,16
++ bge Lcbc_dec
++
++Lcbc_done:
++ addi $out,$out,-1
++ lvx $inout,0,$out # redundant in aligned case
++ vsel $inout,$outhead,$inout,$outmask
++ stvx $inout,0,$out
++
++ neg $enc,$ivp # write [unaligned] iv
++ li $idx,15 # 15 is not typo
++ vxor $rndkey0,$rndkey0,$rndkey0
++ vspltisb $outmask,-1
++ le?vspltisb $tmp,0x0f
++ ?lvsl $outperm,0,$enc
++ ?vperm $outmask,$rndkey0,$outmask,$outperm
++ le?vxor $outperm,$outperm,$tmp
++ lvx $outhead,0,$ivp
++ vperm $ivec,$ivec,$ivec,$outperm
++ vsel $inout,$outhead,$ivec,$outmask
++ lvx $inptail,$idx,$ivp
++ stvx $inout,0,$ivp
++ vsel $inout,$ivec,$inptail,$outmask
++ stvx $inout,$idx,$ivp
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,6,0
++ .long 0
++___
++#########################################################################
++{{ # Optimized CBC decrypt procedure #
++my $key_="r11";
++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
++my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
++ # v26-v31 last 6 round keys
++my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
++
++$code.=<<___;
++.align 5
++_aesp8_cbc_decrypt8x:
++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
++ li r10,`$FRAME+8*16+15`
++ li r11,`$FRAME+8*16+31`
++ stvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ li r0,-1
++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
++ li $x10,0x10
++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ li $x20,0x20
++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ li $x30,0x30
++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ li $x40,0x40
++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ li $x50,0x50
++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ li $x60,0x60
++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ li $x70,0x70
++ mtspr 256,r0
++
++ subi $rounds,$rounds,3 # -4 in total
++ subi $len,$len,128 # bias
++
++ lvx $rndkey0,$x00,$key # load key schedule
++ lvx v30,$x10,$key
++ addi $key,$key,0x20
++ lvx v31,$x00,$key
++ ?vperm $rndkey0,$rndkey0,v30,$keyperm
++ addi $key_,$sp,$FRAME+15
++ mtctr $rounds
++
++Load_cbc_dec_key:
++ ?vperm v24,v30,v31,$keyperm
++ lvx v30,$x10,$key
++ addi $key,$key,0x20
++ stvx v24,$x00,$key_ # off-load round[1]
++ ?vperm v25,v31,v30,$keyperm
++ lvx v31,$x00,$key
++ stvx v25,$x10,$key_ # off-load round[2]
++ addi $key_,$key_,0x20
++ bdnz Load_cbc_dec_key
++
++ lvx v26,$x10,$key
++ ?vperm v24,v30,v31,$keyperm
++ lvx v27,$x20,$key
++ stvx v24,$x00,$key_ # off-load round[3]
++ ?vperm v25,v31,v26,$keyperm
++ lvx v28,$x30,$key
++ stvx v25,$x10,$key_ # off-load round[4]
++ addi $key_,$sp,$FRAME+15 # rewind $key_
++ ?vperm v26,v26,v27,$keyperm
++ lvx v29,$x40,$key
++ ?vperm v27,v27,v28,$keyperm
++ lvx v30,$x50,$key
++ ?vperm v28,v28,v29,$keyperm
++ lvx v31,$x60,$key
++ ?vperm v29,v29,v30,$keyperm
++ lvx $out0,$x70,$key # borrow $out0
++ ?vperm v30,v30,v31,$keyperm
++ lvx v24,$x00,$key_ # pre-load round[1]
++ ?vperm v31,v31,$out0,$keyperm
++ lvx v25,$x10,$key_ # pre-load round[2]
++
++ #lvx $inptail,0,$inp # "caller" already did this
++ #addi $inp,$inp,15 # 15 is not typo
++ subi $inp,$inp,15 # undo "caller"
++
++ le?li $idx,8
++ lvx_u $in0,$x00,$inp # load first 8 "words"
++ le?lvsl $inpperm,0,$idx
++ le?vspltisb $tmp,0x0f
++ lvx_u $in1,$x10,$inp
++ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
++ lvx_u $in2,$x20,$inp
++ le?vperm $in0,$in0,$in0,$inpperm
++ lvx_u $in3,$x30,$inp
++ le?vperm $in1,$in1,$in1,$inpperm
++ lvx_u $in4,$x40,$inp
++ le?vperm $in2,$in2,$in2,$inpperm
++ vxor $out0,$in0,$rndkey0
++ lvx_u $in5,$x50,$inp
++ le?vperm $in3,$in3,$in3,$inpperm
++ vxor $out1,$in1,$rndkey0
++ lvx_u $in6,$x60,$inp
++ le?vperm $in4,$in4,$in4,$inpperm
++ vxor $out2,$in2,$rndkey0
++ lvx_u $in7,$x70,$inp
++ addi $inp,$inp,0x80
++ le?vperm $in5,$in5,$in5,$inpperm
++ vxor $out3,$in3,$rndkey0
++ le?vperm $in6,$in6,$in6,$inpperm
++ vxor $out4,$in4,$rndkey0
++ le?vperm $in7,$in7,$in7,$inpperm
++ vxor $out5,$in5,$rndkey0
++ vxor $out6,$in6,$rndkey0
++ vxor $out7,$in7,$rndkey0
++
++ mtctr $rounds
++ b Loop_cbc_dec8x
++.align 5
++Loop_cbc_dec8x:
++ vncipher $out0,$out0,v24
++ vncipher $out1,$out1,v24
++ vncipher $out2,$out2,v24
++ vncipher $out3,$out3,v24
++ vncipher $out4,$out4,v24
++ vncipher $out5,$out5,v24
++ vncipher $out6,$out6,v24
++ vncipher $out7,$out7,v24
++ lvx v24,$x20,$key_ # round[3]
++ addi $key_,$key_,0x20
++
++ vncipher $out0,$out0,v25
++ vncipher $out1,$out1,v25
++ vncipher $out2,$out2,v25
++ vncipher $out3,$out3,v25
++ vncipher $out4,$out4,v25
++ vncipher $out5,$out5,v25
++ vncipher $out6,$out6,v25
++ vncipher $out7,$out7,v25
++ lvx v25,$x10,$key_ # round[4]
++ bdnz Loop_cbc_dec8x
++
++ subic $len,$len,128 # $len-=128
++ vncipher $out0,$out0,v24
++ vncipher $out1,$out1,v24
++ vncipher $out2,$out2,v24
++ vncipher $out3,$out3,v24
++ vncipher $out4,$out4,v24
++ vncipher $out5,$out5,v24
++ vncipher $out6,$out6,v24
++ vncipher $out7,$out7,v24
++
++ subfe. r0,r0,r0 # borrow?-1:0
++ vncipher $out0,$out0,v25
++ vncipher $out1,$out1,v25
++ vncipher $out2,$out2,v25
++ vncipher $out3,$out3,v25
++ vncipher $out4,$out4,v25
++ vncipher $out5,$out5,v25
++ vncipher $out6,$out6,v25
++ vncipher $out7,$out7,v25
++
++ and r0,r0,$len
++ vncipher $out0,$out0,v26
++ vncipher $out1,$out1,v26
++ vncipher $out2,$out2,v26
++ vncipher $out3,$out3,v26
++ vncipher $out4,$out4,v26
++ vncipher $out5,$out5,v26
++ vncipher $out6,$out6,v26
++ vncipher $out7,$out7,v26
++
++ add $inp,$inp,r0 # $inp is adjusted in such
++ # way that at exit from the
++ # loop inX-in7 are loaded
++ # with last "words"
++ vncipher $out0,$out0,v27
++ vncipher $out1,$out1,v27
++ vncipher $out2,$out2,v27
++ vncipher $out3,$out3,v27
++ vncipher $out4,$out4,v27
++ vncipher $out5,$out5,v27
++ vncipher $out6,$out6,v27
++ vncipher $out7,$out7,v27
++
++ addi $key_,$sp,$FRAME+15 # rewind $key_
++ vncipher $out0,$out0,v28
++ vncipher $out1,$out1,v28
++ vncipher $out2,$out2,v28
++ vncipher $out3,$out3,v28
++ vncipher $out4,$out4,v28
++ vncipher $out5,$out5,v28
++ vncipher $out6,$out6,v28
++ vncipher $out7,$out7,v28
++ lvx v24,$x00,$key_ # re-pre-load round[1]
++
++ vncipher $out0,$out0,v29
++ vncipher $out1,$out1,v29
++ vncipher $out2,$out2,v29
++ vncipher $out3,$out3,v29
++ vncipher $out4,$out4,v29
++ vncipher $out5,$out5,v29
++ vncipher $out6,$out6,v29
++ vncipher $out7,$out7,v29
++ lvx v25,$x10,$key_ # re-pre-load round[2]
++
++ vncipher $out0,$out0,v30
++ vxor $ivec,$ivec,v31 # xor with last round key
++ vncipher $out1,$out1,v30
++ vxor $in0,$in0,v31
++ vncipher $out2,$out2,v30
++ vxor $in1,$in1,v31
++ vncipher $out3,$out3,v30
++ vxor $in2,$in2,v31
++ vncipher $out4,$out4,v30
++ vxor $in3,$in3,v31
++ vncipher $out5,$out5,v30
++ vxor $in4,$in4,v31
++ vncipher $out6,$out6,v30
++ vxor $in5,$in5,v31
++ vncipher $out7,$out7,v30
++ vxor $in6,$in6,v31
++
++ vncipherlast $out0,$out0,$ivec
++ vncipherlast $out1,$out1,$in0
++ lvx_u $in0,$x00,$inp # load next input block
++ vncipherlast $out2,$out2,$in1
++ lvx_u $in1,$x10,$inp
++ vncipherlast $out3,$out3,$in2
++ le?vperm $in0,$in0,$in0,$inpperm
++ lvx_u $in2,$x20,$inp
++ vncipherlast $out4,$out4,$in3
++ le?vperm $in1,$in1,$in1,$inpperm
++ lvx_u $in3,$x30,$inp
++ vncipherlast $out5,$out5,$in4
++ le?vperm $in2,$in2,$in2,$inpperm
++ lvx_u $in4,$x40,$inp
++ vncipherlast $out6,$out6,$in5
++ le?vperm $in3,$in3,$in3,$inpperm
++ lvx_u $in5,$x50,$inp
++ vncipherlast $out7,$out7,$in6
++ le?vperm $in4,$in4,$in4,$inpperm
++ lvx_u $in6,$x60,$inp
++ vmr $ivec,$in7
++ le?vperm $in5,$in5,$in5,$inpperm
++ lvx_u $in7,$x70,$inp
++ addi $inp,$inp,0x80
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $in6,$in6,$in6,$inpperm
++ vxor $out0,$in0,$rndkey0
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $in7,$in7,$in7,$inpperm
++ vxor $out1,$in1,$rndkey0
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ vxor $out2,$in2,$rndkey0
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x30,$out
++ vxor $out3,$in3,$rndkey0
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x40,$out
++ vxor $out4,$in4,$rndkey0
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x50,$out
++ vxor $out5,$in5,$rndkey0
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x60,$out
++ vxor $out6,$in6,$rndkey0
++ stvx_u $out7,$x70,$out
++ addi $out,$out,0x80
++ vxor $out7,$in7,$rndkey0
++
++ mtctr $rounds
++ beq Loop_cbc_dec8x # did $len-=128 borrow?
++
++ addic. $len,$len,128
++ beq Lcbc_dec8x_done
++ nop
++ nop
++
++Loop_cbc_dec8x_tail: # up to 7 "words" tail...
++ vncipher $out1,$out1,v24
++ vncipher $out2,$out2,v24
++ vncipher $out3,$out3,v24
++ vncipher $out4,$out4,v24
++ vncipher $out5,$out5,v24
++ vncipher $out6,$out6,v24
++ vncipher $out7,$out7,v24
++ lvx v24,$x20,$key_ # round[3]
++ addi $key_,$key_,0x20
++
++ vncipher $out1,$out1,v25
++ vncipher $out2,$out2,v25
++ vncipher $out3,$out3,v25
++ vncipher $out4,$out4,v25
++ vncipher $out5,$out5,v25
++ vncipher $out6,$out6,v25
++ vncipher $out7,$out7,v25
++ lvx v25,$x10,$key_ # round[4]
++ bdnz Loop_cbc_dec8x_tail
++
++ vncipher $out1,$out1,v24
++ vncipher $out2,$out2,v24
++ vncipher $out3,$out3,v24
++ vncipher $out4,$out4,v24
++ vncipher $out5,$out5,v24
++ vncipher $out6,$out6,v24
++ vncipher $out7,$out7,v24
++
++ vncipher $out1,$out1,v25
++ vncipher $out2,$out2,v25
++ vncipher $out3,$out3,v25
++ vncipher $out4,$out4,v25
++ vncipher $out5,$out5,v25
++ vncipher $out6,$out6,v25
++ vncipher $out7,$out7,v25
++
++ vncipher $out1,$out1,v26
++ vncipher $out2,$out2,v26
++ vncipher $out3,$out3,v26
++ vncipher $out4,$out4,v26
++ vncipher $out5,$out5,v26
++ vncipher $out6,$out6,v26
++ vncipher $out7,$out7,v26
++
++ vncipher $out1,$out1,v27
++ vncipher $out2,$out2,v27
++ vncipher $out3,$out3,v27
++ vncipher $out4,$out4,v27
++ vncipher $out5,$out5,v27
++ vncipher $out6,$out6,v27
++ vncipher $out7,$out7,v27
++
++ vncipher $out1,$out1,v28
++ vncipher $out2,$out2,v28
++ vncipher $out3,$out3,v28
++ vncipher $out4,$out4,v28
++ vncipher $out5,$out5,v28
++ vncipher $out6,$out6,v28
++ vncipher $out7,$out7,v28
++
++ vncipher $out1,$out1,v29
++ vncipher $out2,$out2,v29
++ vncipher $out3,$out3,v29
++ vncipher $out4,$out4,v29
++ vncipher $out5,$out5,v29
++ vncipher $out6,$out6,v29
++ vncipher $out7,$out7,v29
++
++ vncipher $out1,$out1,v30
++ vxor $ivec,$ivec,v31 # last round key
++ vncipher $out2,$out2,v30
++ vxor $in1,$in1,v31
++ vncipher $out3,$out3,v30
++ vxor $in2,$in2,v31
++ vncipher $out4,$out4,v30
++ vxor $in3,$in3,v31
++ vncipher $out5,$out5,v30
++ vxor $in4,$in4,v31
++ vncipher $out6,$out6,v30
++ vxor $in5,$in5,v31
++ vncipher $out7,$out7,v30
++ vxor $in6,$in6,v31
++
++ cmplwi $len,32 # switch($len)
++ blt Lcbc_dec8x_one
++ nop
++ beq Lcbc_dec8x_two
++ cmplwi $len,64
++ blt Lcbc_dec8x_three
++ nop
++ beq Lcbc_dec8x_four
++ cmplwi $len,96
++ blt Lcbc_dec8x_five
++ nop
++ beq Lcbc_dec8x_six
++
++Lcbc_dec8x_seven:
++ vncipherlast $out1,$out1,$ivec
++ vncipherlast $out2,$out2,$in1
++ vncipherlast $out3,$out3,$in2
++ vncipherlast $out4,$out4,$in3
++ vncipherlast $out5,$out5,$in4
++ vncipherlast $out6,$out6,$in5
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out1,$out1,$out1,$inpperm
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x00,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x10,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x20,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x30,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x40,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x50,$out
++ stvx_u $out7,$x60,$out
++ addi $out,$out,0x70
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_six:
++ vncipherlast $out2,$out2,$ivec
++ vncipherlast $out3,$out3,$in2
++ vncipherlast $out4,$out4,$in3
++ vncipherlast $out5,$out5,$in4
++ vncipherlast $out6,$out6,$in5
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out2,$out2,$out2,$inpperm
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x00,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x10,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x20,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x30,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x40,$out
++ stvx_u $out7,$x50,$out
++ addi $out,$out,0x60
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_five:
++ vncipherlast $out3,$out3,$ivec
++ vncipherlast $out4,$out4,$in3
++ vncipherlast $out5,$out5,$in4
++ vncipherlast $out6,$out6,$in5
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out3,$out3,$out3,$inpperm
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x00,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x10,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x20,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x30,$out
++ stvx_u $out7,$x40,$out
++ addi $out,$out,0x50
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_four:
++ vncipherlast $out4,$out4,$ivec
++ vncipherlast $out5,$out5,$in4
++ vncipherlast $out6,$out6,$in5
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out4,$out4,$out4,$inpperm
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x00,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x10,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x20,$out
++ stvx_u $out7,$x30,$out
++ addi $out,$out,0x40
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_three:
++ vncipherlast $out5,$out5,$ivec
++ vncipherlast $out6,$out6,$in5
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out5,$out5,$out5,$inpperm
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x00,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x10,$out
++ stvx_u $out7,$x20,$out
++ addi $out,$out,0x30
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_two:
++ vncipherlast $out6,$out6,$ivec
++ vncipherlast $out7,$out7,$in6
++ vmr $ivec,$in7
++
++ le?vperm $out6,$out6,$out6,$inpperm
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x00,$out
++ stvx_u $out7,$x10,$out
++ addi $out,$out,0x20
++ b Lcbc_dec8x_done
++
++.align 5
++Lcbc_dec8x_one:
++ vncipherlast $out7,$out7,$ivec
++ vmr $ivec,$in7
++
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out7,0,$out
++ addi $out,$out,0x10
++
++Lcbc_dec8x_done:
++ le?vperm $ivec,$ivec,$ivec,$inpperm
++ stvx_u $ivec,0,$ivp # write [unaligned] iv
++
++ li r10,`$FRAME+15`
++ li r11,`$FRAME+31`
++ stvx $inpperm,r10,$sp # wipe copies of round keys
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++
++ mtspr 256,$vrsave
++ lvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0x80,6,6,0
++ .long 0
++.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
++___
++}} }}}
++
++#########################################################################
++{{{ # CTR procedure[s] #
++my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
++my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
++ map("v$_",(4..11));
++my $dat=$tmp;
++
++$code.=<<___;
++.globl .${prefix}_ctr32_encrypt_blocks
++.align 5
++.${prefix}_ctr32_encrypt_blocks:
++ ${UCMP}i $len,1
++ bltlr-
++
++ lis r0,0xfff0
++ mfspr $vrsave,256
++ mtspr 256,r0
++
++ li $idx,15
++ vxor $rndkey0,$rndkey0,$rndkey0
++ le?vspltisb $tmp,0x0f
++
++ lvx $ivec,0,$ivp # load [unaligned] iv
++ lvsl $inpperm,0,$ivp
++ lvx $inptail,$idx,$ivp
++ vspltisb $one,1
++ le?vxor $inpperm,$inpperm,$tmp
++ vperm $ivec,$ivec,$inptail,$inpperm
++ vsldoi $one,$rndkey0,$one,1
++
++ neg r11,$inp
++ ?lvsl $keyperm,0,$key # prepare for unaligned key
++ lwz $rounds,240($key)
++
++ lvsr $inpperm,0,r11 # prepare for unaligned load
++ lvx $inptail,0,$inp
++ addi $inp,$inp,15 # 15 is not typo
++ le?vxor $inpperm,$inpperm,$tmp
++
++ srwi $rounds,$rounds,1
++ li $idx,16
++ subi $rounds,$rounds,1
++
++ ${UCMP}i $len,8
++ bge _aesp8_ctr32_encrypt8x
++
++ ?lvsr $outperm,0,$out # prepare for unaligned store
++ vspltisb $outmask,-1
++ lvx $outhead,0,$out
++ ?vperm $outmask,$rndkey0,$outmask,$outperm
++ le?vxor $outperm,$outperm,$tmp
++
++ lvx $rndkey0,0,$key
++ mtctr $rounds
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vxor $inout,$ivec,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ b Loop_ctr32_enc
++
++.align 5
++Loop_ctr32_enc:
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vcipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vcipher $inout,$inout,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ bdnz Loop_ctr32_enc
++
++ vadduwm $ivec,$ivec,$one
++ vmr $dat,$inptail
++ lvx $inptail,0,$inp
++ addi $inp,$inp,16
++ subic. $len,$len,1 # blocks--
++
++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
++ vcipher $inout,$inout,$rndkey1
++ lvx $rndkey1,$idx,$key
++ vperm $dat,$dat,$inptail,$inpperm
++ li $idx,16
++ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
++ lvx $rndkey0,0,$key
++ vxor $dat,$dat,$rndkey1 # last round key
++ vcipherlast $inout,$inout,$dat
++
++ lvx $rndkey1,$idx,$key
++ addi $idx,$idx,16
++ vperm $inout,$inout,$inout,$outperm
++ vsel $dat,$outhead,$inout,$outmask
++ mtctr $rounds
++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
++ vmr $outhead,$inout
++ vxor $inout,$ivec,$rndkey0
++ lvx $rndkey0,$idx,$key
++ addi $idx,$idx,16
++ stvx $dat,0,$out
++ addi $out,$out,16
++ bne Loop_ctr32_enc
++
++ addi $out,$out,-1
++ lvx $inout,0,$out # redundant in aligned case
++ vsel $inout,$outhead,$inout,$outmask
++ stvx $inout,0,$out
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,6,0
++ .long 0
++___
++#########################################################################
++{{ # Optimized CTR procedure #
++my $key_="r11";
++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
++my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
++ # v26-v31 last 6 round keys
++my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
++my ($two,$three,$four)=($outhead,$outperm,$outmask);
++
++$code.=<<___;
++.align 5
++_aesp8_ctr32_encrypt8x:
++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
++ li r10,`$FRAME+8*16+15`
++ li r11,`$FRAME+8*16+31`
++ stvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ li r0,-1
++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
++ li $x10,0x10
++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ li $x20,0x20
++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ li $x30,0x30
++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ li $x40,0x40
++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ li $x50,0x50
++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ li $x60,0x60
++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ li $x70,0x70
++ mtspr 256,r0
++
++ subi $rounds,$rounds,3 # -4 in total
++
++ lvx $rndkey0,$x00,$key # load key schedule
++ lvx v30,$x10,$key
++ addi $key,$key,0x20
++ lvx v31,$x00,$key
++ ?vperm $rndkey0,$rndkey0,v30,$keyperm
++ addi $key_,$sp,$FRAME+15
++ mtctr $rounds
++
++Load_ctr32_enc_key:
++ ?vperm v24,v30,v31,$keyperm
++ lvx v30,$x10,$key
++ addi $key,$key,0x20
++ stvx v24,$x00,$key_ # off-load round[1]
++ ?vperm v25,v31,v30,$keyperm
++ lvx v31,$x00,$key
++ stvx v25,$x10,$key_ # off-load round[2]
++ addi $key_,$key_,0x20
++ bdnz Load_ctr32_enc_key
++
++ lvx v26,$x10,$key
++ ?vperm v24,v30,v31,$keyperm
++ lvx v27,$x20,$key
++ stvx v24,$x00,$key_ # off-load round[3]
++ ?vperm v25,v31,v26,$keyperm
++ lvx v28,$x30,$key
++ stvx v25,$x10,$key_ # off-load round[4]
++ addi $key_,$sp,$FRAME+15 # rewind $key_
++ ?vperm v26,v26,v27,$keyperm
++ lvx v29,$x40,$key
++ ?vperm v27,v27,v28,$keyperm
++ lvx v30,$x50,$key
++ ?vperm v28,v28,v29,$keyperm
++ lvx v31,$x60,$key
++ ?vperm v29,v29,v30,$keyperm
++ lvx $out0,$x70,$key # borrow $out0
++ ?vperm v30,v30,v31,$keyperm
++ lvx v24,$x00,$key_ # pre-load round[1]
++ ?vperm v31,v31,$out0,$keyperm
++ lvx v25,$x10,$key_ # pre-load round[2]
++
++ vadduwm $two,$one,$one
++ subi $inp,$inp,15 # undo "caller"
++ $SHL $len,$len,4
++
++ vadduwm $out1,$ivec,$one # counter values ...
++ vadduwm $out2,$ivec,$two
++ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
++ le?li $idx,8
++ vadduwm $out3,$out1,$two
++ vxor $out1,$out1,$rndkey0
++ le?lvsl $inpperm,0,$idx
++ vadduwm $out4,$out2,$two
++ vxor $out2,$out2,$rndkey0
++ le?vspltisb $tmp,0x0f
++ vadduwm $out5,$out3,$two
++ vxor $out3,$out3,$rndkey0
++ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
++ vadduwm $out6,$out4,$two
++ vxor $out4,$out4,$rndkey0
++ vadduwm $out7,$out5,$two
++ vxor $out5,$out5,$rndkey0
++ vadduwm $ivec,$out6,$two # next counter value
++ vxor $out6,$out6,$rndkey0
++ vxor $out7,$out7,$rndkey0
++
++ mtctr $rounds
++ b Loop_ctr32_enc8x
++.align 5
++Loop_ctr32_enc8x:
++ vcipher $out0,$out0,v24
++ vcipher $out1,$out1,v24
++ vcipher $out2,$out2,v24
++ vcipher $out3,$out3,v24
++ vcipher $out4,$out4,v24
++ vcipher $out5,$out5,v24
++ vcipher $out6,$out6,v24
++ vcipher $out7,$out7,v24
++Loop_ctr32_enc8x_middle:
++ lvx v24,$x20,$key_ # round[3]
++ addi $key_,$key_,0x20
++
++ vcipher $out0,$out0,v25
++ vcipher $out1,$out1,v25
++ vcipher $out2,$out2,v25
++ vcipher $out3,$out3,v25
++ vcipher $out4,$out4,v25
++ vcipher $out5,$out5,v25
++ vcipher $out6,$out6,v25
++ vcipher $out7,$out7,v25
++ lvx v25,$x10,$key_ # round[4]
++ bdnz Loop_ctr32_enc8x
++
++ subic r11,$len,256 # $len-256, borrow $key_
++ vcipher $out0,$out0,v24
++ vcipher $out1,$out1,v24
++ vcipher $out2,$out2,v24
++ vcipher $out3,$out3,v24
++ vcipher $out4,$out4,v24
++ vcipher $out5,$out5,v24
++ vcipher $out6,$out6,v24
++ vcipher $out7,$out7,v24
++
++ subfe r0,r0,r0 # borrow?-1:0
++ vcipher $out0,$out0,v25
++ vcipher $out1,$out1,v25
++ vcipher $out2,$out2,v25
++ vcipher $out3,$out3,v25
++ vcipher $out4,$out4,v25
++ vcipher $out5,$out5,v25
++ vcipher $out6,$out6,v25
++ vcipher $out7,$out7,v25
++
++ and r0,r0,r11
++ addi $key_,$sp,$FRAME+15 # rewind $key_
++ vcipher $out0,$out0,v26
++ vcipher $out1,$out1,v26
++ vcipher $out2,$out2,v26
++ vcipher $out3,$out3,v26
++ vcipher $out4,$out4,v26
++ vcipher $out5,$out5,v26
++ vcipher $out6,$out6,v26
++ vcipher $out7,$out7,v26
++ lvx v24,$x00,$key_ # re-pre-load round[1]
++
++ subic $len,$len,129 # $len-=129
++ vcipher $out0,$out0,v27
++ addi $len,$len,1 # $len-=128 really
++ vcipher $out1,$out1,v27
++ vcipher $out2,$out2,v27
++ vcipher $out3,$out3,v27
++ vcipher $out4,$out4,v27
++ vcipher $out5,$out5,v27
++ vcipher $out6,$out6,v27
++ vcipher $out7,$out7,v27
++ lvx v25,$x10,$key_ # re-pre-load round[2]
++
++ vcipher $out0,$out0,v28
++ lvx_u $in0,$x00,$inp # load input
++ vcipher $out1,$out1,v28
++ lvx_u $in1,$x10,$inp
++ vcipher $out2,$out2,v28
++ lvx_u $in2,$x20,$inp
++ vcipher $out3,$out3,v28
++ lvx_u $in3,$x30,$inp
++ vcipher $out4,$out4,v28
++ lvx_u $in4,$x40,$inp
++ vcipher $out5,$out5,v28
++ lvx_u $in5,$x50,$inp
++ vcipher $out6,$out6,v28
++ lvx_u $in6,$x60,$inp
++ vcipher $out7,$out7,v28
++ lvx_u $in7,$x70,$inp
++ addi $inp,$inp,0x80
++
++ vcipher $out0,$out0,v29
++ le?vperm $in0,$in0,$in0,$inpperm
++ vcipher $out1,$out1,v29
++ le?vperm $in1,$in1,$in1,$inpperm
++ vcipher $out2,$out2,v29
++ le?vperm $in2,$in2,$in2,$inpperm
++ vcipher $out3,$out3,v29
++ le?vperm $in3,$in3,$in3,$inpperm
++ vcipher $out4,$out4,v29
++ le?vperm $in4,$in4,$in4,$inpperm
++ vcipher $out5,$out5,v29
++ le?vperm $in5,$in5,$in5,$inpperm
++ vcipher $out6,$out6,v29
++ le?vperm $in6,$in6,$in6,$inpperm
++ vcipher $out7,$out7,v29
++ le?vperm $in7,$in7,$in7,$inpperm
++
++ add $inp,$inp,r0 # $inp is adjusted in such
++ # way that at exit from the
++ # loop inX-in7 are loaded
++ # with last "words"
++ subfe. r0,r0,r0 # borrow?-1:0
++ vcipher $out0,$out0,v30
++ vxor $in0,$in0,v31 # xor with last round key
++ vcipher $out1,$out1,v30
++ vxor $in1,$in1,v31
++ vcipher $out2,$out2,v30
++ vxor $in2,$in2,v31
++ vcipher $out3,$out3,v30
++ vxor $in3,$in3,v31
++ vcipher $out4,$out4,v30
++ vxor $in4,$in4,v31
++ vcipher $out5,$out5,v30
++ vxor $in5,$in5,v31
++ vcipher $out6,$out6,v30
++ vxor $in6,$in6,v31
++ vcipher $out7,$out7,v30
++ vxor $in7,$in7,v31
++
++ bne Lctr32_enc8x_break # did $len-129 borrow?
++
++ vcipherlast $in0,$out0,$in0
++ vcipherlast $in1,$out1,$in1
++ vadduwm $out1,$ivec,$one # counter values ...
++ vcipherlast $in2,$out2,$in2
++ vadduwm $out2,$ivec,$two
++ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
++ vcipherlast $in3,$out3,$in3
++ vadduwm $out3,$out1,$two
++ vxor $out1,$out1,$rndkey0
++ vcipherlast $in4,$out4,$in4
++ vadduwm $out4,$out2,$two
++ vxor $out2,$out2,$rndkey0
++ vcipherlast $in5,$out5,$in5
++ vadduwm $out5,$out3,$two
++ vxor $out3,$out3,$rndkey0
++ vcipherlast $in6,$out6,$in6
++ vadduwm $out6,$out4,$two
++ vxor $out4,$out4,$rndkey0
++ vcipherlast $in7,$out7,$in7
++ vadduwm $out7,$out5,$two
++ vxor $out5,$out5,$rndkey0
++ le?vperm $in0,$in0,$in0,$inpperm
++ vadduwm $ivec,$out6,$two # next counter value
++ vxor $out6,$out6,$rndkey0
++ le?vperm $in1,$in1,$in1,$inpperm
++ vxor $out7,$out7,$rndkey0
++ mtctr $rounds
++
++ vcipher $out0,$out0,v24
++ stvx_u $in0,$x00,$out
++ le?vperm $in2,$in2,$in2,$inpperm
++ vcipher $out1,$out1,v24
++ stvx_u $in1,$x10,$out
++ le?vperm $in3,$in3,$in3,$inpperm
++ vcipher $out2,$out2,v24
++ stvx_u $in2,$x20,$out
++ le?vperm $in4,$in4,$in4,$inpperm
++ vcipher $out3,$out3,v24
++ stvx_u $in3,$x30,$out
++ le?vperm $in5,$in5,$in5,$inpperm
++ vcipher $out4,$out4,v24
++ stvx_u $in4,$x40,$out
++ le?vperm $in6,$in6,$in6,$inpperm
++ vcipher $out5,$out5,v24
++ stvx_u $in5,$x50,$out
++ le?vperm $in7,$in7,$in7,$inpperm
++ vcipher $out6,$out6,v24
++ stvx_u $in6,$x60,$out
++ vcipher $out7,$out7,v24
++ stvx_u $in7,$x70,$out
++ addi $out,$out,0x80
++
++ b Loop_ctr32_enc8x_middle
++
++.align 5
++Lctr32_enc8x_break:
++ cmpwi $len,-0x60
++ blt Lctr32_enc8x_one
++ nop
++ beq Lctr32_enc8x_two
++ cmpwi $len,-0x40
++ blt Lctr32_enc8x_three
++ nop
++ beq Lctr32_enc8x_four
++ cmpwi $len,-0x20
++ blt Lctr32_enc8x_five
++ nop
++ beq Lctr32_enc8x_six
++ cmpwi $len,0x00
++ blt Lctr32_enc8x_seven
++
++Lctr32_enc8x_eight:
++ vcipherlast $out0,$out0,$in0
++ vcipherlast $out1,$out1,$in1
++ vcipherlast $out2,$out2,$in2
++ vcipherlast $out3,$out3,$in3
++ vcipherlast $out4,$out4,$in4
++ vcipherlast $out5,$out5,$in5
++ vcipherlast $out6,$out6,$in6
++ vcipherlast $out7,$out7,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x30,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x40,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x50,$out
++ le?vperm $out7,$out7,$out7,$inpperm
++ stvx_u $out6,$x60,$out
++ stvx_u $out7,$x70,$out
++ addi $out,$out,0x80
++ b Lctr32_enc8x_done
++
++.align 5
++Lctr32_enc8x_seven:
++ vcipherlast $out0,$out0,$in1
++ vcipherlast $out1,$out1,$in2
++ vcipherlast $out2,$out2,$in3
++ vcipherlast $out3,$out3,$in4
++ vcipherlast $out4,$out4,$in5
++ vcipherlast $out5,$out5,$in6
++ vcipherlast $out6,$out6,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x30,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x40,$out
++ le?vperm $out6,$out6,$out6,$inpperm
++ stvx_u $out5,$x50,$out
++ stvx_u $out6,$x60,$out
++ addi $out,$out,0x70
++ b Lctr32_enc8x_done
++
++.align 5
++Lctr32_enc8x_six:
++ vcipherlast $out0,$out0,$in2
++ vcipherlast $out1,$out1,$in3
++ vcipherlast $out2,$out2,$in4
++ vcipherlast $out3,$out3,$in5
++ vcipherlast $out4,$out4,$in6
++ vcipherlast $out5,$out5,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x30,$out
++ le?vperm $out5,$out5,$out5,$inpperm
++ stvx_u $out4,$x40,$out
++ stvx_u $out5,$x50,$out
++ addi $out,$out,0x60
++ b Lctr32_enc8x_done
++
++.align 5
++Lctr32_enc8x_five:
++ vcipherlast $out0,$out0,$in3
++ vcipherlast $out1,$out1,$in4
++ vcipherlast $out2,$out2,$in5
++ vcipherlast $out3,$out3,$in6
++ vcipherlast $out4,$out4,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ le?vperm $out4,$out4,$out4,$inpperm
++ stvx_u $out3,$x30,$out
++ stvx_u $out4,$x40,$out
++ addi $out,$out,0x50
++ b Lctr32_enc8x_done
++
++.align 5
++Lctr32_enc8x_four:
++ vcipherlast $out0,$out0,$in4
++ vcipherlast $out1,$out1,$in5
++ vcipherlast $out2,$out2,$in6
++ vcipherlast $out3,$out3,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ le?vperm $out3,$out3,$out3,$inpperm
++ stvx_u $out2,$x20,$out
++ stvx_u $out3,$x30,$out
++ addi $out,$out,0x40
++ b Lctr32_enc8x_done
++
++.align 5
++Lctr32_enc8x_three:
++ vcipherlast $out0,$out0,$in5
++ vcipherlast $out1,$out1,$in6
++ vcipherlast $out2,$out2,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ le?vperm $out2,$out2,$out2,$inpperm
++ stvx_u $out1,$x10,$out
++ stvx_u $out2,$x20,$out
++ addi $out,$out,0x30
++ b Lcbc_dec8x_done
++
++.align 5
++Lctr32_enc8x_two:
++ vcipherlast $out0,$out0,$in6
++ vcipherlast $out1,$out1,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ le?vperm $out1,$out1,$out1,$inpperm
++ stvx_u $out0,$x00,$out
++ stvx_u $out1,$x10,$out
++ addi $out,$out,0x20
++ b Lcbc_dec8x_done
++
++.align 5
++Lctr32_enc8x_one:
++ vcipherlast $out0,$out0,$in7
++
++ le?vperm $out0,$out0,$out0,$inpperm
++ stvx_u $out0,0,$out
++ addi $out,$out,0x10
++
++Lctr32_enc8x_done:
++ li r10,`$FRAME+15`
++ li r11,`$FRAME+31`
++ stvx $inpperm,r10,$sp # wipe copies of round keys
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++ stvx $inpperm,r10,$sp
++ addi r10,r10,32
++ stvx $inpperm,r11,$sp
++ addi r11,r11,32
++
++ mtspr 256,$vrsave
++ lvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0x80,6,6,0
++ .long 0
++.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
++___
++}} }}}
++
++my $consts=1;
++foreach(split("\n",$code)) {
++ s/\`([^\`]*)\`/eval($1)/geo;
++
++ # constants table endian-specific conversion
++ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
++ my $conv=$3;
++ my @bytes=();
++
++ # convert to endian-agnostic format
++ if ($1 eq "long") {
++ foreach (split(/,\s*/,$2)) {
++ my $l = /^0/?oct:int;
++ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
++ }
++ } else {
++ @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
++ }
++
++ # little-endian conversion
++ if ($flavour =~ /le$/o) {
++ SWITCH: for($conv) {
++ /\?inv/ && do { @bytes=map($_^0xf, at bytes); last; };
++ /\?rev/ && do { @bytes=reverse(@bytes); last; };
++ }
++ }
++
++ #emit
++ print ".byte\t",join(',',map (sprintf("0x%02x",$_), at bytes)),"\n";
++ next;
++ }
++ $consts=0 if (m/Lconsts:/o); # end of table
++
++ # instructions prefixed with '?' are endian-specific and need
++ # to be adjusted accordingly...
++ if ($flavour =~ /le$/o) { # little-endian
++ s/le\?//o or
++ s/be\?/#be#/o or
++ s/\?lvsr/lvsl/o or
++ s/\?lvsl/lvsr/o or
++ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
++ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
++ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
++ } else { # big-endian
++ s/le\?/#le#/o or
++ s/be\?//o or
++ s/\?([a-z]+)/$1/o;
++ }
++
++ print $_,"\n";
++}
++
++close STDOUT;
+diff -up openssl-1.0.1i/crypto/aes/asm/vpaes-ppc.pl.ppc-asm openssl-1.0.1i/crypto/aes/asm/vpaes-ppc.pl
+--- openssl-1.0.1i/crypto/aes/asm/vpaes-ppc.pl.ppc-asm 2014-08-13 19:46:21.093578128 +0200
++++ openssl-1.0.1i/crypto/aes/asm/vpaes-ppc.pl 2014-08-13 19:46:21.093578128 +0200
+@@ -0,0 +1,1512 @@
++#!/usr/bin/env perl
++
++######################################################################
++## Constant-time SSSE3 AES core implementation.
++## version 0.1
++##
++## By Mike Hamburg (Stanford University), 2009
++## Public domain.
++##
++## For details see http://shiftleft.org/papers/vector_aes/ and
++## http://crypto.stanford.edu/vpaes/.
++
++# CBC encrypt/decrypt performance in cycles per byte processed with
++# 128-bit key.
++#
++# aes-ppc.pl this
++# G4e 35.5/52.1/(23.8) 11.9(*)/15.4
++# POWER6 42.7/54.3/(28.2) 63.0/92.8(**)
++# POWER7 32.3/42.9/(18.4) 18.5/23.3
++#
++# (*) This is ~10% worse than reported in paper. The reason is
++# twofold. This module doesn't make any assumption about
++# key schedule (or data for that matter) alignment and handles
++# it in-line. Secondly it, being transliterated from
++# vpaes-x86_64.pl, relies on "nested inversion" better suited
++# for Intel CPUs.
++# (**) Inadequate POWER6 performance is due to astronomic AltiVec
++# latency, 9 cycles per simple logical operation.
++
++$flavour = shift;
++
++if ($flavour =~ /64/) {
++ $SIZE_T =8;
++ $LRSAVE =2*$SIZE_T;
++ $STU ="stdu";
++ $POP ="ld";
++ $PUSH ="std";
++ $UCMP ="cmpld";
++} elsif ($flavour =~ /32/) {
++ $SIZE_T =4;
++ $LRSAVE =$SIZE_T;
++ $STU ="stwu";
++ $POP ="lwz";
++ $PUSH ="stw";
++ $UCMP ="cmplw";
++} else { die "nonsense $flavour"; }
++
++$sp="r1";
++$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
++
++$code.=<<___;
++.machine "any"
++
++.text
++
++.align 7 # totally strategic alignment
++_vpaes_consts:
++Lk_mc_forward: # mc_forward
++ .long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c ?inv
++ .long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300 ?inv
++ .long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704 ?inv
++ .long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08 ?inv
++Lk_mc_backward: # mc_backward
++ .long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e ?inv
++ .long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a ?inv
++ .long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506 ?inv
++ .long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102 ?inv
++Lk_sr: # sr
++ .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f ?inv
++ .long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b ?inv
++ .long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07 ?inv
++ .long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603 ?inv
++
++##
++## "Hot" constants
++##
++Lk_inv: # inv, inva
++ .long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704 ?rev
++ .long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03 ?rev
++Lk_ipt: # input transform (lo, hi)
++ .long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca ?rev
++ .long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd ?rev
++Lk_sbo: # sbou, sbot
++ .long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15 ?rev
++ .long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e ?rev
++Lk_sb1: # sb1u, sb1t
++ .long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b ?rev
++ .long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5 ?rev
++Lk_sb2: # sb2u, sb2t
++ .long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2 ?rev
++ .long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e ?rev
++
++##
++## Decryption stuff
++##
++Lk_dipt: # decryption input transform
++ .long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15 ?rev
++ .long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712 ?rev
++Lk_dsbo: # decryption sbox final output
++ .long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7 ?rev
++ .long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca ?rev
++Lk_dsb9: # decryption sbox output *9*u, *9*t
++ .long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca ?rev
++ .long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72 ?rev
++Lk_dsbd: # decryption sbox output *D*u, *D*t
++ .long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5 ?rev
++ .long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129 ?rev
++Lk_dsbb: # decryption sbox output *B*u, *B*t
++ .long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660 ?rev
++ .long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3 ?rev
++Lk_dsbe: # decryption sbox output *E*u, *E*t
++ .long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222 ?rev
++ .long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794 ?rev
++
++##
++## Key schedule constants
++##
++Lk_dksd: # decryption key schedule: invskew x*D
++ .long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007 ?rev
++ .long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f ?rev
++Lk_dksb: # decryption key schedule: invskew x*B
++ .long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603 ?rev
++ .long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9 ?rev
++Lk_dkse: # decryption key schedule: invskew x*E + 0x63
++ .long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553 ?rev
++ .long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd ?rev
++Lk_dks9: # decryption key schedule: invskew x*9
++ .long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a ?rev
++ .long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b ?rev
++
++Lk_rcon: # rcon
++ .long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70 ?asis
++Lk_s63:
++ .long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b ?asis
++
++Lk_opt: # output transform
++ .long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7 ?rev
++ .long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1 ?rev
++Lk_deskew: # deskew tables: inverts the sbox's "skew"
++ .long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d ?rev
++ .long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128 ?rev
++.align 5
++Lconsts:
++ mflr r0
++ bcl 20,31,\$+4
++ mflr r12 #vvvvv "distance between . and _vpaes_consts
++ addi r12,r12,-0x308
++ mtlr r0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++.asciz "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)"
++.align 6
++___
++
++my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31));
++{
++my ($inp,$out,$key) = map("r$_",(3..5));
++
++my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15));
++my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19));
++my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23));
++
++$code.=<<___;
++##
++## _aes_preheat
++##
++## Fills register %r10 -> .aes_consts (so you can -fPIC)
++## and %xmm9-%xmm15 as specified below.
++##
++.align 4
++_vpaes_encrypt_preheat:
++ mflr r8
++ bl Lconsts
++ mtlr r8
++ li r11, 0xc0 # Lk_inv
++ li r10, 0xd0
++ li r9, 0xe0 # Lk_ipt
++ li r8, 0xf0
++ vxor v7, v7, v7 # 0x00..00
++ vspltisb v8,4 # 0x04..04
++ vspltisb v9,0x0f # 0x0f..0f
++ lvx $invlo, r12, r11
++ li r11, 0x100
++ lvx $invhi, r12, r10
++ li r10, 0x110
++ lvx $iptlo, r12, r9
++ li r9, 0x120
++ lvx $ipthi, r12, r8
++ li r8, 0x130
++ lvx $sbou, r12, r11
++ li r11, 0x140
++ lvx $sbot, r12, r10
++ li r10, 0x150
++ lvx $sb1u, r12, r9
++ lvx $sb1t, r12, r8
++ lvx $sb2u, r12, r11
++ lvx $sb2t, r12, r10
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## _aes_encrypt_core
++##
++## AES-encrypt %xmm0.
++##
++## Inputs:
++## %xmm0 = input
++## %xmm9-%xmm15 as in _vpaes_preheat
++## (%rdx) = scheduled keys
++##
++## Output in %xmm0
++## Clobbers %xmm1-%xmm6, %r9, %r10, %r11, %rax
++##
++##
++.align 5
++_vpaes_encrypt_core:
++ lwz r8, 240($key) # pull rounds
++ li r9, 16
++ lvx v5, 0, $key # vmovdqu (%r9), %xmm5 # round0 key
++ li r11, 0x10
++ lvx v6, r9, $key
++ addi r9, r9, 16
++ ?vperm v5, v5, v6, $keyperm # align round key
++ addi r10, r11, 0x40
++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm1
++ vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm3, %xmm2
++ vxor v0, v0, v5 # vpxor %xmm5, %xmm1, %xmm0
++ vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0
++ mtctr r8
++ b Lenc_entry
++
++.align 4
++Lenc_loop:
++ # middle of middle round
++ vperm v4, $sb1t, v7, v2 # vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u
++ lvx v1, r12, r11 # vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
++ addi r11, r11, 16
++ vperm v0, $sb1u, v7, v3 # vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t
++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k
++ andi. r11, r11, 0x30 # and \$0x30, %r11 # ... mod 4
++ vperm v5, $sb2t, v7, v2 # vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u
++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A
++ vperm v2, $sb2u, v7, v3 # vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t
++ lvx v4, r12, r10 # vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[]
++ addi r10, r11, 0x40
++ vperm v3, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm3 # 0 = B
++ vxor v2, v2, v5 # vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A
++ vperm v0, v0, v7, v4 # vpshufb %xmm4, %xmm0, %xmm0 # 3 = D
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B
++ vperm v4, v3, v7, v1 # vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C
++ vxor v0, v0, v3 # vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D
++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D
++
++Lenc_entry:
++ # top of round
++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
++ vperm v5, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k
++ vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
++ vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
++ vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
++ vand v0, v0, v9
++ vxor v3, v3, v5 # vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
++ vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak
++ vmr v5, v6
++ lvx v6, r9, $key # vmovdqu (%r9), %xmm5
++ vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak
++ addi r9, r9, 16
++ vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io
++ ?vperm v5, v5, v6, $keyperm # align round key
++ vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
++ bdnz Lenc_loop
++
++ # middle of last round
++ addi r10, r11, 0x80
++ # vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo
++ # vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16
++ vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou
++ lvx v1, r12, r10 # vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[]
++ vperm v0, $sbot, v7, v3 # vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t
++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k
++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A
++ vperm v0, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++.globl .vpaes_encrypt
++.align 5
++.vpaes_encrypt:
++ $STU $sp,-$FRAME($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mflr r6
++ mfspr r7, 256 # save vrsave
++ stvx v20,r10,$sp
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ stw r7,`$FRAME-4`($sp) # save vrsave
++ li r0, -1
++ $PUSH r6,`$FRAME+$LRSAVE`($sp)
++ mtspr 256, r0 # preserve all AltiVec registers
++
++ bl _vpaes_encrypt_preheat
++
++ ?lvsl $inpperm, 0, $inp # prepare for unaligned access
++ lvx v0, 0, $inp
++ addi $inp, $inp, 15 # 15 is not a typo
++ ?lvsr $outperm, 0, $out
++ ?lvsl $keyperm, 0, $key # prepare for unaligned access
++ vnor $outmask, v7, v7 # 0xff..ff
++ lvx $inptail, 0, $inp # redundant in aligned case
++ ?vperm $outmask, v7, $outmask, $outperm
++ lvx $outhead, 0, $out
++ ?vperm v0, v0, $inptail, $inpperm
++
++ bl _vpaes_encrypt_core
++
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v1, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v1, 0, $out
++ addi $out, $out, 15 # 15 is not a typo
++ ########
++
++ lvx v1, 0, $out # redundant in aligned case
++ vsel v1, $outhead, v1, $outmask
++ stvx v1, 0, $out
++
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mtlr r6
++ mtspr 256, r7 # restore vrsave
++ lvx v20,r10,$sp
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ addi $sp,$sp,$FRAME
++ blr
++ .long 0
++ .byte 0,12,0x04,1,0x80,0,3,0
++ .long 0
++.size .vpaes_encrypt,.-.vpaes_encrypt
++
++.align 4
++_vpaes_decrypt_preheat:
++ mflr r8
++ bl Lconsts
++ mtlr r8
++ li r11, 0xc0 # Lk_inv
++ li r10, 0xd0
++ li r9, 0x160 # Ldipt
++ li r8, 0x170
++ vxor v7, v7, v7 # 0x00..00
++ vspltisb v8,4 # 0x04..04
++ vspltisb v9,0x0f # 0x0f..0f
++ lvx $invlo, r12, r11
++ li r11, 0x180
++ lvx $invhi, r12, r10
++ li r10, 0x190
++ lvx $iptlo, r12, r9
++ li r9, 0x1a0
++ lvx $ipthi, r12, r8
++ li r8, 0x1b0
++ lvx $sbou, r12, r11
++ li r11, 0x1c0
++ lvx $sbot, r12, r10
++ li r10, 0x1d0
++ lvx $sb9u, r12, r9
++ li r9, 0x1e0
++ lvx $sb9t, r12, r8
++ li r8, 0x1f0
++ lvx $sbdu, r12, r11
++ li r11, 0x200
++ lvx $sbdt, r12, r10
++ li r10, 0x210
++ lvx $sbbu, r12, r9
++ lvx $sbbt, r12, r8
++ lvx $sbeu, r12, r11
++ lvx $sbet, r12, r10
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## Decryption core
++##
++## Same API as encryption core.
++##
++.align 4
++_vpaes_decrypt_core:
++ lwz r8, 240($key) # pull rounds
++ li r9, 16
++ lvx v5, 0, $key # vmovdqu (%r9), %xmm4 # round0 key
++ li r11, 0x30
++ lvx v6, r9, $key
++ addi r9, r9, 16
++ ?vperm v5, v5, v6, $keyperm # align round key
++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2
++ vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm1, %xmm0
++ vxor v0, v0, v5 # vpxor %xmm4, %xmm2, %xmm2
++ vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0
++ mtctr r8
++ b Ldec_entry
++
++.align 4
++Ldec_loop:
++#
++# Inverse mix columns
++#
++ lvx v0, r12, r11 # v5 and v0 are flipped
++ # vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u
++ # vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t
++ vperm v4, $sb9u, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u
++ subi r11, r11, 16
++ vperm v1, $sb9t, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t
++ andi. r11, r11, 0x30
++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0
++ # vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu
++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
++ # vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt
++
++ vperm v4, $sbdu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu
++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
++ vperm v1, $sbdt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt
++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
++ # vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu
++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
++ # vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt
++
++ vperm v4, $sbbu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu
++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
++ vperm v1, $sbbt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt
++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
++ # vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu
++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
++ # vmovdqa 0x50(%r10), %xmm1 # 0 : sbet
++
++ vperm v4, $sbeu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu
++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
++ vperm v1, $sbet, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet
++ vxor v0, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
++ vxor v0, v0, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
++
++Ldec_entry:
++ # top of round
++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
++ vperm v2, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k
++ vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
++ vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
++ vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
++ vand v0, v0, v9
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
++ vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
++ vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak
++ vmr v5, v6
++ lvx v6, r9, $key # vmovdqu (%r9), %xmm0
++ vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak
++ addi r9, r9, 16
++ vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io
++ ?vperm v5, v5, v6, $keyperm # align round key
++ vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
++ bdnz Ldec_loop
++
++ # middle of last round
++ addi r10, r11, 0x80
++ # vmovdqa 0x60(%r10), %xmm4 # 3 : sbou
++ vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou
++ # vmovdqa 0x70(%r10), %xmm1 # 0 : sbot
++ lvx v2, r12, r10 # vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160
++ vperm v1, $sbot, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t
++ vxor v4, v4, v5 # vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k
++ vxor v0, v1, v4 # vpxor %xmm4, %xmm1, %xmm0 # 0 = A
++ vperm v0, v0, v7, v2 # vpshufb %xmm2, %xmm0, %xmm0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++.globl .vpaes_decrypt
++.align 5
++.vpaes_decrypt:
++ $STU $sp,-$FRAME($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mflr r6
++ mfspr r7, 256 # save vrsave
++ stvx v20,r10,$sp
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ stw r7,`$FRAME-4`($sp) # save vrsave
++ li r0, -1
++ $PUSH r6,`$FRAME+$LRSAVE`($sp)
++ mtspr 256, r0 # preserve all AltiVec registers
++
++ bl _vpaes_decrypt_preheat
++
++ ?lvsl $inpperm, 0, $inp # prepare for unaligned access
++ lvx v0, 0, $inp
++ addi $inp, $inp, 15 # 15 is not a typo
++ ?lvsr $outperm, 0, $out
++ ?lvsl $keyperm, 0, $key
++ vnor $outmask, v7, v7 # 0xff..ff
++ lvx $inptail, 0, $inp # redundant in aligned case
++ ?vperm $outmask, v7, $outmask, $outperm
++ lvx $outhead, 0, $out
++ ?vperm v0, v0, $inptail, $inpperm
++
++ bl _vpaes_decrypt_core
++
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v1, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v1, 0, $out
++ addi $out, $out, 15 # 15 is not a typo
++ ########
++
++ lvx v1, 0, $out # redundant in aligned case
++ vsel v1, $outhead, v1, $outmask
++ stvx v1, 0, $out
++
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mtlr r6
++ mtspr 256, r7 # restore vrsave
++ lvx v20,r10,$sp
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ addi $sp,$sp,$FRAME
++ blr
++ .long 0
++ .byte 0,12,0x04,1,0x80,0,3,0
++ .long 0
++.size .vpaes_decrypt,.-.vpaes_decrypt
++
++.globl .vpaes_cbc_encrypt
++.align 5
++.vpaes_cbc_encrypt:
++ ${UCMP}i r5,16
++ bltlr-
++
++ $STU $sp,-`($FRAME+2*$SIZE_T)`($sp)
++ mflr r0
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mfspr r12, 256
++ stvx v20,r10,$sp
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ stw r12,`$FRAME-4`($sp) # save vrsave
++ $PUSH r30,`$FRAME+$SIZE_T*0`($sp)
++ $PUSH r31,`$FRAME+$SIZE_T*1`($sp)
++ li r9, -16
++ $PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
++
++ and r30, r5, r9 # copy length&-16
++ mr r5, r6 # copy pointer to key
++ mr r31, r7 # copy pointer to iv
++ blt Lcbc_abort
++ cmpwi r8, 0 # test direction
++ li r6, -1
++ mr r7, r12 # copy vrsave
++ mtspr 256, r6 # preserve all AltiVec registers
++
++ lvx v24, 0, r31 # load [potentially unaligned] iv
++ li r9, 15
++ ?lvsl $inpperm, 0, r31
++ lvx v25, r9, r31
++ ?vperm v24, v24, v25, $inpperm
++
++ neg r8, $inp # prepare for unaligned access
++ vxor v7, v7, v7
++ ?lvsl $keyperm, 0, $key
++ ?lvsr $outperm, 0, $out
++ ?lvsr $inpperm, 0, r8 # -$inp
++ vnor $outmask, v7, v7 # 0xff..ff
++ lvx $inptail, 0, $inp
++ ?vperm $outmask, v7, $outmask, $outperm
++ addi $inp, $inp, 15 # 15 is not a typo
++ lvx $outhead, 0, $out
++
++ beq Lcbc_decrypt
++
++ bl _vpaes_encrypt_preheat
++ li r0, 16
++
++Lcbc_enc_loop:
++ vmr v0, $inptail
++ lvx $inptail, 0, $inp
++ addi $inp, $inp, 16
++ ?vperm v0, v0, $inptail, $inpperm
++ vxor v0, v0, v24 # ^= iv
++
++ bl _vpaes_encrypt_core
++
++ vmr v24, v0 # put aside iv
++ sub. r30, r30, r0 # len -= 16
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v1, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v1, 0, $out
++ addi $out, $out, 16
++ bne Lcbc_enc_loop
++
++ b Lcbc_done
++
++.align 5
++Lcbc_decrypt:
++ bl _vpaes_decrypt_preheat
++ li r0, 16
++
++Lcbc_dec_loop:
++ vmr v0, $inptail
++ lvx $inptail, 0, $inp
++ addi $inp, $inp, 16
++ ?vperm v0, v0, $inptail, $inpperm
++ vmr v25, v0 # put aside input
++
++ bl _vpaes_decrypt_core
++
++ vxor v0, v0, v24 # ^= iv
++ vmr v24, v25
++ sub. r30, r30, r0 # len -= 16
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v1, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v1, 0, $out
++ addi $out, $out, 16
++ bne Lcbc_dec_loop
++
++Lcbc_done:
++ addi $out, $out, -1
++ lvx v1, 0, $out # redundant in aligned case
++ vsel v1, $outhead, v1, $outmask
++ stvx v1, 0, $out
++
++ neg r8, r31 # write [potentially unaligned] iv
++ ?lvsl $outperm, 0, r8
++ li r6, 15
++ vnor $outmask, v7, v7 # 0xff..ff
++ ?vperm $outmask, v7, $outmask, $outperm
++ lvx $outhead, 0, r31
++ vperm v24, v24, v24, $outperm # rotate right/left
++ vsel v0, $outhead, v24, $outmask
++ lvx v1, r6, r31
++ stvx v0, 0, r31
++ vsel v1, v24, v1, $outmask
++ stvx v1, r6, r31
++
++ mtspr 256, r7 # restore vrsave
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ lvx v20,r10,$sp
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++Lcbc_abort:
++ $POP r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
++ $POP r30,`$FRAME+$SIZE_T*0`($sp)
++ $POP r31,`$FRAME+$SIZE_T*1`($sp)
++ mtlr r0
++ addi $sp,$sp,`$FRAME+$SIZE_T*2`
++ blr
++ .long 0
++ .byte 0,12,0x04,1,0x80,2,6,0
++ .long 0
++.size .vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt
++___
++}
++{
++my ($inp,$bits,$out)=map("r$_",(3..5));
++my $dir="cr1";
++my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24));
++
++$code.=<<___;
++########################################################
++## ##
++## AES key schedule ##
++## ##
++########################################################
++.align 4
++_vpaes_key_preheat:
++ mflr r8
++ bl Lconsts
++ mtlr r8
++ li r11, 0xc0 # Lk_inv
++ li r10, 0xd0
++ li r9, 0xe0 # L_ipt
++ li r8, 0xf0
++
++ vspltisb v8,4 # 0x04..04
++ vxor v9,v9,v9 # 0x00..00
++ lvx $invlo, r12, r11 # Lk_inv
++ li r11, 0x120
++ lvx $invhi, r12, r10
++ li r10, 0x130
++ lvx $iptlo, r12, r9 # Lk_ipt
++ li r9, 0x220
++ lvx $ipthi, r12, r8
++ li r8, 0x230
++
++ lvx v14, r12, r11 # Lk_sb1
++ li r11, 0x240
++ lvx v15, r12, r10
++ li r10, 0x250
++
++ lvx v16, r12, r9 # Lk_dksd
++ li r9, 0x260
++ lvx v17, r12, r8
++ li r8, 0x270
++ lvx v18, r12, r11 # Lk_dksb
++ li r11, 0x280
++ lvx v19, r12, r10
++ li r10, 0x290
++ lvx v20, r12, r9 # Lk_dkse
++ li r9, 0x2a0
++ lvx v21, r12, r8
++ li r8, 0x2b0
++ lvx v22, r12, r11 # Lk_dks9
++ lvx v23, r12, r10
++
++ lvx v24, r12, r9 # Lk_rcon
++ lvx v25, 0, r12 # Lk_mc_forward[0]
++ lvx v26, r12, r8 # Lks63
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++.align 4
++_vpaes_schedule_core:
++ mflr r7
++
++ bl _vpaes_key_preheat # load the tables
++
++ #lvx v0, 0, $inp # vmovdqu (%rdi), %xmm0 # load key (unaligned)
++ neg r8, $inp # prepare for unaligned access
++ lvx v0, 0, $inp
++ addi $inp, $inp, 15 # 15 is not typo
++ ?lvsr $inpperm, 0, r8 # -$inp
++ lvx v6, 0, $inp # v6 serves as inptail
++ addi $inp, $inp, 8
++ ?vperm v0, v0, v6, $inpperm
++
++ # input transform
++ vmr v3, v0 # vmovdqa %xmm0, %xmm3
++ bl _vpaes_schedule_transform
++ vmr v7, v0 # vmovdqa %xmm0, %xmm7
++
++ bne $dir, Lschedule_am_decrypting
++
++ # encrypting, output zeroth round key after transform
++ li r8, 0x30 # mov \$0x30,%r8d
++ addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
++
++ ?lvsr $outperm, 0, $out # prepare for unaligned access
++ vnor $outmask, v9, v9 # 0xff..ff
++ lvx $outhead, 0, $out
++ ?vperm $outmask, v9, $outmask, $outperm
++
++ #stvx v0, 0, $out # vmovdqu %xmm0, (%rdx)
++ vperm v1, v0, v0, $outperm # rotate right/left
++ vsel v2, $outhead, v1, $outmask
++ vmr $outhead, v1
++ stvx v2, 0, $out
++ b Lschedule_go
++
++Lschedule_am_decrypting:
++ srwi r8, $bits, 1 # shr \$1,%r8d
++ andi. r8, r8, 32 # and \$32,%r8d
++ xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32
++ addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
++ # decrypting, output zeroth round key after shiftrows
++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
++ vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
++
++ neg r0, $out # prepare for unaligned access
++ ?lvsl $outperm, 0, r0
++ addi $out, $out, 15 # 15 is not typo
++ vnor $outmask, v9, v9 # 0xff..ff
++ lvx $outhead, 0, $out
++ ?vperm $outmask, $outmask, v9, $outperm
++
++ #stvx v4, 0, $out # vmovdqu %xmm3, (%rdx)
++ vperm v4, v4, v4, $outperm # rotate right/left
++ vsel v2, $outhead, v4, $outmask
++ vmr $outhead, v4
++ stvx v2, 0, $out
++ xori r8, r8, 0x30 # xor \$0x30, %r8
++
++Lschedule_go:
++ cmplwi $bits, 192 # cmp \$192, %esi
++ bgt Lschedule_256
++ beq Lschedule_192
++ # 128: fall though
++
++##
++## .schedule_128
++##
++## 128-bit specific part of key schedule.
++##
++## This schedule is really simple, because all its parts
++## are accomplished by the subroutines.
++##
++Lschedule_128:
++ li r0, 10 # mov \$10, %esi
++ mtctr r0
++
++Loop_schedule_128:
++ bl _vpaes_schedule_round
++ bdz Lschedule_mangle_last # dec %esi
++ bl _vpaes_schedule_mangle # write output
++ b Loop_schedule_128
++
++##
++## .aes_schedule_192
++##
++## 192-bit specific part of key schedule.
++##
++## The main body of this schedule is the same as the 128-bit
++## schedule, but with more smearing. The long, high side is
++## stored in %xmm7 as before, and the short, low side is in
++## the high bits of %xmm6.
++##
++## This schedule is somewhat nastier, however, because each
++## round produces 192 bits of key material, or 1.5 round keys.
++## Therefore, on each cycle we do 2 rounds and produce 3 round
++## keys.
++##
++.align 4
++Lschedule_192:
++ li r0, 4 # mov \$4, %esi
++ lvx v0, 0, $inp
++ ?vperm v0, v6, v0, $inpperm
++ ?vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
++ bl _vpaes_schedule_transform # input transform
++ ?vsldoi v6, v0, v9, 8
++ ?vsldoi v6, v9, v6, 8 # clobber "low" side with zeros
++ mtctr r0
++
++Loop_schedule_192:
++ bl _vpaes_schedule_round
++ ?vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0
++ bl _vpaes_schedule_mangle # save key n
++ bl _vpaes_schedule_192_smear
++ bl _vpaes_schedule_mangle # save key n+1
++ bl _vpaes_schedule_round
++ bdz Lschedule_mangle_last # dec %esi
++ bl _vpaes_schedule_mangle # save key n+2
++ bl _vpaes_schedule_192_smear
++ b Loop_schedule_192
++
++##
++## .aes_schedule_256
++##
++## 256-bit specific part of key schedule.
++##
++## The structure here is very similar to the 128-bit
++## schedule, but with an additional "low side" in
++## %xmm6. The low side's rounds are the same as the
++## high side's, except no rcon and no rotation.
++##
++.align 4
++Lschedule_256:
++ li r0, 7 # mov \$7, %esi
++ addi $inp, $inp, 8
++ lvx v0, 0, $inp # vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
++ ?vperm v0, v6, v0, $inpperm
++ bl _vpaes_schedule_transform # input transform
++ mtctr r0
++
++Loop_schedule_256:
++ bl _vpaes_schedule_mangle # output low result
++ vmr v6, v0 # vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6
++
++ # high round
++ bl _vpaes_schedule_round
++ bdz Lschedule_mangle_last # dec %esi
++ bl _vpaes_schedule_mangle
++
++ # low round. swap xmm7 and xmm6
++ ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
++ vmr v5, v7 # vmovdqa %xmm7, %xmm5
++ vmr v7, v6 # vmovdqa %xmm6, %xmm7
++ bl _vpaes_schedule_low_round
++ vmr v7, v5 # vmovdqa %xmm5, %xmm7
++
++ b Loop_schedule_256
++##
++## .aes_schedule_mangle_last
++##
++## Mangler for last round of key schedule
++## Mangles %xmm0
++## when encrypting, outputs out(%xmm0) ^ 63
++## when decrypting, outputs unskew(%xmm0)
++##
++## Always called right before return... jumps to cleanup and exits
++##
++.align 4
++Lschedule_mangle_last:
++ # schedule last round key from xmm0
++ li r11, 0x2e0 # lea .Lk_deskew(%rip),%r11
++ li r9, 0x2f0
++ bne $dir, Lschedule_mangle_last_dec
++
++ # encrypting
++ lvx v1, r8, r10 # vmovdqa (%r8,%r10),%xmm1
++ li r11, 0x2c0 # lea .Lk_opt(%rip), %r11 # prepare to output transform
++ li r9, 0x2d0 # prepare to output transform
++ vperm v0, v0, v0, v1 # vpshufb %xmm1, %xmm0, %xmm0 # output permute
++
++ lvx $iptlo, r11, r12 # reload $ipt
++ lvx $ipthi, r9, r12
++ addi $out, $out, 16 # add \$16, %rdx
++ vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0
++ bl _vpaes_schedule_transform # output transform
++
++ #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v2, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v2, 0, $out
++
++ addi $out, $out, 15 # 15 is not typo
++ lvx v1, 0, $out # redundant in aligned case
++ vsel v1, $outhead, v1, $outmask
++ stvx v1, 0, $out
++ b Lschedule_mangle_done
++
++.align 4
++Lschedule_mangle_last_dec:
++ lvx $iptlo, r11, r12 # reload $ipt
++ lvx $ipthi, r9, r12
++ addi $out, $out, -16 # add \$-16, %rdx
++ vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0
++ bl _vpaes_schedule_transform # output transform
++
++ #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
++ vperm v0, v0, v0, $outperm # rotate right/left
++ vsel v2, $outhead, v0, $outmask
++ vmr $outhead, v0
++ stvx v2, 0, $out
++
++ addi $out, $out, -15 # -15 is not typo
++ lvx v1, 0, $out # redundant in aligned case
++ vsel v1, $outhead, v1, $outmask
++ stvx v1, 0, $out
++
++Lschedule_mangle_done:
++ mtlr r7
++ # cleanup
++ vxor v0, v0, v0 # vpxor %xmm0, %xmm0, %xmm0
++ vxor v1, v1, v1 # vpxor %xmm1, %xmm1, %xmm1
++ vxor v2, v2, v2 # vpxor %xmm2, %xmm2, %xmm2
++ vxor v3, v3, v3 # vpxor %xmm3, %xmm3, %xmm3
++ vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4
++ vxor v5, v5, v5 # vpxor %xmm5, %xmm5, %xmm5
++ vxor v6, v6, v6 # vpxor %xmm6, %xmm6, %xmm6
++ vxor v7, v7, v7 # vpxor %xmm7, %xmm7, %xmm7
++
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## .aes_schedule_192_smear
++##
++## Smear the short, low side in the 192-bit key schedule.
++##
++## Inputs:
++## %xmm7: high side, b a x y
++## %xmm6: low side, d c 0 0
++## %xmm13: 0
++##
++## Outputs:
++## %xmm6: b+c+d b+c 0 0
++## %xmm0: b+c+d b+c b a
++##
++.align 4
++_vpaes_schedule_192_smear:
++ ?vspltw v0, v7, 3
++ ?vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0
++ ?vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
++ vxor v6, v6, v1 # vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0
++ vxor v6, v6, v0 # vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a
++ vmr v0, v6
++ ?vsldoi v6, v6, v9, 8
++ ?vsldoi v6, v9, v6, 8 # clobber low side with zeros
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## .aes_schedule_round
++##
++## Runs one main round of the key schedule on %xmm0, %xmm7
++##
++## Specifically, runs subbytes on the high dword of %xmm0
++## then rotates it by one byte and xors into the low dword of
++## %xmm7.
++##
++## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
++## next rcon.
++##
++## Smears the dwords of %xmm7 by xoring the low into the
++## second low, result into third, result into highest.
++##
++## Returns results in %xmm7 = %xmm0.
++## Clobbers %xmm1-%xmm4, %r11.
++##
++.align 4
++_vpaes_schedule_round:
++ # extract rcon from xmm8
++ #vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4
++ ?vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1
++ ?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8
++ vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7
++
++ # rotate
++ ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
++ ?vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0
++
++ # fall through...
++
++ # low round: same as high round, but no rotation and no rcon.
++_vpaes_schedule_low_round:
++ # smear xmm7
++ ?vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1
++ vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7
++ vspltisb v1, 0x0f # 0x0f..0f
++ ?vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4
++
++ # subbytes
++ vand v1, v1, v0 # vpand %xmm9, %xmm0, %xmm1 # 0 = k
++ vsrb v0, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
++ vxor v7, v7, v4 # vpxor %xmm4, %xmm7, %xmm7
++ vperm v2, $invhi, v9, v1 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k
++ vxor v1, v1, v0 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
++ vperm v3, $invlo, v9, v0 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
++ vperm v4, $invlo, v9, v1 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
++ vxor v7, v7, v26 # vpxor .Lk_s63(%rip), %xmm7, %xmm7
++ vperm v3, $invlo, v9, v3 # vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak
++ vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
++ vperm v2, $invlo, v9, v4 # vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak
++ vxor v3, v3, v1 # vpxor %xmm1, %xmm3, %xmm3 # 2 = io
++ vxor v2, v2, v0 # vpxor %xmm0, %xmm2, %xmm2 # 3 = jo
++ vperm v4, v15, v9, v3 # vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou
++ vperm v1, v14, v9, v2 # vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t
++ vxor v1, v1, v4 # vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output
++
++ # add in smeared stuff
++ vxor v0, v1, v7 # vpxor %xmm7, %xmm1, %xmm0
++ vxor v7, v1, v7 # vmovdqa %xmm0, %xmm7
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## .aes_schedule_transform
++##
++## Linear-transform %xmm0 according to tables at (%r11)
++##
++## Requires that %xmm9 = 0x0F0F... as in preheat
++## Output in %xmm0
++## Clobbers %xmm2
++##
++.align 4
++_vpaes_schedule_transform:
++ #vand v1, v0, v9 # vpand %xmm9, %xmm0, %xmm1
++ vsrb v2, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
++ # vmovdqa (%r11), %xmm2 # lo
++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2
++ # vmovdqa 16(%r11), %xmm1 # hi
++ vperm v2, $ipthi, $ipthi, v2 # vpshufb %xmm0, %xmm1, %xmm0
++ vxor v0, v0, v2 # vpxor %xmm2, %xmm0, %xmm0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++##
++## .aes_schedule_mangle
++##
++## Mangle xmm0 from (basis-transformed) standard version
++## to our version.
++##
++## On encrypt,
++## xor with 0x63
++## multiply by circulant 0,1,1,1
++## apply shiftrows transform
++##
++## On decrypt,
++## xor with 0x63
++## multiply by "inverse mixcolumns" circulant E,B,D,9
++## deskew
++## apply shiftrows transform
++##
++##
++## Writes out to (%rdx), and increments or decrements it
++## Keeps track of round number mod 4 in %r8
++## Preserves xmm0
++## Clobbers xmm1-xmm5
++##
++.align 4
++_vpaes_schedule_mangle:
++ #vmr v4, v0 # vmovdqa %xmm0, %xmm4 # save xmm0 for later
++ # vmovdqa .Lk_mc_forward(%rip),%xmm5
++ bne $dir, Lschedule_mangle_dec
++
++ # encrypting
++ vxor v4, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm4
++ addi $out, $out, 16 # add \$16, %rdx
++ vperm v4, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm4
++ vperm v1, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm1
++ vperm v3, v1, v1, v25 # vpshufb %xmm5, %xmm1, %xmm3
++ vxor v4, v4, v1 # vpxor %xmm1, %xmm4, %xmm4
++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
++ vxor v3, v3, v4 # vpxor %xmm4, %xmm3, %xmm3
++
++ vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
++ addi r8, r8, -16 # add \$-16, %r8
++ andi. r8, r8, 0x30 # and \$0x30, %r8
++
++ #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx)
++ vperm v1, v3, v3, $outperm # rotate right/left
++ vsel v2, $outhead, v1, $outmask
++ vmr $outhead, v1
++ stvx v2, 0, $out
++ blr
++
++.align 4
++Lschedule_mangle_dec:
++ # inverse mix columns
++ # lea .Lk_dksd(%rip),%r11
++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm4, %xmm1 # 1 = hi
++ #and v4, v0, v9 # vpand %xmm9, %xmm4, %xmm4 # 4 = lo
++
++ # vmovdqa 0x00(%r11), %xmm2
++ vperm v2, v16, v16, v0 # vpshufb %xmm4, %xmm2, %xmm2
++ # vmovdqa 0x10(%r11), %xmm3
++ vperm v3, v17, v17, v1 # vpshufb %xmm1, %xmm3, %xmm3
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
++
++ # vmovdqa 0x20(%r11), %xmm2
++ vperm v2, v18, v18, v0 # vpshufb %xmm4, %xmm2, %xmm2
++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
++ # vmovdqa 0x30(%r11), %xmm3
++ vperm v3, v19, v19, v1 # vpshufb %xmm1, %xmm3, %xmm3
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
++
++ # vmovdqa 0x40(%r11), %xmm2
++ vperm v2, v20, v20, v0 # vpshufb %xmm4, %xmm2, %xmm2
++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
++ # vmovdqa 0x50(%r11), %xmm3
++ vperm v3, v21, v21, v1 # vpshufb %xmm1, %xmm3, %xmm3
++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
++
++ # vmovdqa 0x60(%r11), %xmm2
++ vperm v2, v22, v22, v0 # vpshufb %xmm4, %xmm2, %xmm2
++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
++ # vmovdqa 0x70(%r11), %xmm4
++ vperm v4, v23, v23, v1 # vpshufb %xmm1, %xmm4, %xmm4
++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
++ vxor v3, v4, v2 # vpxor %xmm2, %xmm4, %xmm3
++
++ addi $out, $out, -16 # add \$-16, %rdx
++
++ vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
++ addi r8, r8, -16 # add \$-16, %r8
++ andi. r8, r8, 0x30 # and \$0x30, %r8
++
++ #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx)
++ vperm v1, v3, v3, $outperm # rotate right/left
++ vsel v2, $outhead, v1, $outmask
++ vmr $outhead, v1
++ stvx v2, 0, $out
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++
++.globl .vpaes_set_encrypt_key
++.align 5
++.vpaes_set_encrypt_key:
++ $STU $sp,-$FRAME($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mflr r0
++ mfspr r6, 256 # save vrsave
++ stvx v20,r10,$sp
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ stw r6,`$FRAME-4`($sp) # save vrsave
++ li r7, -1
++ $PUSH r0, `$FRAME+$LRSAVE`($sp)
++ mtspr 256, r7 # preserve all AltiVec registers
++
++ srwi r9, $bits, 5 # shr \$5,%eax
++ addi r9, r9, 6 # add \$5,%eax
++ stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
++
++ cmplw $dir, $bits, $bits # set encrypt direction
++ li r8, 0x30 # mov \$0x30,%r8d
++ bl _vpaes_schedule_core
++
++ $POP r0, `$FRAME+$LRSAVE`($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mtspr 256, r6 # restore vrsave
++ mtlr r0
++ xor r3, r3, r3
++ lvx v20,r10,$sp
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ addi $sp,$sp,$FRAME
++ blr
++ .long 0
++ .byte 0,12,0x04,1,0x80,0,3,0
++ .long 0
++.size .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key
++
++.globl .vpaes_set_decrypt_key
++.align 4
++.vpaes_set_decrypt_key:
++ $STU $sp,-$FRAME($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mflr r0
++ mfspr r6, 256 # save vrsave
++ stvx v20,r10,$sp
++ addi r10,r10,32
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ stw r6,`$FRAME-4`($sp) # save vrsave
++ li r7, -1
++ $PUSH r0, `$FRAME+$LRSAVE`($sp)
++ mtspr 256, r7 # preserve all AltiVec registers
++
++ srwi r9, $bits, 5 # shr \$5,%eax
++ addi r9, r9, 6 # add \$5,%eax
++ stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
++
++ slwi r9, r9, 4 # shl \$4,%eax
++ add $out, $out, r9 # lea (%rdx,%rax),%rdx
++
++ cmplwi $dir, $bits, 0 # set decrypt direction
++ srwi r8, $bits, 1 # shr \$1,%r8d
++ andi. r8, r8, 32 # and \$32,%r8d
++ xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32
++ bl _vpaes_schedule_core
++
++ $POP r0, `$FRAME+$LRSAVE`($sp)
++ li r10,`15+6*$SIZE_T`
++ li r11,`31+6*$SIZE_T`
++ mtspr 256, r6 # restore vrsave
++ mtlr r0
++ xor r3, r3, r3
++ lvx v20,r10,$sp
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ addi $sp,$sp,$FRAME
++ blr
++ .long 0
++ .byte 0,12,0x04,1,0x80,0,3,0
++ .long 0
++.size .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key
++___
++}
++
++my $consts=1;
++foreach (split("\n",$code)) {
++ s/\`([^\`]*)\`/eval $1/geo;
++
++ # constants table endian-specific conversion
++ if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) {
++ my $conv=$2;
++ my @bytes=();
++
++ # convert to endian-agnostic format
++ foreach (split(/,\s+/,$1)) {
++ my $l = /^0/?oct:int;
++ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
++ }
++
++ # little-endian conversion
++ if ($flavour =~ /le$/o) {
++ SWITCH: for($conv) {
++ /\?inv/ && do { @bytes=map($_^0xf, at bytes); last; };
++ /\?rev/ && do { @bytes=reverse(@bytes); last; };
++ }
++ }
++
++ #emit
++ print ".byte\t",join(',',map (sprintf("0x%02x",$_), at bytes)),"\n";
++ next;
++ }
++ $consts=0 if (m/Lconsts:/o); # end of table
++
++ # instructions prefixed with '?' are endian-specific and need
++ # to be adjusted accordingly...
++ if ($flavour =~ /le$/o) { # little-endian
++ s/\?lvsr/lvsl/o or
++ s/\?lvsl/lvsr/o or
++ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
++ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
++ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
++ } else { # big-endian
++ s/\?([a-z]+)/$1/o;
++ }
++
++ print $_,"\n";
++}
++
++close STDOUT;
+diff -up openssl-1.0.1i/crypto/aes/Makefile.ppc-asm openssl-1.0.1i/crypto/aes/Makefile
+--- openssl-1.0.1i/crypto/aes/Makefile.ppc-asm 2014-08-06 23:18:31.000000000 +0200
++++ openssl-1.0.1i/crypto/aes/Makefile 2014-08-13 19:46:21.092578104 +0200
+@@ -71,6 +71,10 @@ aes-sparcv9.s: asm/aes-sparcv9.pl
+
+ aes-ppc.s: asm/aes-ppc.pl
+ $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@
++vpaes-ppc.s: asm/vpaes-ppc.pl
++ $(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@
++aesp8-ppc.s: asm/aesp8-ppc.pl
++ $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@
+
+ aes-parisc.s: asm/aes-parisc.pl
+ $(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@
+diff -up openssl-1.0.1i/crypto/bn/asm/ppc-mont.pl.ppc-asm openssl-1.0.1i/crypto/bn/asm/ppc-mont.pl
+--- openssl-1.0.1i/crypto/bn/asm/ppc-mont.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/bn/asm/ppc-mont.pl 2014-08-13 19:46:21.093578128 +0200
+@@ -325,6 +325,7 @@ Lcopy: ; copy or in-place refresh
+ .long 0
+ .byte 0,12,4,0,0x80,12,6,0
+ .long 0
++.size .bn_mul_mont_int,.-.bn_mul_mont_int
+
+ .asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>"
+ ___
+diff -up openssl-1.0.1i/crypto/bn/asm/ppc.pl.ppc-asm openssl-1.0.1i/crypto/bn/asm/ppc.pl
+--- openssl-1.0.1i/crypto/bn/asm/ppc.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/bn/asm/ppc.pl 2014-08-13 19:46:21.094578151 +0200
+@@ -392,6 +392,7 @@ $data=<<EOF;
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
++.size .bn_sqr_comba4,.-.bn_sqr_comba4
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -819,6 +820,7 @@ $data=<<EOF;
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
++.size .bn_sqr_comba8,.-.bn_sqr_comba8
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -972,6 +974,7 @@ $data=<<EOF;
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
++.size .bn_mul_comba4,.-.bn_mul_comba4
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1510,6 +1513,7 @@ $data=<<EOF;
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
++.size .bn_mul_comba8,.-.bn_mul_comba8
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1560,6 +1564,7 @@ Lppcasm_sub_adios:
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
++.size .bn_sub_words,.-.bn_sub_words
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1605,6 +1610,7 @@ Lppcasm_add_adios:
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
++.size .bn_add_words,.-.bn_add_words
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1720,6 +1726,7 @@ Lppcasm_div9:
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
++.size .bn_div_words,.-.bn_div_words
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1761,6 +1768,7 @@ Lppcasm_sqr_adios:
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
++.size .bn_sqr_words,.-.bn_sqr_words
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1866,6 +1874,7 @@ Lppcasm_mw_OVER:
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
++.size bn_mul_words,.-bn_mul_words
+
+ #
+ # NOTE: The following label name should be changed to
+@@ -1991,6 +2000,7 @@ Lppcasm_maw_adios:
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
++.size .bn_mul_add_words,.-.bn_mul_add_words
+ .align 4
+ EOF
+ $data =~ s/\`([^\`]*)\`/eval $1/gem;
+diff -up openssl-1.0.1i/crypto/bn/asm/ppc64-mont.pl.ppc-asm openssl-1.0.1i/crypto/bn/asm/ppc64-mont.pl
+--- openssl-1.0.1i/crypto/bn/asm/ppc64-mont.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/bn/asm/ppc64-mont.pl 2014-08-13 19:46:21.094578151 +0200
+@@ -1,7 +1,7 @@
+ #!/usr/bin/env perl
+
+ # ====================================================================
+-# Written by Andy Polyakov <appro at fy.chalmers.se> for the OpenSSL
++# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
+@@ -65,6 +65,14 @@
+ # others alternative would be to break dependence on upper halves of
+ # GPRs by sticking to 32-bit integer operations...
+
++# December 2012
++
++# Remove above mentioned dependence on GPRs' upper halves in 32-bit
++# build. No signal masking overhead, but integer instructions are
++# *more* numerous... It's still "universally" faster than 32-bit
++# ppc-mont.pl, but improvement coefficient is not as impressive
++# for longer keys...
++
+ $flavour = shift;
+
+ if ($flavour =~ /32/) {
+@@ -110,6 +118,9 @@ $tp="r10";
+ $j="r11";
+ $i="r12";
+ # non-volatile registers
++$c1="r19";
++$n1="r20";
++$a1="r21";
+ $nap_d="r22"; # interleaved ap and np in double format
+ $a0="r23"; # ap[0]
+ $t0="r24"; # temporary registers
+@@ -180,8 +191,8 @@ $T3a="f30"; $T3b="f31";
+ # . .
+ # +-------------------------------+
+ # . .
+-# -12*size_t +-------------------------------+
+-# | 10 saved gpr, r22-r31 |
++# -13*size_t +-------------------------------+
++# | 13 saved gpr, r19-r31 |
+ # . .
+ # . .
+ # -12*8 +-------------------------------+
+@@ -215,6 +226,9 @@ $code=<<___;
+ mr $i,$sp
+ $STUX $sp,$sp,$tp ; alloca
+
++ $PUSH r19,`-12*8-13*$SIZE_T`($i)
++ $PUSH r20,`-12*8-12*$SIZE_T`($i)
++ $PUSH r21,`-12*8-11*$SIZE_T`($i)
+ $PUSH r22,`-12*8-10*$SIZE_T`($i)
+ $PUSH r23,`-12*8-9*$SIZE_T`($i)
+ $PUSH r24,`-12*8-8*$SIZE_T`($i)
+@@ -237,40 +251,26 @@ $code=<<___;
+ stfd f29,`-3*8`($i)
+ stfd f30,`-2*8`($i)
+ stfd f31,`-1*8`($i)
+-___
+-$code.=<<___ if ($SIZE_T==8);
+- ld $a0,0($ap) ; pull ap[0] value
+- ld $n0,0($n0) ; pull n0[0] value
+- ld $t3,0($bp) ; bp[0]
+-___
+-$code.=<<___ if ($SIZE_T==4);
+- mr $t1,$n0
+- lwz $a0,0($ap) ; pull ap[0,1] value
+- lwz $t0,4($ap)
+- lwz $n0,0($t1) ; pull n0[0,1] value
+- lwz $t1,4($t1)
+- lwz $t3,0($bp) ; bp[0,1]
+- lwz $t2,4($bp)
+- insrdi $a0,$t0,32,0
+- insrdi $n0,$t1,32,0
+- insrdi $t3,$t2,32,0
+-___
+-$code.=<<___;
++
+ addi $tp,$sp,`$FRAME+$TRANSFER+8+64`
+ li $i,-64
+ add $nap_d,$tp,$num
+ and $nap_d,$nap_d,$i ; align to 64 bytes
+-
+- mulld $t7,$a0,$t3 ; ap[0]*bp[0]
+ ; nap_d is off by 1, because it's used with stfdu/lfdu
+ addi $nap_d,$nap_d,-8
+ srwi $j,$num,`3+1` ; counter register, num/2
+- mulld $t7,$t7,$n0 ; tp[0]*n0
+ addi $j,$j,-1
+ addi $tp,$sp,`$FRAME+$TRANSFER-8`
+ li $carry,0
+ mtctr $j
++___
++
++$code.=<<___ if ($SIZE_T==8);
++ ld $a0,0($ap) ; pull ap[0] value
++ ld $t3,0($bp) ; bp[0]
++ ld $n0,0($n0) ; pull n0[0] value
+
++ mulld $t7,$a0,$t3 ; ap[0]*bp[0]
+ ; transfer bp[0] to FPU as 4x16-bit values
+ extrdi $t0,$t3,16,48
+ extrdi $t1,$t3,16,32
+@@ -280,6 +280,8 @@ $code.=<<___;
+ std $t1,`$FRAME+8`($sp)
+ std $t2,`$FRAME+16`($sp)
+ std $t3,`$FRAME+24`($sp)
++
++ mulld $t7,$t7,$n0 ; tp[0]*n0
+ ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
+ extrdi $t4,$t7,16,48
+ extrdi $t5,$t7,16,32
+@@ -289,21 +291,61 @@ $code.=<<___;
+ std $t5,`$FRAME+40`($sp)
+ std $t6,`$FRAME+48`($sp)
+ std $t7,`$FRAME+56`($sp)
+-___
+-$code.=<<___ if ($SIZE_T==8);
+- lwz $t0,4($ap) ; load a[j] as 32-bit word pair
+- lwz $t1,0($ap)
+- lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
++
++ extrdi $t0,$a0,32,32 ; lwz $t0,4($ap)
++ extrdi $t1,$a0,32,0 ; lwz $t1,0($ap)
++ lwz $t2,12($ap) ; load a[1] as 32-bit word pair
+ lwz $t3,8($ap)
+- lwz $t4,4($np) ; load n[j] as 32-bit word pair
++ lwz $t4,4($np) ; load n[0] as 32-bit word pair
+ lwz $t5,0($np)
+- lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
++ lwz $t6,12($np) ; load n[1] as 32-bit word pair
+ lwz $t7,8($np)
+ ___
+ $code.=<<___ if ($SIZE_T==4);
+- lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs
+- lwz $t1,4($ap)
+- lwz $t2,8($ap)
++ lwz $a0,0($ap) ; pull ap[0,1] value
++ mr $n1,$n0
++ lwz $a1,4($ap)
++ li $c1,0
++ lwz $t1,0($bp) ; bp[0,1]
++ lwz $t3,4($bp)
++ lwz $n0,0($n1) ; pull n0[0,1] value
++ lwz $n1,4($n1)
++
++ mullw $t4,$a0,$t1 ; mulld ap[0]*bp[0]
++ mulhwu $t5,$a0,$t1
++ mullw $t6,$a1,$t1
++ mullw $t7,$a0,$t3
++ add $t5,$t5,$t6
++ add $t5,$t5,$t7
++ ; transfer bp[0] to FPU as 4x16-bit values
++ extrwi $t0,$t1,16,16
++ extrwi $t1,$t1,16,0
++ extrwi $t2,$t3,16,16
++ extrwi $t3,$t3,16,0
++ std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build
++ std $t1,`$FRAME+8`($sp)
++ std $t2,`$FRAME+16`($sp)
++ std $t3,`$FRAME+24`($sp)
++
++ mullw $t0,$t4,$n0 ; mulld tp[0]*n0
++ mulhwu $t1,$t4,$n0
++ mullw $t2,$t5,$n0
++ mullw $t3,$t4,$n1
++ add $t1,$t1,$t2
++ add $t1,$t1,$t3
++ ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
++ extrwi $t4,$t0,16,16
++ extrwi $t5,$t0,16,0
++ extrwi $t6,$t1,16,16
++ extrwi $t7,$t1,16,0
++ std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build
++ std $t5,`$FRAME+40`($sp)
++ std $t6,`$FRAME+48`($sp)
++ std $t7,`$FRAME+56`($sp)
++
++ mr $t0,$a0 ; lwz $t0,0($ap)
++ mr $t1,$a1 ; lwz $t1,4($ap)
++ lwz $t2,8($ap) ; load a[j..j+3] as 32-bit word pairs
+ lwz $t3,12($ap)
+ lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs
+ lwz $t5,4($np)
+@@ -319,7 +361,7 @@ $code.=<<___;
+ lfd $nb,`$FRAME+40`($sp)
+ lfd $nc,`$FRAME+48`($sp)
+ lfd $nd,`$FRAME+56`($sp)
+- std $t0,`$FRAME+64`($sp)
++ std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build
+ std $t1,`$FRAME+72`($sp)
+ std $t2,`$FRAME+80`($sp)
+ std $t3,`$FRAME+88`($sp)
+@@ -441,7 +483,7 @@ $code.=<<___ if ($SIZE_T==4);
+ lwz $t7,12($np)
+ ___
+ $code.=<<___;
+- std $t0,`$FRAME+64`($sp)
++ std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build
+ std $t1,`$FRAME+72`($sp)
+ std $t2,`$FRAME+80`($sp)
+ std $t3,`$FRAME+88`($sp)
+@@ -449,6 +491,9 @@ $code.=<<___;
+ std $t5,`$FRAME+104`($sp)
+ std $t6,`$FRAME+112`($sp)
+ std $t7,`$FRAME+120`($sp)
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ ld $t0,`$FRAME+0`($sp)
+ ld $t1,`$FRAME+8`($sp)
+ ld $t2,`$FRAME+16`($sp)
+@@ -457,6 +502,20 @@ $code.=<<___;
+ ld $t5,`$FRAME+40`($sp)
+ ld $t6,`$FRAME+48`($sp)
+ ld $t7,`$FRAME+56`($sp)
++___
++} else {
++$code.=<<___;
++ lwz $t1,`$FRAME+0`($sp)
++ lwz $t0,`$FRAME+4`($sp)
++ lwz $t3,`$FRAME+8`($sp)
++ lwz $t2,`$FRAME+12`($sp)
++ lwz $t5,`$FRAME+16`($sp)
++ lwz $t4,`$FRAME+20`($sp)
++ lwz $t7,`$FRAME+24`($sp)
++ lwz $t6,`$FRAME+28`($sp)
++___
++}
++$code.=<<___;
+ lfd $A0,`$FRAME+64`($sp)
+ lfd $A1,`$FRAME+72`($sp)
+ lfd $A2,`$FRAME+80`($sp)
+@@ -488,7 +547,9 @@ $code.=<<___;
+ fmadd $T0b,$A0,$bb,$dotb
+ stfd $A2,24($nap_d) ; save a[j+1] in double format
+ stfd $A3,32($nap_d)
+-
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ fmadd $T1a,$A0,$bc,$T1a
+ fmadd $T1b,$A0,$bd,$T1b
+ fmadd $T2a,$A1,$bc,$T2a
+@@ -561,11 +622,123 @@ $code.=<<___;
+ stfd $T3b,`$FRAME+56`($sp)
+ std $t0,8($tp) ; tp[j-1]
+ stdu $t4,16($tp) ; tp[j]
++___
++} else {
++$code.=<<___;
++ fmadd $T1a,$A0,$bc,$T1a
++ fmadd $T1b,$A0,$bd,$T1b
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ fmadd $T2a,$A1,$bc,$T2a
++ fmadd $T2b,$A1,$bd,$T2b
++ stfd $N0,40($nap_d) ; save n[j] in double format
++ stfd $N1,48($nap_d)
++ srwi $c1,$t1,16
++ insrwi $carry,$t1,16,0
++ fmadd $T3a,$A2,$bc,$T3a
++ fmadd $T3b,$A2,$bd,$T3b
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ fmul $dota,$A3,$bc
++ fmul $dotb,$A3,$bd
++ stfd $N2,56($nap_d) ; save n[j+1] in double format
++ stfdu $N3,64($nap_d)
++ insrwi $t0,$t2,16,0 ; 0..31 bits
++ srwi $c1,$t3,16
++ insrwi $carry,$t3,16,0
++
++ fmadd $T1a,$N1,$na,$T1a
++ fmadd $T1b,$N1,$nb,$T1b
++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1
++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ fmadd $T2a,$N2,$na,$T2a
++ fmadd $T2b,$N2,$nb,$T2b
++ srwi $c1,$t5,16
++ insrwi $carry,$t5,16,0
++ fmadd $T3a,$N3,$na,$T3a
++ fmadd $T3b,$N3,$nb,$T3b
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ fmadd $T0a,$N0,$na,$T0a
++ fmadd $T0b,$N0,$nb,$T0b
++ insrwi $t4,$t6,16,0 ; 32..63 bits
++ srwi $c1,$t7,16
++ insrwi $carry,$t7,16,0
++
++ fmadd $T1a,$N0,$nc,$T1a
++ fmadd $T1b,$N0,$nd,$T1b
++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3
++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ fmadd $T2a,$N1,$nc,$T2a
++ fmadd $T2b,$N1,$nd,$T2b
++ stw $t0,12($tp) ; tp[j-1]
++ stw $t4,8($tp)
++ srwi $c1,$t3,16
++ insrwi $carry,$t3,16,0
++ fmadd $T3a,$N2,$nc,$T3a
++ fmadd $T3b,$N2,$nd,$T3b
++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5
++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ fmadd $dota,$N3,$nc,$dota
++ fmadd $dotb,$N3,$nd,$dotb
++ insrwi $t2,$t6,16,0 ; 64..95 bits
++ srwi $c1,$t7,16
++ insrwi $carry,$t7,16,0
++
++ fctid $T0a,$T0a
++ fctid $T0b,$T0b
++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7
++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ fctid $T1a,$T1a
++ fctid $T1b,$T1b
++ srwi $c1,$t1,16
++ insrwi $carry,$t1,16,0
++ fctid $T2a,$T2a
++ fctid $T2b,$T2b
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ fctid $T3a,$T3a
++ fctid $T3b,$T3b
++ insrwi $t0,$t4,16,0 ; 96..127 bits
++ srwi $c1,$t5,16
++ insrwi $carry,$t5,16,0
++
++ stfd $T0a,`$FRAME+0`($sp)
++ stfd $T0b,`$FRAME+8`($sp)
++ stfd $T1a,`$FRAME+16`($sp)
++ stfd $T1b,`$FRAME+24`($sp)
++ stfd $T2a,`$FRAME+32`($sp)
++ stfd $T2b,`$FRAME+40`($sp)
++ stfd $T3a,`$FRAME+48`($sp)
++ stfd $T3b,`$FRAME+56`($sp)
++ stw $t2,20($tp) ; tp[j]
++ stwu $t0,16($tp)
++___
++}
++$code.=<<___;
+ bdnz- L1st
+
+ fctid $dota,$dota
+ fctid $dotb,$dotb
+-
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ ld $t0,`$FRAME+0`($sp)
+ ld $t1,`$FRAME+8`($sp)
+ ld $t2,`$FRAME+16`($sp)
+@@ -611,33 +784,117 @@ $code.=<<___;
+ insrdi $t6,$t7,48,0
+ srdi $ovf,$t7,48
+ std $t6,8($tp) ; tp[num-1]
++___
++} else {
++$code.=<<___;
++ lwz $t1,`$FRAME+0`($sp)
++ lwz $t0,`$FRAME+4`($sp)
++ lwz $t3,`$FRAME+8`($sp)
++ lwz $t2,`$FRAME+12`($sp)
++ lwz $t5,`$FRAME+16`($sp)
++ lwz $t4,`$FRAME+20`($sp)
++ lwz $t7,`$FRAME+24`($sp)
++ lwz $t6,`$FRAME+28`($sp)
++ stfd $dota,`$FRAME+64`($sp)
++ stfd $dotb,`$FRAME+72`($sp)
+
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ insrwi $carry,$t1,16,0
++ srwi $c1,$t1,16
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ insrwi $t0,$t2,16,0 ; 0..31 bits
++ insrwi $carry,$t3,16,0
++ srwi $c1,$t3,16
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ insrwi $carry,$t5,16,0
++ srwi $c1,$t5,16
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ insrwi $t4,$t6,16,0 ; 32..63 bits
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ stw $t0,12($tp) ; tp[j-1]
++ stw $t4,8($tp)
++
++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1
++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0
++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3
++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2
++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5
++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4
++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7
++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6
++
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ insrwi $carry,$t3,16,0
++ srwi $c1,$t3,16
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ insrwi $t2,$t6,16,0 ; 64..95 bits
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ insrwi $carry,$t1,16,0
++ srwi $c1,$t1,16
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ insrwi $t0,$t4,16,0 ; 96..127 bits
++ insrwi $carry,$t5,16,0
++ srwi $c1,$t5,16
++ stw $t2,20($tp) ; tp[j]
++ stwu $t0,16($tp)
++
++ lwz $t7,`$FRAME+64`($sp)
++ lwz $t6,`$FRAME+68`($sp)
++ lwz $t5,`$FRAME+72`($sp)
++ lwz $t4,`$FRAME+76`($sp)
++
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++
++ insrwi $t6,$t4,16,0
++ srwi $t4,$t4,16
++ insrwi $t4,$t5,16,0
++ srwi $ovf,$t5,16
++ stw $t6,12($tp) ; tp[num-1]
++ stw $t4,8($tp)
++___
++}
++$code.=<<___;
+ slwi $t7,$num,2
+ subf $nap_d,$t7,$nap_d ; rewind pointer
+
+ li $i,8 ; i=1
+ .align 5
+ Louter:
+-___
+-$code.=<<___ if ($SIZE_T==8);
+- ldx $t3,$bp,$i ; bp[i]
+-___
+-$code.=<<___ if ($SIZE_T==4);
+- add $t0,$bp,$i
+- lwz $t3,0($t0) ; bp[i,i+1]
+- lwz $t0,4($t0)
+- insrdi $t3,$t0,32,0
+-___
+-$code.=<<___;
+- ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
+- mulld $t7,$a0,$t3 ; ap[0]*bp[i]
+-
+ addi $tp,$sp,`$FRAME+$TRANSFER`
+- add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0]
+ li $carry,0
+- mulld $t7,$t7,$n0 ; tp[0]*n0
+ mtctr $j
++___
++$code.=<<___ if ($SIZE_T==8);
++ ldx $t3,$bp,$i ; bp[i]
+
++ ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
++ mulld $t7,$a0,$t3 ; ap[0]*bp[i]
++ add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0]
+ ; transfer bp[i] to FPU as 4x16-bit values
+ extrdi $t0,$t3,16,48
+ extrdi $t1,$t3,16,32
+@@ -647,6 +904,8 @@ $code.=<<___;
+ std $t1,`$FRAME+8`($sp)
+ std $t2,`$FRAME+16`($sp)
+ std $t3,`$FRAME+24`($sp)
++
++ mulld $t7,$t7,$n0 ; tp[0]*n0
+ ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
+ extrdi $t4,$t7,16,48
+ extrdi $t5,$t7,16,32
+@@ -656,7 +915,50 @@ $code.=<<___;
+ std $t5,`$FRAME+40`($sp)
+ std $t6,`$FRAME+48`($sp)
+ std $t7,`$FRAME+56`($sp)
++___
++$code.=<<___ if ($SIZE_T==4);
++ add $t0,$bp,$i
++ li $c1,0
++ lwz $t1,0($t0) ; bp[i,i+1]
++ lwz $t3,4($t0)
++
++ mullw $t4,$a0,$t1 ; ap[0]*bp[i]
++ lwz $t0,`$FRAME+$TRANSFER+8+4`($sp) ; tp[0]
++ mulhwu $t5,$a0,$t1
++ lwz $t2,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
++ mullw $t6,$a1,$t1
++ mullw $t7,$a0,$t3
++ add $t5,$t5,$t6
++ add $t5,$t5,$t7
++ addc $t4,$t4,$t0 ; ap[0]*bp[i]+tp[0]
++ adde $t5,$t5,$t2
++ ; transfer bp[i] to FPU as 4x16-bit values
++ extrwi $t0,$t1,16,16
++ extrwi $t1,$t1,16,0
++ extrwi $t2,$t3,16,16
++ extrwi $t3,$t3,16,0
++ std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build
++ std $t1,`$FRAME+8`($sp)
++ std $t2,`$FRAME+16`($sp)
++ std $t3,`$FRAME+24`($sp)
+
++ mullw $t0,$t4,$n0 ; mulld tp[0]*n0
++ mulhwu $t1,$t4,$n0
++ mullw $t2,$t5,$n0
++ mullw $t3,$t4,$n1
++ add $t1,$t1,$t2
++ add $t1,$t1,$t3
++ ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
++ extrwi $t4,$t0,16,16
++ extrwi $t5,$t0,16,0
++ extrwi $t6,$t1,16,16
++ extrwi $t7,$t1,16,0
++ std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build
++ std $t5,`$FRAME+40`($sp)
++ std $t6,`$FRAME+48`($sp)
++ std $t7,`$FRAME+56`($sp)
++___
++$code.=<<___;
+ lfd $A0,8($nap_d) ; load a[j] in double format
+ lfd $A1,16($nap_d)
+ lfd $A2,24($nap_d) ; load a[j+1] in double format
+@@ -769,7 +1071,9 @@ Linner:
+ fmul $dotb,$A3,$bd
+ lfd $A2,24($nap_d) ; load a[j+1] in double format
+ lfd $A3,32($nap_d)
+-
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ fmadd $T1a,$N1,$na,$T1a
+ fmadd $T1b,$N1,$nb,$T1b
+ ld $t0,`$FRAME+0`($sp)
+@@ -856,10 +1160,131 @@ $code.=<<___;
+ addze $carry,$carry
+ std $t3,-16($tp) ; tp[j-1]
+ std $t5,-8($tp) ; tp[j]
++___
++} else {
++$code.=<<___;
++ fmadd $T1a,$N1,$na,$T1a
++ fmadd $T1b,$N1,$nb,$T1b
++ lwz $t1,`$FRAME+0`($sp)
++ lwz $t0,`$FRAME+4`($sp)
++ fmadd $T2a,$N2,$na,$T2a
++ fmadd $T2b,$N2,$nb,$T2b
++ lwz $t3,`$FRAME+8`($sp)
++ lwz $t2,`$FRAME+12`($sp)
++ fmadd $T3a,$N3,$na,$T3a
++ fmadd $T3b,$N3,$nb,$T3b
++ lwz $t5,`$FRAME+16`($sp)
++ lwz $t4,`$FRAME+20`($sp)
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ fmadd $T0a,$N0,$na,$T0a
++ fmadd $T0b,$N0,$nb,$T0b
++ lwz $t7,`$FRAME+24`($sp)
++ lwz $t6,`$FRAME+28`($sp)
++ srwi $c1,$t1,16
++ insrwi $carry,$t1,16,0
++
++ fmadd $T1a,$N0,$nc,$T1a
++ fmadd $T1b,$N0,$nd,$T1b
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ fmadd $T2a,$N1,$nc,$T2a
++ fmadd $T2b,$N1,$nd,$T2b
++ insrwi $t0,$t2,16,0 ; 0..31 bits
++ srwi $c1,$t3,16
++ insrwi $carry,$t3,16,0
++ fmadd $T3a,$N2,$nc,$T3a
++ fmadd $T3b,$N2,$nd,$T3b
++ lwz $t2,12($tp) ; tp[j]
++ lwz $t3,8($tp)
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ fmadd $dota,$N3,$nc,$dota
++ fmadd $dotb,$N3,$nd,$dotb
++ srwi $c1,$t5,16
++ insrwi $carry,$t5,16,0
++
++ fctid $T0a,$T0a
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ fctid $T0b,$T0b
++ insrwi $t4,$t6,16,0 ; 32..63 bits
++ srwi $c1,$t7,16
++ insrwi $carry,$t7,16,0
++ fctid $T1a,$T1a
++ addc $t0,$t0,$t2
++ adde $t4,$t4,$t3
++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1
++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0
++ fctid $T1b,$T1b
++ addze $carry,$carry
++ addze $c1,$c1
++ stw $t0,4($tp) ; tp[j-1]
++ stw $t4,0($tp)
++ fctid $T2a,$T2a
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3
++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2
++ fctid $T2b,$T2b
++ srwi $c1,$t3,16
++ insrwi $carry,$t3,16,0
++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5
++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4
++ fctid $T3a,$T3a
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7
++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6
++ fctid $T3b,$T3b
++
++ insrwi $t2,$t6,16,0 ; 64..95 bits
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ lwz $t6,20($tp)
++ lwzu $t7,16($tp)
++ addc $t0,$t0,$carry
++ stfd $T0a,`$FRAME+0`($sp)
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ stfd $T0b,`$FRAME+8`($sp)
++ insrwi $carry,$t1,16,0
++ srwi $c1,$t1,16
++ addc $t4,$t4,$carry
++ stfd $T1a,`$FRAME+16`($sp)
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ insrwi $t0,$t4,16,0 ; 96..127 bits
++ stfd $T1b,`$FRAME+24`($sp)
++ insrwi $carry,$t5,16,0
++ srwi $c1,$t5,16
++
++ addc $t2,$t2,$t6
++ stfd $T2a,`$FRAME+32`($sp)
++ adde $t0,$t0,$t7
++ stfd $T2b,`$FRAME+40`($sp)
++ addze $carry,$carry
++ stfd $T3a,`$FRAME+48`($sp)
++ addze $c1,$c1
++ stfd $T3b,`$FRAME+56`($sp)
++ stw $t2,-4($tp) ; tp[j]
++ stw $t0,-8($tp)
++___
++}
++$code.=<<___;
+ bdnz- Linner
+
+ fctid $dota,$dota
+ fctid $dotb,$dotb
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ ld $t0,`$FRAME+0`($sp)
+ ld $t1,`$FRAME+8`($sp)
+ ld $t2,`$FRAME+16`($sp)
+@@ -926,7 +1351,116 @@ $code.=<<___;
+ insrdi $t6,$t7,48,0
+ srdi $ovf,$t7,48
+ std $t6,0($tp) ; tp[num-1]
++___
++} else {
++$code.=<<___;
++ lwz $t1,`$FRAME+0`($sp)
++ lwz $t0,`$FRAME+4`($sp)
++ lwz $t3,`$FRAME+8`($sp)
++ lwz $t2,`$FRAME+12`($sp)
++ lwz $t5,`$FRAME+16`($sp)
++ lwz $t4,`$FRAME+20`($sp)
++ lwz $t7,`$FRAME+24`($sp)
++ lwz $t6,`$FRAME+28`($sp)
++ stfd $dota,`$FRAME+64`($sp)
++ stfd $dotb,`$FRAME+72`($sp)
+
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ insrwi $carry,$t1,16,0
++ srwi $c1,$t1,16
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ insrwi $t0,$t2,16,0 ; 0..31 bits
++ lwz $t2,12($tp) ; tp[j]
++ insrwi $carry,$t3,16,0
++ srwi $c1,$t3,16
++ lwz $t3,8($tp)
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ insrwi $carry,$t5,16,0
++ srwi $c1,$t5,16
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ insrwi $t4,$t6,16,0 ; 32..63 bits
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++
++ addc $t0,$t0,$t2
++ adde $t4,$t4,$t3
++ addze $carry,$carry
++ addze $c1,$c1
++ stw $t0,4($tp) ; tp[j-1]
++ stw $t4,0($tp)
++
++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1
++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0
++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3
++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2
++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5
++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4
++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7
++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6
++
++ addc $t2,$t2,$carry
++ adde $t3,$t3,$c1
++ srwi $carry,$t2,16
++ insrwi $carry,$t3,16,0
++ srwi $c1,$t3,16
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ srwi $carry,$t6,16
++ insrwi $t2,$t6,16,0 ; 64..95 bits
++ lwz $t6,20($tp)
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ lwzu $t7,16($tp)
++ addc $t0,$t0,$carry
++ adde $t1,$t1,$c1
++ srwi $carry,$t0,16
++ insrwi $carry,$t1,16,0
++ srwi $c1,$t1,16
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++ srwi $carry,$t4,16
++ insrwi $t0,$t4,16,0 ; 96..127 bits
++ insrwi $carry,$t5,16,0
++ srwi $c1,$t5,16
++
++ addc $t2,$t2,$t6
++ adde $t0,$t0,$t7
++ lwz $t7,`$FRAME+64`($sp)
++ lwz $t6,`$FRAME+68`($sp)
++ addze $carry,$carry
++ addze $c1,$c1
++ lwz $t5,`$FRAME+72`($sp)
++ lwz $t4,`$FRAME+76`($sp)
++
++ addc $t6,$t6,$carry
++ adde $t7,$t7,$c1
++ stw $t2,-4($tp) ; tp[j]
++ stw $t0,-8($tp)
++ addc $t6,$t6,$ovf
++ addze $t7,$t7
++ srwi $carry,$t6,16
++ insrwi $carry,$t7,16,0
++ srwi $c1,$t7,16
++ addc $t4,$t4,$carry
++ adde $t5,$t5,$c1
++
++ insrwi $t6,$t4,16,0
++ srwi $t4,$t4,16
++ insrwi $t4,$t5,16,0
++ srwi $ovf,$t5,16
++ stw $t6,4($tp) ; tp[num-1]
++ stw $t4,0($tp)
++___
++}
++$code.=<<___;
+ slwi $t7,$num,2
+ addi $i,$i,8
+ subf $nap_d,$t7,$nap_d ; rewind pointer
+@@ -994,14 +1528,14 @@ $code.=<<___ if ($SIZE_T==4);
+ mtctr $j
+
+ .align 4
+-Lsub: ld $t0,8($tp) ; load tp[j..j+3] in 64-bit word order
+- ldu $t2,16($tp)
++Lsub: lwz $t0,12($tp) ; load tp[j..j+3] in 64-bit word order
++ lwz $t1,8($tp)
++ lwz $t2,20($tp)
++ lwzu $t3,16($tp)
+ lwz $t4,4($np) ; load np[j..j+3] in 32-bit word order
+ lwz $t5,8($np)
+ lwz $t6,12($np)
+ lwzu $t7,16($np)
+- extrdi $t1,$t0,32,0
+- extrdi $t3,$t2,32,0
+ subfe $t4,$t4,$t0 ; tp[j]-np[j]
+ stw $t0,4($ap) ; save tp[j..j+3] in 32-bit word order
+ subfe $t5,$t5,$t1 ; tp[j+1]-np[j+1]
+@@ -1052,6 +1586,9 @@ ___
+ $code.=<<___;
+ $POP $i,0($sp)
+ li r3,1 ; signal "handled"
++ $POP r19,`-12*8-13*$SIZE_T`($i)
++ $POP r20,`-12*8-12*$SIZE_T`($i)
++ $POP r21,`-12*8-11*$SIZE_T`($i)
+ $POP r22,`-12*8-10*$SIZE_T`($i)
+ $POP r23,`-12*8-9*$SIZE_T`($i)
+ $POP r24,`-12*8-8*$SIZE_T`($i)
+@@ -1077,8 +1614,9 @@ $code.=<<___;
+ mr $sp,$i
+ blr
+ .long 0
+- .byte 0,12,4,0,0x8c,10,6,0
++ .byte 0,12,4,0,0x8c,13,6,0
+ .long 0
++.size .$fname,.-.$fname
+
+ .asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@openssl.org>"
+ ___
+diff -up openssl-1.0.1i/crypto/evp/e_aes.c.ppc-asm openssl-1.0.1i/crypto/evp/e_aes.c
+--- openssl-1.0.1i/crypto/evp/e_aes.c.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/evp/e_aes.c 2014-08-13 19:46:21.094578151 +0200
+@@ -153,6 +153,20 @@ void AES_xts_decrypt(const char *inp,cha
+ const unsigned char iv[16]);
+ #endif
+
++#if defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
++# include "ppc_arch.h"
++# ifdef VPAES_ASM
++# define VPAES_CAPABLE (OPENSSL_ppccap_P & PPC_ALTIVEC)
++# endif
++# define HWAES_CAPABLE (OPENSSL_ppccap_P & PPC_CRYPTO207)
++# define HWAES_set_encrypt_key aes_p8_set_encrypt_key
++# define HWAES_set_decrypt_key aes_p8_set_decrypt_key
++# define HWAES_encrypt aes_p8_encrypt
++# define HWAES_decrypt aes_p8_decrypt
++# define HWAES_cbc_encrypt aes_p8_cbc_encrypt
++# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
++#endif
++
+ #if defined(AES_ASM) && !defined(I386_ONLY) && ( \
+ ((defined(__i386) || defined(__i386__) || \
+ defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \
+diff -up openssl-1.0.1i/crypto/modes/asm/ghashp8-ppc.pl.ppc-asm openssl-1.0.1i/crypto/modes/asm/ghashp8-ppc.pl
+--- openssl-1.0.1i/crypto/modes/asm/ghashp8-ppc.pl.ppc-asm 2014-08-13 19:46:21.095578174 +0200
++++ openssl-1.0.1i/crypto/modes/asm/ghashp8-ppc.pl 2014-08-13 19:46:21.095578174 +0200
+@@ -0,0 +1,234 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# GHASH for for PowerISA v2.07.
++#
++# July 2014
++#
++# Accurate performance measurements are problematic, because it's
++# always virtualized setup with possibly throttled processor.
++# Relative comparison is therefore more informative. This initial
++# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
++# faster than "4-bit" integer-only compiler-generated 64-bit code.
++# "Initial version" means that there is room for futher improvement.
++
++$flavour=shift;
++$output =shift;
++
++if ($flavour =~ /64/) {
++ $SIZE_T=8;
++ $LRSAVE=2*$SIZE_T;
++ $STU="stdu";
++ $POP="ld";
++ $PUSH="std";
++} elsif ($flavour =~ /32/) {
++ $SIZE_T=4;
++ $LRSAVE=$SIZE_T;
++ $STU="stwu";
++ $POP="lwz";
++ $PUSH="stw";
++} else { die "nonsense $flavour"; }
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
++
++my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
++
++my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
++my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
++my $vrsave="r12";
++
++$code=<<___;
++.machine "any"
++
++.text
++
++.globl .gcm_init_p8
++.align 5
++.gcm_init_p8:
++ lis r0,0xfff0
++ li r8,0x10
++ mfspr $vrsave,256
++ li r9,0x20
++ mtspr 256,r0
++ li r10,0x30
++ lvx_u $H,0,r4 # load H
++
++ vspltisb $xC2,-16 # 0xf0
++ vspltisb $t0,1 # one
++ vaddubm $xC2,$xC2,$xC2 # 0xe0
++ vxor $zero,$zero,$zero
++ vor $xC2,$xC2,$t0 # 0xe1
++ vsldoi $xC2,$xC2,$zero,15 # 0xe1...
++ vsldoi $t1,$zero,$t0,1 # ...1
++ vaddubm $xC2,$xC2,$xC2 # 0xc2...
++ vspltisb $t2,7
++ vor $xC2,$xC2,$t1 # 0xc2....01
++ vspltb $t1,$H,0 # most significant byte
++ vsl $H,$H,$t0 # H<<=1
++ vsrab $t1,$t1,$t2 # broadcast carry bit
++ vand $t1,$t1,$xC2
++ vxor $H,$H,$t1 # twisted H
++
++ vsldoi $H,$H,$H,8 # twist even more ...
++ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
++ vsldoi $Hl,$zero,$H,8 # ... and split
++ vsldoi $Hh,$H,$zero,8
++
++ stvx_u $xC2,0,r3 # save pre-computed table
++ stvx_u $Hl,r8,r3
++ stvx_u $H, r9,r3
++ stvx_u $Hh,r10,r3
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,2,0
++ .long 0
++.size .gcm_init_p8,.-.gcm_init_p8
++
++.globl .gcm_gmult_p8
++.align 5
++.gcm_gmult_p8:
++ lis r0,0xfff8
++ li r8,0x10
++ mfspr $vrsave,256
++ li r9,0x20
++ mtspr 256,r0
++ li r10,0x30
++ lvx_u $IN,0,$Xip # load Xi
++
++ lvx_u $Hl,r8,$Htbl # load pre-computed table
++ le?lvsl $lemask,r0,r0
++ lvx_u $H, r9,$Htbl
++ le?vspltisb $t0,0x07
++ lvx_u $Hh,r10,$Htbl
++ le?vxor $lemask,$lemask,$t0
++ lvx_u $xC2,0,$Htbl
++ le?vperm $IN,$IN,$IN,$lemask
++ vxor $zero,$zero,$zero
++
++ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
++ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
++ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
++
++ vpmsumd $t2,$Xl,$xC2 # 1st phase
++
++ vsldoi $t0,$Xm,$zero,8
++ vsldoi $t1,$zero,$Xm,8
++ vxor $Xl,$Xl,$t0
++ vxor $Xh,$Xh,$t1
++
++ vsldoi $Xl,$Xl,$Xl,8
++ vxor $Xl,$Xl,$t2
++
++ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
++ vpmsumd $Xl,$Xl,$xC2
++ vxor $t1,$t1,$Xh
++ vxor $Xl,$Xl,$t1
++
++ le?vperm $Xl,$Xl,$Xl,$lemask
++ stvx_u $Xl,0,$Xip # write out Xi
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,2,0
++ .long 0
++.size .gcm_gmult_p8,.-.gcm_gmult_p8
++
++.globl .gcm_ghash_p8
++.align 5
++.gcm_ghash_p8:
++ lis r0,0xfff8
++ li r8,0x10
++ mfspr $vrsave,256
++ li r9,0x20
++ mtspr 256,r0
++ li r10,0x30
++ lvx_u $Xl,0,$Xip # load Xi
++
++ lvx_u $Hl,r8,$Htbl # load pre-computed table
++ le?lvsl $lemask,r0,r0
++ lvx_u $H, r9,$Htbl
++ le?vspltisb $t0,0x07
++ lvx_u $Hh,r10,$Htbl
++ le?vxor $lemask,$lemask,$t0
++ lvx_u $xC2,0,$Htbl
++ le?vperm $Xl,$Xl,$Xl,$lemask
++ vxor $zero,$zero,$zero
++
++ lvx_u $IN,0,$inp
++ addi $inp,$inp,16
++ subi $len,$len,16
++ le?vperm $IN,$IN,$IN,$lemask
++ vxor $IN,$IN,$Xl
++ b Loop
++
++.align 5
++Loop:
++ subic $len,$len,16
++ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
++ subfe. r0,r0,r0 # borrow?-1:0
++ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
++ and r0,r0,$len
++ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
++ add $inp,$inp,r0
++
++ vpmsumd $t2,$Xl,$xC2 # 1st phase
++
++ vsldoi $t0,$Xm,$zero,8
++ vsldoi $t1,$zero,$Xm,8
++ vxor $Xl,$Xl,$t0
++ vxor $Xh,$Xh,$t1
++
++ vsldoi $Xl,$Xl,$Xl,8
++ vxor $Xl,$Xl,$t2
++ lvx_u $IN,0,$inp
++ addi $inp,$inp,16
++
++ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
++ vpmsumd $Xl,$Xl,$xC2
++ le?vperm $IN,$IN,$IN,$lemask
++ vxor $t1,$t1,$Xh
++ vxor $IN,$IN,$t1
++ vxor $IN,$IN,$Xl
++ beq Loop # did $len-=16 borrow?
++
++ vxor $Xl,$Xl,$t1
++ le?vperm $Xl,$Xl,$Xl,$lemask
++ stvx_u $Xl,0,$Xip # write out Xi
++
++ mtspr 256,$vrsave
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,4,0
++ .long 0
++.size .gcm_ghash_p8,.-.gcm_ghash_p8
++
++.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
++.align 2
++___
++
++foreach (split("\n",$code)) {
++ if ($flavour =~ /le$/o) { # little-endian
++ s/le\?//o or
++ s/be\?/#be#/o;
++ } else {
++ s/le\?/#le#/o or
++ s/be\?//o;
++ }
++ print $_,"\n";
++}
++
++close STDOUT; # enforce flush
+diff -up openssl-1.0.1i/crypto/modes/gcm128.c.ppc-asm openssl-1.0.1i/crypto/modes/gcm128.c
+--- openssl-1.0.1i/crypto/modes/gcm128.c.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/modes/gcm128.c 2014-08-13 19:46:21.095578174 +0200
+@@ -671,6 +671,21 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const
+ void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
+ void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+ # endif
++# elif defined(__sparc__) || defined(__sparc)
++# include "sparc_arch.h"
++# define GHASH_ASM_SPARC
++# define GCM_FUNCREF_4BIT
++extern unsigned int OPENSSL_sparcv9cap_P[];
++void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
++void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
++void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
++#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
++# include "ppc_arch.h"
++# define GHASH_ASM_PPC
++# define GCM_FUNCREF_4BIT
++void gcm_init_p8(u128 Htable[16],const u64 Xi[2]);
++void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]);
++void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+ # endif
+ #endif
+
+@@ -745,6 +760,16 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *
+ } else {
+ gcm_init_4bit(ctx->Htable,ctx->H.u);
+ ctx->gmult = gcm_gmult_4bit;
++ ctx->ghash = gcm_ghash_4bit;
++ }
++# elif defined(GHASH_ASM_PPC)
++ if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
++ gcm_init_p8(ctx->Htable,ctx->H.u);
++ ctx->gmult = gcm_gmult_p8;
++ ctx->ghash = gcm_ghash_p8;
++ } else {
++ gcm_init_4bit(ctx->Htable,ctx->H.u);
++ ctx->gmult = gcm_gmult_4bit;
+ ctx->ghash = gcm_ghash_4bit;
+ }
+ # else
+diff -up openssl-1.0.1i/crypto/modes/Makefile.ppc-asm openssl-1.0.1i/crypto/modes/Makefile
+--- openssl-1.0.1i/crypto/modes/Makefile.ppc-asm 2014-08-13 19:48:28.435511100 +0200
++++ openssl-1.0.1i/crypto/modes/Makefile 2014-08-13 19:48:04.641963082 +0200
+@@ -59,6 +59,8 @@ ghash-alpha.s: asm/ghash-alpha.pl
+
+ ghash-parisc.s: asm/ghash-parisc.pl
+ $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
++ghashp8-ppc.s: asm/ghashp8-ppc.pl
++ $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@
+
+ # GNU make "catch all"
+ ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
+diff -up openssl-1.0.1i/crypto/perlasm/ppc-xlate.pl.ppc-asm openssl-1.0.1i/crypto/perlasm/ppc-xlate.pl
+--- openssl-1.0.1i/crypto/perlasm/ppc-xlate.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/perlasm/ppc-xlate.pl 2014-08-13 19:46:21.095578174 +0200
+@@ -27,7 +27,8 @@ my $globl = sub {
+ /osx/ && do { $name = "_$name";
+ last;
+ };
+- /linux.*32/ && do { $ret .= ".globl $name\n";
++ /linux.*(32|64le)/
++ && do { $ret .= ".globl $name\n";
+ $ret .= ".type $name,\@function";
+ last;
+ };
+@@ -37,7 +38,6 @@ my $globl = sub {
+ $ret .= ".align 3\n";
+ $ret .= "$name:\n";
+ $ret .= ".quad .$name,.TOC.\@tocbase,0\n";
+- $ret .= ".size $name,24\n";
+ $ret .= ".previous\n";
+
+ $name = ".$name";
+@@ -50,7 +50,9 @@ my $globl = sub {
+ $ret;
+ };
+ my $text = sub {
+- ($flavour =~ /aix/) ? ".csect" : ".text";
++ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
++ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
++ $ret;
+ };
+ my $machine = sub {
+ my $junk = shift;
+@@ -62,9 +64,12 @@ my $machine = sub {
+ ".machine $arch";
+ };
+ my $size = sub {
+- if ($flavour =~ /linux.*32/)
++ if ($flavour =~ /linux/)
+ { shift;
+- ".size " . join(",", at _);
++ my $name = shift; $name =~ s|^[\.\_]||;
++ my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name;
++ $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/);
++ $ret;
+ }
+ else
+ { ""; }
+@@ -77,6 +82,25 @@ my $asciz = sub {
+ else
+ { ""; }
+ };
++my $quad = sub {
++ shift;
++ my @ret;
++ my ($hi,$lo);
++ for (@_) {
++ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
++ { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
++ elsif (/^([0-9]+)$/o)
++ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
++ else
++ { $hi=undef; $lo=$_; }
++
++ if (defined($hi))
++ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
++ else
++ { push(@ret,".quad $lo"); }
++ }
++ join("\n", at ret);
++};
+
+ ################################################################
+ # simplified mnemonics not handled by at least one assembler
+@@ -122,6 +146,46 @@ my $extrdi = sub {
+ $b = ($b+$n)&63; $n = 64-$n;
+ " rldicl $ra,$rs,$b,$n";
+ };
++my $vmr = sub {
++ my ($f,$vx,$vy) = @_;
++ " vor $vx,$vy,$vy";
++};
++
++# PowerISA 2.06 stuff
++sub vsxmem_op {
++ my ($f, $vrt, $ra, $rb, $op) = @_;
++ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
++}
++# made-up unaligned memory reference AltiVec/VMX instructions
++my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
++my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
++my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
++my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
++my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
++my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
++
++# PowerISA 2.07 stuff
++sub vcrypto_op {
++ my ($f, $vrt, $vra, $vrb, $op) = @_;
++ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
++}
++my $vcipher = sub { vcrypto_op(@_, 1288); };
++my $vcipherlast = sub { vcrypto_op(@_, 1289); };
++my $vncipher = sub { vcrypto_op(@_, 1352); };
++my $vncipherlast= sub { vcrypto_op(@_, 1353); };
++my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
++my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
++my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
++my $vpmsumb = sub { vcrypto_op(@_, 1032); };
++my $vpmsumd = sub { vcrypto_op(@_, 1224); };
++my $vpmsubh = sub { vcrypto_op(@_, 1096); };
++my $vpmsumw = sub { vcrypto_op(@_, 1160); };
++my $vaddudm = sub { vcrypto_op(@_, 192); };
++
++my $mtsle = sub {
++ my ($f, $arg) = @_;
++ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
++};
+
+ while($line=<>) {
+
+@@ -138,7 +202,10 @@ while($line=<>) {
+ {
+ $line =~ s|(^[\.\w]+)\:\s*||;
+ my $label = $1;
+- printf "%s:",($GLOBALS{$label} or $label) if ($label);
++ if ($label) {
++ printf "%s:",($GLOBALS{$label} or $label);
++ printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/);
++ }
+ }
+
+ {
+@@ -147,7 +214,7 @@ while($line=<>) {
+ my $mnemonic = $2;
+ my $f = $3;
+ my $opcode = eval("\$$mnemonic");
+- $line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/);
++ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+ if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
+ elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
+ }
+diff -up openssl-1.0.1i/crypto/ppc_arch.h.ppc-asm openssl-1.0.1i/crypto/ppc_arch.h
+--- openssl-1.0.1i/crypto/ppc_arch.h.ppc-asm 2014-08-13 19:46:21.095578174 +0200
++++ openssl-1.0.1i/crypto/ppc_arch.h 2014-08-13 19:46:21.095578174 +0200
+@@ -0,0 +1,10 @@
++#ifndef __PPC_ARCH_H__
++#define __PPC_ARCH_H__
++
++extern unsigned int OPENSSL_ppccap_P;
++
++#define PPC_FPU64 (1<<0)
++#define PPC_ALTIVEC (1<<1)
++#define PPC_CRYPTO207 (1<<2)
++
++#endif
+diff -up openssl-1.0.1i/crypto/ppccap.c.ppc-asm openssl-1.0.1i/crypto/ppccap.c
+--- openssl-1.0.1i/crypto/ppccap.c.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/ppccap.c 2014-08-13 19:46:21.095578174 +0200
+@@ -4,13 +4,15 @@
+ #include <setjmp.h>
+ #include <signal.h>
+ #include <unistd.h>
++#if defined(__linux) || defined(_AIX)
++#include <sys/utsname.h>
++#endif
+ #include <crypto.h>
+ #include <openssl/bn.h>
+
+-#define PPC_FPU64 (1<<0)
+-#define PPC_ALTIVEC (1<<1)
++#include "ppc_arch.h"
+
+-static int OPENSSL_ppccap_P = 0;
++unsigned int OPENSSL_ppccap_P = 0;
+
+ static sigset_t all_masked;
+
+@@ -22,7 +24,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_U
+
+ if (sizeof(size_t)==4)
+ {
+-#if (defined(__APPLE__) && defined(__MACH__))
++#if 1 || (defined(__APPLE__) && defined(__MACH__))
+ if (num>=8 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64))
+ return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);
+ #else
+@@ -50,11 +52,28 @@ int bn_mul_mont(BN_ULONG *rp, const BN_U
+ }
+ #endif
+
++void sha256_block_p8(void *ctx,const void *inp,size_t len);
++void sha256_block_ppc(void *ctx,const void *inp,size_t len);
++void sha256_block_data_order(void *ctx,const void *inp,size_t len)
++ {
++ OPENSSL_ppccap_P&PPC_CRYPTO207? sha256_block_p8(ctx,inp,len):
++ sha256_block_ppc(ctx,inp,len);
++ }
++
++void sha512_block_p8(void *ctx,const void *inp,size_t len);
++void sha512_block_ppc(void *ctx,const void *inp,size_t len);
++void sha512_block_data_order(void *ctx,const void *inp,size_t len)
++ {
++ OPENSSL_ppccap_P&PPC_CRYPTO207? sha512_block_p8(ctx,inp,len):
++ sha512_block_ppc(ctx,inp,len);
++ }
++
+ static sigjmp_buf ill_jmp;
+ static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
+
+ void OPENSSL_ppc64_probe(void);
+ void OPENSSL_altivec_probe(void);
++void OPENSSL_crypto207_probe(void);
+
+ void OPENSSL_cpuid_setup(void)
+ {
+@@ -85,12 +104,14 @@ void OPENSSL_cpuid_setup(void)
+ OPENSSL_ppccap_P = 0;
+
+ #if defined(_AIX)
+- if (sizeof(size_t)==4
++ if (sizeof(size_t)==4)
++ {
++ struct utsname uts;
+ # if defined(_SC_AIX_KERNEL_BITMODE)
+- && sysconf(_SC_AIX_KERNEL_BITMODE)!=64
++ if (sysconf(_SC_AIX_KERNEL_BITMODE)!=64) return;
+ # endif
+- )
+- return;
++ if (uname(&uts)!=0 || atoi(uts.version)<6) return;
++ }
+ #endif
+
+ memset(&ill_act,0,sizeof(ill_act));
+@@ -102,6 +123,10 @@ void OPENSSL_cpuid_setup(void)
+
+ if (sizeof(size_t)==4)
+ {
++#ifdef __linux
++ struct utsname uts;
++ if (uname(&uts)==0 && strcmp(uts.machine,"ppc64")==0)
++#endif
+ if (sigsetjmp(ill_jmp,1) == 0)
+ {
+ OPENSSL_ppc64_probe();
+@@ -119,6 +144,11 @@ void OPENSSL_cpuid_setup(void)
+ {
+ OPENSSL_altivec_probe();
+ OPENSSL_ppccap_P |= PPC_ALTIVEC;
++ if (sigsetjmp(ill_jmp,1) == 0)
++ {
++ OPENSSL_crypto207_probe();
++ OPENSSL_ppccap_P |= PPC_CRYPTO207;
++ }
+ }
+
+ sigaction (SIGILL,&ill_oact,NULL);
+diff -up openssl-1.0.1i/crypto/ppccpuid.pl.ppc-asm openssl-1.0.1i/crypto/ppccpuid.pl
+--- openssl-1.0.1i/crypto/ppccpuid.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/ppccpuid.pl 2014-08-13 19:46:21.096578196 +0200
+@@ -31,6 +31,7 @@ $code=<<___;
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .OPENSSL_ppc64_probe,.-.OPENSSL_ppc64_probe
+
+ .globl .OPENSSL_altivec_probe
+ .align 4
+@@ -39,6 +40,17 @@ $code=<<___;
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe
++
++.globl .OPENSSL_crypto207_probe
++.align 4
++.OPENSSL_crypto207_probe:
++ lvx_u v0,0,r1
++ vcipher v0,v0,v0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++.size .OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe
+
+ .globl .OPENSSL_wipe_cpu
+ .align 4
+@@ -71,6 +83,7 @@ $code=<<___;
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .OPENSSL_wipe_cpu,.-.OPENSSL_wipe_cpu
+
+ .globl .OPENSSL_atomic_add
+ .align 4
+@@ -84,6 +97,7 @@ Ladd: lwarx r5,0,r3
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
++.size .OPENSSL_atomic_add,.-.OPENSSL_atomic_add
+
+ .globl .OPENSSL_rdtsc
+ .align 4
+@@ -93,6 +107,7 @@ Ladd: lwarx r5,0,r3
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .OPENSSL_rdtsc,.-.OPENSSL_rdtsc
+
+ .globl .OPENSSL_cleanse
+ .align 4
+@@ -125,7 +140,99 @@ Laligned:
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
++.size .OPENSSL_cleanse,.-.OPENSSL_cleanse
++___
++{
++my ($out,$cnt,$max)=("r3","r4","r5");
++my ($tick,$lasttick)=("r6","r7");
++my ($diff,$lastdiff)=("r8","r9");
++
++$code.=<<___;
++.globl .OPENSSL_instrument_bus
++.align 4
++.OPENSSL_instrument_bus:
++ mtctr $cnt
++
++ mftb $lasttick # collect 1st tick
++ li $diff,0
++
++ dcbf 0,$out # flush cache line
++ lwarx $tick,0,$out # load and lock
++ add $tick,$tick,$diff
++ stwcx. $tick,0,$out
++ stwx $tick,0,$out
++
++Loop: mftb $tick
++ sub $diff,$tick,$lasttick
++ mr $lasttick,$tick
++ dcbf 0,$out # flush cache line
++ lwarx $tick,0,$out # load and lock
++ add $tick,$tick,$diff
++ stwcx. $tick,0,$out
++ stwx $tick,0,$out
++ addi $out,$out,4 # ++$out
++ bdnz Loop
++
++ mr r3,$cnt
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,2,0
++ .long 0
++.size .OPENSSL_instrument_bus,.-.OPENSSL_instrument_bus
++
++.globl .OPENSSL_instrument_bus2
++.align 4
++.OPENSSL_instrument_bus2:
++ mr r0,$cnt
++ slwi $cnt,$cnt,2
++
++ mftb $lasttick # collect 1st tick
++ li $diff,0
++
++ dcbf 0,$out # flush cache line
++ lwarx $tick,0,$out # load and lock
++ add $tick,$tick,$diff
++ stwcx. $tick,0,$out
++ stwx $tick,0,$out
++
++ mftb $tick # collect 1st diff
++ sub $diff,$tick,$lasttick
++ mr $lasttick,$tick
++ mr $lastdiff,$diff
++Loop2:
++ dcbf 0,$out # flush cache line
++ lwarx $tick,0,$out # load and lock
++ add $tick,$tick,$diff
++ stwcx. $tick,0,$out
++ stwx $tick,0,$out
++
++ addic. $max,$max,-1
++ beq Ldone2
++
++ mftb $tick
++ sub $diff,$tick,$lasttick
++ mr $lasttick,$tick
++ cmplw 7,$diff,$lastdiff
++ mr $lastdiff,$diff
++
++ mfcr $tick # pull cr
++ not $tick,$tick # flip bits
++ rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale
++
++ sub. $cnt,$cnt,$tick # conditional --$cnt
++ add $out,$out,$tick # conditional ++$out
++ bne Loop2
++
++Ldone2:
++ srwi $cnt,$cnt,2
++ sub r3,r0,$cnt
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,3,0
++ .long 0
++.size .OPENSSL_instrument_bus2,.-.OPENSSL_instrument_bus2
+ ___
++}
+
+ $code =~ s/\`([^\`]*)\`/eval $1/gem;
+ print $code;
+diff -up openssl-1.0.1i/crypto/sha/asm/sha1-ppc.pl.ppc-asm openssl-1.0.1i/crypto/sha/asm/sha1-ppc.pl
+--- openssl-1.0.1i/crypto/sha/asm/sha1-ppc.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/sha/asm/sha1-ppc.pl 2014-08-13 19:46:21.096578196 +0200
+@@ -9,8 +9,7 @@
+
+ # I let hardware handle unaligned input(*), except on page boundaries
+ # (see below for details). Otherwise straightforward implementation
+-# with X vector in register bank. The module is big-endian [which is
+-# not big deal as there're no little-endian targets left around].
++# with X vector in register bank.
+ #
+ # (*) this means that this module is inappropriate for PPC403? Does
+ # anybody know if pre-POWER3 can sustain unaligned load?
+@@ -38,6 +37,10 @@ if ($flavour =~ /64/) {
+ $PUSH ="stw";
+ } else { die "nonsense $flavour"; }
+
++# Define endianess based on flavour
++# i.e.: linux64le
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+@@ -68,14 +71,28 @@ $T ="r12";
+ @X=("r16","r17","r18","r19","r20","r21","r22","r23",
+ "r24","r25","r26","r27","r28","r29","r30","r31");
+
++sub loadbe {
++my ($dst, $src, $temp_reg) = @_;
++$code.=<<___ if (!$LITTLE_ENDIAN);
++ lwz $dst,$src
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ lwz $temp_reg,$src
++ rotlwi $dst,$temp_reg,8
++ rlwimi $dst,$temp_reg,24,0,7
++ rlwimi $dst,$temp_reg,24,16,23
++___
++}
++
+ sub BODY_00_19 {
+ my ($i,$a,$b,$c,$d,$e,$f)=@_;
+ my $j=$i+1;
+-$code.=<<___ if ($i==0);
+- lwz @X[$i],`$i*4`($inp)
+-___
++
++ # Since the last value of $f is discarded, we can use
++ # it as a temp reg to swap byte-order when needed.
++ loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
++ loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
+ $code.=<<___ if ($i<15);
+- lwz @X[$j],`$j*4`($inp)
+ add $f,$K,$e
+ rotlwi $e,$a,5
+ add $f,$f, at X[$i]
+@@ -108,31 +125,31 @@ my ($i,$a,$b,$c,$d,$e,$f)=@_;
+ my $j=$i+1;
+ $code.=<<___ if ($i<79);
+ add $f,$K,$e
++ xor $t0,$b,$d
+ rotlwi $e,$a,5
+ xor @X[$j%16], at X[$j%16], at X[($j+2)%16]
+ add $f,$f, at X[$i%16]
+- xor $t0,$b,$c
++ xor $t0,$t0,$c
+ xor @X[$j%16], at X[$j%16], at X[($j+8)%16]
+- add $f,$f,$e
++ add $f,$f,$t0
+ rotlwi $b,$b,30
+- xor $t0,$t0,$d
+ xor @X[$j%16], at X[$j%16], at X[($j+13)%16]
+- add $f,$f,$t0
++ add $f,$f,$e
+ rotlwi @X[$j%16], at X[$j%16],1
+ ___
+ $code.=<<___ if ($i==79);
+ add $f,$K,$e
++ xor $t0,$b,$d
+ rotlwi $e,$a,5
+ lwz r16,0($ctx)
+ add $f,$f, at X[$i%16]
+- xor $t0,$b,$c
++ xor $t0,$t0,$c
+ lwz r17,4($ctx)
+- add $f,$f,$e
++ add $f,$f,$t0
+ rotlwi $b,$b,30
+ lwz r18,8($ctx)
+- xor $t0,$t0,$d
+ lwz r19,12($ctx)
+- add $f,$f,$t0
++ add $f,$f,$e
+ lwz r20,16($ctx)
+ ___
+ }
+@@ -316,6 +333,7 @@ $code.=<<___;
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size .sha1_block_data_order,.-.sha1_block_data_order
+ ___
+ $code.=<<___;
+ .asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
+diff -up openssl-1.0.1i/crypto/sha/asm/sha512-ppc.pl.ppc-asm openssl-1.0.1i/crypto/sha/asm/sha512-ppc.pl
+--- openssl-1.0.1i/crypto/sha/asm/sha512-ppc.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/crypto/sha/asm/sha512-ppc.pl 2014-08-13 19:46:21.096578196 +0200
+@@ -1,7 +1,7 @@
+ #!/usr/bin/env perl
+
+ # ====================================================================
+-# Written by Andy Polyakov <appro at fy.chalmers.se> for the OpenSSL
++# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
+@@ -9,8 +9,7 @@
+
+ # I let hardware handle unaligned input, except on page boundaries
+ # (see below for details). Otherwise straightforward implementation
+-# with X vector in register bank. The module is big-endian [which is
+-# not big deal as there're no little-endian targets left around].
++# with X vector in register bank.
+
+ # sha256 | sha512
+ # -m64 -m32 | -m64 -m32
+@@ -56,6 +55,8 @@ if ($flavour =~ /64/) {
+ $PUSH="stw";
+ } else { die "nonsense $flavour"; }
+
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+@@ -64,7 +65,7 @@ die "can't locate ppc-xlate.pl";
+ open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+ if ($output =~ /512/) {
+- $func="sha512_block_data_order";
++ $func="sha512_block_ppc";
+ $SZ=8;
+ @Sigma0=(28,34,39);
+ @Sigma1=(14,18,41);
+@@ -76,7 +77,7 @@ if ($output =~ /512/) {
+ $ROR="rotrdi";
+ $SHR="srdi";
+ } else {
+- $func="sha256_block_data_order";
++ $func="sha256_block_ppc";
+ $SZ=4;
+ @Sigma0=( 2,13,22);
+ @Sigma1=( 6,11,25);
+@@ -110,7 +111,7 @@ $B ="r9";
+ $C ="r10";
+ $D ="r11";
+ $E ="r12";
+-$F ="r13"; $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer
++$F =$t1; $t1 = "r0"; # stay away from "r13";
+ $G ="r14";
+ $H ="r15";
+
+@@ -118,24 +119,23 @@ $H ="r15";
+ @X=("r16","r17","r18","r19","r20","r21","r22","r23",
+ "r24","r25","r26","r27","r28","r29","r30","r31");
+
+-$inp="r31"; # reassigned $inp! aliases with @X[15]
++$inp="r31" if($SZ==4 || $SIZE_T==8); # reassigned $inp! aliases with @X[15]
+
+ sub ROUND_00_15 {
+ my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+ $code.=<<___;
+- $LD $T,`$i*$SZ`($Tbl)
+ $ROR $a0,$e,$Sigma1[0]
+ $ROR $a1,$e,$Sigma1[1]
+ and $t0,$f,$e
+- andc $t1,$g,$e
+- add $T,$T,$h
+ xor $a0,$a0,$a1
++ add $h,$h,$t1
++ andc $t1,$g,$e
+ $ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]`
+ or $t0,$t0,$t1 ; Ch(e,f,g)
+- add $T,$T, at X[$i]
++ add $h,$h, at X[$i%16]
+ xor $a0,$a0,$a1 ; Sigma1(e)
+- add $T,$T,$t0
+- add $T,$T,$a0
++ add $h,$h,$t0
++ add $h,$h,$a0
+
+ $ROR $a0,$a,$Sigma0[0]
+ $ROR $a1,$a,$Sigma0[1]
+@@ -146,9 +146,14 @@ $code.=<<___;
+ xor $t0,$t0,$t1
+ and $t1,$b,$c
+ xor $a0,$a0,$a1 ; Sigma0(a)
+- add $d,$d,$T
++ add $d,$d,$h
+ xor $t0,$t0,$t1 ; Maj(a,b,c)
+- add $h,$T,$a0
++___
++$code.=<<___ if ($i<15);
++ $LD $t1,`($i+1)*$SZ`($Tbl)
++___
++$code.=<<___;
++ add $h,$h,$a0
+ add $h,$h,$t0
+
+ ___
+@@ -169,10 +174,11 @@ $code.=<<___;
+ add @X[$i], at X[$i], at X[($i+9)%16]
+ xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f])
+ xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f])
++ $LD $t1,`$i*$SZ`($Tbl)
+ add @X[$i], at X[$i],$a0
+ add @X[$i], at X[$i],$t0
+ ___
+-&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h);
++&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
+ }
+
+ $code=<<___;
+@@ -188,8 +194,6 @@ $func:
+
+ $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp)
+
+- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
+- $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
+ $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
+ $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
+ $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -209,7 +213,10 @@ $func:
+ $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
+ $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
+ $PUSH r0,`$FRAME+$LRSAVE`($sp)
++___
+
++if ($SZ==4 || $SIZE_T==8) {
++$code.=<<___;
+ $LD $A,`0*$SZ`($ctx)
+ mr $inp,r4 ; incarnate $inp
+ $LD $B,`1*$SZ`($ctx)
+@@ -219,7 +226,16 @@ $func:
+ $LD $F,`5*$SZ`($ctx)
+ $LD $G,`6*$SZ`($ctx)
+ $LD $H,`7*$SZ`($ctx)
++___
++} else {
++ for ($i=16;$i<32;$i++) {
++ $code.=<<___;
++ lwz r$i,`$LITTLE_ENDIAN^(4*($i-16))`($ctx)
++___
++ }
++}
+
++$code.=<<___;
+ bl LPICmeup
+ LPICedup:
+ andi. r0,$inp,3
+@@ -255,6 +271,9 @@ Lunaligned:
+ Lcross_page:
+ li $t1,`16*$SZ/4`
+ mtctr $t1
++___
++if ($SZ==4 || $SIZE_T==8) {
++$code.=<<___;
+ addi r20,$sp,$LOCALS ; aligned spot below the frame
+ Lmemcpy:
+ lbz r16,0($inp)
+@@ -268,7 +287,26 @@ Lmemcpy:
+ stb r19,3(r20)
+ addi r20,r20,4
+ bdnz Lmemcpy
++___
++} else {
++$code.=<<___;
++ addi r12,$sp,$LOCALS ; aligned spot below the frame
++Lmemcpy:
++ lbz r8,0($inp)
++ lbz r9,1($inp)
++ lbz r10,2($inp)
++ lbz r11,3($inp)
++ addi $inp,$inp,4
++ stb r8,0(r12)
++ stb r9,1(r12)
++ stb r10,2(r12)
++ stb r11,3(r12)
++ addi r12,r12,4
++ bdnz Lmemcpy
++___
++}
+
++$code.=<<___;
+ $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp
+ addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer
+ addi $inp,$sp,$LOCALS ; fictitious inp pointer
+@@ -283,8 +321,6 @@ Lmemcpy:
+
+ Ldone:
+ $POP r0,`$FRAME+$LRSAVE`($sp)
+- $POP $toc,`$FRAME-$SIZE_T*20`($sp)
+- $POP r13,`$FRAME-$SIZE_T*19`($sp)
+ $POP r14,`$FRAME-$SIZE_T*18`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -309,27 +345,48 @@ Ldone:
+ .long 0
+ .byte 0,12,4,1,0x80,18,3,0
+ .long 0
++___
+
++if ($SZ==4 || $SIZE_T==8) {
++$code.=<<___;
+ .align 4
+ Lsha2_block_private:
++ $LD $t1,0($Tbl)
+ ___
+ for($i=0;$i<16;$i++) {
+-$code.=<<___ if ($SZ==4);
++$code.=<<___ if ($SZ==4 && !$LITTLE_ENDIAN);
+ lwz @X[$i],`$i*$SZ`($inp)
+ ___
++$code.=<<___ if ($SZ==4 && $LITTLE_ENDIAN);
++ lwz $a0,`$i*$SZ`($inp)
++ rotlwi @X[$i],$a0,8
++ rlwimi @X[$i],$a0,24,0,7
++ rlwimi @X[$i],$a0,24,16,23
++___
+ # 64-bit loads are split to 2x32-bit ones, as CPU can't handle
+ # unaligned 64-bit loads, only 32-bit ones...
+-$code.=<<___ if ($SZ==8);
++$code.=<<___ if ($SZ==8 && !$LITTLE_ENDIAN);
+ lwz $t0,`$i*$SZ`($inp)
+ lwz @X[$i],`$i*$SZ+4`($inp)
+ insrdi @X[$i],$t0,32,0
+ ___
++$code.=<<___ if ($SZ==8 && $LITTLE_ENDIAN);
++ lwz $a0,`$i*$SZ`($inp)
++ lwz $a1,`$i*$SZ+4`($inp)
++ rotlwi $t0,$a0,8
++ rotlwi @X[$i],$a1,8
++ rlwimi $t0,$a0,24,0,7
++ rlwimi @X[$i],$a1,24,0,7
++ rlwimi $t0,$a0,24,16,23
++ rlwimi @X[$i],$a1,24,16,23
++ insrdi @X[$i],$t0,32,0
++___
+ &ROUND_00_15($i, at V);
+ unshift(@V,pop(@V));
+ }
+ $code.=<<___;
+- li $T,`$rounds/16-1`
+- mtctr $T
++ li $t0,`$rounds/16-1`
++ mtctr $t0
+ .align 4
+ Lrounds:
+ addi $Tbl,$Tbl,`16*$SZ`
+@@ -377,7 +434,282 @@ $code.=<<___;
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
++.size $func,.-$func
++___
++} else {
++########################################################################
++# SHA512 for PPC32, X vector is off-loaded to stack...
++#
++# | sha512
++# | -m32
++# ----------------------+-----------------------
++# PPC74x0,gcc-4.0.1 | +48%
++# POWER6,gcc-4.4.6 | +124%(*)
++# POWER7,gcc-4.4.6 | +79%(*)
++# e300,gcc-4.1.0 | +167%
++#
++# (*) ~1/3 of -m64 result [and ~20% better than -m32 code generated
++# by xlc-12.1]
++
++my $XOFF=$LOCALS;
++
++my @V=map("r$_",(16..31)); # A..H
++
++my ($s0,$s1,$t0,$t1,$t2,$t3,$a0,$a1,$a2,$a3)=map("r$_",(0,5,6,8..12,14,15));
++my ($x0,$x1)=("r3","r4"); # zaps $ctx and $inp
++
++sub ROUND_00_15_ppc32 {
++my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
++ $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_;
++
++$code.=<<___;
++ lwz $t2,`$SZ*($i%16)+($LITTLE_ENDIAN^4)`($Tbl)
++ xor $a0,$flo,$glo
++ lwz $t3,`$SZ*($i%16)+($LITTLE_ENDIAN^0)`($Tbl)
++ xor $a1,$fhi,$ghi
++ addc $hlo,$hlo,$t0 ; h+=x[i]
++ stw $t0,`$XOFF+0+$SZ*($i%16)`($sp) ; save x[i]
++
++ srwi $s0,$elo,$Sigma1[0]
++ srwi $s1,$ehi,$Sigma1[0]
++ and $a0,$a0,$elo
++ adde $hhi,$hhi,$t1
++ and $a1,$a1,$ehi
++ stw $t1,`$XOFF+4+$SZ*($i%16)`($sp)
++ srwi $t0,$elo,$Sigma1[1]
++ srwi $t1,$ehi,$Sigma1[1]
++ addc $hlo,$hlo,$t2 ; h+=K512[i]
++ insrwi $s0,$ehi,$Sigma1[0],0
++ insrwi $s1,$elo,$Sigma1[0],0
++ xor $a0,$a0,$glo ; Ch(e,f,g)
++ adde $hhi,$hhi,$t3
++ xor $a1,$a1,$ghi
++ insrwi $t0,$ehi,$Sigma1[1],0
++ insrwi $t1,$elo,$Sigma1[1],0
++ addc $hlo,$hlo,$a0 ; h+=Ch(e,f,g)
++ srwi $t2,$ehi,$Sigma1[2]-32
++ srwi $t3,$elo,$Sigma1[2]-32
++ xor $s0,$s0,$t0
++ xor $s1,$s1,$t1
++ insrwi $t2,$elo,$Sigma1[2]-32,0
++ insrwi $t3,$ehi,$Sigma1[2]-32,0
++ xor $a0,$alo,$blo ; a^b, b^c in next round
++ adde $hhi,$hhi,$a1
++ xor $a1,$ahi,$bhi
++ xor $s0,$s0,$t2 ; Sigma1(e)
++ xor $s1,$s1,$t3
++
++ srwi $t0,$alo,$Sigma0[0]
++ and $a2,$a2,$a0
++ addc $hlo,$hlo,$s0 ; h+=Sigma1(e)
++ and $a3,$a3,$a1
++ srwi $t1,$ahi,$Sigma0[0]
++ srwi $s0,$ahi,$Sigma0[1]-32
++ adde $hhi,$hhi,$s1
++ srwi $s1,$alo,$Sigma0[1]-32
++ insrwi $t0,$ahi,$Sigma0[0],0
++ insrwi $t1,$alo,$Sigma0[0],0
++ xor $a2,$a2,$blo ; Maj(a,b,c)
++ addc $dlo,$dlo,$hlo ; d+=h
++ xor $a3,$a3,$bhi
++ insrwi $s0,$alo,$Sigma0[1]-32,0
++ insrwi $s1,$ahi,$Sigma0[1]-32,0
++ adde $dhi,$dhi,$hhi
++ srwi $t2,$ahi,$Sigma0[2]-32
++ srwi $t3,$alo,$Sigma0[2]-32
++ xor $s0,$s0,$t0
++ addc $hlo,$hlo,$a2 ; h+=Maj(a,b,c)
++ xor $s1,$s1,$t1
++ insrwi $t2,$alo,$Sigma0[2]-32,0
++ insrwi $t3,$ahi,$Sigma0[2]-32,0
++ adde $hhi,$hhi,$a3
++___
++$code.=<<___ if ($i>=15);
++ lwz $t0,`$XOFF+0+$SZ*(($i+2)%16)`($sp)
++ lwz $t1,`$XOFF+4+$SZ*(($i+2)%16)`($sp)
++___
++$code.=<<___ if ($i<15 && !$LITTLE_ENDIAN);
++ lwz $t1,`$SZ*($i+1)+0`($inp)
++ lwz $t0,`$SZ*($i+1)+4`($inp)
+ ___
++$code.=<<___ if ($i<15 && $LITTLE_ENDIAN);
++ lwz $a2,`$SZ*($i+1)+0`($inp)
++ lwz $a3,`$SZ*($i+1)+4`($inp)
++ rotlwi $t1,$a2,8
++ rotlwi $t0,$a3,8
++ rlwimi $t1,$a2,24,0,7
++ rlwimi $t0,$a3,24,0,7
++ rlwimi $t1,$a2,24,16,23
++ rlwimi $t0,$a3,24,16,23
++___
++$code.=<<___;
++ xor $s0,$s0,$t2 ; Sigma0(a)
++ xor $s1,$s1,$t3
++ addc $hlo,$hlo,$s0 ; h+=Sigma0(a)
++ adde $hhi,$hhi,$s1
++___
++$code.=<<___ if ($i==15);
++ lwz $x0,`$XOFF+0+$SZ*(($i+1)%16)`($sp)
++ lwz $x1,`$XOFF+4+$SZ*(($i+1)%16)`($sp)
++___
++}
++sub ROUND_16_xx_ppc32 {
++my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
++ $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_;
++
++$code.=<<___;
++ srwi $s0,$t0,$sigma0[0]
++ srwi $s1,$t1,$sigma0[0]
++ srwi $t2,$t0,$sigma0[1]
++ srwi $t3,$t1,$sigma0[1]
++ insrwi $s0,$t1,$sigma0[0],0
++ insrwi $s1,$t0,$sigma0[0],0
++ srwi $a0,$t0,$sigma0[2]
++ insrwi $t2,$t1,$sigma0[1],0
++ insrwi $t3,$t0,$sigma0[1],0
++ insrwi $a0,$t1,$sigma0[2],0
++ xor $s0,$s0,$t2
++ lwz $t2,`$XOFF+0+$SZ*(($i+14)%16)`($sp)
++ srwi $a1,$t1,$sigma0[2]
++ xor $s1,$s1,$t3
++ lwz $t3,`$XOFF+4+$SZ*(($i+14)%16)`($sp)
++ xor $a0,$a0,$s0
++ srwi $s0,$t2,$sigma1[0]
++ xor $a1,$a1,$s1
++ srwi $s1,$t3,$sigma1[0]
++ addc $x0,$x0,$a0 ; x[i]+=sigma0(x[i+1])
++ srwi $a0,$t3,$sigma1[1]-32
++ insrwi $s0,$t3,$sigma1[0],0
++ insrwi $s1,$t2,$sigma1[0],0
++ adde $x1,$x1,$a1
++ srwi $a1,$t2,$sigma1[1]-32
++
++ insrwi $a0,$t2,$sigma1[1]-32,0
++ srwi $t2,$t2,$sigma1[2]
++ insrwi $a1,$t3,$sigma1[1]-32,0
++ insrwi $t2,$t3,$sigma1[2],0
++ xor $s0,$s0,$a0
++ lwz $a0,`$XOFF+0+$SZ*(($i+9)%16)`($sp)
++ srwi $t3,$t3,$sigma1[2]
++ xor $s1,$s1,$a1
++ lwz $a1,`$XOFF+4+$SZ*(($i+9)%16)`($sp)
++ xor $s0,$s0,$t2
++ addc $x0,$x0,$a0 ; x[i]+=x[i+9]
++ xor $s1,$s1,$t3
++ adde $x1,$x1,$a1
++ addc $x0,$x0,$s0 ; x[i]+=sigma1(x[i+14])
++ adde $x1,$x1,$s1
++___
++ ($t0,$t1,$x0,$x1) = ($x0,$x1,$t0,$t1);
++ &ROUND_00_15_ppc32(@_);
++}
++
++$code.=<<___;
++.align 4
++Lsha2_block_private:
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
++ lwz $t1,0($inp)
++ xor $a2, at V[3], at V[5] ; B^C, magic seed
++ lwz $t0,4($inp)
++ xor $a3, at V[2], at V[4]
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++ lwz $a1,0($inp)
++ xor $a2, at V[3], at V[5] ; B^C, magic seed
++ lwz $a0,4($inp)
++ xor $a3, at V[2], at V[4]
++ rotlwi $t1,$a1,8
++ rotlwi $t0,$a0,8
++ rlwimi $t1,$a1,24,0,7
++ rlwimi $t0,$a0,24,0,7
++ rlwimi $t1,$a1,24,16,23
++ rlwimi $t0,$a0,24,16,23
++___
++for($i=0;$i<16;$i++) {
++ &ROUND_00_15_ppc32($i, at V);
++ unshift(@V,pop(@V)); unshift(@V,pop(@V));
++ ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1);
++}
++$code.=<<___;
++ li $a0,`$rounds/16-1`
++ mtctr $a0
++.align 4
++Lrounds:
++ addi $Tbl,$Tbl,`16*$SZ`
++___
++for(;$i<32;$i++) {
++ &ROUND_16_xx_ppc32($i, at V);
++ unshift(@V,pop(@V)); unshift(@V,pop(@V));
++ ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1);
++}
++$code.=<<___;
++ bdnz- Lrounds
++
++ $POP $ctx,`$FRAME-$SIZE_T*22`($sp)
++ $POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
++ $POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
++ subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl
++
++ lwz $t0,`$LITTLE_ENDIAN^0`($ctx)
++ lwz $t1,`$LITTLE_ENDIAN^4`($ctx)
++ lwz $t2,`$LITTLE_ENDIAN^8`($ctx)
++ lwz $t3,`$LITTLE_ENDIAN^12`($ctx)
++ lwz $a0,`$LITTLE_ENDIAN^16`($ctx)
++ lwz $a1,`$LITTLE_ENDIAN^20`($ctx)
++ lwz $a2,`$LITTLE_ENDIAN^24`($ctx)
++ addc @V[1], at V[1],$t1
++ lwz $a3,`$LITTLE_ENDIAN^28`($ctx)
++ adde @V[0], at V[0],$t0
++ lwz $t0,`$LITTLE_ENDIAN^32`($ctx)
++ addc @V[3], at V[3],$t3
++ lwz $t1,`$LITTLE_ENDIAN^36`($ctx)
++ adde @V[2], at V[2],$t2
++ lwz $t2,`$LITTLE_ENDIAN^40`($ctx)
++ addc @V[5], at V[5],$a1
++ lwz $t3,`$LITTLE_ENDIAN^44`($ctx)
++ adde @V[4], at V[4],$a0
++ lwz $a0,`$LITTLE_ENDIAN^48`($ctx)
++ addc @V[7], at V[7],$a3
++ lwz $a1,`$LITTLE_ENDIAN^52`($ctx)
++ adde @V[6], at V[6],$a2
++ lwz $a2,`$LITTLE_ENDIAN^56`($ctx)
++ addc @V[9], at V[9],$t1
++ lwz $a3,`$LITTLE_ENDIAN^60`($ctx)
++ adde @V[8], at V[8],$t0
++ stw @V[0],`$LITTLE_ENDIAN^0`($ctx)
++ stw @V[1],`$LITTLE_ENDIAN^4`($ctx)
++ addc @V[11], at V[11],$t3
++ stw @V[2],`$LITTLE_ENDIAN^8`($ctx)
++ stw @V[3],`$LITTLE_ENDIAN^12`($ctx)
++ adde @V[10], at V[10],$t2
++ stw @V[4],`$LITTLE_ENDIAN^16`($ctx)
++ stw @V[5],`$LITTLE_ENDIAN^20`($ctx)
++ addc @V[13], at V[13],$a1
++ stw @V[6],`$LITTLE_ENDIAN^24`($ctx)
++ stw @V[7],`$LITTLE_ENDIAN^28`($ctx)
++ adde @V[12], at V[12],$a0
++ stw @V[8],`$LITTLE_ENDIAN^32`($ctx)
++ stw @V[9],`$LITTLE_ENDIAN^36`($ctx)
++ addc @V[15], at V[15],$a3
++ stw @V[10],`$LITTLE_ENDIAN^40`($ctx)
++ stw @V[11],`$LITTLE_ENDIAN^44`($ctx)
++ adde @V[14], at V[14],$a2
++ stw @V[12],`$LITTLE_ENDIAN^48`($ctx)
++ stw @V[13],`$LITTLE_ENDIAN^52`($ctx)
++ stw @V[14],`$LITTLE_ENDIAN^56`($ctx)
++ stw @V[15],`$LITTLE_ENDIAN^60`($ctx)
++
++ addi $inp,$inp,`16*$SZ` ; advance inp
++ $PUSH $inp,`$FRAME-$SIZE_T*23`($sp)
++ $UCMP $inp,$num
++ bne Lsha2_block_private
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++.size $func,.-$func
++___
++}
+
+ # Ugly hack here, because PPC assembler syntax seem to vary too
+ # much from platforms to platform...
+@@ -395,46 +727,46 @@ LPICmeup:
+ .space `64-9*4`
+ ___
+ $code.=<<___ if ($SZ==8);
+- .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
+- .long 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
+- .long 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
+- .long 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
+- .long 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
+- .long 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
+- .long 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
+- .long 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
+- .long 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
+- .long 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
+- .long 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
+- .long 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
+- .long 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
+- .long 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
+- .long 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
+- .long 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
+- .long 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
+- .long 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
+- .long 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
+- .long 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
+- .long 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
+- .long 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
+- .long 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
+- .long 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
+- .long 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
+- .long 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
+- .long 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
+- .long 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
+- .long 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
+- .long 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
+- .long 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
+- .long 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
+- .long 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
+- .long 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
+- .long 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
+- .long 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
+- .long 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
+- .long 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
+- .long 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
+- .long 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
++ .quad 0x428a2f98d728ae22,0x7137449123ef65cd
++ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
++ .quad 0x3956c25bf348b538,0x59f111f1b605d019
++ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
++ .quad 0xd807aa98a3030242,0x12835b0145706fbe
++ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
++ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
++ .quad 0x9bdc06a725c71235,0xc19bf174cf692694
++ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
++ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
++ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
++ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
++ .quad 0x983e5152ee66dfab,0xa831c66d2db43210
++ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
++ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
++ .quad 0x06ca6351e003826f,0x142929670a0e6e70
++ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
++ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
++ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
++ .quad 0x81c2c92e47edaee6,0x92722c851482353b
++ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
++ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
++ .quad 0xd192e819d6ef5218,0xd69906245565a910
++ .quad 0xf40e35855771202a,0x106aa07032bbd1b8
++ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
++ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
++ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
++ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
++ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
++ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
++ .quad 0x90befffa23631e28,0xa4506cebde82bde9
++ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
++ .quad 0xca273eceea26619c,0xd186b8c721c0c207
++ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
++ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
++ .quad 0x113f9804bef90dae,0x1b710b35131c471b
++ .quad 0x28db77f523047d84,0x32caab7b40c72493
++ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
++ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
++ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+ ___
+ $code.=<<___ if ($SZ==4);
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+diff -up openssl-1.0.1i/crypto/sha/asm/sha512p8-ppc.pl.ppc-asm openssl-1.0.1i/crypto/sha/asm/sha512p8-ppc.pl
+--- openssl-1.0.1i/crypto/sha/asm/sha512p8-ppc.pl.ppc-asm 2014-08-13 19:46:21.096578196 +0200
++++ openssl-1.0.1i/crypto/sha/asm/sha512p8-ppc.pl 2014-08-13 19:46:21.096578196 +0200
+@@ -0,0 +1,423 @@
++#!/usr/bin/env perl
++
++# ====================================================================
++# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++
++# SHA256/512 for PowerISA v2.07.
++#
++# Accurate performance measurements are problematic, because it's
++# always virtualized setup with possibly throttled processor.
++# Relative comparison is therefore more informative. This module is
++# ~60% faster than integer-only sha512-ppc.pl. To anchor to something
++# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than
++# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than
++# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
++# result is degree of computational resources' utilization. POWER8 is
++# "massively multi-threaded chip" and difference between single- and
++# maximum multi-process benchmark results tells that utlization is
++# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and
++# for sha1-ppc.pl - 73%. 100% means that multi-process result equals
++# to single-process one, given that all threads end up on the same
++# physical core.
++
++$flavour=shift;
++$output =shift;
++
++if ($flavour =~ /64/) {
++ $SIZE_T=8;
++ $LRSAVE=2*$SIZE_T;
++ $STU="stdu";
++ $POP="ld";
++ $PUSH="std";
++} elsif ($flavour =~ /32/) {
++ $SIZE_T=4;
++ $LRSAVE=$SIZE_T;
++ $STU="stwu";
++ $POP="lwz";
++ $PUSH="stw";
++} else { die "nonsense $flavour"; }
++
++$LENDIAN=($flavour=~/le/);
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
++
++if ($output =~ /512/) {
++ $bits=512;
++ $SZ=8;
++ $sz="d";
++ $rounds=80;
++} else {
++ $bits=256;
++ $SZ=4;
++ $sz="w";
++ $rounds=64;
++}
++
++$func="sha${bits}_block_p8";
++$FRAME=8*$SIZE_T;
++
++$sp ="r1";
++$toc="r2";
++$ctx="r3";
++$inp="r4";
++$num="r5";
++$Tbl="r6";
++$idx="r7";
++$lrsave="r8";
++$offload="r11";
++$vrsave="r12";
++($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31));
++
++ at V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7));
++ at X=map("v$_",(8..23));
++($Ki,$Func,$S0,$S1,$s0,$s1,$lemask)=map("v$_",(24..31));
++
++sub ROUND {
++my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
++my $j=($i+1)%16;
++
++$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1));
++ lvx_u @X[$i+1],0,$inp ; load X[i] in advance
++ addi $inp,$inp,16
++___
++$code.=<<___ if ($i<16 && ($i%(16/$SZ)));
++ vsldoi @X[$i], at X[$i-1], at X[$i-1],$SZ
++___
++$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0);
++ vperm @X[$i], at X[$i], at X[$i],$lemask
++___
++$code.=<<___;
++ `"vshasigma${sz} $s0, at X[($j+1)%16],0,0" if ($i>=15)`
++ vsel $Func,$g,$f,$e ; Ch(e,f,g)
++ vshasigma${sz} $S1,$e,1,15 ; Sigma1(e)
++ vaddu${sz}m $h,$h, at X[$i%16] ; h+=X[i]
++ vshasigma${sz} $S0,$a,1,0 ; Sigma0(a)
++ `"vshasigma${sz} $s1, at X[($j+14)%16],0,15" if ($i>=15)`
++ vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g)
++ vxor $Func,$a,$b
++ `"vaddu${sz}m @X[$j], at X[$j], at X[($j+9)%16]" if ($i>=15)`
++ vaddu${sz}m $h,$h,$S1 ; h+=Sigma1(e)
++ vsel $Func,$b,$c,$Func ; Maj(a,b,c)
++ vaddu${sz}m $g,$g,$Ki ; future h+=K[i]
++ vaddu${sz}m $d,$d,$h ; d+=h
++ vaddu${sz}m $S0,$S0,$Func ; Sigma0(a)+Maj(a,b,c)
++ `"vaddu${sz}m @X[$j], at X[$j],$s0" if ($i>=15)`
++ lvx $Ki,$idx,$Tbl ; load next K[i]
++ addi $idx,$idx,16
++ vaddu${sz}m $h,$h,$S0 ; h+=Sigma0(a)+Maj(a,b,c)
++ `"vaddu${sz}m @X[$j], at X[$j],$s1" if ($i>=15)`
++___
++}
++
++$code=<<___;
++.machine "any"
++.text
++
++.globl $func
++.align 6
++$func:
++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
++ mflr $lrsave
++ li r10,`$FRAME+8*16+15`
++ li r11,`$FRAME+8*16+31`
++ stvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ mfspr $vrsave,256
++ stvx v21,r11,$sp
++ addi r11,r11,32
++ stvx v22,r10,$sp
++ addi r10,r10,32
++ stvx v23,r11,$sp
++ addi r11,r11,32
++ stvx v24,r10,$sp
++ addi r10,r10,32
++ stvx v25,r11,$sp
++ addi r11,r11,32
++ stvx v26,r10,$sp
++ addi r10,r10,32
++ stvx v27,r11,$sp
++ addi r11,r11,32
++ stvx v28,r10,$sp
++ addi r10,r10,32
++ stvx v29,r11,$sp
++ addi r11,r11,32
++ stvx v30,r10,$sp
++ stvx v31,r11,$sp
++ li r11,-1
++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
++ li $x10,0x10
++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ li $x20,0x20
++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ li $x30,0x30
++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ li $x40,0x40
++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ li $x50,0x50
++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ li $x60,0x60
++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ li $x70,0x70
++ $PUSH $lrsave,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
++ mtspr 256,r11
++
++ bl LPICmeup
++ addi $offload,$sp,$FRAME+15
++___
++$code.=<<___ if ($LENDIAN);
++ li $idx,8
++ lvsl $lemask,0,$idx
++ vspltisb $Ki,0x0f
++ vxor $lemask,$lemask,$Ki
++___
++$code.=<<___ if ($SZ==4);
++ lvx_4w $A,$x00,$ctx
++ lvx_4w $E,$x10,$ctx
++ vsldoi $B,$A,$A,4 # unpack
++ vsldoi $C,$A,$A,8
++ vsldoi $D,$A,$A,12
++ vsldoi $F,$E,$E,4
++ vsldoi $G,$E,$E,8
++ vsldoi $H,$E,$E,12
++___
++$code.=<<___ if ($SZ==8);
++ lvx_u $A,$x00,$ctx
++ lvx_u $C,$x10,$ctx
++ lvx_u $E,$x20,$ctx
++ vsldoi $B,$A,$A,8 # unpack
++ lvx_u $G,$x30,$ctx
++ vsldoi $D,$C,$C,8
++ vsldoi $F,$E,$E,8
++ vsldoi $H,$G,$G,8
++___
++$code.=<<___;
++ li r0,`($rounds-16)/16` # inner loop counter
++ b Loop
++.align 5
++Loop:
++ lvx $Ki,$x00,$Tbl
++ li $idx,16
++ lvx_u @X[0],0,$inp
++ addi $inp,$inp,16
++ stvx $A,$x00,$offload # offload $A-$H
++ stvx $B,$x10,$offload
++ stvx $C,$x20,$offload
++ stvx $D,$x30,$offload
++ stvx $E,$x40,$offload
++ stvx $F,$x50,$offload
++ stvx $G,$x60,$offload
++ stvx $H,$x70,$offload
++ vaddu${sz}m $H,$H,$Ki # h+K[i]
++ lvx $Ki,$idx,$Tbl
++ addi $idx,$idx,16
++___
++for ($i=0;$i<16;$i++) { &ROUND($i, at V); unshift(@V,pop(@V)); }
++$code.=<<___;
++ mtctr r0
++ b L16_xx
++.align 5
++L16_xx:
++___
++for (;$i<32;$i++) { &ROUND($i, at V); unshift(@V,pop(@V)); }
++$code.=<<___;
++ bdnz L16_xx
++
++ lvx @X[2],$x00,$offload
++ subic. $num,$num,1
++ lvx @X[3],$x10,$offload
++ vaddu${sz}m $A,$A, at X[2]
++ lvx @X[4],$x20,$offload
++ vaddu${sz}m $B,$B, at X[3]
++ lvx @X[5],$x30,$offload
++ vaddu${sz}m $C,$C, at X[4]
++ lvx @X[6],$x40,$offload
++ vaddu${sz}m $D,$D, at X[5]
++ lvx @X[7],$x50,$offload
++ vaddu${sz}m $E,$E, at X[6]
++ lvx @X[8],$x60,$offload
++ vaddu${sz}m $F,$F, at X[7]
++ lvx @X[9],$x70,$offload
++ vaddu${sz}m $G,$G, at X[8]
++ vaddu${sz}m $H,$H, at X[9]
++ bne Loop
++___
++$code.=<<___ if ($SZ==4);
++ lvx @X[0],$idx,$Tbl
++ addi $idx,$idx,16
++ vperm $A,$A,$B,$Ki # pack the answer
++ lvx @X[1],$idx,$Tbl
++ vperm $E,$E,$F,$Ki
++ vperm $A,$A,$C, at X[0]
++ vperm $E,$E,$G, at X[0]
++ vperm $A,$A,$D, at X[1]
++ vperm $E,$E,$H, at X[1]
++ stvx_4w $A,$x00,$ctx
++ stvx_4w $E,$x10,$ctx
++___
++$code.=<<___ if ($SZ==8);
++ vperm $A,$A,$B,$Ki # pack the answer
++ vperm $C,$C,$D,$Ki
++ vperm $E,$E,$F,$Ki
++ vperm $G,$G,$H,$Ki
++ stvx_u $A,$x00,$ctx
++ stvx_u $C,$x10,$ctx
++ stvx_u $E,$x20,$ctx
++ stvx_u $G,$x30,$ctx
++___
++$code.=<<___;
++ li r10,`$FRAME+8*16+15`
++ mtlr $lrsave
++ li r11,`$FRAME+8*16+31`
++ mtspr 256,$vrsave
++ lvx v20,r10,$sp # ABI says so
++ addi r10,r10,32
++ lvx v21,r11,$sp
++ addi r11,r11,32
++ lvx v22,r10,$sp
++ addi r10,r10,32
++ lvx v23,r11,$sp
++ addi r11,r11,32
++ lvx v24,r10,$sp
++ addi r10,r10,32
++ lvx v25,r11,$sp
++ addi r11,r11,32
++ lvx v26,r10,$sp
++ addi r10,r10,32
++ lvx v27,r11,$sp
++ addi r11,r11,32
++ lvx v28,r10,$sp
++ addi r10,r10,32
++ lvx v29,r11,$sp
++ addi r11,r11,32
++ lvx v30,r10,$sp
++ lvx v31,r11,$sp
++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
++ blr
++ .long 0
++ .byte 0,12,4,1,0x80,6,3,0
++ .long 0
++.size $func,.-$func
++___
++
++# Ugly hack here, because PPC assembler syntax seem to vary too
++# much from platforms to platform...
++$code.=<<___;
++.align 6
++LPICmeup:
++ mflr r0
++ bcl 20,31,\$+4
++ mflr $Tbl ; vvvvvv "distance" between . and 1st data entry
++ addi $Tbl,$Tbl,`64-8`
++ mtlr r0
++ blr
++ .long 0
++ .byte 0,12,0x14,0,0,0,0,0
++ .space `64-9*4`
++___
++
++if ($SZ==8) {
++ local *table = sub {
++ foreach(@_) { $code.=".quad $_,$_\n"; }
++ };
++ table(
++ "0x428a2f98d728ae22","0x7137449123ef65cd",
++ "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc",
++ "0x3956c25bf348b538","0x59f111f1b605d019",
++ "0x923f82a4af194f9b","0xab1c5ed5da6d8118",
++ "0xd807aa98a3030242","0x12835b0145706fbe",
++ "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2",
++ "0x72be5d74f27b896f","0x80deb1fe3b1696b1",
++ "0x9bdc06a725c71235","0xc19bf174cf692694",
++ "0xe49b69c19ef14ad2","0xefbe4786384f25e3",
++ "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65",
++ "0x2de92c6f592b0275","0x4a7484aa6ea6e483",
++ "0x5cb0a9dcbd41fbd4","0x76f988da831153b5",
++ "0x983e5152ee66dfab","0xa831c66d2db43210",
++ "0xb00327c898fb213f","0xbf597fc7beef0ee4",
++ "0xc6e00bf33da88fc2","0xd5a79147930aa725",
++ "0x06ca6351e003826f","0x142929670a0e6e70",
++ "0x27b70a8546d22ffc","0x2e1b21385c26c926",
++ "0x4d2c6dfc5ac42aed","0x53380d139d95b3df",
++ "0x650a73548baf63de","0x766a0abb3c77b2a8",
++ "0x81c2c92e47edaee6","0x92722c851482353b",
++ "0xa2bfe8a14cf10364","0xa81a664bbc423001",
++ "0xc24b8b70d0f89791","0xc76c51a30654be30",
++ "0xd192e819d6ef5218","0xd69906245565a910",
++ "0xf40e35855771202a","0x106aa07032bbd1b8",
++ "0x19a4c116b8d2d0c8","0x1e376c085141ab53",
++ "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8",
++ "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb",
++ "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3",
++ "0x748f82ee5defb2fc","0x78a5636f43172f60",
++ "0x84c87814a1f0ab72","0x8cc702081a6439ec",
++ "0x90befffa23631e28","0xa4506cebde82bde9",
++ "0xbef9a3f7b2c67915","0xc67178f2e372532b",
++ "0xca273eceea26619c","0xd186b8c721c0c207",
++ "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178",
++ "0x06f067aa72176fba","0x0a637dc5a2c898a6",
++ "0x113f9804bef90dae","0x1b710b35131c471b",
++ "0x28db77f523047d84","0x32caab7b40c72493",
++ "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c",
++ "0x4cc5d4becb3e42b6","0x597f299cfc657e2a",
++ "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0");
++$code.=<<___ if (!$LENDIAN);
++.quad 0x0001020304050607,0x1011121314151617
++___
++$code.=<<___ if ($LENDIAN); # quad-swapped
++.quad 0x1011121314151617,0x0001020304050607
++___
++} else {
++ local *table = sub {
++ foreach(@_) { $code.=".long $_,$_,$_,$_\n"; }
++ };
++ table(
++ "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5",
++ "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5",
++ "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3",
++ "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174",
++ "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc",
++ "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da",
++ "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7",
++ "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967",
++ "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13",
++ "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85",
++ "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3",
++ "0xd192e819","0xd6990624","0xf40e3585","0x106aa070",
++ "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5",
++ "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3",
++ "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208",
++ "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0");
++$code.=<<___ if (!$LENDIAN);
++.long 0x00010203,0x10111213,0x10111213,0x10111213
++.long 0x00010203,0x04050607,0x10111213,0x10111213
++.long 0x00010203,0x04050607,0x08090a0b,0x10111213
++___
++$code.=<<___ if ($LENDIAN); # word-swapped
++.long 0x10111213,0x10111213,0x10111213,0x00010203
++.long 0x10111213,0x10111213,0x04050607,0x00010203
++.long 0x10111213,0x08090a0b,0x04050607,0x00010203
++___
++}
++$code.=<<___;
++.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
++.align 2
++___
++
++$code =~ s/\`([^\`]*)\`/eval $1/gem;
++print $code;
++close STDOUT;
+diff -up openssl-1.0.1i/crypto/sha/Makefile.ppc-asm openssl-1.0.1i/crypto/sha/Makefile
+--- openssl-1.0.1i/crypto/sha/Makefile.ppc-asm 2014-08-06 23:18:30.000000000 +0200
++++ openssl-1.0.1i/crypto/sha/Makefile 2014-08-13 19:46:21.096578196 +0200
+@@ -75,6 +75,8 @@ sha512-sparcv9.s:asm/sha512-sparcv9.pl;
+ sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@
+ sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
+ sha512-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
++sha256p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@
++sha512p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@
+
+ sha1-parisc.s: asm/sha1-parisc.pl; $(PERL) asm/sha1-parisc.pl $(PERLASM_SCHEME) $@
+ sha256-parisc.s:asm/sha512-parisc.pl; $(PERL) asm/sha512-parisc.pl $(PERLASM_SCHEME) $@
diff --git a/openssl-1.0.1e-trusted-first.patch b/openssl-1.0.1i-trusted-first.patch
similarity index 67%
rename from openssl-1.0.1e-trusted-first.patch
rename to openssl-1.0.1i-trusted-first.patch
index 08ab639..f11f36d 100644
--- a/openssl-1.0.1e-trusted-first.patch
+++ b/openssl-1.0.1i-trusted-first.patch
@@ -1,7 +1,7 @@
-diff -up openssl-1.0.1e/apps/apps.c.trusted-first openssl-1.0.1e/apps/apps.c
---- openssl-1.0.1e/apps/apps.c.trusted-first 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/apps/apps.c 2013-08-16 15:42:39.920534769 +0200
-@@ -2361,6 +2361,8 @@ int args_verify(char ***pargs, int *parg
+diff -up openssl-1.0.1i/apps/apps.c.trusted-first openssl-1.0.1i/apps/apps.c
+--- openssl-1.0.1i/apps/apps.c.trusted-first 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/apps/apps.c 2014-08-07 13:54:27.751103405 +0200
+@@ -2365,6 +2365,8 @@ int args_verify(char ***pargs, int *parg
flags |= X509_V_FLAG_NOTIFY_POLICY;
else if (!strcmp(arg, "-check_ss_sig"))
flags |= X509_V_FLAG_CHECK_SS_SIGNATURE;
@@ -10,9 +10,9 @@ diff -up openssl-1.0.1e/apps/apps.c.trusted-first openssl-1.0.1e/apps/apps.c
else
return 0;
-diff -up openssl-1.0.1e/apps/cms.c.trusted-first openssl-1.0.1e/apps/cms.c
---- openssl-1.0.1e/apps/cms.c.trusted-first 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/apps/cms.c 2013-08-16 15:43:56.671213879 +0200
+diff -up openssl-1.0.1i/apps/cms.c.trusted-first openssl-1.0.1i/apps/cms.c
+--- openssl-1.0.1i/apps/cms.c.trusted-first 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/apps/cms.c 2014-08-07 13:54:27.751103405 +0200
@@ -642,6 +642,7 @@ int MAIN(int argc, char **argv)
BIO_printf (bio_err, "-text include or delete text MIME headers\n");
BIO_printf (bio_err, "-CApath dir trusted certificates directory\n");
@@ -21,10 +21,10 @@ diff -up openssl-1.0.1e/apps/cms.c.trusted-first openssl-1.0.1e/apps/cms.c
BIO_printf (bio_err, "-crl_check check revocation status of signer's certificate using CRLs\n");
BIO_printf (bio_err, "-crl_check_all check revocation status of signer's certificate chain using CRLs\n");
#ifndef OPENSSL_NO_ENGINE
-diff -up openssl-1.0.1e/apps/ocsp.c.trusted-first openssl-1.0.1e/apps/ocsp.c
---- openssl-1.0.1e/apps/ocsp.c.trusted-first 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/apps/ocsp.c 2013-08-16 15:49:47.477572414 +0200
-@@ -595,6 +595,7 @@ int MAIN(int argc, char **argv)
+diff -up openssl-1.0.1i/apps/ocsp.c.trusted-first openssl-1.0.1i/apps/ocsp.c
+--- openssl-1.0.1i/apps/ocsp.c.trusted-first 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/apps/ocsp.c 2014-08-07 13:54:27.752103409 +0200
+@@ -605,6 +605,7 @@ int MAIN(int argc, char **argv)
BIO_printf (bio_err, "-path path to use in OCSP request\n");
BIO_printf (bio_err, "-CApath dir trusted certificates directory\n");
BIO_printf (bio_err, "-CAfile file trusted certificates file\n");
@@ -32,20 +32,20 @@ diff -up openssl-1.0.1e/apps/ocsp.c.trusted-first openssl-1.0.1e/apps/ocsp.c
BIO_printf (bio_err, "-VAfile file validator certificates file\n");
BIO_printf (bio_err, "-validity_period n maximum validity discrepancy in seconds\n");
BIO_printf (bio_err, "-status_age n maximum status age in seconds\n");
-diff -up openssl-1.0.1e/apps/s_client.c.trusted-first openssl-1.0.1e/apps/s_client.c
---- openssl-1.0.1e/apps/s_client.c.trusted-first 2013-08-16 15:42:39.000000000 +0200
-+++ openssl-1.0.1e/apps/s_client.c 2013-08-16 15:49:00.727542994 +0200
-@@ -298,6 +298,7 @@ static void sc_usage(void)
+diff -up openssl-1.0.1i/apps/s_client.c.trusted-first openssl-1.0.1i/apps/s_client.c
+--- openssl-1.0.1i/apps/s_client.c.trusted-first 2014-08-07 13:54:27.752103409 +0200
++++ openssl-1.0.1i/apps/s_client.c 2014-08-07 15:06:28.443918055 +0200
+@@ -299,6 +299,7 @@ static void sc_usage(void)
BIO_printf(bio_err," -pass arg - private key file pass phrase source\n");
BIO_printf(bio_err," -CApath arg - PEM format directory of CA's\n");
BIO_printf(bio_err," -CAfile arg - PEM format file of CA's\n");
+ BIO_printf(bio_err," -trusted_first - Use trusted CA's first when building the trust chain\n");
BIO_printf(bio_err," -reconnect - Drop and re-make the connection with the same Session-ID\n");
BIO_printf(bio_err," -pause - sleep(1) after each read(2) and write(2) system call\n");
- BIO_printf(bio_err," -showcerts - show all certificates in the chain\n");
-diff -up openssl-1.0.1e/apps/smime.c.trusted-first openssl-1.0.1e/apps/smime.c
---- openssl-1.0.1e/apps/smime.c.trusted-first 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/apps/smime.c 2013-08-16 15:46:44.024875150 +0200
+ BIO_printf(bio_err," -prexit - print session information even on connection failure\n");
+diff -up openssl-1.0.1i/apps/smime.c.trusted-first openssl-1.0.1i/apps/smime.c
+--- openssl-1.0.1i/apps/smime.c.trusted-first 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/apps/smime.c 2014-08-07 13:54:27.753103414 +0200
@@ -479,6 +479,7 @@ int MAIN(int argc, char **argv)
BIO_printf (bio_err, "-text include or delete text MIME headers\n");
BIO_printf (bio_err, "-CApath dir trusted certificates directory\n");
@@ -54,10 +54,10 @@ diff -up openssl-1.0.1e/apps/smime.c.trusted-first openssl-1.0.1e/apps/smime.c
BIO_printf (bio_err, "-crl_check check revocation status of signer's certificate using CRLs\n");
BIO_printf (bio_err, "-crl_check_all check revocation status of signer's certificate chain using CRLs\n");
#ifndef OPENSSL_NO_ENGINE
-diff -up openssl-1.0.1e/apps/s_server.c.trusted-first openssl-1.0.1e/apps/s_server.c
---- openssl-1.0.1e/apps/s_server.c.trusted-first 2013-08-16 15:42:39.000000000 +0200
-+++ openssl-1.0.1e/apps/s_server.c 2013-08-16 15:48:19.469634430 +0200
-@@ -501,6 +501,7 @@ static void sv_usage(void)
+diff -up openssl-1.0.1i/apps/s_server.c.trusted-first openssl-1.0.1i/apps/s_server.c
+--- openssl-1.0.1i/apps/s_server.c.trusted-first 2014-08-07 13:54:27.718103241 +0200
++++ openssl-1.0.1i/apps/s_server.c 2014-08-07 13:54:27.753103414 +0200
+@@ -502,6 +502,7 @@ static void sv_usage(void)
BIO_printf(bio_err," -state - Print the SSL states\n");
BIO_printf(bio_err," -CApath arg - PEM format directory of CA's\n");
BIO_printf(bio_err," -CAfile arg - PEM format file of CA's\n");
@@ -65,9 +65,9 @@ diff -up openssl-1.0.1e/apps/s_server.c.trusted-first openssl-1.0.1e/apps/s_serv
BIO_printf(bio_err," -nocert - Don't use any certificates (Anon-DH)\n");
BIO_printf(bio_err," -cipher arg - play with 'openssl ciphers' to see what goes here\n");
BIO_printf(bio_err," -serverpref - Use server's cipher preferences\n");
-diff -up openssl-1.0.1e/apps/s_time.c.trusted-first openssl-1.0.1e/apps/s_time.c
---- openssl-1.0.1e/apps/s_time.c.trusted-first 2013-08-16 15:42:39.000000000 +0200
-+++ openssl-1.0.1e/apps/s_time.c 2013-08-16 15:47:35.862674188 +0200
+diff -up openssl-1.0.1i/apps/s_time.c.trusted-first openssl-1.0.1i/apps/s_time.c
+--- openssl-1.0.1i/apps/s_time.c.trusted-first 2014-08-07 13:54:27.432101823 +0200
++++ openssl-1.0.1i/apps/s_time.c 2014-08-07 13:54:27.753103414 +0200
@@ -179,6 +179,7 @@ static void s_time_usage(void)
file if not specified by this option\n\
-CApath arg - PEM format directory of CA's\n\
@@ -76,9 +76,9 @@ diff -up openssl-1.0.1e/apps/s_time.c.trusted-first openssl-1.0.1e/apps/s_time.c
-cipher - preferred cipher to use, play with 'openssl ciphers'\n\n";
printf( "usage: s_time <args>\n\n" );
-diff -up openssl-1.0.1e/apps/ts.c.trusted-first openssl-1.0.1e/apps/ts.c
---- openssl-1.0.1e/apps/ts.c.trusted-first 2013-08-16 15:42:39.000000000 +0200
-+++ openssl-1.0.1e/apps/ts.c 2013-08-16 15:45:27.766206812 +0200
+diff -up openssl-1.0.1i/apps/ts.c.trusted-first openssl-1.0.1i/apps/ts.c
+--- openssl-1.0.1i/apps/ts.c.trusted-first 2014-08-07 13:54:27.707103186 +0200
++++ openssl-1.0.1i/apps/ts.c 2014-08-07 13:54:27.753103414 +0200
@@ -383,7 +383,7 @@ int MAIN(int argc, char **argv)
"ts -verify [-data file_to_hash] [-digest digest_bytes] "
"[-queryfile request.tsq] "
@@ -88,9 +88,9 @@ diff -up openssl-1.0.1e/apps/ts.c.trusted-first openssl-1.0.1e/apps/ts.c
"-untrusted cert_file.pem\n");
cleanup:
/* Clean up. */
-diff -up openssl-1.0.1e/apps/verify.c.trusted-first openssl-1.0.1e/apps/verify.c
---- openssl-1.0.1e/apps/verify.c.trusted-first 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/apps/verify.c 2013-08-16 15:46:09.720124654 +0200
+diff -up openssl-1.0.1i/apps/verify.c.trusted-first openssl-1.0.1i/apps/verify.c
+--- openssl-1.0.1i/apps/verify.c.trusted-first 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/apps/verify.c 2014-08-07 13:54:27.754103419 +0200
@@ -237,7 +237,7 @@ int MAIN(int argc, char **argv)
end:
@@ -100,9 +100,9 @@ diff -up openssl-1.0.1e/apps/verify.c.trusted-first openssl-1.0.1e/apps/verify.c
BIO_printf(bio_err," [-attime timestamp]");
#ifndef OPENSSL_NO_ENGINE
BIO_printf(bio_err," [-engine e]");
-diff -up openssl-1.0.1e/crypto/x509/x509_vfy.c.trusted-first openssl-1.0.1e/crypto/x509/x509_vfy.c
---- openssl-1.0.1e/crypto/x509/x509_vfy.c.trusted-first 2013-08-16 15:42:39.864533545 +0200
-+++ openssl-1.0.1e/crypto/x509/x509_vfy.c 2013-08-16 15:42:39.921534791 +0200
+diff -up openssl-1.0.1i/crypto/x509/x509_vfy.c.trusted-first openssl-1.0.1i/crypto/x509/x509_vfy.c
+--- openssl-1.0.1i/crypto/x509/x509_vfy.c.trusted-first 2014-08-07 13:54:27.716103231 +0200
++++ openssl-1.0.1i/crypto/x509/x509_vfy.c 2014-08-07 13:54:27.754103419 +0200
@@ -207,6 +207,21 @@ int X509_verify_cert(X509_STORE_CTX *ctx
/* If we are self signed, we break */
@@ -125,9 +125,9 @@ diff -up openssl-1.0.1e/crypto/x509/x509_vfy.c.trusted-first openssl-1.0.1e/cryp
/* If we were passed a cert chain, use it first */
if (ctx->untrusted != NULL)
-diff -up openssl-1.0.1e/crypto/x509/x509_vfy.h.trusted-first openssl-1.0.1e/crypto/x509/x509_vfy.h
---- openssl-1.0.1e/crypto/x509/x509_vfy.h.trusted-first 2013-08-16 15:42:39.356522432 +0200
-+++ openssl-1.0.1e/crypto/x509/x509_vfy.h 2013-08-16 15:42:39.922534813 +0200
+diff -up openssl-1.0.1i/crypto/x509/x509_vfy.h.trusted-first openssl-1.0.1i/crypto/x509/x509_vfy.h
+--- openssl-1.0.1i/crypto/x509/x509_vfy.h.trusted-first 2014-08-07 13:54:27.360101466 +0200
++++ openssl-1.0.1i/crypto/x509/x509_vfy.h 2014-08-07 13:54:27.754103419 +0200
@@ -389,6 +389,8 @@ void X509_STORE_CTX_set_depth(X509_STORE
#define X509_V_FLAG_USE_DELTAS 0x2000
/* Check selfsigned CA signature */
@@ -137,9 +137,9 @@ diff -up openssl-1.0.1e/crypto/x509/x509_vfy.h.trusted-first openssl-1.0.1e/cryp
#define X509_VP_FLAG_DEFAULT 0x1
-diff -up openssl-1.0.1e/doc/apps/cms.pod.trusted-first openssl-1.0.1e/doc/apps/cms.pod
---- openssl-1.0.1e/doc/apps/cms.pod.trusted-first 2013-08-16 15:42:39.000000000 +0200
-+++ openssl-1.0.1e/doc/apps/cms.pod 2013-08-16 15:50:48.723921117 +0200
+diff -up openssl-1.0.1i/doc/apps/cms.pod.trusted-first openssl-1.0.1i/doc/apps/cms.pod
+--- openssl-1.0.1i/doc/apps/cms.pod.trusted-first 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/doc/apps/cms.pod 2014-08-07 13:54:27.754103419 +0200
@@ -35,6 +35,7 @@ B<openssl> B<cms>
[B<-print>]
[B<-CAfile file>]
@@ -148,7 +148,7 @@ diff -up openssl-1.0.1e/doc/apps/cms.pod.trusted-first openssl-1.0.1e/doc/apps/c
[B<-md digest>]
[B<-[cipher]>]
[B<-nointern>]
-@@ -238,6 +239,12 @@ B<-verify>. This directory must be a sta
+@@ -243,6 +244,12 @@ B<-verify>. This directory must be a sta
is a hash of each subject name (using B<x509 -hash>) should be linked
to each certificate.
@@ -161,9 +161,9 @@ diff -up openssl-1.0.1e/doc/apps/cms.pod.trusted-first openssl-1.0.1e/doc/apps/c
=item B<-md digest>
digest algorithm to use when signing or resigning. If not present then the
-diff -up openssl-1.0.1e/doc/apps/ocsp.pod.trusted-first openssl-1.0.1e/doc/apps/ocsp.pod
---- openssl-1.0.1e/doc/apps/ocsp.pod.trusted-first 2013-08-16 15:42:39.000000000 +0200
-+++ openssl-1.0.1e/doc/apps/ocsp.pod 2013-08-16 15:52:20.106933403 +0200
+diff -up openssl-1.0.1i/doc/apps/ocsp.pod.trusted-first openssl-1.0.1i/doc/apps/ocsp.pod
+--- openssl-1.0.1i/doc/apps/ocsp.pod.trusted-first 2014-08-07 13:54:27.708103191 +0200
++++ openssl-1.0.1i/doc/apps/ocsp.pod 2014-08-07 13:54:27.755103424 +0200
@@ -29,6 +29,7 @@ B<openssl> B<ocsp>
[B<-path>]
[B<-CApath dir>]
@@ -186,10 +186,10 @@ diff -up openssl-1.0.1e/doc/apps/ocsp.pod.trusted-first openssl-1.0.1e/doc/apps/
=item B<-verify_other file>
file containing additional certificates to search when attempting to locate
-diff -up openssl-1.0.1e/doc/apps/s_client.pod.trusted-first openssl-1.0.1e/doc/apps/s_client.pod
---- openssl-1.0.1e/doc/apps/s_client.pod.trusted-first 2013-08-16 15:42:39.000000000 +0200
-+++ openssl-1.0.1e/doc/apps/s_client.pod 2013-08-16 15:53:17.364194159 +0200
-@@ -17,6 +17,7 @@ B<openssl> B<s_client>
+diff -up openssl-1.0.1i/doc/apps/s_client.pod.trusted-first openssl-1.0.1i/doc/apps/s_client.pod
+--- openssl-1.0.1i/doc/apps/s_client.pod.trusted-first 2014-08-07 13:54:27.726103281 +0200
++++ openssl-1.0.1i/doc/apps/s_client.pod 2014-08-07 13:54:27.755103424 +0200
+@@ -19,6 +19,7 @@ B<openssl> B<s_client>
[B<-pass arg>]
[B<-CApath directory>]
[B<-CAfile filename>]
@@ -197,7 +197,7 @@ diff -up openssl-1.0.1e/doc/apps/s_client.pod.trusted-first openssl-1.0.1e/doc/a
[B<-reconnect>]
[B<-pause>]
[B<-showcerts>]
-@@ -107,7 +108,7 @@ also used when building the client certi
+@@ -121,7 +122,7 @@ also used when building the client certi
A file containing trusted certificates to use during server authentication
and to use when attempting to build the client certificate chain.
@@ -206,9 +206,9 @@ diff -up openssl-1.0.1e/doc/apps/s_client.pod.trusted-first openssl-1.0.1e/doc/a
Set various certificate chain valiadition option. See the
L<B<verify>|verify(1)> manual page for details.
-diff -up openssl-1.0.1e/doc/apps/smime.pod.trusted-first openssl-1.0.1e/doc/apps/smime.pod
---- openssl-1.0.1e/doc/apps/smime.pod.trusted-first 2013-08-16 15:42:39.000000000 +0200
-+++ openssl-1.0.1e/doc/apps/smime.pod 2013-08-16 15:56:12.497050767 +0200
+diff -up openssl-1.0.1i/doc/apps/smime.pod.trusted-first openssl-1.0.1i/doc/apps/smime.pod
+--- openssl-1.0.1i/doc/apps/smime.pod.trusted-first 2014-07-22 21:43:11.000000000 +0200
++++ openssl-1.0.1i/doc/apps/smime.pod 2014-08-07 13:54:27.755103424 +0200
@@ -15,6 +15,9 @@ B<openssl> B<smime>
[B<-pk7out>]
[B<-[cipher]>]
@@ -232,9 +232,9 @@ diff -up openssl-1.0.1e/doc/apps/smime.pod.trusted-first openssl-1.0.1e/doc/apps
=item B<-md digest>
digest algorithm to use when signing or resigning. If not present then the
-diff -up openssl-1.0.1e/doc/apps/s_server.pod.trusted-first openssl-1.0.1e/doc/apps/s_server.pod
---- openssl-1.0.1e/doc/apps/s_server.pod.trusted-first 2013-08-16 15:42:39.000000000 +0200
-+++ openssl-1.0.1e/doc/apps/s_server.pod 2013-08-16 15:54:33.609873214 +0200
+diff -up openssl-1.0.1i/doc/apps/s_server.pod.trusted-first openssl-1.0.1i/doc/apps/s_server.pod
+--- openssl-1.0.1i/doc/apps/s_server.pod.trusted-first 2014-08-07 13:54:27.726103281 +0200
++++ openssl-1.0.1i/doc/apps/s_server.pod 2014-08-07 15:07:12.315099577 +0200
@@ -33,6 +33,7 @@ B<openssl> B<s_server>
[B<-state>]
[B<-CApath directory>]
@@ -242,8 +242,8 @@ diff -up openssl-1.0.1e/doc/apps/s_server.pod.trusted-first openssl-1.0.1e/doc/a
+[B<-trusted_first>]
[B<-nocert>]
[B<-cipher cipherlist>]
- [B<-quiet>]
-@@ -168,6 +169,12 @@ and to use when attempting to build the
+ [B<-serverpref>]
+@@ -178,6 +179,12 @@ and to use when attempting to build the
is also used in the list of acceptable client CAs passed to the client when
a certificate is requested.
@@ -256,9 +256,9 @@ diff -up openssl-1.0.1e/doc/apps/s_server.pod.trusted-first openssl-1.0.1e/doc/a
=item B<-state>
prints out the SSL session states.
-diff -up openssl-1.0.1e/doc/apps/s_time.pod.trusted-first openssl-1.0.1e/doc/apps/s_time.pod
---- openssl-1.0.1e/doc/apps/s_time.pod.trusted-first 2013-02-11 16:02:48.000000000 +0100
-+++ openssl-1.0.1e/doc/apps/s_time.pod 2013-08-16 15:55:12.651732938 +0200
+diff -up openssl-1.0.1i/doc/apps/s_time.pod.trusted-first openssl-1.0.1i/doc/apps/s_time.pod
+--- openssl-1.0.1i/doc/apps/s_time.pod.trusted-first 2014-07-22 21:41:23.000000000 +0200
++++ openssl-1.0.1i/doc/apps/s_time.pod 2014-08-07 13:54:27.755103424 +0200
@@ -14,6 +14,7 @@ B<openssl> B<s_time>
[B<-key filename>]
[B<-CApath directory>]
@@ -280,9 +280,9 @@ diff -up openssl-1.0.1e/doc/apps/s_time.pod.trusted-first openssl-1.0.1e/doc/app
=item B<-new>
performs the timing test using a new session ID for each connection.
-diff -up openssl-1.0.1e/doc/apps/ts.pod.trusted-first openssl-1.0.1e/doc/apps/ts.pod
---- openssl-1.0.1e/doc/apps/ts.pod.trusted-first 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/doc/apps/ts.pod 2013-08-16 15:57:17.399479957 +0200
+diff -up openssl-1.0.1i/doc/apps/ts.pod.trusted-first openssl-1.0.1i/doc/apps/ts.pod
+--- openssl-1.0.1i/doc/apps/ts.pod.trusted-first 2014-07-22 21:41:23.000000000 +0200
++++ openssl-1.0.1i/doc/apps/ts.pod 2014-08-07 13:54:27.756103429 +0200
@@ -46,6 +46,7 @@ B<-verify>
[B<-token_in>]
[B<-CApath> trusted_cert_path]
@@ -304,9 +304,9 @@ diff -up openssl-1.0.1e/doc/apps/ts.pod.trusted-first openssl-1.0.1e/doc/apps/ts
=item B<-untrusted> cert_file.pem
Set of additional untrusted certificates in PEM format which may be
-diff -up openssl-1.0.1e/doc/apps/verify.pod.trusted-first openssl-1.0.1e/doc/apps/verify.pod
---- openssl-1.0.1e/doc/apps/verify.pod.trusted-first 2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/doc/apps/verify.pod 2013-08-16 15:58:00.267423925 +0200
+diff -up openssl-1.0.1i/doc/apps/verify.pod.trusted-first openssl-1.0.1i/doc/apps/verify.pod
+--- openssl-1.0.1i/doc/apps/verify.pod.trusted-first 2014-08-06 23:10:56.000000000 +0200
++++ openssl-1.0.1i/doc/apps/verify.pod 2014-08-07 13:54:27.756103429 +0200
@@ -9,6 +9,7 @@ verify - Utility to verify certificates.
B<openssl> B<verify>
[B<-CApath directory>]
diff --git a/sources b/sources
index 556dcb4..b97a288 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-6115ae0bb61b481a9195baef72514c2e openssl-1.0.1e-hobbled.tar.xz
+c152e5284765c3325301a62b01a48fc0 openssl-1.0.1i-hobbled.tar.xz
More information about the scm-commits
mailing list