[openssl/f20] Multiple security and bug fixes.

Tomáš Mráz tmraz at fedoraproject.org
Tue Jan 13 17:24:43 UTC 2015


commit 31eac0f57b5e28f5d2ca8acea159b6125951bead
Author: Tomas Mraz <tmraz at fedoraproject.org>
Date:   Tue Jan 13 18:25:03 2015 +0100

    Multiple security and bug fixes.
    
    - fix CVE-2014-3570 - incorrect computation in BN_sqr()
    - fix CVE-2014-3571 - possible crash in dtls1_get_record()
    - fix CVE-2014-3572 - possible downgrade of ECDH ciphersuite to non-PFS state
    - fix CVE-2014-8275 - various certificate fingerprint issues
    - fix CVE-2015-0204 - remove support for RSA ephemeral keys for non-export
      ciphersuites and on server
    - fix CVE-2015-0205 - do not allow unauthenticated client DH certificate
    - fix CVE-2015-0206 - possible memory leak when buffering DTLS records
    - add ECC TLS extensions to DTLS (#1119803)
    - do not send ECC ciphersuites in SSLv2 client hello (#1090955)
    - copy digest algorithm when handling SNI context switch

 openssl-1.0.1e-copy-algo.patch     |   33 +
 openssl-1.0.1e-cve-2014-3570.patch | 3155 ++++++++++++++++++++++++++++++++++++
 openssl-1.0.1e-cve-2014-3571.patch |   34 +
 openssl-1.0.1e-cve-2014-3572.patch |   51 +
 openssl-1.0.1e-cve-2015-0204.patch |  158 ++
 openssl-1.0.1e-cve-2015-0205.patch |   12 +
 openssl-1.0.1e-cve-2015-0206.patch |  116 ++
 openssl-1.0.1e-dtls-ecc-ext.patch  |  119 ++
 openssl-1.0.1e-ssl2-no-ec.patch    |   17 +
 openssl.spec                       |   35 +-
 10 files changed, 3729 insertions(+), 1 deletions(-)
---
diff --git a/openssl-1.0.1e-copy-algo.patch b/openssl-1.0.1e-copy-algo.patch
new file mode 100644
index 0000000..927c584
--- /dev/null
+++ b/openssl-1.0.1e-copy-algo.patch
@@ -0,0 +1,33 @@
+diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c
+index 6a33b9d..76a5f9e 100644
+--- a/ssl/ssl_lib.c
++++ b/ssl/ssl_lib.c
+@@ -3177,15 +3177,26 @@ SSL_CTX *SSL_get_SSL_CTX(const SSL *ssl)
+ 
+ SSL_CTX *SSL_set_SSL_CTX(SSL *ssl, SSL_CTX* ctx)
+ 	{
++	CERT *ocert = ssl->cert;
+ 	if (ssl->ctx == ctx)
+ 		return ssl->ctx;
+ #ifndef OPENSSL_NO_TLSEXT
+ 	if (ctx == NULL)
+ 		ctx = ssl->initial_ctx;
+ #endif
+-	if (ssl->cert != NULL)
+-		ssl_cert_free(ssl->cert);
+ 	ssl->cert = ssl_cert_dup(ctx->cert);
++	if (ocert)
++		{
++		int i;
++		/* Copy negotiated digests from original */
++		for (i = 0; i < SSL_PKEY_NUM; i++)
++			{
++			CERT_PKEY *cpk = ocert->pkeys + i;
++			CERT_PKEY *rpk = ssl->cert->pkeys + i;
++			rpk->digest = cpk->digest;
++			}
++		ssl_cert_free(ocert);
++		}
+ 	CRYPTO_add(&ctx->references,1,CRYPTO_LOCK_SSL_CTX);
+ 	if (ssl->ctx != NULL)
+ 		SSL_CTX_free(ssl->ctx); /* decrement reference count */
diff --git a/openssl-1.0.1e-cve-2014-3570.patch b/openssl-1.0.1e-cve-2014-3570.patch
new file mode 100644
index 0000000..aff2f02
--- /dev/null
+++ b/openssl-1.0.1e-cve-2014-3570.patch
@@ -0,0 +1,3155 @@
+From e078642ddea29bbb6ba29788a6a513796387fbbb Mon Sep 17 00:00:00 2001
+From: Andy Polyakov <appro at openssl.org>
+Date: Mon, 5 Jan 2015 14:52:56 +0100
+Subject: [PATCH] Fix for CVE-2014-3570.
+
+Reviewed-by: Emilia Kasper <emilia at openssl.org>
+(cherry picked from commit e793809ba50c1e90ab592fb640a856168e50f3de)
+(with 1.0.1-specific addendum)
+---
+ crypto/bn/asm/mips.pl      |  611 +++---------
+ crypto/bn/asm/mips3.s      | 2201 --------------------------------------------
+ crypto/bn/asm/x86_64-gcc.c |   34 +-
+ crypto/bn/bn_asm.c         |   16 +-
+ crypto/bn/bntest.c         |  102 +-
+ 5 files changed, 234 insertions(+), 2730 deletions(-)
+ delete mode 100644 crypto/bn/asm/mips3.s
+
+diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl
+index d2f3ef7..215c9a7 100644
+--- a/crypto/bn/asm/mips.pl
++++ b/crypto/bn/asm/mips.pl
+@@ -1872,6 +1872,41 @@ ___
+ 
+ ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3);
+ 
++sub add_c2 () {
++my ($hi,$lo,$c0,$c1,$c2,
++    $warm,      # !$warm denotes first call with specific sequence of
++                # $c_[XYZ] when there is no Z-carry to accumulate yet;
++    $an,$bn     # these two are arguments for multiplication which
++                # result is used in *next* step [which is why it's
++                # commented as "forward multiplication" below];
++    )=@_;
++$code.=<<___;
++	mflo	$lo
++	mfhi	$hi
++	$ADDU	$c0,$lo
++	sltu	$at,$c0,$lo
++	 $MULTU	$an,$bn			# forward multiplication
++	$ADDU	$c0,$lo
++	$ADDU	$at,$hi
++	sltu	$lo,$c0,$lo
++	$ADDU	$c1,$at
++	$ADDU	$hi,$lo
++___
++$code.=<<___	if (!$warm);
++	sltu	$c2,$c1,$at
++	$ADDU	$c1,$hi
++	sltu	$hi,$c1,$hi
++	$ADDU	$c2,$hi
++___
++$code.=<<___	if ($warm);
++	sltu	$at,$c1,$at
++	$ADDU	$c1,$hi
++	$ADDU	$c2,$at
++	sltu	$hi,$c1,$hi
++	$ADDU	$c2,$hi
++___
++}
++
+ $code.=<<___;
+ 
+ .align	5
+@@ -1920,21 +1955,10 @@ $code.=<<___;
+ 	sltu	$at,$c_2,$t_1
+ 	$ADDU	$c_3,$t_2,$at
+ 	$ST	$c_2,$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_2,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_1,$a_1		# mul_add_c(a[1],b[1],c3,c1,c2);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
++___
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
++		$a_1,$a_1);		# mul_add_c(a[1],b[1],c3,c1,c2);
++$code.=<<___;
+ 	mflo	$t_1
+ 	mfhi	$t_2
+ 	$ADDU	$c_3,$t_1
+@@ -1945,67 +1969,19 @@ $code.=<<___;
+ 	sltu	$at,$c_1,$t_2
+ 	$ADDU	$c_2,$at
+ 	$ST	$c_3,2*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_3,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_1,$a_2		# mul_add_c2(a[1],b[2],c1,c2,c3);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_3,$at
+-	 $MULTU	$a_4,$a_0		# mul_add_c2(a[4],b[0],c2,c3,c1);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
++___
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
++		$a_1,$a_2);		# mul_add_c2(a[1],b[2],c1,c2,c3);
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
++		$a_4,$a_0);		# mul_add_c2(a[4],b[0],c2,c3,c1);
++$code.=<<___;
+ 	$ST	$c_1,3*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_1,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_3,$a_1		# mul_add_c2(a[3],b[1],c2,c3,c1);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_2,$t_1
+-	sltu	$at,$c_2,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_3,$t_2
+-	sltu	$at,$c_3,$t_2
+-	$ADDU	$c_1,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_1,$at
+-	$MULTU	$a_2,$a_2		# mul_add_c(a[2],b[2],c2,c3,c1);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_2,$t_1
+-	sltu	$at,$c_2,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_3,$t_2
+-	sltu	$at,$c_3,$t_2
+-	$ADDU	$c_1,$at
++___
++	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
++		$a_3,$a_1);		# mul_add_c2(a[3],b[1],c2,c3,c1);
++	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
++		$a_2,$a_2);		# mul_add_c(a[2],b[2],c2,c3,c1);
++$code.=<<___;
+ 	mflo	$t_1
+ 	mfhi	$t_2
+ 	$ADDU	$c_2,$t_1
+@@ -2016,97 +1992,23 @@ $code.=<<___;
+ 	sltu	$at,$c_3,$t_2
+ 	$ADDU	$c_1,$at
+ 	$ST	$c_2,4*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_2,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_1,$a_4		# mul_add_c2(a[1],b[4],c3,c1,c2);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_2,$at
+-	$MULTU	$a_2,$a_3		# mul_add_c2(a[2],b[3],c3,c1,c2);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	 $MULTU	$a_6,$a_0		# mul_add_c2(a[6],b[0],c1,c2,c3);
+-	$ADDU	$c_2,$at
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
++___
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
++		$a_1,$a_4);		# mul_add_c2(a[1],b[4],c3,c1,c2);
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
++		$a_2,$a_3);		# mul_add_c2(a[2],b[3],c3,c1,c2);
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
++		$a_6,$a_0);		# mul_add_c2(a[6],b[0],c1,c2,c3);
++$code.=<<___;
+ 	$ST	$c_3,5*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_3,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_5,$a_1		# mul_add_c2(a[5],b[1],c1,c2,c3);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_3,$at
+-	$MULTU	$a_4,$a_2		# mul_add_c2(a[4],b[2],c1,c2,c3);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_3,$at
+-	$MULTU	$a_3,$a_3		# mul_add_c(a[3],b[3],c1,c2,c3);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
++___
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
++		$a_5,$a_1);		# mul_add_c2(a[5],b[1],c1,c2,c3);
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
++		$a_4,$a_2);		# mul_add_c2(a[4],b[2],c1,c2,c3);
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
++		$a_3,$a_3);		# mul_add_c(a[3],b[3],c1,c2,c3);
++$code.=<<___;
+ 	mflo	$t_1
+ 	mfhi	$t_2
+ 	$ADDU	$c_1,$t_1
+@@ -2117,112 +2019,25 @@ $code.=<<___;
+ 	sltu	$at,$c_2,$t_2
+ 	$ADDU	$c_3,$at
+ 	$ST	$c_1,6*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_1,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_1,$a_6		# mul_add_c2(a[1],b[6],c2,c3,c1);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_2,$t_1
+-	sltu	$at,$c_2,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_3,$t_2
+-	sltu	$at,$c_3,$t_2
+-	$ADDU	$c_1,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_1,$at
+-	$MULTU	$a_2,$a_5		# mul_add_c2(a[2],b[5],c2,c3,c1);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_2,$t_1
+-	sltu	$at,$c_2,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_3,$t_2
+-	sltu	$at,$c_3,$t_2
+-	$ADDU	$c_1,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_1,$at
+-	$MULTU	$a_3,$a_4		# mul_add_c2(a[3],b[4],c2,c3,c1);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_2,$t_1
+-	sltu	$at,$c_2,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_3,$t_2
+-	sltu	$at,$c_3,$t_2
+-	$ADDU	$c_1,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_1,$at
+-	 $MULTU	$a_7,$a_1		# mul_add_c2(a[7],b[1],c3,c1,c2);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_2,$t_1
+-	sltu	$at,$c_2,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_3,$t_2
+-	sltu	$at,$c_3,$t_2
+-	$ADDU	$c_1,$at
++___
++	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
++		$a_1,$a_6);		# mul_add_c2(a[1],b[6],c2,c3,c1);
++	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
++		$a_2,$a_5);		# mul_add_c2(a[2],b[5],c2,c3,c1);
++	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
++		$a_3,$a_4);		# mul_add_c2(a[3],b[4],c2,c3,c1);
++	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
++		$a_7,$a_1);		# mul_add_c2(a[7],b[1],c3,c1,c2);
++$code.=<<___;
+ 	$ST	$c_2,7*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_2,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_6,$a_2		# mul_add_c2(a[6],b[2],c3,c1,c2);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_2,$at
+-	$MULTU	$a_5,$a_3		# mul_add_c2(a[5],b[3],c3,c1,c2);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_2,$at
+-	$MULTU	$a_4,$a_4		# mul_add_c(a[4],b[4],c3,c1,c2);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
++___
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
++		$a_6,$a_2);		# mul_add_c2(a[6],b[2],c3,c1,c2);
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
++		$a_5,$a_3);		# mul_add_c2(a[5],b[3],c3,c1,c2);
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
++		$a_4,$a_4);		# mul_add_c(a[4],b[4],c3,c1,c2);
++$code.=<<___;
+ 	mflo	$t_1
+ 	mfhi	$t_2
+ 	$ADDU	$c_3,$t_1
+@@ -2233,82 +2048,21 @@ $code.=<<___;
+ 	sltu	$at,$c_1,$t_2
+ 	$ADDU	$c_2,$at
+ 	$ST	$c_3,8*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_3,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_3,$a_6		# mul_add_c2(a[3],b[6],c1,c2,c3);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_3,$at
+-	$MULTU	$a_4,$a_5		# mul_add_c2(a[4],b[5],c1,c2,c3);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_3,$at
+-	 $MULTU	$a_7,$a_3		# mul_add_c2(a[7],b[3],c2,c3,c1);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
++___
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
++		$a_3,$a_6);		# mul_add_c2(a[3],b[6],c1,c2,c3);
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
++		$a_4,$a_5);		# mul_add_c2(a[4],b[5],c1,c2,c3);
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
++		$a_7,$a_3);		# mul_add_c2(a[7],b[3],c2,c3,c1);
++$code.=<<___;
+ 	$ST	$c_1,9*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_1,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_6,$a_4		# mul_add_c2(a[6],b[4],c2,c3,c1);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_2,$t_1
+-	sltu	$at,$c_2,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_3,$t_2
+-	sltu	$at,$c_3,$t_2
+-	$ADDU	$c_1,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_1,$at
+-	$MULTU	$a_5,$a_5		# mul_add_c(a[5],b[5],c2,c3,c1);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_2,$t_1
+-	sltu	$at,$c_2,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_3,$t_2
+-	sltu	$at,$c_3,$t_2
+-	$ADDU	$c_1,$at
++___
++	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
++		$a_6,$a_4);		# mul_add_c2(a[6],b[4],c2,c3,c1);
++	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
++		$a_5,$a_5);		# mul_add_c(a[5],b[5],c2,c3,c1);
++$code.=<<___;
+ 	mflo	$t_1
+ 	mfhi	$t_2
+ 	$ADDU	$c_2,$t_1
+@@ -2319,52 +2073,17 @@ $code.=<<___;
+ 	sltu	$at,$c_3,$t_2
+ 	$ADDU	$c_1,$at
+ 	$ST	$c_2,10*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_2,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_5,$a_6		# mul_add_c2(a[5],b[6],c3,c1,c2);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_2,$at
+-	 $MULTU	$a_7,$a_5		# mul_add_c2(a[7],b[5],c1,c2,c3);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
++___
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
++		$a_5,$a_6);		# mul_add_c2(a[5],b[6],c3,c1,c2);
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
++		$a_7,$a_5);		# mul_add_c2(a[7],b[5],c1,c2,c3);
++$code.=<<___;
+ 	$ST	$c_3,11*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_3,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_6,$a_6		# mul_add_c(a[6],b[6],c1,c2,c3);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
++___
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
++		$a_6,$a_6);		# mul_add_c(a[6],b[6],c1,c2,c3);
++$code.=<<___;
+ 	mflo	$t_1
+ 	mfhi	$t_2
+ 	$ADDU	$c_1,$t_1
+@@ -2375,21 +2094,10 @@ $code.=<<___;
+ 	sltu	$at,$c_2,$t_2
+ 	$ADDU	$c_3,$at
+ 	$ST	$c_1,12*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_1,$t_2,$zero
+-	$SLL	$t_2,1
+-	 $MULTU	$a_7,$a_7		# mul_add_c(a[7],b[7],c3,c1,c2);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_2,$t_1
+-	sltu	$at,$c_2,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_3,$t_2
+-	sltu	$at,$c_3,$t_2
+-	$ADDU	$c_1,$at
++___
++	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
++		$a_7,$a_7);		# mul_add_c(a[7],b[7],c3,c1,c2);
++$code.=<<___;
+ 	$ST	$c_2,13*$BNSZ($a0)
+ 
+ 	mflo	$t_1
+@@ -2457,21 +2165,10 @@ $code.=<<___;
+ 	sltu	$at,$c_2,$t_1
+ 	$ADDU	$c_3,$t_2,$at
+ 	$ST	$c_2,$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_2,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_1,$a_1		# mul_add_c(a[1],b[1],c3,c1,c2);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
++___
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
++		$a_1,$a_1);		# mul_add_c(a[1],b[1],c3,c1,c2);
++$code.=<<___;
+ 	mflo	$t_1
+ 	mfhi	$t_2
+ 	$ADDU	$c_3,$t_1
+@@ -2482,52 +2179,17 @@ $code.=<<___;
+ 	sltu	$at,$c_1,$t_2
+ 	$ADDU	$c_2,$at
+ 	$ST	$c_3,2*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_3,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_1,$a_2		# mul_add_c(a2[1],b[2],c1,c2,c3);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$at,$t_2,$zero
+-	$ADDU	$c_3,$at
+-	 $MULTU	$a_3,$a_1		# mul_add_c2(a[3],b[1],c2,c3,c1);
+-	$SLL	$t_2,1
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_1,$t_1
+-	sltu	$at,$c_1,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_2,$t_2
+-	sltu	$at,$c_2,$t_2
+-	$ADDU	$c_3,$at
++___
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
++		$a_1,$a_2);		# mul_add_c2(a2[1],b[2],c1,c2,c3);
++	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
++		$a_3,$a_1);		# mul_add_c2(a[3],b[1],c2,c3,c1);
++$code.=<<___;
+ 	$ST	$c_1,3*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_1,$t_2,$zero
+-	$SLL	$t_2,1
+-	$MULTU	$a_2,$a_2		# mul_add_c(a[2],b[2],c2,c3,c1);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_2,$t_1
+-	sltu	$at,$c_2,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_3,$t_2
+-	sltu	$at,$c_3,$t_2
+-	$ADDU	$c_1,$at
++___
++	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
++		$a_2,$a_2);		# mul_add_c(a[2],b[2],c2,c3,c1);
++$code.=<<___;
+ 	mflo	$t_1
+ 	mfhi	$t_2
+ 	$ADDU	$c_2,$t_1
+@@ -2538,21 +2200,10 @@ $code.=<<___;
+ 	sltu	$at,$c_3,$t_2
+ 	$ADDU	$c_1,$at
+ 	$ST	$c_2,4*$BNSZ($a0)
+-
+-	mflo	$t_1
+-	mfhi	$t_2
+-	slt	$c_2,$t_2,$zero
+-	$SLL	$t_2,1
+-	 $MULTU	$a_3,$a_3		# mul_add_c(a[3],b[3],c1,c2,c3);
+-	slt	$a2,$t_1,$zero
+-	$ADDU	$t_2,$a2
+-	$SLL	$t_1,1
+-	$ADDU	$c_3,$t_1
+-	sltu	$at,$c_3,$t_1
+-	$ADDU	$t_2,$at
+-	$ADDU	$c_1,$t_2
+-	sltu	$at,$c_1,$t_2
+-	$ADDU	$c_2,$at
++___
++	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
++		$a_3,$a_3);		# mul_add_c(a[3],b[3],c1,c2,c3);
++$code.=<<___;
+ 	$ST	$c_3,5*$BNSZ($a0)
+ 
+ 	mflo	$t_1
+diff --git a/crypto/bn/asm/mips3.s b/crypto/bn/asm/mips3.s
+deleted file mode 100644
+index dca4105..0000000
+--- a/crypto/bn/asm/mips3.s
++++ /dev/null
+@@ -1,2201 +0,0 @@
+-.rdata
+-.asciiz	"mips3.s, Version 1.1"
+-.asciiz	"MIPS III/IV ISA artwork by Andy Polyakov <appro at fy.chalmers.se>"
+-
+-/*
+- * ====================================================================
+- * Written by Andy Polyakov <appro at fy.chalmers.se> for the OpenSSL
+- * project.
+- *
+- * Rights for redistribution and usage in source and binary forms are
+- * granted according to the OpenSSL license. Warranty of any kind is
+- * disclaimed.
+- * ====================================================================
+- */
+-
+-/*
+- * This is my modest contributon to the OpenSSL project (see
+- * http://www.openssl.org/ for more information about it) and is
+- * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
+- * module. For updates see http://fy.chalmers.se/~appro/hpe/.
+- *
+- * The module is designed to work with either of the "new" MIPS ABI(5),
+- * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
+- * IRIX 5.x not only because it doesn't support new ABIs but also
+- * because 5.x kernels put R4x00 CPU into 32-bit mode and all those
+- * 64-bit instructions (daddu, dmultu, etc.) found below gonna only
+- * cause illegal instruction exception:-(
+- *
+- * In addition the code depends on preprocessor flags set up by MIPSpro
+- * compiler driver (either as or cc) and therefore (probably?) can't be
+- * compiled by the GNU assembler. GNU C driver manages fine though...
+- * I mean as long as -mmips-as is specified or is the default option,
+- * because then it simply invokes /usr/bin/as which in turn takes
+- * perfect care of the preprocessor definitions. Another neat feature
+- * offered by the MIPSpro assembler is an optimization pass. This gave
+- * me the opportunity to have the code looking more regular as all those
+- * architecture dependent instruction rescheduling details were left to
+- * the assembler. Cool, huh?
+- *
+- * Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
+- * goes way over 3 times faster!
+- *
+- *					<appro at fy.chalmers.se>
+- */
+-#include <asm.h>
+-#include <regdef.h>
+-
+-#if _MIPS_ISA>=4
+-#define	MOVNZ(cond,dst,src)	\
+-	movn	dst,src,cond
+-#else
+-#define	MOVNZ(cond,dst,src)	\
+-	.set	noreorder;	\
+-	bnezl	cond,.+8;	\
+-	move	dst,src;	\
+-	.set	reorder
+-#endif
+-
+-.text
+-
+-.set	noat
+-.set	reorder
+-
+-#define	MINUS4	v1
+-
+-.align	5
+-LEAF(bn_mul_add_words)
+-	.set	noreorder
+-	bgtzl	a2,.L_bn_mul_add_words_proceed
+-	ld	t0,0(a1)
+-	jr	ra
+-	move	v0,zero
+-	.set	reorder
+-
+-.L_bn_mul_add_words_proceed:
+-	li	MINUS4,-4
+-	and	ta0,a2,MINUS4
+-	move	v0,zero
+-	beqz	ta0,.L_bn_mul_add_words_tail
+-
+-.L_bn_mul_add_words_loop:
+-	dmultu	t0,a3
+-	ld	t1,0(a0)
+-	ld	t2,8(a1)
+-	ld	t3,8(a0)
+-	ld	ta0,16(a1)
+-	ld	ta1,16(a0)
+-	daddu	t1,v0
+-	sltu	v0,t1,v0	/* All manuals say it "compares 32-bit
+-				 * values", but it seems to work fine
+-				 * even on 64-bit registers. */
+-	mflo	AT
+-	mfhi	t0
+-	daddu	t1,AT
+-	daddu	v0,t0
+-	sltu	AT,t1,AT
+-	sd	t1,0(a0)
+-	daddu	v0,AT
+-
+-	dmultu	t2,a3
+-	ld	ta2,24(a1)
+-	ld	ta3,24(a0)
+-	daddu	t3,v0
+-	sltu	v0,t3,v0
+-	mflo	AT
+-	mfhi	t2
+-	daddu	t3,AT
+-	daddu	v0,t2
+-	sltu	AT,t3,AT
+-	sd	t3,8(a0)
+-	daddu	v0,AT
+-
+-	dmultu	ta0,a3
+-	subu	a2,4
+-	PTR_ADD	a0,32
+-	PTR_ADD	a1,32
+-	daddu	ta1,v0
+-	sltu	v0,ta1,v0
+-	mflo	AT
+-	mfhi	ta0
+-	daddu	ta1,AT
+-	daddu	v0,ta0
+-	sltu	AT,ta1,AT
+-	sd	ta1,-16(a0)
+-	daddu	v0,AT
+-
+-
+-	dmultu	ta2,a3
+-	and	ta0,a2,MINUS4
+-	daddu	ta3,v0
+-	sltu	v0,ta3,v0
+-	mflo	AT
+-	mfhi	ta2
+-	daddu	ta3,AT
+-	daddu	v0,ta2
+-	sltu	AT,ta3,AT
+-	sd	ta3,-8(a0)
+-	daddu	v0,AT
+-	.set	noreorder
+-	bgtzl	ta0,.L_bn_mul_add_words_loop
+-	ld	t0,0(a1)
+-
+-	bnezl	a2,.L_bn_mul_add_words_tail
+-	ld	t0,0(a1)
+-	.set	reorder
+-
+-.L_bn_mul_add_words_return:
+-	jr	ra
+-
+-.L_bn_mul_add_words_tail:
+-	dmultu	t0,a3
+-	ld	t1,0(a0)
+-	subu	a2,1
+-	daddu	t1,v0
+-	sltu	v0,t1,v0
+-	mflo	AT
+-	mfhi	t0
+-	daddu	t1,AT
+-	daddu	v0,t0
+-	sltu	AT,t1,AT
+-	sd	t1,0(a0)
+-	daddu	v0,AT
+-	beqz	a2,.L_bn_mul_add_words_return
+-
+-	ld	t0,8(a1)
+-	dmultu	t0,a3
+-	ld	t1,8(a0)
+-	subu	a2,1
+-	daddu	t1,v0
+-	sltu	v0,t1,v0
+-	mflo	AT
+-	mfhi	t0
+-	daddu	t1,AT
+-	daddu	v0,t0
+-	sltu	AT,t1,AT
+-	sd	t1,8(a0)
+-	daddu	v0,AT
+-	beqz	a2,.L_bn_mul_add_words_return
+-
+-	ld	t0,16(a1)
+-	dmultu	t0,a3
+-	ld	t1,16(a0)
+-	daddu	t1,v0
+-	sltu	v0,t1,v0
+-	mflo	AT
+-	mfhi	t0
+-	daddu	t1,AT
+-	daddu	v0,t0
+-	sltu	AT,t1,AT
+-	sd	t1,16(a0)
+-	daddu	v0,AT
+-	jr	ra
+-END(bn_mul_add_words)
+-
+-.align	5
+-LEAF(bn_mul_words)
+-	.set	noreorder
+-	bgtzl	a2,.L_bn_mul_words_proceed
+-	ld	t0,0(a1)
+-	jr	ra
+-	move	v0,zero
+-	.set	reorder
+-
+-.L_bn_mul_words_proceed:
+-	li	MINUS4,-4
+-	and	ta0,a2,MINUS4
+-	move	v0,zero
+-	beqz	ta0,.L_bn_mul_words_tail
+-
+-.L_bn_mul_words_loop:
+-	dmultu	t0,a3
+-	ld	t2,8(a1)
+-	ld	ta0,16(a1)
+-	ld	ta2,24(a1)
+-	mflo	AT
+-	mfhi	t0
+-	daddu	v0,AT
+-	sltu	t1,v0,AT
+-	sd	v0,0(a0)
+-	daddu	v0,t1,t0
+-
+-	dmultu	t2,a3
+-	subu	a2,4
+-	PTR_ADD	a0,32
+-	PTR_ADD	a1,32
+-	mflo	AT
+-	mfhi	t2
+-	daddu	v0,AT
+-	sltu	t3,v0,AT
+-	sd	v0,-24(a0)
+-	daddu	v0,t3,t2
+-
+-	dmultu	ta0,a3
+-	mflo	AT
+-	mfhi	ta0
+-	daddu	v0,AT
+-	sltu	ta1,v0,AT
+-	sd	v0,-16(a0)
+-	daddu	v0,ta1,ta0
+-
+-
+-	dmultu	ta2,a3
+-	and	ta0,a2,MINUS4
+-	mflo	AT
+-	mfhi	ta2
+-	daddu	v0,AT
+-	sltu	ta3,v0,AT
+-	sd	v0,-8(a0)
+-	daddu	v0,ta3,ta2
+-	.set	noreorder
+-	bgtzl	ta0,.L_bn_mul_words_loop
+-	ld	t0,0(a1)
+-
+-	bnezl	a2,.L_bn_mul_words_tail
+-	ld	t0,0(a1)
+-	.set	reorder
+-
+-.L_bn_mul_words_return:
+-	jr	ra
+-
+-.L_bn_mul_words_tail:
+-	dmultu	t0,a3
+-	subu	a2,1
+-	mflo	AT
+-	mfhi	t0
+-	daddu	v0,AT
+-	sltu	t1,v0,AT
+-	sd	v0,0(a0)
+-	daddu	v0,t1,t0
+-	beqz	a2,.L_bn_mul_words_return
+-
+-	ld	t0,8(a1)
+-	dmultu	t0,a3
+-	subu	a2,1
+-	mflo	AT
+-	mfhi	t0
+-	daddu	v0,AT
+-	sltu	t1,v0,AT
+-	sd	v0,8(a0)
+-	daddu	v0,t1,t0
+-	beqz	a2,.L_bn_mul_words_return
+-
+-	ld	t0,16(a1)
+-	dmultu	t0,a3
+-	mflo	AT
+-	mfhi	t0
+-	daddu	v0,AT
+-	sltu	t1,v0,AT
+-	sd	v0,16(a0)
+-	daddu	v0,t1,t0
+-	jr	ra
+-END(bn_mul_words)
+-
+-.align	5
+-LEAF(bn_sqr_words)
+-	.set	noreorder
+-	bgtzl	a2,.L_bn_sqr_words_proceed
+-	ld	t0,0(a1)
+-	jr	ra
+-	move	v0,zero
+-	.set	reorder
+-
+-.L_bn_sqr_words_proceed:
+-	li	MINUS4,-4
+-	and	ta0,a2,MINUS4
+-	move	v0,zero
+-	beqz	ta0,.L_bn_sqr_words_tail
+-
+-.L_bn_sqr_words_loop:
+-	dmultu	t0,t0
+-	ld	t2,8(a1)
+-	ld	ta0,16(a1)
+-	ld	ta2,24(a1)
+-	mflo	t1
+-	mfhi	t0
+-	sd	t1,0(a0)
+-	sd	t0,8(a0)
+-
+-	dmultu	t2,t2
+-	subu	a2,4
+-	PTR_ADD	a0,64
+-	PTR_ADD	a1,32
+-	mflo	t3
+-	mfhi	t2
+-	sd	t3,-48(a0)
+-	sd	t2,-40(a0)
+-
+-	dmultu	ta0,ta0
+-	mflo	ta1
+-	mfhi	ta0
+-	sd	ta1,-32(a0)
+-	sd	ta0,-24(a0)
+-
+-
+-	dmultu	ta2,ta2
+-	and	ta0,a2,MINUS4
+-	mflo	ta3
+-	mfhi	ta2
+-	sd	ta3,-16(a0)
+-	sd	ta2,-8(a0)
+-
+-	.set	noreorder
+-	bgtzl	ta0,.L_bn_sqr_words_loop
+-	ld	t0,0(a1)
+-
+-	bnezl	a2,.L_bn_sqr_words_tail
+-	ld	t0,0(a1)
+-	.set	reorder
+-
+-.L_bn_sqr_words_return:
+-	move	v0,zero
+-	jr	ra
+-
+-.L_bn_sqr_words_tail:
+-	dmultu	t0,t0
+-	subu	a2,1
+-	mflo	t1
+-	mfhi	t0
+-	sd	t1,0(a0)
+-	sd	t0,8(a0)
+-	beqz	a2,.L_bn_sqr_words_return
+-
+-	ld	t0,8(a1)
+-	dmultu	t0,t0
+-	subu	a2,1
+-	mflo	t1
+-	mfhi	t0
+-	sd	t1,16(a0)
+-	sd	t0,24(a0)
+-	beqz	a2,.L_bn_sqr_words_return
+-
+-	ld	t0,16(a1)
+-	dmultu	t0,t0
+-	mflo	t1
+-	mfhi	t0
+-	sd	t1,32(a0)
+-	sd	t0,40(a0)
+-	jr	ra
+-END(bn_sqr_words)
+-
+-.align	5
+-LEAF(bn_add_words)
+-	.set	noreorder
+-	bgtzl	a3,.L_bn_add_words_proceed
+-	ld	t0,0(a1)
+-	jr	ra
+-	move	v0,zero
+-	.set	reorder
+-
+-.L_bn_add_words_proceed:
+-	li	MINUS4,-4
+-	and	AT,a3,MINUS4
+-	move	v0,zero
+-	beqz	AT,.L_bn_add_words_tail
+-
+-.L_bn_add_words_loop:
+-	ld	ta0,0(a2)
+-	subu	a3,4
+-	ld	t1,8(a1)
+-	and	AT,a3,MINUS4
+-	ld	t2,16(a1)
+-	PTR_ADD	a2,32
+-	ld	t3,24(a1)
+-	PTR_ADD	a0,32
+-	ld	ta1,-24(a2)
+-	PTR_ADD	a1,32
+-	ld	ta2,-16(a2)
+-	ld	ta3,-8(a2)
+-	daddu	ta0,t0
+-	sltu	t8,ta0,t0
+-	daddu	t0,ta0,v0
+-	sltu	v0,t0,ta0
+-	sd	t0,-32(a0)
+-	daddu	v0,t8
+-
+-	daddu	ta1,t1
+-	sltu	t9,ta1,t1
+-	daddu	t1,ta1,v0
+-	sltu	v0,t1,ta1
+-	sd	t1,-24(a0)
+-	daddu	v0,t9
+-
+-	daddu	ta2,t2
+-	sltu	t8,ta2,t2
+-	daddu	t2,ta2,v0
+-	sltu	v0,t2,ta2
+-	sd	t2,-16(a0)
+-	daddu	v0,t8
+-	
+-	daddu	ta3,t3
+-	sltu	t9,ta3,t3
+-	daddu	t3,ta3,v0
+-	sltu	v0,t3,ta3
+-	sd	t3,-8(a0)
+-	daddu	v0,t9
+-	
+-	.set	noreorder
+-	bgtzl	AT,.L_bn_add_words_loop
+-	ld	t0,0(a1)
+-
+-	bnezl	a3,.L_bn_add_words_tail
+-	ld	t0,0(a1)
+-	.set	reorder
+-
+-.L_bn_add_words_return:
+-	jr	ra
+-
+-.L_bn_add_words_tail:
+-	ld	ta0,0(a2)
+-	daddu	ta0,t0
+-	subu	a3,1
+-	sltu	t8,ta0,t0
+-	daddu	t0,ta0,v0
+-	sltu	v0,t0,ta0
+-	sd	t0,0(a0)
+-	daddu	v0,t8
+-	beqz	a3,.L_bn_add_words_return
+-
+-	ld	t1,8(a1)
+-	ld	ta1,8(a2)
+-	daddu	ta1,t1
+-	subu	a3,1
+-	sltu	t9,ta1,t1
+-	daddu	t1,ta1,v0
+-	sltu	v0,t1,ta1
+-	sd	t1,8(a0)
+-	daddu	v0,t9
+-	beqz	a3,.L_bn_add_words_return
+-
+-	ld	t2,16(a1)
+-	ld	ta2,16(a2)
+-	daddu	ta2,t2
+-	sltu	t8,ta2,t2
+-	daddu	t2,ta2,v0
+-	sltu	v0,t2,ta2
+-	sd	t2,16(a0)
+-	daddu	v0,t8
+-	jr	ra
+-END(bn_add_words)
+-
+-.align	5
+-LEAF(bn_sub_words)
+-	.set	noreorder
+-	bgtzl	a3,.L_bn_sub_words_proceed
+-	ld	t0,0(a1)
+-	jr	ra
+-	move	v0,zero
+-	.set	reorder
+-
+-.L_bn_sub_words_proceed:
+-	li	MINUS4,-4
+-	and	AT,a3,MINUS4
+-	move	v0,zero
+-	beqz	AT,.L_bn_sub_words_tail
+-
+-.L_bn_sub_words_loop:
+-	ld	ta0,0(a2)
+-	subu	a3,4
+-	ld	t1,8(a1)
+-	and	AT,a3,MINUS4
+-	ld	t2,16(a1)
+-	PTR_ADD	a2,32
+-	ld	t3,24(a1)
+-	PTR_ADD	a0,32
+-	ld	ta1,-24(a2)
+-	PTR_ADD	a1,32
+-	ld	ta2,-16(a2)
+-	ld	ta3,-8(a2)
+-	sltu	t8,t0,ta0
+-	dsubu	t0,ta0
+-	dsubu	ta0,t0,v0
+-	sd	ta0,-32(a0)
+-	MOVNZ	(t0,v0,t8)
+-
+-	sltu	t9,t1,ta1
+-	dsubu	t1,ta1
+-	dsubu	ta1,t1,v0
+-	sd	ta1,-24(a0)
+-	MOVNZ	(t1,v0,t9)
+-
+-
+-	sltu	t8,t2,ta2
+-	dsubu	t2,ta2
+-	dsubu	ta2,t2,v0
+-	sd	ta2,-16(a0)
+-	MOVNZ	(t2,v0,t8)
+-
+-	sltu	t9,t3,ta3
+-	dsubu	t3,ta3
+-	dsubu	ta3,t3,v0
+-	sd	ta3,-8(a0)
+-	MOVNZ	(t3,v0,t9)
+-
+-	.set	noreorder
+-	bgtzl	AT,.L_bn_sub_words_loop
+-	ld	t0,0(a1)
+-
+-	bnezl	a3,.L_bn_sub_words_tail
+-	ld	t0,0(a1)
+-	.set	reorder
+-
+-.L_bn_sub_words_return:
+-	jr	ra
+-
+-.L_bn_sub_words_tail:
+-	ld	ta0,0(a2)
+-	subu	a3,1
+-	sltu	t8,t0,ta0
+-	dsubu	t0,ta0
+-	dsubu	ta0,t0,v0
+-	MOVNZ	(t0,v0,t8)
+-	sd	ta0,0(a0)
+-	beqz	a3,.L_bn_sub_words_return
+-
+-	ld	t1,8(a1)
+-	subu	a3,1
+-	ld	ta1,8(a2)
+-	sltu	t9,t1,ta1
+-	dsubu	t1,ta1
+-	dsubu	ta1,t1,v0
+-	MOVNZ	(t1,v0,t9)
+-	sd	ta1,8(a0)
+-	beqz	a3,.L_bn_sub_words_return
+-
+-	ld	t2,16(a1)
+-	ld	ta2,16(a2)
+-	sltu	t8,t2,ta2
+-	dsubu	t2,ta2
+-	dsubu	ta2,t2,v0
+-	MOVNZ	(t2,v0,t8)
+-	sd	ta2,16(a0)
+-	jr	ra
+-END(bn_sub_words)
+-
+-#undef	MINUS4
+-
+-.align 5
+-LEAF(bn_div_3_words)
+-	.set	reorder
+-	move	a3,a0		/* we know that bn_div_words doesn't
+-				 * touch a3, ta2, ta3 and preserves a2
+-				 * so that we can save two arguments
+-				 * and return address in registers
+-				 * instead of stack:-)
+-				 */
+-	ld	a0,(a3)
+-	move	ta2,a1
+-	ld	a1,-8(a3)
+-	bne	a0,a2,.L_bn_div_3_words_proceed
+-	li	v0,-1
+-	jr	ra
+-.L_bn_div_3_words_proceed:
+-	move	ta3,ra
+-	bal	bn_div_words
+-	move	ra,ta3
+-	dmultu	ta2,v0
+-	ld	t2,-16(a3)
+-	move	ta0,zero
+-	mfhi	t1
+-	mflo	t0
+-	sltu	t8,t1,v1
+-.L_bn_div_3_words_inner_loop:
+-	bnez	t8,.L_bn_div_3_words_inner_loop_done
+-	sgeu	AT,t2,t0
+-	seq	t9,t1,v1
+-	and	AT,t9
+-	sltu	t3,t0,ta2
+-	daddu	v1,a2
+-	dsubu	t1,t3
+-	dsubu	t0,ta2
+-	sltu	t8,t1,v1
+-	sltu	ta0,v1,a2
+-	or	t8,ta0
+-	.set	noreorder
+-	beqzl	AT,.L_bn_div_3_words_inner_loop
+-	dsubu	v0,1
+-	.set	reorder
+-.L_bn_div_3_words_inner_loop_done:
+-	jr	ra
+-END(bn_div_3_words)
+-
+-.align	5
+-LEAF(bn_div_words)
+-	.set	noreorder
+-	bnezl	a2,.L_bn_div_words_proceed
+-	move	v1,zero
+-	jr	ra
+-	li	v0,-1		/* I'd rather signal div-by-zero
+-				 * which can be done with 'break 7' */
+-
+-.L_bn_div_words_proceed:
+-	bltz	a2,.L_bn_div_words_body
+-	move	t9,v1
+-	dsll	a2,1
+-	bgtz	a2,.-4
+-	addu	t9,1
+-
+-	.set	reorder
+-	negu	t1,t9
+-	li	t2,-1
+-	dsll	t2,t1
+-	and	t2,a0
+-	dsrl	AT,a1,t1
+-	.set	noreorder
+-	bnezl	t2,.+8
+-	break	6		/* signal overflow */
+-	.set	reorder
+-	dsll	a0,t9
+-	dsll	a1,t9
+-	or	a0,AT
+-
+-#define	QT	ta0
+-#define	HH	ta1
+-#define	DH	v1
+-.L_bn_div_words_body:
+-	dsrl	DH,a2,32
+-	sgeu	AT,a0,a2
+-	.set	noreorder
+-	bnezl	AT,.+8
+-	dsubu	a0,a2
+-	.set	reorder
+-
+-	li	QT,-1
+-	dsrl	HH,a0,32
+-	dsrl	QT,32	/* q=0xffffffff */
+-	beq	DH,HH,.L_bn_div_words_skip_div1
+-	ddivu	zero,a0,DH
+-	mflo	QT
+-.L_bn_div_words_skip_div1:
+-	dmultu	a2,QT
+-	dsll	t3,a0,32
+-	dsrl	AT,a1,32
+-	or	t3,AT
+-	mflo	t0
+-	mfhi	t1
+-.L_bn_div_words_inner_loop1:
+-	sltu	t2,t3,t0
+-	seq	t8,HH,t1
+-	sltu	AT,HH,t1
+-	and	t2,t8
+-	sltu	v0,t0,a2
+-	or	AT,t2
+-	.set	noreorder
+-	beqz	AT,.L_bn_div_words_inner_loop1_done
+-	dsubu	t1,v0
+-	dsubu	t0,a2
+-	b	.L_bn_div_words_inner_loop1
+-	dsubu	QT,1
+-	.set	reorder
+-.L_bn_div_words_inner_loop1_done:
+-
+-	dsll	a1,32
+-	dsubu	a0,t3,t0
+-	dsll	v0,QT,32
+-
+-	li	QT,-1
+-	dsrl	HH,a0,32
+-	dsrl	QT,32	/* q=0xffffffff */
+-	beq	DH,HH,.L_bn_div_words_skip_div2
+-	ddivu	zero,a0,DH
+-	mflo	QT
+-.L_bn_div_words_skip_div2:
+-#undef	DH
+-	dmultu	a2,QT
+-	dsll	t3,a0,32
+-	dsrl	AT,a1,32
+-	or	t3,AT
+-	mflo	t0
+-	mfhi	t1
+-.L_bn_div_words_inner_loop2:
+-	sltu	t2,t3,t0
+-	seq	t8,HH,t1
+-	sltu	AT,HH,t1
+-	and	t2,t8
+-	sltu	v1,t0,a2
+-	or	AT,t2
+-	.set	noreorder
+-	beqz	AT,.L_bn_div_words_inner_loop2_done
+-	dsubu	t1,v1
+-	dsubu	t0,a2
+-	b	.L_bn_div_words_inner_loop2
+-	dsubu	QT,1
+-	.set	reorder
+-.L_bn_div_words_inner_loop2_done:	
+-#undef	HH
+-
+-	dsubu	a0,t3,t0
+-	or	v0,QT
+-	dsrl	v1,a0,t9	/* v1 contains remainder if anybody wants it */
+-	dsrl	a2,t9		/* restore a2 */
+-	jr	ra
+-#undef	QT
+-END(bn_div_words)
+-
+-#define	a_0	t0
+-#define	a_1	t1
+-#define	a_2	t2
+-#define	a_3	t3
+-#define	b_0	ta0
+-#define	b_1	ta1
+-#define	b_2	ta2
+-#define	b_3	ta3
+-
+-#define	a_4	s0
+-#define	a_5	s2
+-#define	a_6	s4
+-#define	a_7	a1	/* once we load a[7] we don't need a anymore */
+-#define	b_4	s1
+-#define	b_5	s3
+-#define	b_6	s5
+-#define	b_7	a2	/* once we load b[7] we don't need b anymore */
+-
+-#define	t_1	t8
+-#define	t_2	t9
+-
+-#define	c_1	v0
+-#define	c_2	v1
+-#define	c_3	a3
+-
+-#define	FRAME_SIZE	48
+-
+-.align	5
+-LEAF(bn_mul_comba8)
+-	.set	noreorder
+-	PTR_SUB	sp,FRAME_SIZE
+-	.frame	sp,64,ra
+-	.set	reorder
+-	ld	a_0,0(a1)	/* If compiled with -mips3 option on
+-				 * R5000 box assembler barks on this
+-				 * line with "shouldn't have mult/div
+-				 * as last instruction in bb (R10K
+-				 * bug)" warning. If anybody out there
+-				 * has a clue about how to circumvent
+-				 * this do send me a note.
+-				 *		<appro at fy.chalmers.se>
+-				 */
+-	ld	b_0,0(a2)
+-	ld	a_1,8(a1)
+-	ld	a_2,16(a1)
+-	ld	a_3,24(a1)
+-	ld	b_1,8(a2)
+-	ld	b_2,16(a2)
+-	ld	b_3,24(a2)
+-	dmultu	a_0,b_0		/* mul_add_c(a[0],b[0],c1,c2,c3); */
+-	sd	s0,0(sp)
+-	sd	s1,8(sp)
+-	sd	s2,16(sp)
+-	sd	s3,24(sp)
+-	sd	s4,32(sp)
+-	sd	s5,40(sp)
+-	mflo	c_1
+-	mfhi	c_2
+-
+-	dmultu	a_0,b_1		/* mul_add_c(a[0],b[1],c2,c3,c1); */
+-	ld	a_4,32(a1)
+-	ld	a_5,40(a1)
+-	ld	a_6,48(a1)
+-	ld	a_7,56(a1)
+-	ld	b_4,32(a2)
+-	ld	b_5,40(a2)
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	c_3,t_2,AT
+-	dmultu	a_1,b_0		/* mul_add_c(a[1],b[0],c2,c3,c1); */
+-	ld	b_6,48(a2)
+-	ld	b_7,56(a2)
+-	sd	c_1,0(a0)	/* r[0]=c1; */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	c_1,c_3,t_2
+-	sd	c_2,8(a0)	/* r[1]=c2; */
+-
+-	dmultu	a_2,b_0		/* mul_add_c(a[2],b[0],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	dmultu	a_1,b_1		/* mul_add_c(a[1],b[1],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	c_2,c_1,t_2
+-	dmultu	a_0,b_2		/* mul_add_c(a[0],b[2],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,16(a0)	/* r[2]=c3; */
+-
+-	dmultu	a_0,b_3		/* mul_add_c(a[0],b[3],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	c_3,c_2,t_2
+-	dmultu	a_1,b_2		/* mul_add_c(a[1],b[2],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_2,b_1		/* mul_add_c(a[2],b[1],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_3,b_0		/* mul_add_c(a[3],b[0],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	sd	c_1,24(a0)	/* r[3]=c1; */
+-
+-	dmultu	a_4,b_0		/* mul_add_c(a[4],b[0],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	c_1,c_3,t_2
+-	dmultu	a_3,b_1		/* mul_add_c(a[3],b[1],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_2,b_2		/* mul_add_c(a[2],b[2],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_1,b_3		/* mul_add_c(a[1],b[3],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_0,b_4		/* mul_add_c(a[0],b[4],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	sd	c_2,32(a0)	/* r[4]=c2; */
+-
+-	dmultu	a_0,b_5		/* mul_add_c(a[0],b[5],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	c_2,c_1,t_2
+-	dmultu	a_1,b_4		/* mul_add_c(a[1],b[4],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_2,b_3		/* mul_add_c(a[2],b[3],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_3,b_2		/* mul_add_c(a[3],b[2],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_4,b_1		/* mul_add_c(a[4],b[1],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_5,b_0		/* mul_add_c(a[5],b[0],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,40(a0)	/* r[5]=c3; */
+-
+-	dmultu	a_6,b_0		/* mul_add_c(a[6],b[0],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	c_3,c_2,t_2
+-	dmultu	a_5,b_1		/* mul_add_c(a[5],b[1],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_4,b_2		/* mul_add_c(a[4],b[2],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_3,b_3		/* mul_add_c(a[3],b[3],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_2,b_4		/* mul_add_c(a[2],b[4],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_1,b_5		/* mul_add_c(a[1],b[5],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_0,b_6		/* mul_add_c(a[0],b[6],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	sd	c_1,48(a0)	/* r[6]=c1; */
+-
+-	dmultu	a_0,b_7		/* mul_add_c(a[0],b[7],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	c_1,c_3,t_2
+-	dmultu	a_1,b_6		/* mul_add_c(a[1],b[6],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_2,b_5		/* mul_add_c(a[2],b[5],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_3,b_4		/* mul_add_c(a[3],b[4],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_4,b_3		/* mul_add_c(a[4],b[3],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_5,b_2		/* mul_add_c(a[5],b[2],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_6,b_1		/* mul_add_c(a[6],b[1],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_7,b_0		/* mul_add_c(a[7],b[0],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	sd	c_2,56(a0)	/* r[7]=c2; */
+-
+-	dmultu	a_7,b_1		/* mul_add_c(a[7],b[1],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	c_2,c_1,t_2
+-	dmultu	a_6,b_2		/* mul_add_c(a[6],b[2],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_5,b_3		/* mul_add_c(a[5],b[3],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_4,b_4		/* mul_add_c(a[4],b[4],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_3,b_5		/* mul_add_c(a[3],b[5],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_2,b_6		/* mul_add_c(a[2],b[6],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_1,b_7		/* mul_add_c(a[1],b[7],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,64(a0)	/* r[8]=c3; */
+-
+-	dmultu	a_2,b_7		/* mul_add_c(a[2],b[7],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	c_3,c_2,t_2
+-	dmultu	a_3,b_6		/* mul_add_c(a[3],b[6],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_4,b_5		/* mul_add_c(a[4],b[5],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_5,b_4		/* mul_add_c(a[5],b[4],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_6,b_3		/* mul_add_c(a[6],b[3],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_7,b_2		/* mul_add_c(a[7],b[2],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	sd	c_1,72(a0)	/* r[9]=c1; */
+-
+-	dmultu	a_7,b_3		/* mul_add_c(a[7],b[3],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	c_1,c_3,t_2
+-	dmultu	a_6,b_4		/* mul_add_c(a[6],b[4],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_5,b_5		/* mul_add_c(a[5],b[5],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_4,b_6		/* mul_add_c(a[4],b[6],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_3,b_7		/* mul_add_c(a[3],b[7],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	sd	c_2,80(a0)	/* r[10]=c2; */
+-
+-	dmultu	a_4,b_7		/* mul_add_c(a[4],b[7],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	c_2,c_1,t_2
+-	dmultu	a_5,b_6		/* mul_add_c(a[5],b[6],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_6,b_5		/* mul_add_c(a[6],b[5],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_7,b_4		/* mul_add_c(a[7],b[4],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,88(a0)	/* r[11]=c3; */
+-
+-	dmultu	a_7,b_5		/* mul_add_c(a[7],b[5],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	c_3,c_2,t_2
+-	dmultu	a_6,b_6		/* mul_add_c(a[6],b[6],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_5,b_7		/* mul_add_c(a[5],b[7],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	sd	c_1,96(a0)	/* r[12]=c1; */
+-
+-	dmultu	a_6,b_7		/* mul_add_c(a[6],b[7],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	c_1,c_3,t_2
+-	dmultu	a_7,b_6		/* mul_add_c(a[7],b[6],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	sd	c_2,104(a0)	/* r[13]=c2; */
+-
+-	dmultu	a_7,b_7		/* mul_add_c(a[7],b[7],c3,c1,c2); */
+-	ld	s0,0(sp)
+-	ld	s1,8(sp)
+-	ld	s2,16(sp)
+-	ld	s3,24(sp)
+-	ld	s4,32(sp)
+-	ld	s5,40(sp)
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sd	c_3,112(a0)	/* r[14]=c3; */
+-	sd	c_1,120(a0)	/* r[15]=c1; */
+-
+-	PTR_ADD	sp,FRAME_SIZE
+-
+-	jr	ra
+-END(bn_mul_comba8)
+-
+-.align	5
+-LEAF(bn_mul_comba4)
+-	.set	reorder
+-	ld	a_0,0(a1)
+-	ld	b_0,0(a2)
+-	ld	a_1,8(a1)
+-	ld	a_2,16(a1)
+-	dmultu	a_0,b_0		/* mul_add_c(a[0],b[0],c1,c2,c3); */
+-	ld	a_3,24(a1)
+-	ld	b_1,8(a2)
+-	ld	b_2,16(a2)
+-	ld	b_3,24(a2)
+-	mflo	c_1
+-	mfhi	c_2
+-	sd	c_1,0(a0)
+-
+-	dmultu	a_0,b_1		/* mul_add_c(a[0],b[1],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	c_3,t_2,AT
+-	dmultu	a_1,b_0		/* mul_add_c(a[1],b[0],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	c_1,c_3,t_2
+-	sd	c_2,8(a0)
+-
+-	dmultu	a_2,b_0		/* mul_add_c(a[2],b[0],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	dmultu	a_1,b_1		/* mul_add_c(a[1],b[1],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	c_2,c_1,t_2
+-	dmultu	a_0,b_2		/* mul_add_c(a[0],b[2],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,16(a0)
+-
+-	dmultu	a_0,b_3		/* mul_add_c(a[0],b[3],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	c_3,c_2,t_2
+-	dmultu	a_1,b_2		/* mul_add_c(a[1],b[2],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_2,b_1		/* mul_add_c(a[2],b[1],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_3,b_0		/* mul_add_c(a[3],b[0],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	sd	c_1,24(a0)
+-
+-	dmultu	a_3,b_1		/* mul_add_c(a[3],b[1],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	c_1,c_3,t_2
+-	dmultu	a_2,b_2		/* mul_add_c(a[2],b[2],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_1,b_3		/* mul_add_c(a[1],b[3],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	sd	c_2,32(a0)
+-
+-	dmultu	a_2,b_3		/* mul_add_c(a[2],b[3],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	c_2,c_1,t_2
+-	dmultu	a_3,b_2		/* mul_add_c(a[3],b[2],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,40(a0)
+-
+-	dmultu	a_3,b_3		/* mul_add_c(a[3],b[3],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sd	c_1,48(a0)
+-	sd	c_2,56(a0)
+-
+-	jr	ra
+-END(bn_mul_comba4)
+-
+-#undef	a_4
+-#undef	a_5
+-#undef	a_6
+-#undef	a_7
+-#define	a_4	b_0
+-#define	a_5	b_1
+-#define	a_6	b_2
+-#define	a_7	b_3
+-
+-.align	5
+-LEAF(bn_sqr_comba8)
+-	.set	reorder
+-	ld	a_0,0(a1)
+-	ld	a_1,8(a1)
+-	ld	a_2,16(a1)
+-	ld	a_3,24(a1)
+-
+-	dmultu	a_0,a_0		/* mul_add_c(a[0],b[0],c1,c2,c3); */
+-	ld	a_4,32(a1)
+-	ld	a_5,40(a1)
+-	ld	a_6,48(a1)
+-	ld	a_7,56(a1)
+-	mflo	c_1
+-	mfhi	c_2
+-	sd	c_1,0(a0)
+-
+-	dmultu	a_0,a_1		/* mul_add_c2(a[0],b[1],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_1,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	c_3,t_2,AT
+-	sd	c_2,8(a0)
+-
+-	dmultu	a_2,a_0		/* mul_add_c2(a[2],b[0],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_2,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_1,a_1		/* mul_add_c(a[1],b[1],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,16(a0)
+-
+-	dmultu	a_0,a_3		/* mul_add_c2(a[0],b[3],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_3,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_1,a_2		/* mul_add_c2(a[1],b[2],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_3,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	sd	c_1,24(a0)
+-
+-	dmultu	a_4,a_0		/* mul_add_c2(a[4],b[0],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_1,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_3,a_1		/* mul_add_c2(a[3],b[1],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_1,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_2,a_2		/* mul_add_c(a[2],b[2],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	sd	c_2,32(a0)
+-
+-	dmultu	a_0,a_5		/* mul_add_c2(a[0],b[5],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_2,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_1,a_4		/* mul_add_c2(a[1],b[4],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_2,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_2,a_3		/* mul_add_c2(a[2],b[3],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_2,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,40(a0)
+-
+-	dmultu	a_6,a_0		/* mul_add_c2(a[6],b[0],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_3,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_5,a_1		/* mul_add_c2(a[5],b[1],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_3,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_4,a_2		/* mul_add_c2(a[4],b[2],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_3,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_3,a_3		/* mul_add_c(a[3],b[3],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	sd	c_1,48(a0)
+-
+-	dmultu	a_0,a_7		/* mul_add_c2(a[0],b[7],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_1,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_1,a_6		/* mul_add_c2(a[1],b[6],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_1,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_2,a_5		/* mul_add_c2(a[2],b[5],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_1,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_3,a_4		/* mul_add_c2(a[3],b[4],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_1,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	sd	c_2,56(a0)
+-
+-	dmultu	a_7,a_1		/* mul_add_c2(a[7],b[1],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_2,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_6,a_2		/* mul_add_c2(a[6],b[2],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_2,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_5,a_3		/* mul_add_c2(a[5],b[3],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_2,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_4,a_4		/* mul_add_c(a[4],b[4],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,64(a0)
+-
+-	dmultu	a_2,a_7		/* mul_add_c2(a[2],b[7],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_3,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_3,a_6		/* mul_add_c2(a[3],b[6],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_3,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_4,a_5		/* mul_add_c2(a[4],b[5],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_3,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	sd	c_1,72(a0)
+-
+-	dmultu	a_7,a_3		/* mul_add_c2(a[7],b[3],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_1,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_6,a_4		/* mul_add_c2(a[6],b[4],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_1,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_5,a_5		/* mul_add_c(a[5],b[5],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	sd	c_2,80(a0)
+-
+-	dmultu	a_4,a_7		/* mul_add_c2(a[4],b[7],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_2,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_5,a_6		/* mul_add_c2(a[5],b[6],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_2,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,88(a0)
+-
+-	dmultu	a_7,a_5		/* mul_add_c2(a[7],b[5],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_3,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_6,a_6		/* mul_add_c(a[6],b[6],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	sd	c_1,96(a0)
+-
+-	dmultu	a_6,a_7		/* mul_add_c2(a[6],b[7],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_1,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	sd	c_2,104(a0)
+-
+-	dmultu	a_7,a_7		/* mul_add_c(a[7],b[7],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sd	c_3,112(a0)
+-	sd	c_1,120(a0)
+-
+-	jr	ra
+-END(bn_sqr_comba8)
+-
+-.align	5
+-LEAF(bn_sqr_comba4)
+-	.set	reorder
+-	ld	a_0,0(a1)
+-	ld	a_1,8(a1)
+-	ld	a_2,16(a1)
+-	ld	a_3,24(a1)
+-	dmultu	a_0,a_0		/* mul_add_c(a[0],b[0],c1,c2,c3); */
+-	mflo	c_1
+-	mfhi	c_2
+-	sd	c_1,0(a0)
+-
+-	dmultu	a_0,a_1		/* mul_add_c2(a[0],b[1],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_1,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	c_3,t_2,AT
+-	sd	c_2,8(a0)
+-
+-	dmultu	a_2,a_0		/* mul_add_c2(a[2],b[0],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_2,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	dmultu	a_1,a_1		/* mul_add_c(a[1],b[1],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,16(a0)
+-
+-	dmultu	a_0,a_3		/* mul_add_c2(a[0],b[3],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_3,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	dmultu	a_1,a_2		/* mul_add_c(a2[1],b[2],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	AT,t_2,zero
+-	daddu	c_3,AT
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sltu	AT,c_2,t_2
+-	daddu	c_3,AT
+-	sd	c_1,24(a0)
+-
+-	dmultu	a_3,a_1		/* mul_add_c2(a[3],b[1],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_1,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	dmultu	a_2,a_2		/* mul_add_c(a[2],b[2],c2,c3,c1); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_2,t_1
+-	sltu	AT,c_2,t_1
+-	daddu	t_2,AT
+-	daddu	c_3,t_2
+-	sltu	AT,c_3,t_2
+-	daddu	c_1,AT
+-	sd	c_2,32(a0)
+-
+-	dmultu	a_2,a_3		/* mul_add_c2(a[2],b[3],c3,c1,c2); */
+-	mflo	t_1
+-	mfhi	t_2
+-	slt	c_2,t_2,zero
+-	dsll	t_2,1
+-	slt	a2,t_1,zero
+-	daddu	t_2,a2
+-	dsll	t_1,1
+-	daddu	c_3,t_1
+-	sltu	AT,c_3,t_1
+-	daddu	t_2,AT
+-	daddu	c_1,t_2
+-	sltu	AT,c_1,t_2
+-	daddu	c_2,AT
+-	sd	c_3,40(a0)
+-
+-	dmultu	a_3,a_3		/* mul_add_c(a[3],b[3],c1,c2,c3); */
+-	mflo	t_1
+-	mfhi	t_2
+-	daddu	c_1,t_1
+-	sltu	AT,c_1,t_1
+-	daddu	t_2,AT
+-	daddu	c_2,t_2
+-	sd	c_1,48(a0)
+-	sd	c_2,56(a0)
+-
+-	jr	ra
+-END(bn_sqr_comba4)
+diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c
+index 31476ab..2d39407 100644
+--- a/crypto/bn/asm/x86_64-gcc.c
++++ b/crypto/bn/asm/x86_64-gcc.c
+@@ -273,6 +273,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+ /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
+ /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
+ 
++/*
++ * Keep in mind that carrying into high part of multiplication result
++ * can not overflow, because it cannot be all-ones.
++ */
+ #if 0
+ /* original macros are kept for reference purposes */
+ #define mul_add_c(a,b,c0,c1,c2) {	\
+@@ -287,10 +291,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+ 	BN_ULONG ta=(a),tb=(b),t0;	\
+ 	t1 = BN_UMULT_HIGH(ta,tb);	\
+ 	t0 = ta * tb;			\
+-	t2 = t1+t1; c2 += (t2<t1)?1:0;	\
+-	t1 = t0+t0; t2 += (t1<t0)?1:0;	\
+-	c0 += t1; t2 += (c0<t1)?1:0;	\
++	c0 += t0; t2 = t1+((c0<t0)?1:0);\
+ 	c1 += t2; c2 += (c1<t2)?1:0;	\
++	c0 += t0; t1 += (c0<t0)?1:0;	\
++	c1 += t1; c2 += (c1<t1)?1:0;	\
+ 	}
+ #else
+ #define mul_add_c(a,b,c0,c1,c2)	do {	\
+@@ -328,22 +332,14 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+ 		: "=a"(t1),"=d"(t2)	\
+ 		: "a"(a),"m"(b)		\
+ 		: "cc");		\
+-	asm ("addq %0,%0; adcq %2,%1"	\
+-		: "+d"(t2),"+r"(c2)	\
+-		: "g"(0)		\
+-		: "cc");		\
+-	asm ("addq %0,%0; adcq %2,%1"	\
+-		: "+a"(t1),"+d"(t2)	\
+-		: "g"(0)		\
+-		: "cc");		\
+-	asm ("addq %2,%0; adcq %3,%1"	\
+-		: "+r"(c0),"+d"(t2)	\
+-		: "a"(t1),"g"(0)	\
+-		: "cc");		\
+-	asm ("addq %2,%0; adcq %3,%1"	\
+-		: "+r"(c1),"+r"(c2)	\
+-		: "d"(t2),"g"(0)	\
+-		: "cc");		\
++	asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"	\
++		: "+r"(c0),"+r"(c1),"+r"(c2)		\
++		: "r"(t1),"r"(t2),"g"(0)		\
++		: "cc");				\
++	asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"	\
++		: "+r"(c0),"+r"(c1),"+r"(c2)		\
++		: "r"(t1),"r"(t2),"g"(0)		\
++		: "cc");				\
+ 	} while (0)
+ #endif
+ 
+diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c
+index c43c91c..a33b634 100644
+--- a/crypto/bn/bn_asm.c
++++ b/crypto/bn/bn_asm.c
+@@ -438,6 +438,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
+ /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
+ /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
+ 
++/*
++ * Keep in mind that carrying into high part of multiplication result
++ * can not overflow, because it cannot be all-ones.
++ */
+ #ifdef BN_LLONG
+ #define mul_add_c(a,b,c0,c1,c2) \
+ 	t=(BN_ULLONG)a*b; \
+@@ -478,10 +482,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
+ #define mul_add_c2(a,b,c0,c1,c2) {	\
+ 	BN_ULONG ta=(a),tb=(b),t0;	\
+ 	BN_UMULT_LOHI(t0,t1,ta,tb);	\
+-	t2 = t1+t1; c2 += (t2<t1)?1:0;	\
+-	t1 = t0+t0; t2 += (t1<t0)?1:0;	\
+-	c0 += t1; t2 += (c0<t1)?1:0;	\
++	c0 += t0; t2 = t1+((c0<t0)?1:0);\
+ 	c1 += t2; c2 += (c1<t2)?1:0;	\
++	c0 += t0; t1 += (c0<t0)?1:0;	\
++	c1 += t1; c2 += (c1<t1)?1:0;	\
+ 	}
+ 
+ #define sqr_add_c(a,i,c0,c1,c2)	{	\
+@@ -508,10 +512,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
+ 	BN_ULONG ta=(a),tb=(b),t0;	\
+ 	t1 = BN_UMULT_HIGH(ta,tb);	\
+ 	t0 = ta * tb;			\
+-	t2 = t1+t1; c2 += (t2<t1)?1:0;	\
+-	t1 = t0+t0; t2 += (t1<t0)?1:0;	\
+-	c0 += t1; t2 += (c0<t1)?1:0;	\
++	c0 += t0; t2 = t1+((c0<t0)?1:0);\
+ 	c1 += t2; c2 += (c1<t2)?1:0;	\
++	c0 += t0; t1 += (c0<t0)?1:0;	\
++	c1 += t1; c2 += (c1<t1)?1:0;	\
+ 	}
+ 
+ #define sqr_add_c(a,i,c0,c1,c2)	{	\
+diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c
+index 7771e92..48bc633 100644
+--- a/crypto/bn/bntest.c
++++ b/crypto/bn/bntest.c
+@@ -678,44 +678,98 @@ int test_mul(BIO *bp)
+ 
+ int test_sqr(BIO *bp, BN_CTX *ctx)
+ 	{
+-	BIGNUM a,c,d,e;
+-	int i;
++	BIGNUM *a,*c,*d,*e;
++	int i, ret = 0;
+ 
+-	BN_init(&a);
+-	BN_init(&c);
+-	BN_init(&d);
+-	BN_init(&e);
++	a = BN_new();
++	c = BN_new();
++	d = BN_new();
++	e = BN_new();
++	if (a == NULL || c == NULL || d == NULL || e == NULL)
++		{
++		goto err;
++		}
+ 
+ 	for (i=0; i<num0; i++)
+ 		{
+-		BN_bntest_rand(&a,40+i*10,0,0);
+-		a.neg=rand_neg();
+-		BN_sqr(&c,&a,ctx);
++		BN_bntest_rand(a,40+i*10,0,0);
++		a->neg=rand_neg();
++		BN_sqr(c,a,ctx);
+ 		if (bp != NULL)
+ 			{
+ 			if (!results)
+ 				{
+-				BN_print(bp,&a);
++				BN_print(bp,a);
+ 				BIO_puts(bp," * ");
+-				BN_print(bp,&a);
++				BN_print(bp,a);
+ 				BIO_puts(bp," - ");
+ 				}
+-			BN_print(bp,&c);
++			BN_print(bp,c);
+ 			BIO_puts(bp,"\n");
+ 			}
+-		BN_div(&d,&e,&c,&a,ctx);
+-		BN_sub(&d,&d,&a);
+-		if(!BN_is_zero(&d) || !BN_is_zero(&e))
+-		    {
+-		    fprintf(stderr,"Square test failed!\n");
+-		    return 0;
+-		    }
++		BN_div(d,e,c,a,ctx);
++		BN_sub(d,d,a);
++		if(!BN_is_zero(d) || !BN_is_zero(e))
++			{
++			fprintf(stderr,"Square test failed!\n");
++			goto err;
++			}
+ 		}
+-	BN_free(&a);
+-	BN_free(&c);
+-	BN_free(&d);
+-	BN_free(&e);
+-	return(1);
++
++	/* Regression test for a BN_sqr overflow bug. */
++	BN_hex2bn(&a,
++		"80000000000000008000000000000001FFFFFFFFFFFFFFFE0000000000000000");
++	BN_sqr(c, a, ctx);
++	if (bp != NULL)
++		{
++		if (!results)
++			{
++			BN_print(bp,a);
++			BIO_puts(bp," * ");
++			BN_print(bp,a);
++			BIO_puts(bp," - ");
++			}
++		BN_print(bp,c);
++		BIO_puts(bp,"\n");
++		}
++	BN_mul(d, a, a, ctx);
++	if (BN_cmp(c, d))
++		{
++		fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce "
++			"different results!\n");
++		goto err;
++		}
++
++	/* Regression test for a BN_sqr overflow bug. */
++	BN_hex2bn(&a,
++		"80000000000000000000000080000001FFFFFFFE000000000000000000000000");
++	BN_sqr(c, a, ctx);
++	if (bp != NULL)
++		{
++		if (!results)
++			{
++			BN_print(bp,a);
++			BIO_puts(bp," * ");
++			BN_print(bp,a);
++			BIO_puts(bp," - ");
++			}
++		BN_print(bp,c);
++		BIO_puts(bp,"\n");
++		}
++	BN_mul(d, a, a, ctx);
++	if (BN_cmp(c, d))
++		{
++		fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce "
++			"different results!\n");
++		goto err;
++		}
++	ret = 1;
++err:
++	if (a != NULL) BN_free(a);
++	if (c != NULL) BN_free(c);
++	if (d != NULL) BN_free(d);
++	if (e != NULL) BN_free(e);
++	return ret;
+ 	}
+ 
+ int test_mont(BIO *bp, BN_CTX *ctx)
+-- 
+1.8.3.1
+
diff --git a/openssl-1.0.1e-cve-2014-3571.patch b/openssl-1.0.1e-cve-2014-3571.patch
new file mode 100644
index 0000000..9c93b7f
--- /dev/null
+++ b/openssl-1.0.1e-cve-2014-3571.patch
@@ -0,0 +1,34 @@
+diff -up openssl-1.0.1e/ssl/d1_pkt.c.dtls1-reads openssl-1.0.1e/ssl/d1_pkt.c
+--- openssl-1.0.1e/ssl/d1_pkt.c.dtls1-reads	2015-01-12 17:31:41.647213706 +0100
++++ openssl-1.0.1e/ssl/d1_pkt.c	2015-01-12 17:38:21.708261411 +0100
+@@ -641,8 +641,6 @@ again:
+ 		/* now s->packet_length == DTLS1_RT_HEADER_LENGTH */
+ 		i=rr->length;
+ 		n=ssl3_read_n(s,i,i,1);
+-		if (n <= 0) return(n); /* error or non-blocking io */
+-
+ 		/* this packet contained a partial record, dump it */
+ 		if ( n != i)
+ 			{
+@@ -677,7 +675,8 @@ again:
+ 		 * would be dropped unnecessarily.
+ 		 */
+ 		if (!(s->d1->listen && rr->type == SSL3_RT_HANDSHAKE &&
+-		    *p == SSL3_MT_CLIENT_HELLO) &&
++		    s->packet_length > DTLS1_RT_HEADER_LENGTH &&
++		    s->packet[DTLS1_RT_HEADER_LENGTH] == SSL3_MT_CLIENT_HELLO) &&
+ 		    !dtls1_record_replay_check(s, bitmap))
+ 			{
+ 			rr->length = 0;
+diff -up openssl-1.0.1e/ssl/s3_pkt.c.dtls1-reads openssl-1.0.1e/ssl/s3_pkt.c
+--- openssl-1.0.1e/ssl/s3_pkt.c.dtls1-reads	2015-01-12 17:31:41.680214453 +0100
++++ openssl-1.0.1e/ssl/s3_pkt.c	2015-01-12 17:38:06.721922482 +0100
+@@ -182,6 +182,8 @@ int ssl3_read_n(SSL *s, int n, int max,
+ 	 * at once (as long as it fits into the buffer). */
+ 	if (SSL_version(s) == DTLS1_VERSION || SSL_version(s) == DTLS1_BAD_VER)
+ 		{
++		if (left == 0 && extend)
++			return 0;
+ 		if (left > 0 && n > left)
+ 			n = left;
+ 		}
diff --git a/openssl-1.0.1e-cve-2014-3572.patch b/openssl-1.0.1e-cve-2014-3572.patch
new file mode 100644
index 0000000..013876b
--- /dev/null
+++ b/openssl-1.0.1e-cve-2014-3572.patch
@@ -0,0 +1,51 @@
+diff -up openssl-1.0.1e/ssl/s3_clnt.c.ecdh-downgrade openssl-1.0.1e/ssl/s3_clnt.c
+--- openssl-1.0.1e/ssl/s3_clnt.c.ecdh-downgrade	2015-01-12 16:37:49.978126895 +0100
++++ openssl-1.0.1e/ssl/s3_clnt.c	2015-01-12 17:02:01.740959687 +0100
+@@ -1287,6 +1287,8 @@ int ssl3_get_key_exchange(SSL *s)
+ 	int encoded_pt_len = 0;
+ #endif
+ 
++	EVP_MD_CTX_init(&md_ctx);
++
+ 	/* use same message size as in ssl3_get_certificate_request()
+ 	 * as ServerKeyExchange message may be skipped */
+ 	n=s->method->ssl_get_message(s,
+@@ -1297,14 +1299,26 @@ int ssl3_get_key_exchange(SSL *s)
+ 		&ok);
+ 	if (!ok) return((int)n);
+ 
++	alg_k=s->s3->tmp.new_cipher->algorithm_mkey;
++
+ 	if (s->s3->tmp.message_type != SSL3_MT_SERVER_KEY_EXCHANGE)
+ 		{
++		/*
++		 * Can't skip server key exchange if this is an ephemeral
++		 * ciphersuite.
++		 */
++		if (alg_k & (SSL_kEDH|SSL_kEECDH))
++			{
++			SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_UNEXPECTED_MESSAGE);
++			al = SSL_AD_UNEXPECTED_MESSAGE;
++			goto f_err;
++			}
+ #ifndef OPENSSL_NO_PSK
+ 		/* In plain PSK ciphersuite, ServerKeyExchange can be
+ 		   omitted if no identity hint is sent. Set
+ 		   session->sess_cert anyway to avoid problems
+ 		   later.*/
+-		if (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)
++		if (alg_k & SSL_kPSK)
+ 			{
+ 			s->session->sess_cert=ssl_sess_cert_new();
+ 			if (s->ctx->psk_identity_hint)
+@@ -1347,9 +1361,8 @@ int ssl3_get_key_exchange(SSL *s)
+ 		}
+ 
+ 	param_len=0;
+-	alg_k=s->s3->tmp.new_cipher->algorithm_mkey;
++
+ 	alg_a=s->s3->tmp.new_cipher->algorithm_auth;
+-	EVP_MD_CTX_init(&md_ctx);
+ 
+ #ifndef OPENSSL_NO_PSK
+ 	if (alg_k & SSL_kPSK)
diff --git a/openssl-1.0.1e-cve-2015-0204.patch b/openssl-1.0.1e-cve-2015-0204.patch
new file mode 100644
index 0000000..527f7c5
--- /dev/null
+++ b/openssl-1.0.1e-cve-2015-0204.patch
@@ -0,0 +1,158 @@
+diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_set_options.pod.rsa-ephemeral openssl-1.0.1e/doc/ssl/SSL_CTX_set_options.pod
+--- openssl-1.0.1e/doc/ssl/SSL_CTX_set_options.pod.rsa-ephemeral	2013-02-11 16:26:04.000000000 +0100
++++ openssl-1.0.1e/doc/ssl/SSL_CTX_set_options.pod	2015-01-13 11:15:25.096957795 +0100
+@@ -151,15 +151,7 @@ temporary/ephemeral DH parameters are us
+ 
+ =item SSL_OP_EPHEMERAL_RSA
+ 
+-Always use ephemeral (temporary) RSA key when doing RSA operations
+-(see L<SSL_CTX_set_tmp_rsa_callback(3)|SSL_CTX_set_tmp_rsa_callback(3)>).
+-According to the specifications this is only done, when a RSA key
+-can only be used for signature operations (namely under export ciphers
+-with restricted RSA keylength). By setting this option, ephemeral
+-RSA keys are always used. This option breaks compatibility with the
+-SSL/TLS specifications and may lead to interoperability problems with
+-clients and should therefore never be used. Ciphers with EDH (ephemeral
+-Diffie-Hellman) key exchange should be used instead.
++This option is no longer implemented and is treated as no op.
+ 
+ =item SSL_OP_CIPHER_SERVER_PREFERENCE
+ 
+diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod.rsa-ephemeral openssl-1.0.1e/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod
+--- openssl-1.0.1e/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod.rsa-ephemeral	2013-02-11 16:02:48.000000000 +0100
++++ openssl-1.0.1e/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod	2015-01-13 11:15:25.096957795 +0100
+@@ -74,21 +74,14 @@ exchange and use EDH (Ephemeral Diffie-H
+ in order to achieve forward secrecy (see
+ L<SSL_CTX_set_tmp_dh_callback(3)|SSL_CTX_set_tmp_dh_callback(3)>).
+ 
+-On OpenSSL servers ephemeral RSA key exchange is therefore disabled by default
+-and must be explicitly enabled  using the SSL_OP_EPHEMERAL_RSA option of
+-L<SSL_CTX_set_options(3)|SSL_CTX_set_options(3)>, violating the TLS/SSL
+-standard. When ephemeral RSA key exchange is required for export ciphers,
+-it will automatically be used without this option!
+-
+-An application may either directly specify the key or can supply the key via
+-a callback function. The callback approach has the advantage, that the
+-callback may generate the key only in case it is actually needed. As the
+-generation of a RSA key is however costly, it will lead to a significant
+-delay in the handshake procedure.  Another advantage of the callback function
+-is that it can supply keys of different size (e.g. for SSL_OP_EPHEMERAL_RSA
+-usage) while the explicit setting of the key is only useful for key size of
+-512 bits to satisfy the export restricted ciphers and does give away key length
+-if a longer key would be allowed.
++An application may either directly specify the key or can supply the key via a
++callback function. The callback approach has the advantage, that the callback
++may generate the key only in case it is actually needed. As the generation of a
++RSA key is however costly, it will lead to a significant delay in the handshake
++procedure.  Another advantage of the callback function is that it can supply
++keys of different size while the explicit setting of the key is only useful for
++key size of 512 bits to satisfy the export restricted ciphers and does give
++away key length if a longer key would be allowed.
+ 
+ The B<tmp_rsa_callback> is called with the B<keylength> needed and
+ the B<is_export> information. The B<is_export> flag is set, when the
+diff -up openssl-1.0.1e/CHANGES.rsa-ephemeral openssl-1.0.1e/CHANGES
+diff -up openssl-1.0.1e/ssl/d1_srvr.c.rsa-ephemeral openssl-1.0.1e/ssl/d1_srvr.c
+--- openssl-1.0.1e/ssl/d1_srvr.c.rsa-ephemeral	2015-01-12 17:49:04.912808002 +0100
++++ openssl-1.0.1e/ssl/d1_srvr.c	2015-01-13 11:15:25.096957795 +0100
+@@ -450,24 +450,15 @@ int dtls1_accept(SSL *s)
+ 		case SSL3_ST_SW_KEY_EXCH_B:
+ 			alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
+ 
+-			/* clear this, it may get reset by
+-			 * send_server_key_exchange */
+-			if ((s->options & SSL_OP_EPHEMERAL_RSA)
+-#ifndef OPENSSL_NO_KRB5
+-				&& !(alg_k & SSL_kKRB5)
+-#endif /* OPENSSL_NO_KRB5 */
+-				)
+-				/* option SSL_OP_EPHEMERAL_RSA sends temporary RSA key
+-				 * even when forbidden by protocol specs
+-				 * (handshake may fail as clients are not required to
+-				 * be able to handle this) */
+-				s->s3->tmp.use_rsa_tmp=1;
+-			else
+-				s->s3->tmp.use_rsa_tmp=0;
++			/*
++			 * clear this, it may get reset by
++			 * send_server_key_exchange
++			 */
++			s->s3->tmp.use_rsa_tmp=0;
+ 
+ 			/* only send if a DH key exchange or
+ 			 * RSA but we have a sign only certificate */
+-			if (s->s3->tmp.use_rsa_tmp
++			if (0
+ 			/* PSK: send ServerKeyExchange if PSK identity
+ 			 * hint if provided */
+ #ifndef OPENSSL_NO_PSK
+diff -up openssl-1.0.1e/ssl/ssl.h.rsa-ephemeral openssl-1.0.1e/ssl/ssl.h
+--- openssl-1.0.1e/ssl/ssl.h.rsa-ephemeral	2015-01-12 17:49:04.936808545 +0100
++++ openssl-1.0.1e/ssl/ssl.h	2015-01-13 11:15:25.098957840 +0100
+@@ -587,9 +587,8 @@ struct ssl_session_st
+ #define SSL_OP_SINGLE_ECDH_USE				0x00080000L
+ /* If set, always create a new key when using tmp_dh parameters */
+ #define SSL_OP_SINGLE_DH_USE				0x00100000L
+-/* Set to always use the tmp_rsa key when doing RSA operations,
+- * even when this violates protocol specs */
+-#define SSL_OP_EPHEMERAL_RSA				0x00200000L
++/* Does nothing: retained for compatibiity */
++#define SSL_OP_EPHEMERAL_RSA				0x0
+ /* Set on servers to choose the cipher according to the server's
+  * preferences */
+ #define SSL_OP_CIPHER_SERVER_PREFERENCE			0x00400000L
+diff -up openssl-1.0.1e/ssl/s3_clnt.c.rsa-ephemeral openssl-1.0.1e/ssl/s3_clnt.c
+--- openssl-1.0.1e/ssl/s3_clnt.c.rsa-ephemeral	2015-01-12 17:49:04.946808771 +0100
++++ openssl-1.0.1e/ssl/s3_clnt.c	2015-01-13 11:15:25.097957817 +0100
+@@ -1492,6 +1492,13 @@ int ssl3_get_key_exchange(SSL *s)
+ #ifndef OPENSSL_NO_RSA
+ 	if (alg_k & SSL_kRSA)
+ 		{
++		/* Temporary RSA keys only allowed in export ciphersuites */
++		if (!SSL_C_IS_EXPORT(s->s3->tmp.new_cipher))
++			{
++			al=SSL_AD_UNEXPECTED_MESSAGE;
++			SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_UNEXPECTED_MESSAGE);
++			goto f_err;
++			}
+ 		if ((rsa=RSA_new()) == NULL)
+ 			{
+ 			SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_MALLOC_FAILURE);
+diff -up openssl-1.0.1e/ssl/s3_srvr.c.rsa-ephemeral openssl-1.0.1e/ssl/s3_srvr.c
+--- openssl-1.0.1e/ssl/s3_srvr.c.rsa-ephemeral	2015-01-12 17:51:32.044135496 +0100
++++ openssl-1.0.1e/ssl/s3_srvr.c	2015-01-13 11:15:25.098957840 +0100
+@@ -441,20 +441,11 @@ int ssl3_accept(SSL *s)
+ 		case SSL3_ST_SW_KEY_EXCH_B:
+ 			alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
+ 
+-			/* clear this, it may get reset by
+-			 * send_server_key_exchange */
+-			if ((s->options & SSL_OP_EPHEMERAL_RSA)
+-#ifndef OPENSSL_NO_KRB5
+-				&& !(alg_k & SSL_kKRB5)
+-#endif /* OPENSSL_NO_KRB5 */
+-				)
+-				/* option SSL_OP_EPHEMERAL_RSA sends temporary RSA key
+-				 * even when forbidden by protocol specs
+-				 * (handshake may fail as clients are not required to
+-				 * be able to handle this) */
+-				s->s3->tmp.use_rsa_tmp=1;
+-			else
+-				s->s3->tmp.use_rsa_tmp=0;
++			/*
++			 * clear this, it may get reset by
++			 * send_server_key_exchange
++			 */
++			s->s3->tmp.use_rsa_tmp=0;
+ 
+ 
+ 			/* only send if a DH key exchange, fortezza or
+@@ -468,7 +459,7 @@ int ssl3_accept(SSL *s)
+ 			 * server certificate contains the server's
+ 			 * public key for key exchange.
+ 			 */
+-			if (s->s3->tmp.use_rsa_tmp
++			if (0
+ 			/* PSK: send ServerKeyExchange if PSK identity
+ 			 * hint if provided */
+ #ifndef OPENSSL_NO_PSK
diff --git a/openssl-1.0.1e-cve-2015-0205.patch b/openssl-1.0.1e-cve-2015-0205.patch
new file mode 100644
index 0000000..ff378ae
--- /dev/null
+++ b/openssl-1.0.1e-cve-2015-0205.patch
@@ -0,0 +1,12 @@
+diff -up openssl-1.0.1e/ssl/s3_srvr.c.dh-unauthenticated openssl-1.0.1e/ssl/s3_srvr.c
+--- openssl-1.0.1e/ssl/s3_srvr.c.dh-unauthenticated	2015-01-12 17:49:04.930808409 +0100
++++ openssl-1.0.1e/ssl/s3_srvr.c	2015-01-13 11:15:25.098957840 +0100
+@@ -2951,7 +2951,7 @@ int ssl3_get_cert_verify(SSL *s)
+ 	if (s->s3->tmp.message_type != SSL3_MT_CERTIFICATE_VERIFY)
+ 		{
+ 		s->s3->tmp.reuse_message=1;
+-		if ((peer != NULL) && (type & EVP_PKT_SIGN))
++		if (peer != NULL)
+ 			{
+ 			al=SSL_AD_UNEXPECTED_MESSAGE;
+ 			SSLerr(SSL_F_SSL3_GET_CERT_VERIFY,SSL_R_MISSING_VERIFY_MESSAGE);
diff --git a/openssl-1.0.1e-cve-2015-0206.patch b/openssl-1.0.1e-cve-2015-0206.patch
new file mode 100644
index 0000000..e2d1718
--- /dev/null
+++ b/openssl-1.0.1e-cve-2015-0206.patch
@@ -0,0 +1,116 @@
+diff -up openssl-1.0.1e/ssl/d1_pkt.c.dtls-recleak openssl-1.0.1e/ssl/d1_pkt.c
+--- openssl-1.0.1e/ssl/d1_pkt.c.dtls-rec-leak	2015-01-13 11:44:12.410022377 +0100
++++ openssl-1.0.1e/ssl/d1_pkt.c	2015-01-13 11:50:40.062789458 +0100
+@@ -212,7 +212,7 @@ dtls1_buffer_record(SSL *s, record_pqueu
+ 	/* Limit the size of the queue to prevent DOS attacks */
+ 	if (pqueue_size(queue->q) >= 100)
+ 		return 0;
+-		
++
+ 	rdata = OPENSSL_malloc(sizeof(DTLS1_RECORD_DATA));
+ 	item = pitem_new(priority, rdata);
+ 	if (rdata == NULL || item == NULL)
+@@ -239,14 +239,6 @@ dtls1_buffer_record(SSL *s, record_pqueu
+ 	}
+ #endif
+ 
+-	/* insert should not fail, since duplicates are dropped */
+-	if (pqueue_insert(queue->q, item) == NULL)
+-		{
+-		OPENSSL_free(rdata);
+-		pitem_free(item);
+-		return(0);
+-		}
+-
+ 	s->packet = NULL;
+ 	s->packet_length = 0;
+ 	memset(&(s->s3->rbuf), 0, sizeof(SSL3_BUFFER));
+@@ -255,11 +247,24 @@ dtls1_buffer_record(SSL *s, record_pqueu
+ 	if (!ssl3_setup_buffers(s))
+ 		{
+ 		SSLerr(SSL_F_DTLS1_BUFFER_RECORD, ERR_R_INTERNAL_ERROR);
++		if (rdata->rbuf.buf != NULL)
++			OPENSSL_free(rdata->rbuf.buf);
+ 		OPENSSL_free(rdata);
+ 		pitem_free(item);
+-		return(0);
++		return(-1);
+ 		}
+-	
++
++	/* insert should not fail, since duplicates are dropped */
++	if (pqueue_insert(queue->q, item) == NULL)
++		{
++		SSLerr(SSL_F_DTLS1_BUFFER_RECORD, ERR_R_INTERNAL_ERROR);
++		if (rdata->rbuf.buf != NULL)
++			OPENSSL_free(rdata->rbuf.buf);
++		OPENSSL_free(rdata);
++		pitem_free(item);
++		return(-1);
++		}
++
+ 	return(1);
+ 	}
+ 
+@@ -313,8 +318,9 @@ dtls1_process_buffered_records(SSL *s)
+             dtls1_get_unprocessed_record(s);
+             if ( ! dtls1_process_record(s))
+                 return(0);
+-            dtls1_buffer_record(s, &(s->d1->processed_rcds), 
+-                s->s3->rrec.seq_num);
++            if(dtls1_buffer_record(s, &(s->d1->processed_rcds),
++                s->s3->rrec.seq_num)<0)
++                return -1;
+             }
+         }
+ 
+@@ -529,7 +535,6 @@ printf("\n");
+ 
+ 	/* we have pulled in a full packet so zero things */
+ 	s->packet_length=0;
+-	dtls1_record_bitmap_update(s, &(s->d1->bitmap));/* Mark receipt of record. */
+ 	return(1);
+ 
+ f_err:
+@@ -562,7 +567,8 @@ int dtls1_get_record(SSL *s)
+ 
+ 	/* The epoch may have changed.  If so, process all the
+ 	 * pending records.  This is a non-blocking operation. */
+-	dtls1_process_buffered_records(s);
++	if(dtls1_process_buffered_records(s)<0)
++		return -1;
+ 
+ 	/* if we're renegotiating, then there may be buffered records */
+ 	if (dtls1_get_processed_record(s))
+@@ -699,7 +705,9 @@ again:
+ 		{
+ 		if ((SSL_in_init(s) || s->in_handshake) && !s->d1->listen)
+ 			{
+-			dtls1_buffer_record(s, &(s->d1->unprocessed_rcds), rr->seq_num);
++			if(dtls1_buffer_record(s, &(s->d1->unprocessed_rcds), rr->seq_num)<0)
++				return -1;
++			dtls1_record_bitmap_update(s, bitmap);/* Mark receipt of record. */
+ 			}
+ 		rr->length = 0;
+ 		s->packet_length = 0;
+@@ -712,6 +720,7 @@ again:
+ 		s->packet_length = 0;  /* dump this record */
+ 		goto again;   /* get another record */
+ 		}
++	dtls1_record_bitmap_update(s, bitmap);/* Mark receipt of record. */
+ 
+ 	return(1);
+ 
+@@ -863,7 +872,11 @@ start:
+ 		 * buffer the application data for later processing rather
+ 		 * than dropping the connection.
+ 		 */
+-		dtls1_buffer_record(s, &(s->d1->buffered_app_data), rr->seq_num);
++		if(dtls1_buffer_record(s, &(s->d1->buffered_app_data), rr->seq_num)<0)
++			{
++			SSLerr(SSL_F_DTLS1_READ_BYTES, ERR_R_INTERNAL_ERROR);
++			return -1;
++			}
+ 		rr->length = 0;
+ 		goto start;
+ 		}
diff --git a/openssl-1.0.1e-dtls-ecc-ext.patch b/openssl-1.0.1e-dtls-ecc-ext.patch
new file mode 100644
index 0000000..2a002cc
--- /dev/null
+++ b/openssl-1.0.1e-dtls-ecc-ext.patch
@@ -0,0 +1,119 @@
+From 2054eb771ea29378f90d3a77c2f4015b17de702d Mon Sep 17 00:00:00 2001
+From: "Dr. Stephen Henson" <steve at openssl.org>
+Date: Tue, 15 Jul 2014 12:20:30 +0100
+Subject: [PATCH] Add ECC extensions with DTLS.
+
+PR#3449
+---
+ ssl/d1_clnt.c |  8 +++++++-
+ ssl/d1_srvr.c |  5 +++++
+ ssl/t1_lib.c  | 18 ++++++------------
+ 3 files changed, 18 insertions(+), 13 deletions(-)
+
+diff --git a/ssl/d1_clnt.c b/ssl/d1_clnt.c
+index 48e5e06..65dbb4a 100644
+--- a/ssl/d1_clnt.c
++++ b/ssl/d1_clnt.c
+@@ -876,12 +876,18 @@ int dtls1_client_hello(SSL *s)
+ 		*(p++)=0; /* Add the NULL method */
+ 
+ #ifndef OPENSSL_NO_TLSEXT
++		/* TLS extensions*/
++		if (ssl_prepare_clienthello_tlsext(s) <= 0)
++			{
++			SSLerr(SSL_F_DTLS1_CLIENT_HELLO,SSL_R_CLIENTHELLO_TLSEXT);
++			goto err;
++			}
+ 		if ((p = ssl_add_clienthello_tlsext(s, p, buf+SSL3_RT_MAX_PLAIN_LENGTH)) == NULL)
+ 			{
+ 			SSLerr(SSL_F_DTLS1_CLIENT_HELLO,ERR_R_INTERNAL_ERROR);
+ 			goto err;
+ 			}
+-#endif		
++#endif
+ 
+ 		l=(p-d);
+ 		d=buf;
+diff --git a/ssl/d1_srvr.c b/ssl/d1_srvr.c
+index 1384ab0..ef9c347 100644
+--- a/ssl/d1_srvr.c
++++ b/ssl/d1_srvr.c
+@@ -980,6 +980,11 @@ int dtls1_send_server_hello(SSL *s)
+ #endif
+ 
+ #ifndef OPENSSL_NO_TLSEXT
++		if (ssl_prepare_serverhello_tlsext(s) <= 0)
++			{
++			SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO,SSL_R_SERVERHELLO_TLSEXT);
++			return -1;
++			}
+ 		if ((p = ssl_add_serverhello_tlsext(s, p, buf+SSL3_RT_MAX_PLAIN_LENGTH)) == NULL)
+ 			{
+ 			SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO,ERR_R_INTERNAL_ERROR);
+diff --git a/ssl/t1_lib.c b/ssl/t1_lib.c
+index f6a480d..8167a51 100644
+--- a/ssl/t1_lib.c
++++ b/ssl/t1_lib.c
+@@ -453,8 +453,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, unsigned c
+ #endif
+ 
+ #ifndef OPENSSL_NO_EC
+-	if (s->tlsext_ecpointformatlist != NULL &&
+-	    s->version != DTLS1_VERSION)
++	if (s->tlsext_ecpointformatlist != NULL)
+ 		{
+ 		/* Add TLS extension ECPointFormats to the ClientHello message */
+ 		long lenmax; 
+@@ -473,8 +472,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, unsigned c
+ 		memcpy(ret, s->tlsext_ecpointformatlist, s->tlsext_ecpointformatlist_length);
+ 		ret+=s->tlsext_ecpointformatlist_length;
+ 		}
+-	if (s->tlsext_ellipticcurvelist != NULL &&
+-	    s->version != DTLS1_VERSION)
++	if (s->tlsext_ellipticcurvelist != NULL)
+ 		{
+ 		/* Add TLS extension EllipticCurves to the ClientHello message */
+ 		long lenmax; 
+@@ -750,8 +748,7 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, unsigned c
+         }
+ 
+ #ifndef OPENSSL_NO_EC
+-	if (s->tlsext_ecpointformatlist != NULL &&
+-	    s->version != DTLS1_VERSION)
++	if (s->tlsext_ecpointformatlist != NULL)
+ 		{
+ 		/* Add TLS extension ECPointFormats to the ServerHello message */
+ 		long lenmax; 
+@@ -1154,8 +1151,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in
+ #endif
+ 
+ #ifndef OPENSSL_NO_EC
+-		else if (type == TLSEXT_TYPE_ec_point_formats &&
+-	             s->version != DTLS1_VERSION)
++		else if (type == TLSEXT_TYPE_ec_point_formats)
+ 			{
+ 			unsigned char *sdata = data;
+ 			int ecpointformatlist_length = *(sdata++);
+@@ -1189,8 +1185,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in
+ 			fprintf(stderr,"\n");
+ #endif
+ 			}
+-		else if (type == TLSEXT_TYPE_elliptic_curves &&
+-	             s->version != DTLS1_VERSION)
++		else if (type == TLSEXT_TYPE_elliptic_curves)
+ 			{
+ 			unsigned char *sdata = data;
+ 			int ellipticcurvelist_length = (*(sdata++) << 8);
+@@ -1549,8 +1544,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in
+ 			}
+ 
+ #ifndef OPENSSL_NO_EC
+-		else if (type == TLSEXT_TYPE_ec_point_formats &&
+-	             s->version != DTLS1_VERSION)
++		else if (type == TLSEXT_TYPE_ec_point_formats)
+ 			{
+ 			unsigned char *sdata = data;
+ 			int ecpointformatlist_length = *(sdata++);
+-- 
+1.8.3.1
+
diff --git a/openssl-1.0.1e-ssl2-no-ec.patch b/openssl-1.0.1e-ssl2-no-ec.patch
new file mode 100644
index 0000000..81ad472
--- /dev/null
+++ b/openssl-1.0.1e-ssl2-no-ec.patch
@@ -0,0 +1,17 @@
+diff -up openssl-1.0.1e/ssl/s23_lib.c.ssl2noec openssl-1.0.1e/ssl/s23_lib.c
+--- openssl-1.0.1e/ssl/s23_lib.c.ssl2noec	2013-02-11 16:26:04.000000000 +0100
++++ openssl-1.0.1e/ssl/s23_lib.c	2014-05-06 15:51:54.053293674 +0200
+@@ -107,6 +107,13 @@ int ssl23_put_cipher_by_char(const SSL_C
+ 	long l;
+ 
+ 	/* We can write SSLv2 and SSLv3 ciphers */
++	/* but no ECC ciphers */
++	if (c->algorithm_mkey == SSL_kECDHr ||
++		c->algorithm_mkey == SSL_kECDHe ||
++		c->algorithm_mkey == SSL_kEECDH ||
++		c->algorithm_auth == SSL_aECDH ||
++		c->algorithm_auth == SSL_aECDSA)
++		return 0;
+ 	if (p != NULL)
+ 		{
+ 		l=c->id;
diff --git a/openssl.spec b/openssl.spec
index a56336d..19cab1a 100644
--- a/openssl.spec
+++ b/openssl.spec
@@ -21,7 +21,7 @@
 Summary: Utilities from the general purpose cryptography library with TLS implementation
 Name: openssl
 Version: 1.0.1e
-Release: 40%{?dist}
+Release: 41%{?dist}
 Epoch: 1
 # We have to remove certain patented algorithms from the openssl source
 # tarball with the hobble-openssl script which is included below.
@@ -78,6 +78,7 @@ Patch74: openssl-1.0.1e-no-md5-verify.patch
 Patch75: openssl-1.0.1e-compat-symbols.patch
 Patch76: openssl-1.0.1e-new-fips-reqs.patch
 Patch77: openssl-1.0.1e-weak-ciphers.patch
+Patch41: openssl-1.0.1e-ssl2-no-ec.patch
 # Backported fixes including security fixes
 Patch81: openssl-1.0.1-beta2-padlock64.patch
 Patch82: openssl-1.0.1e-backports.patch
@@ -95,6 +96,7 @@ Patch93: openssl-1.0.1e-cve-2014-0198.patch
 Patch94: openssl-1.0.1e-cve-2014-0221.patch
 Patch95: openssl-1.0.1e-cve-2014-0224.patch
 Patch96: openssl-1.0.1e-cve-2014-3470.patch
+Patch97: openssl-1.0.1e-dtls-ecc-ext.patch
 Patch100: openssl-1.0.1e-cve-2014-3505.patch
 Patch101: openssl-1.0.1e-cve-2014-3506.patch
 Patch102: openssl-1.0.1e-cve-2014-3507.patch
@@ -105,6 +107,14 @@ Patch106: openssl-1.0.1e-cve-2014-3511.patch
 Patch110: openssl-1.0.1e-cve-2014-3567.patch
 Patch111: openssl-1.0.1e-cve-2014-3513.patch
 Patch112: openssl-1.0.1e-fallback-scsv.patch
+Patch113: openssl-1.0.1e-copy-algo.patch
+Patch114: openssl-1.0.1e-cve-2014-3570.patch
+Patch115: openssl-1.0.1e-cve-2014-3571.patch
+Patch116: openssl-1.0.1e-cve-2014-3572.patch
+Patch117: openssl-1.0.1e-cve-2014-8275.patch
+Patch118: openssl-1.0.1e-cve-2015-0204.patch
+Patch119: openssl-1.0.1e-cve-2015-0205.patch
+Patch120: openssl-1.0.1e-cve-2015-0206.patch
 
 License: OpenSSL
 Group: System Environment/Libraries
@@ -217,6 +227,7 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/
 %patch75 -p1 -b .compat
 %patch76 -p1 -b .fips-reqs
 %patch77 -p1 -b .weak-ciphers
+%patch41 -p1 -b .ssl2-noec
 
 %patch81 -p1 -b .padlock64
 %patch82 -p1 -b .backports
@@ -235,6 +246,7 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/
 %patch94 -p1 -b .dtls1-dos
 %patch95 -p1 -b .keying-mitm
 %patch96 -p1 -b .anon-ecdh-dos
+%patch97 -p1 -b .dtls-ecc-ext
 %patch100 -p1 -b .dtls-doublefree
 %patch101 -p1 -b .dtls-sizechecks
 %patch102 -p1 -b .dtls-memleak
@@ -245,6 +257,14 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/
 %patch110 -p1 -b .ticket-leak
 %patch111 -p1 -b .srtp-leak
 %patch112 -p1 -b .fallback-scsv
+%patch113 -p1 -b .copy-algo
+%patch114 -p1 -b .bn-sqr
+%patch115 -p1 -b .dtls1-reads
+%patch116 -p1 -b .ecdh-downgrade
+%patch117 -p1 -b .cert-fingerprint
+%patch118 -p1 -b .rsa-ephemeral
+%patch119 -p1 -b .dh-unauthenticated
+%patch120 -p1 -b .dtls-rec-leak
 
 sed -i 's/SHLIB_VERSION_NUMBER "1.0.0"/SHLIB_VERSION_NUMBER "%{version}"/' crypto/opensslv.h
 
@@ -508,6 +528,19 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/fipscanister.*
 %postun libs -p /sbin/ldconfig
 
 %changelog
+* Tue Jan 13 2015 Tomáš Mráz <tmraz at redhat.com> 1.0.1e-41
+- fix CVE-2014-3570 - incorrect computation in BN_sqr()
+- fix CVE-2014-3571 - possible crash in dtls1_get_record()
+- fix CVE-2014-3572 - possible downgrade of ECDH ciphersuite to non-PFS state
+- fix CVE-2014-8275 - various certificate fingerprint issues
+- fix CVE-2015-0204 - remove support for RSA ephemeral keys for non-export
+  ciphersuites and on server
+- fix CVE-2015-0205 - do not allow unauthenticated client DH certificate
+- fix CVE-2015-0206 - possible memory leak when buffering DTLS records
+- add ECC TLS extensions to DTLS (#1119803)
+- do not send ECC ciphersuites in SSLv2 client hello (#1090955)
+- copy digest algorithm when handling SNI context switch
+
 * Thu Oct 16 2014 Tomáš Mráz <tmraz at redhat.com> 1.0.1e-40
 - fix CVE-2014-3567 - memory leak when handling session tickets
 - fix CVE-2014-3513 - memory leak in srtp support


More information about the scm-commits mailing list