[kernel/f13/master] TCP networking fixes from 2.6.36.3, including one CVE

Wed Jan 26 14:59:51 UTC 2011

commit c5251bc7fbc0d307c8ab800603694160eec67297
Author: Chuck Ebbert <cebbert at redhat.com>
Date:   Wed Jan 26 09:58:08 2011 -0500

    TCP networking fixes from 2.6.36.3, including one CVE
    
      CVE-2010-4165: possible kernel oops from user MSS

 tcp-avoid-a-possible-divide-by-zero.patch          |   48 ++++++++++++++
 ...g-fix-in-initialization-of-receive-window.patch |   41 ++++++++++++
 ...hange-unlocked-socket-state-in-tcp_v4_err.patch |   56 ++++++++++++++++
 ...increase-tcp_maxseg-socket-option-minimum.patch |   39 +++++++++++
 tcp-make-tcp_maxseg-minimum-more-correct.patch     |   30 +++++++++
 tcp-protect-sysctl_tcp_cookie_size-reads.patch     |   68 ++++++++++++++++++++
 6 files changed, 282 insertions(+), 0 deletions(-)
---

diff --git a/tcp-avoid-a-possible-divide-by-zero.patch b/tcp-avoid-a-possible-divide-by-zero.patch
new file mode 100644
index 0000000..e8642c8
--- /dev/null
+++ b/tcp-avoid-a-possible-divide-by-zero.patch
@@ -0,0 +1,48 @@
+From b6bd33114e63d96f424c8e2baf46b3a58745077b Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet at gmail.com>
+Date: Tue, 7 Dec 2010 12:03:55 +0000
+Subject: tcp: avoid a possible divide by zero
+
+
+From: Eric Dumazet <eric.dumazet at gmail.com>
+
+[ Upstream commit ad9f4f50fe9288bbe65b7dfd76d8820afac6a24c ]
+
+sysctl_tcp_tso_win_divisor might be set to zero while one cpu runs in
+tcp_tso_should_defer(). Make sure we dont allow a divide by zero by
+reading sysctl_tcp_tso_win_divisor exactly once.
+
+Signed-off-by: Eric Dumazet <eric.dumazet at gmail.com>
+Signed-off-by: David S. Miller <davem at davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+---
+ net/ipv4/tcp_output.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1518,6 +1518,7 @@ static int tcp_tso_should_defer(struct s
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 	const struct inet_connection_sock *icsk = inet_csk(sk);
+ 	u32 send_win, cong_win, limit, in_flight;
++	int win_divisor;
+ 
+ 	if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
+ 		goto send_now;
+@@ -1549,13 +1550,14 @@ static int tcp_tso_should_defer(struct s
+ 	if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
+ 		goto send_now;
+ 
+-	if (sysctl_tcp_tso_win_divisor) {
++	win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
++	if (win_divisor) {
+ 		u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
+ 
+ 		/* If at least some fraction of a window is available,
+ 		 * just use it.
+ 		 */
+-		chunk /= sysctl_tcp_tso_win_divisor;
++		chunk /= win_divisor;
+ 		if (limit >= chunk)
+ 			goto send_now;
+ 	} else {
diff --git a/tcp-bug-fix-in-initialization-of-receive-window.patch b/tcp-bug-fix-in-initialization-of-receive-window.patch
new file mode 100644
index 0000000..241b1d7
--- /dev/null
+++ b/tcp-bug-fix-in-initialization-of-receive-window.patch
@@ -0,0 +1,41 @@
+From 18ab4520fd46404b67d415045ee5d9c4535eaacb Mon Sep 17 00:00:00 2001
+From: Nandita Dukkipati <nanditad at google.com>
+Date: Fri, 3 Dec 2010 13:33:44 +0000
+Subject: tcp: Bug fix in initialization of receive window.
+
+
+From: Nandita Dukkipati <nanditad at google.com>
+
+[ Upstream commit b1afde60f2b9ee8444fba4e012dc99a3b28d224d ]
+
+The bug has to do with boundary checks on the initial receive window.
+If the initial receive window falls between init_cwnd and the
+receive window specified by the user, the initial window is incorrectly
+brought down to init_cwnd. The correct behavior is to allow it to
+remain unchanged.
+
+Signed-off-by: Nandita Dukkipati <nanditad at google.com>
+Signed-off-by: David S. Miller <davem at davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+---
+ net/ipv4/tcp_output.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -237,11 +237,10 @@ void tcp_select_initial_window(int __spa
+ 		/* when initializing use the value from init_rcv_wnd
+ 		 * rather than the default from above
+ 		 */
+-		if (init_rcv_wnd &&
+-		    (*rcv_wnd > init_rcv_wnd * mss))
+-			*rcv_wnd = init_rcv_wnd * mss;
+-		else if (*rcv_wnd > init_cwnd * mss)
+-			*rcv_wnd = init_cwnd * mss;
++		if (init_rcv_wnd)
++			*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
++		else
++			*rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
+ 	}
+ 
+ 	/* Set the clamp no higher than max representable value */
diff --git a/tcp-don-t-change-unlocked-socket-state-in-tcp_v4_err.patch b/tcp-don-t-change-unlocked-socket-state-in-tcp_v4_err.patch
new file mode 100644
index 0000000..10e02a9
--- /dev/null
+++ b/tcp-don-t-change-unlocked-socket-state-in-tcp_v4_err.patch
@@ -0,0 +1,56 @@
+From 34eef919139f6a7558b43576b12b40731f12f7d7 Mon Sep 17 00:00:00 2001
+From: David S. Miller <davem at davemloft.net>
+Date: Fri, 12 Nov 2010 13:35:00 -0800
+Subject: tcp: Don't change unlocked socket state in tcp_v4_err().
+
+
+From: David S. Miller <davem at davemloft.net>
+
+[ Upstream commit 8f49c2703b33519aaaccc63f571b465b9d2b3a2d ]
+
+Alexey Kuznetsov noticed a regression introduced by
+commit f1ecd5d9e7366609d640ff4040304ea197fbc618
+("Revert Backoff [v3]: Revert RTO on ICMP destination unreachable")
+
+The RTO and timer modification code added to tcp_v4_err()
+doesn't check sock_owned_by_user(), which if true means we
+don't have exclusive access to the socket and therefore cannot
+modify it's critical state.
+
+Just skip this new code block if sock_owned_by_user() is true
+and eliminate the now superfluous sock_owned_by_user() code
+block contained within.
+
+Reported-by: Alexey Kuznetsov <kuznet at ms2.inr.ac.ru>
+Signed-off-by: David S. Miller <davem at davemloft.net>
+CC: Damian Lukowski <damian at tvk.rwth-aachen.de>
+Acked-by: Eric Dumazet <eric.dumazet at gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+---
+ net/ipv4/tcp_ipv4.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb
+ 		    !icsk->icsk_backoff)
+ 			break;
+ 
++		if (sock_owned_by_user(sk))
++			break;
++
+ 		icsk->icsk_backoff--;
+ 		inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
+ 					 icsk->icsk_backoff;
+@@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb
+ 		if (remaining) {
+ 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ 						  remaining, TCP_RTO_MAX);
+-		} else if (sock_owned_by_user(sk)) {
+-			/* RTO revert clocked out retransmission,
+-			 * but socket is locked. Will defer. */
+-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+-						  HZ/20, TCP_RTO_MAX);
+ 		} else {
+ 			/* RTO revert clocked out retransmission.
+ 			 * Will retransmit now */
diff --git a/tcp-increase-tcp_maxseg-socket-option-minimum.patch b/tcp-increase-tcp_maxseg-socket-option-minimum.patch
new file mode 100644
index 0000000..f42df41
--- /dev/null
+++ b/tcp-increase-tcp_maxseg-socket-option-minimum.patch
@@ -0,0 +1,39 @@
+From 47a8c78fffc3bde1f828c9fce0aae5ae5320cfb3 Mon Sep 17 00:00:00 2001
+From: David S. Miller <davem at davemloft.net>
+Date: Wed, 10 Nov 2010 21:35:37 -0800
+Subject: tcp: Increase TCP_MAXSEG socket option minimum.
+
+
+From: David S. Miller <davem at davemloft.net>
+
+[ Upstream commit 7a1abd08d52fdeddb3e9a5a33f2f15cc6a5674d2 ]
+
+As noted by Steve Chen, since commit
+f5fff5dc8a7a3f395b0525c02ba92c95d42b7390 ("tcp: advertise MSS
+requested by user") we can end up with a situation where
+tcp_select_initial_window() does a divide by a zero (or
+even negative) mss value.
+
+The problem is that sometimes we effectively subtract
+TCPOLEN_TSTAMP_ALIGNED and/or TCPOLEN_MD5SIG_ALIGNED from the mss.
+
+Fix this by increasing the minimum from 8 to 64.
+
+Reported-by: Steve Chen <schen at mvista.com>
+Signed-off-by: David S. Miller <davem at davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+---
+ net/ipv4/tcp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock
+ 		/* Values greater than interface MTU won't take effect. However
+ 		 * at the point when this call is done we typically don't yet
+ 		 * know which interface is going to be used */
+-		if (val < 8 || val > MAX_TCP_WINDOW) {
++		if (val < 64 || val > MAX_TCP_WINDOW) {
+ 			err = -EINVAL;
+ 			break;
+ 		}
diff --git a/tcp-make-tcp_maxseg-minimum-more-correct.patch b/tcp-make-tcp_maxseg-minimum-more-correct.patch
new file mode 100644
index 0000000..2d04147
--- /dev/null
+++ b/tcp-make-tcp_maxseg-minimum-more-correct.patch
@@ -0,0 +1,30 @@
+From 9f3ec7da60ef8443addc35828214f129590495f2 Mon Sep 17 00:00:00 2001
+From: David S. Miller <davem at davemloft.net>
+Date: Wed, 24 Nov 2010 11:47:22 -0800
+Subject: tcp: Make TCP_MAXSEG minimum more correct.
+
+
+From: David S. Miller <davem at davemloft.net>
+
+[ Upstream commit c39508d6f118308355468314ff414644115a07f3 ]
+
+Use TCP_MIN_MSS instead of constant 64.
+
+Reported-by: Min Zhang <mzhang at mvista.com>
+Signed-off-by: David S. Miller <davem at davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+---
+ net/ipv4/tcp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock
+ 		/* Values greater than interface MTU won't take effect. However
+ 		 * at the point when this call is done we typically don't yet
+ 		 * know which interface is going to be used */
+-		if (val < 64 || val > MAX_TCP_WINDOW) {
++		if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
+ 			err = -EINVAL;
+ 			break;
+ 		}
diff --git a/tcp-protect-sysctl_tcp_cookie_size-reads.patch b/tcp-protect-sysctl_tcp_cookie_size-reads.patch
new file mode 100644
index 0000000..8f2e2e7
--- /dev/null
+++ b/tcp-protect-sysctl_tcp_cookie_size-reads.patch
@@ -0,0 +1,68 @@
+From e1e9ef4b173c9437d9966b2d953a2c624190d2c9 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet at gmail.com>
+Date: Tue, 7 Dec 2010 12:20:47 +0000
+Subject: tcp: protect sysctl_tcp_cookie_size reads
+
+
+From: Eric Dumazet <eric.dumazet at gmail.com>
+
+[ Upstream commit f19872575ff7819a3723154657a497d9bca66b33 ]
+
+Make sure sysctl_tcp_cookie_size is read once in
+tcp_cookie_size_check(), or we might return an illegal value to caller
+if sysctl_tcp_cookie_size is changed by another cpu.
+
+Signed-off-by: Eric Dumazet <eric.dumazet at gmail.com>
+Cc: Ben Hutchings <bhutchings at solarflare.com>
+Cc: William Allen Simpson <william.allen.simpson at gmail.com>
+Signed-off-by: David S. Miller <davem at davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+---
+ net/ipv4/tcp_output.c |   27 +++++++++++++++------------
+ 1 file changed, 15 insertions(+), 12 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -391,27 +391,30 @@ struct tcp_out_options {
+  */
+ static u8 tcp_cookie_size_check(u8 desired)
+ {
+-	if (desired > 0) {
++	int cookie_size;
++
++	if (desired > 0)
+ 		/* previously specified */
+ 		return desired;
+-	}
+-	if (sysctl_tcp_cookie_size <= 0) {
++
++	cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
++	if (cookie_size <= 0)
+ 		/* no default specified */
+ 		return 0;
+-	}
+-	if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) {
++
++	if (cookie_size <= TCP_COOKIE_MIN)
+ 		/* value too small, specify minimum */
+ 		return TCP_COOKIE_MIN;
+-	}
+-	if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) {
++
++	if (cookie_size >= TCP_COOKIE_MAX)
+ 		/* value too large, specify maximum */
+ 		return TCP_COOKIE_MAX;
+-	}
+-	if (0x1 & sysctl_tcp_cookie_size) {
++
++	if (cookie_size & 1)
+ 		/* 8-bit multiple, illegal, fix it */
+-		return (u8)(sysctl_tcp_cookie_size + 0x1);
+-	}
+-	return (u8)sysctl_tcp_cookie_size;
++		cookie_size++;
++
++	return (u8)cookie_size;
+ }
+ 
+ /* Write previously computed TCP options to the packet.