[kernel/f14] CVE-2011-1083: excessive in kernel CPU consumption when creating large nested epoll structures (rhbz

Josh Boyer jwboyer at fedoraproject.org
Tue Oct 25 16:47:58 UTC 2011


commit 528c087e3658d0bad14120efe4403d7629a59deb
Author: Josh Boyer <jwboyer at redhat.com>
Date:   Tue Oct 25 11:57:55 2011 -0400

    CVE-2011-1083: excessive in kernel CPU consumption when creating large nested
    epoll structures (rhbz 748668)

 epoll-fix-spurious-lockdep-warnings.patch |  116 +++++++
 epoll-limit-paths.patch                   |  465 +++++++++++++++++++++++++++++
 kernel.spec                               |   13 +-
 3 files changed, 593 insertions(+), 1 deletions(-)
---
diff --git a/epoll-fix-spurious-lockdep-warnings.patch b/epoll-fix-spurious-lockdep-warnings.patch
new file mode 100644
index 0000000..131796a
--- /dev/null
+++ b/epoll-fix-spurious-lockdep-warnings.patch
@@ -0,0 +1,116 @@
+epoll can acquire multiple ep->mutex on multiple "struct eventpoll"s
+at once in the case where one epoll fd is monitoring another epoll
+fd. This is perfectly OK, since we're careful about the lock ordering,
+but causes spurious lockdep warnings. Annotate the recursion using
+mutex_lock_nested, and add a comment explaining the nesting rules for
+good measure.
+
+Reported-by: Paul Bolle <pebolle at tiscali.nl>
+Signed-off-by: Nelson Elhage <nelhage at nelhage.com>
+---
+ I've tested this on a synthetic epoll test case, that just adds e1 to
+ e2 and then does an epoll_wait(). I verified that it caused lockdep
+ problems on 3.0 and that this patch fixed it, but I haven't done more
+ extensive testing. Paul, are you able to test systemd against this?
+
+ fs/eventpoll.c |   25 ++++++++++++++++++-------
+ 1 files changed, 18 insertions(+), 7 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index f9cfd16..0cb7bc6 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -76,6 +76,15 @@
+  * Events that require holding "epmutex" are very rare, while for
+  * normal operations the epoll private "ep->mtx" will guarantee
+  * a better scalability.
++ * It is possible to acquire multiple "ep->mtx"es at once in the case
++ * when one epoll fd is added to another. In this case, we always
++ * acquire the locks in the order of nesting (i.e. after epoll_ctl(e1,
++ * EPOLL_CTL_ADD, e2), e1->mtx will always be acquired before
++ * e2->mtx). Since we disallow cycles of epoll file descriptors, this
++ * ensures that the mutexes are well-ordered. In order to communicate
++ * this nesting to lockdep, when walking a tree of epoll file
++ * descriptors, we use the current recursion depth as the lockdep
++ * subkey.
+  */
+ 
+ /* Epoll private bits inside the event mask */
+@@ -464,13 +473,15 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
+  * @ep: Pointer to the epoll private data structure.
+  * @sproc: Pointer to the scan callback.
+  * @priv: Private opaque data passed to the @sproc callback.
++ * @depth: The current depth of recursive f_op->poll calls.
+  *
+  * Returns: The same integer error code returned by the @sproc callback.
+  */
+ static int ep_scan_ready_list(struct eventpoll *ep,
+ 			      int (*sproc)(struct eventpoll *,
+ 					   struct list_head *, void *),
+-			      void *priv)
++			      void *priv,
++			      int depth)
+ {
+ 	int error, pwake = 0;
+ 	unsigned long flags;
+@@ -481,7 +492,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
+ 	 * We need to lock this because we could be hit by
+ 	 * eventpoll_release_file() and epoll_ctl().
+ 	 */
+-	mutex_lock(&ep->mtx);
++	mutex_lock_nested(&ep->mtx, depth);
+ 
+ 	/*
+ 	 * Steal the ready list, and re-init the original one to the
+@@ -670,7 +681,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
+ 
+ static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
+ {
+-	return ep_scan_ready_list(priv, ep_read_events_proc, NULL);
++	return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
+ }
+ 
+ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
+@@ -737,7 +748,7 @@ void eventpoll_release_file(struct file *file)
+ 
+ 		ep = epi->ep;
+ 		list_del_init(&epi->fllink);
+-		mutex_lock(&ep->mtx);
++		mutex_lock_nested(&ep->mtx, 0);
+ 		ep_remove(ep, epi);
+ 		mutex_unlock(&ep->mtx);
+ 	}
+@@ -1134,7 +1145,7 @@ static int ep_send_events(struct eventpoll *ep,
+ 	esed.maxevents = maxevents;
+ 	esed.events = events;
+ 
+-	return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
++	return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
+ }
+ 
+ static inline struct timespec ep_set_mstimeout(long ms)
+@@ -1267,7 +1278,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
+ 	struct rb_node *rbp;
+ 	struct epitem *epi;
+ 
+-	mutex_lock(&ep->mtx);
++	mutex_lock_nested(&ep->mtx, call_nests + 1);
+ 	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ 		epi = rb_entry(rbp, struct epitem, rbn);
+ 		if (unlikely(is_file_epoll(epi->ffd.file))) {
+@@ -1409,7 +1420,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
+ 	}
+ 
+ 
+-	mutex_lock(&ep->mtx);
++	mutex_lock_nested(&ep->mtx, 0);
+ 
+ 	/*
+ 	 * Try to lookup the file inside our RB tree, Since we grabbed "mtx"
+-- 
+1.7.4.1
+
+--
+To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
+the body of a message to majordomo at vger.kernel.org
+More majordomo info at  http://vger.kernel.org/majordomo-info.html
\ No newline at end of file
diff --git a/epoll-limit-paths.patch b/epoll-limit-paths.patch
new file mode 100644
index 0000000..440db27
--- /dev/null
+++ b/epoll-limit-paths.patch
@@ -0,0 +1,465 @@
+From 6a4ca79652219cf22da800d990e5b46feaea1ad9 Mon Sep 17 00:00:00 2001
+From: Jason Baron <jbaron at redhat.com>
+Date: Mon, 24 Oct 2011 14:59:02 +1100
+Subject: [PATCH] epoll: limit paths
+
+epoll: limit paths
+
+The current epoll code can be tickled to run basically indefinitely in
+both loop detection path check (on ep_insert()), and in the wakeup paths.
+The programs that tickle this behavior set up deeply linked networks of
+epoll file descriptors that cause the epoll algorithms to traverse them
+indefinitely.  A couple of these sample programs have been previously
+posted in this thread: https://lkml.org/lkml/2011/2/25/297.
+
+To fix the loop detection path check algorithms, I simply keep track of
+the epoll nodes that have been already visited.  Thus, the loop detection
+becomes proportional to the number of epoll file descriptor and links.
+This dramatically decreases the run-time of the loop check algorithm.  In
+one diabolical case I tried it reduced the run-time from 15 mintues (all
+in kernel time) to .3 seconds.
+
+Fixing the wakeup paths could be done at wakeup time in a similar manner
+by keeping track of nodes that have already been visited, but the
+complexity is harder, since there can be multiple wakeups on different
+cpus...Thus, I've opted to limit the number of possible wakeup paths when
+the paths are created.
+
+This is accomplished, by noting that the end file descriptor points that
+are found during the loop detection pass (from the newly added link), are
+actually the sources for wakeup events.  I keep a list of these file
+descriptors and limit the number and length of these paths that emanate
+from these 'source file descriptors'.  In the current implemetation I
+allow 1000 paths of length 1, 500 of length 2, 100 of length 3, 50 of
+length 4 and 10 of length 5.  Note that it is sufficient to check the
+'source file descriptors' reachable from the newly added link, since no
+other 'source file descriptors' will have newly added links.  This allows
+us to check only the wakeup paths that may have gotten too long, and not
+re-check all possible wakeup paths on the system.
+
+In terms of the path limit selection, I think its first worth noting that
+the most common case for epoll, is probably the model where you have 1
+epoll file descriptor that is monitoring n number of 'source file
+descriptors'.  In this case, each 'source file descriptor' has a 1 path of
+length 1.  Thus, I believe that the limits I'm proposing are quite
+reasonable and in fact may be too generous.  Thus, I'm hoping that the
+proposed limits will not prevent any workloads that currently work to
+fail.
+
+In terms of locking, I have extended the use of the 'epmutex' to all
+epoll_ctl add and remove operations.  Currently its only used in a subset
+of the add paths.  I need to hold the epmutex, so that we can correctly
+traverse a coherent graph, to check the number of paths.  I believe that
+this additional locking is probably ok, since its in the setup/teardown
+paths, and doesn't affect the running paths, but it certainly is going to
+add some extra overhead.  Also, worth noting is that the epmuex was
+recently added to the ep_ctl add operations in the initial path loop
+detection code using the argument that it was not on a critical path.
+
+Another thing to note here, is the length of epoll chains that is allowed.
+Currently, eventpoll.c defines:
+
+/* Maximum number of nesting allowed inside epoll sets */
+#define EP_MAX_NESTS 4
+
+This basically means that I am limited to a graph depth of 5 (EP_MAX_NESTS
++ 1).  However, this limit is currently only enforced during the loop
+check detection code, and only when the epoll file descriptors are added
+in a certain order.  Thus, this limit is currently easily bypassed.  The
+newly added check for wakeup paths, stricly limits the wakeup paths to a
+length of 5, regardless of the order in which ep's are linked together.
+Thus, a side-effect of the new code is a more consistent enforcement of
+the graph depth.
+
+Thus far, I've tested this, using the sample programs previously
+mentioned, which now either return quickly or return -EINVAL.  I've also
+testing using the piptest.c epoll tester, which showed no difference in
+performance.  I've also created a number of different epoll networks and
+tested that they behave as expectded.
+
+I believe this solves the original diabolical test cases, while still
+preserving the sane epoll nesting.
+
+Signed-off-by: Jason Baron <jbaron at redhat.com>
+Cc: Nelson Elhage <nelhage at ksplice.com>
+Cc: Davide Libenzi <davidel at xmailserver.org>
+Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
+---
+ fs/eventpoll.c            |  226 ++++++++++++++++++++++++++++++++++++++++-----
+ include/linux/eventpoll.h |    1 +
+ include/linux/fs.h        |    1 +
+ 3 files changed, 203 insertions(+), 25 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 4a53743..414ac74 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -197,6 +197,12 @@ struct eventpoll {
+ 
+ 	/* The user that created the eventpoll descriptor */
+ 	struct user_struct *user;
++
++	struct file *file;
++
++	/* used to optimize loop detection check */
++	int visited;
++	struct list_head visitedllink;
+ };
+ 
+ /* Wait structure used by the poll hooks */
+@@ -255,6 +261,12 @@ static struct kmem_cache *epi_cache __read_mostly;
+ /* Slab cache used to allocate "struct eppoll_entry" */
+ static struct kmem_cache *pwq_cache __read_mostly;
+ 
++/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
++LIST_HEAD(visited_list);
++
++/* Files with newly added links, which need a limit on emanating paths */
++LIST_HEAD(tfile_check_list);
++
+ #ifdef CONFIG_SYSCTL
+ 
+ #include <linux/sysctl.h>
+@@ -276,6 +288,12 @@ ctl_table epoll_table[] = {
+ };
+ #endif /* CONFIG_SYSCTL */
+ 
++static const struct file_operations eventpoll_fops;
++
++static inline int is_file_epoll(struct file *f)
++{
++	return f->f_op == &eventpoll_fops;
++}
+ 
+ /* Setup the structure that is used as key for the RB tree */
+ static inline void ep_set_ffd(struct epoll_filefd *ffd,
+@@ -711,12 +729,6 @@ static const struct file_operations eventpoll_fops = {
+ 	.llseek		= noop_llseek,
+ };
+ 
+-/* Fast test to see if the file is an evenpoll file */
+-static inline int is_file_epoll(struct file *f)
+-{
+-	return f->f_op == &eventpoll_fops;
+-}
+-
+ /*
+  * This is called from eventpoll_release() to unlink files from the eventpoll
+  * interface. We need to have this facility to cleanup correctly files that are
+@@ -926,6 +938,96 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
+ 	rb_insert_color(&epi->rbn, &ep->rbr);
+ }
+ 
++
++
++#define PATH_ARR_SIZE 5
++/* These are the number paths of length 1 to 5, that we are allowing to emanate
++ * from a single file of interest. For example, we allow 1000 paths of length
++ * 1, to emanate from each file of interest. This essentially represents the
++ * potential wakeup paths, which need to be limited in order to avoid massive
++ * uncontrolled wakeup storms. The common use case should be a single ep which
++ * is connected to n file sources. In this case each file source has 1 path
++ * of length 1. Thus, the numbers below should be more than sufficient.
++ */
++int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
++int path_count[PATH_ARR_SIZE];
++
++static int path_count_inc(int nests)
++{
++	if (++path_count[nests] > path_limits[nests])
++		return -1;
++	return 0;
++}
++
++static void path_count_init(void)
++{
++	int i;
++
++	for (i = 0; i < PATH_ARR_SIZE; i++)
++		path_count[i] = 0;
++}
++
++static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
++{
++	int error = 0;
++	struct file *file = priv;
++	struct file *child_file;
++	struct epitem *epi;
++
++	list_for_each_entry(epi, &file->f_ep_links, fllink) {
++		child_file = epi->ep->file;
++		if (is_file_epoll(child_file)) {
++			if (list_empty(&child_file->f_ep_links)) {
++				if (path_count_inc(call_nests)) {
++					error = -1;
++					break;
++				}
++			} else {
++				error = ep_call_nested(&poll_loop_ncalls,
++							EP_MAX_NESTS,
++							reverse_path_check_proc,
++							child_file, child_file,
++							current);
++			}
++			if (error != 0)
++				break;
++		} else {
++			printk(KERN_ERR "reverse_path_check_proc: "
++				"file is not an ep!\n");
++		}
++	}
++	return error;
++}
++
++/**
++ * reverse_path_check - The tfile_check_list is list of file *, which have
++ *                      links that are proposed to be newly added. We need to
++ *                      make sure that those added links don't add too many
++ *                      paths such that we will spend all our time waking up
++ *                      eventpoll objects.
++ *
++ * Returns: Returns zero if the proposed links don't create too many paths,
++ *	    -1 otherwise.
++ */
++static int reverse_path_check(void)
++{
++	int length = 0;
++	int error = 0;
++	struct file *current_file;
++
++	/* let's call this for all tfiles */
++	list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
++		length++;
++		path_count_init();
++		error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
++					reverse_path_check_proc, current_file,
++					current_file, current);
++		if (error)
++			break;
++	}
++	return error;
++}
++
+ /*
+  * Must be called with "mtx" held.
+  */
+@@ -987,6 +1089,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+ 	 */
+ 	ep_rbtree_insert(ep, epi);
+ 
++	/* now check if we've created too many backpaths */
++	error = -EINVAL;
++	if (reverse_path_check())
++		goto error_remove_epi;
++
+ 	/* We have to drop the new item inside our item list to keep track of it */
+ 	spin_lock_irqsave(&ep->lock, flags);
+ 
+@@ -1011,6 +1118,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+ 
+ 	return 0;
+ 
++error_remove_epi:
++	spin_lock(&tfile->f_lock);
++	if (ep_is_linked(&epi->fllink))
++		list_del_init(&epi->fllink);
++	spin_unlock(&tfile->f_lock);
++
++	rb_erase(&epi->rbn, &ep->rbr);
++
+ error_unregister:
+ 	ep_unregister_pollwait(ep, epi);
+ 
+@@ -1275,18 +1390,35 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
+ 	int error = 0;
+ 	struct file *file = priv;
+ 	struct eventpoll *ep = file->private_data;
++	struct eventpoll *ep_tovisit;
+ 	struct rb_node *rbp;
+ 	struct epitem *epi;
+ 
+ 	mutex_lock_nested(&ep->mtx, call_nests + 1);
++	ep->visited = 1;
++	list_add(&ep->visitedllink, &visited_list);
+ 	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ 		epi = rb_entry(rbp, struct epitem, rbn);
+ 		if (unlikely(is_file_epoll(epi->ffd.file))) {
++			ep_tovisit = epi->ffd.file->private_data;
++			if (ep_tovisit->visited)
++				continue;
+ 			error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+-					       ep_loop_check_proc, epi->ffd.file,
+-					       epi->ffd.file->private_data, current);
++					ep_loop_check_proc, epi->ffd.file,
++					ep_tovisit, current);
+ 			if (error != 0)
+ 				break;
++		} else {
++			/* if we've reached a file that is not associated with
++			 * an ep, then then we need to check if the newly added
++			 * links are going to add too many wakeup paths. We do
++			 * this by adding it to the tfile_check_list, if it's
++			 * not already there, and calling reverse_path_check()
++			 * during ep_insert()
++			 */
++			if (list_empty(&epi->ffd.file->f_tfile_llink))
++				list_add(&epi->ffd.file->f_tfile_llink,
++					 &tfile_check_list);
+ 		}
+ 	}
+ 	mutex_unlock(&ep->mtx);
+@@ -1307,8 +1439,30 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
+  */
+ static int ep_loop_check(struct eventpoll *ep, struct file *file)
+ {
+-	return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
++	int ret;
++	struct eventpoll *ep_cur, *ep_next;
++
++	ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ 			      ep_loop_check_proc, file, ep, current);
++	/* clear visited list */
++	list_for_each_entry_safe(ep_cur, ep_next, &visited_list, visitedllink) {
++		ep_cur->visited = 0;
++		list_del(&ep_cur->visitedllink);
++	}
++	return ret;
++}
++
++static void clear_tfile_check_list(void)
++{
++	struct file *file;
++
++	/* first clear the tfile_check_list */
++	while (!list_empty(&tfile_check_list)) {
++		file = list_first_entry(&tfile_check_list, struct file,
++					f_tfile_llink);
++		list_del_init(&file->f_tfile_llink);
++	}
++	INIT_LIST_HEAD(&tfile_check_list);
+ }
+ 
+ /*
+@@ -1316,8 +1470,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
+  */
+ SYSCALL_DEFINE1(epoll_create1, int, flags)
+ {
+-	int error;
++	int error, fd;
+ 	struct eventpoll *ep = NULL;
++	struct file *file;
+ 
+ 	/* Check the EPOLL_* constant for consistency.  */
+ 	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
+@@ -1334,11 +1489,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
+ 	 * Creates all the items needed to setup an eventpoll file. That is,
+ 	 * a file structure and a free file descriptor.
+ 	 */
+-	error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
++	fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
++	if (fd < 0) {
++		error = fd;
++		goto out_free_ep;
++	}
++	file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
+ 				 O_RDWR | (flags & O_CLOEXEC));
+-	if (error < 0)
+-		ep_free(ep);
+-
++	if (IS_ERR(file)) {
++		error = PTR_ERR(file);
++		goto out_free_fd;
++	}
++	fd_install(fd, file);
++	ep->file = file;
++	return fd;
++
++out_free_fd:
++	put_unused_fd(fd);
++out_free_ep:
++	ep_free(ep);
+ 	return error;
+ }
+ 
+@@ -1404,21 +1573,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
+ 	/*
+ 	 * When we insert an epoll file descriptor, inside another epoll file
+ 	 * descriptor, there is the change of creating closed loops, which are
+-	 * better be handled here, than in more critical paths.
++	 * better be handled here, than in more critical paths. While we are
++	 * checking for loops we also determine the list of files reachable
++	 * and hang them on the tfile_check_list, so we can check that we
++	 * haven't created too many possible wakeup paths.
+ 	 *
+-	 * We hold epmutex across the loop check and the insert in this case, in
+-	 * order to prevent two separate inserts from racing and each doing the
+-	 * insert "at the same time" such that ep_loop_check passes on both
+-	 * before either one does the insert, thereby creating a cycle.
++	 * We need to hold the epmutex across both ep_insert and ep_remove
++	 * b/c we want to make sure we are looking at a coherent view of
++	 * epoll network.
+ 	 */
+-	if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
++	if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
+ 		mutex_lock(&epmutex);
+ 		did_lock_epmutex = 1;
+-		error = -ELOOP;
+-		if (ep_loop_check(ep, tfile) != 0)
+-			goto error_tgt_fput;
+ 	}
+-
++	if (op == EPOLL_CTL_ADD) {
++		if (is_file_epoll(tfile)) {
++			error = -ELOOP;
++			if (ep_loop_check(ep, tfile) != 0)
++				goto error_tgt_fput;
++		} else
++			list_add(&tfile->f_tfile_llink, &tfile_check_list);
++	}
+ 
+ 	mutex_lock_nested(&ep->mtx, 0);
+ 
+@@ -1437,6 +1612,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
+ 			error = ep_insert(ep, &epds, tfile, fd);
+ 		} else
+ 			error = -EEXIST;
++		clear_tfile_check_list();
+ 		break;
+ 	case EPOLL_CTL_DEL:
+ 		if (epi)
+@@ -1455,7 +1631,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
+ 	mutex_unlock(&ep->mtx);
+ 
+ error_tgt_fput:
+-	if (unlikely(did_lock_epmutex))
++	if (did_lock_epmutex)
+ 		mutex_unlock(&epmutex);
+ 
+ 	fput(tfile);
+diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
+index f362733..657ab55 100644
+--- a/include/linux/eventpoll.h
++++ b/include/linux/eventpoll.h
+@@ -61,6 +61,7 @@ struct file;
+ static inline void eventpoll_init_file(struct file *file)
+ {
+ 	INIT_LIST_HEAD(&file->f_ep_links);
++	INIT_LIST_HEAD(&file->f_tfile_llink);
+ }
+ 
+ 
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 277f497..93778e0 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -985,6 +985,7 @@ struct file {
+ #ifdef CONFIG_EPOLL
+ 	/* Used by fs/eventpoll.c to link all the hooks to this file */
+ 	struct list_head	f_ep_links;
++	struct list_head	f_tfile_llink;
+ #endif /* #ifdef CONFIG_EPOLL */
+ 	struct address_space	*f_mapping;
+ #ifdef CONFIG_DEBUG_WRITECOUNT
+-- 
+1.7.6.4
+
diff --git a/kernel.spec b/kernel.spec
index 98ca031..5b7f26b 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -48,7 +48,7 @@ Summary: The Linux kernel
 # reset this by hand to 1 (or to 0 and then use rpmdev-bumpspec).
 # scripts/rebase.sh should be made to do that for you, actually.
 #
-%global baserelease 100
+%global baserelease 101
 %global fedora_build %{baserelease}
 
 # base_sublevel is the kernel version we're starting with and patching
@@ -903,6 +903,10 @@ Patch14063: 0002-USB-EHCI-go-back-to-using-the-system-clock-for-QH-un.patch
 #rhbz 747948
 Patch14064: ext4-fix-BUG_ON-in-ext4_ext_insert_extent.patch
 
+#rhbz 748668
+Patch14065: epoll-fix-spurious-lockdep-warnings.patch
+Patch14066: epoll-limit-paths.patch
+
 %endif
 
 BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root
@@ -1709,6 +1713,10 @@ ApplyPatch 0002-USB-EHCI-go-back-to-using-the-system-clock-for-QH-un.patch
 #rhbz 747948
 ApplyPatch ext4-fix-BUG_ON-in-ext4_ext_insert_extent.patch
 
+#rhbz 748668
+ApplyPatch epoll-fix-spurious-lockdep-warnings.patch
+ApplyPatch epoll-limit-paths.patch
+
 # END OF PATCH APPLICATIONS
 
 %endif
@@ -2295,6 +2303,9 @@ fi
 # and build.
 
 %changelog
+* Tue Oct 25 2011 Josh Boyer <jwboyer at redhat.com>
+- CVE-2011-1083: excessive in kernel CPU consumption when creating large nested epoll structures (rhbz 748668)
+
 * Tue Oct 25 2011 Chuck Ebbert <cebbert at redhat.com>
 - Fix NULL dereference in udp6_ufo_fragment(), caused by
   fix for CVE-2011-2699.


More information about the scm-commits mailing list