[kernel/f15] Add patch to fix epoll lockdep warnings
Josh Boyer
jwboyer at fedoraproject.org
Tue Aug 2 23:52:51 UTC 2011
commit 52aef13a2d375a7abc1d806db3bed90612d10ab4
Author: Josh Boyer <jwboyer at redhat.com>
Date: Tue Aug 2 14:33:26 2011 -0400
Add patch to fix epoll lockdep warnings
epoll-fix-spurious-lockdep-warnings.patch | 116 +++++++++++++++++++++++++++++
kernel.spec | 7 ++-
2 files changed, 122 insertions(+), 1 deletions(-)
---
diff --git a/epoll-fix-spurious-lockdep-warnings.patch b/epoll-fix-spurious-lockdep-warnings.patch
new file mode 100644
index 0000000..131796a
--- /dev/null
+++ b/epoll-fix-spurious-lockdep-warnings.patch
@@ -0,0 +1,116 @@
+epoll can acquire multiple ep->mutex on multiple "struct eventpoll"s
+at once in the case where one epoll fd is monitoring another epoll
+fd. This is perfectly OK, since we're careful about the lock ordering,
+but causes spurious lockdep warnings. Annotate the recursion using
+mutex_lock_nested, and add a comment explaining the nesting rules for
+good measure.
+
+Reported-by: Paul Bolle <pebolle at tiscali.nl>
+Signed-off-by: Nelson Elhage <nelhage at nelhage.com>
+---
+ I've tested this on a synthetic epoll test case, that just adds e1 to
+ e2 and then does an epoll_wait(). I verified that it caused lockdep
+ problems on 3.0 and that this patch fixed it, but I haven't done more
+ extensive testing. Paul, are you able to test systemd against this?
+
+ fs/eventpoll.c | 25 ++++++++++++++++++-------
+ 1 files changed, 18 insertions(+), 7 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index f9cfd16..0cb7bc6 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -76,6 +76,15 @@
+ * Events that require holding "epmutex" are very rare, while for
+ * normal operations the epoll private "ep->mtx" will guarantee
+ * a better scalability.
++ * It is possible to acquire multiple "ep->mtx"es at once in the case
++ * when one epoll fd is added to another. In this case, we always
++ * acquire the locks in the order of nesting (i.e. after epoll_ctl(e1,
++ * EPOLL_CTL_ADD, e2), e1->mtx will always be acquired before
++ * e2->mtx). Since we disallow cycles of epoll file descriptors, this
++ * ensures that the mutexes are well-ordered. In order to communicate
++ * this nesting to lockdep, when walking a tree of epoll file
++ * descriptors, we use the current recursion depth as the lockdep
++ * subkey.
+ */
+
+ /* Epoll private bits inside the event mask */
+@@ -464,13 +473,15 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
+ * @ep: Pointer to the epoll private data structure.
+ * @sproc: Pointer to the scan callback.
+ * @priv: Private opaque data passed to the @sproc callback.
++ * @depth: The current depth of recursive f_op->poll calls.
+ *
+ * Returns: The same integer error code returned by the @sproc callback.
+ */
+ static int ep_scan_ready_list(struct eventpoll *ep,
+ int (*sproc)(struct eventpoll *,
+ struct list_head *, void *),
+- void *priv)
++ void *priv,
++ int depth)
+ {
+ int error, pwake = 0;
+ unsigned long flags;
+@@ -481,7 +492,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
+ * We need to lock this because we could be hit by
+ * eventpoll_release_file() and epoll_ctl().
+ */
+- mutex_lock(&ep->mtx);
++ mutex_lock_nested(&ep->mtx, depth);
+
+ /*
+ * Steal the ready list, and re-init the original one to the
+@@ -670,7 +681,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
+
+ static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
+ {
+- return ep_scan_ready_list(priv, ep_read_events_proc, NULL);
++ return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
+ }
+
+ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
+@@ -737,7 +748,7 @@ void eventpoll_release_file(struct file *file)
+
+ ep = epi->ep;
+ list_del_init(&epi->fllink);
+- mutex_lock(&ep->mtx);
++ mutex_lock_nested(&ep->mtx, 0);
+ ep_remove(ep, epi);
+ mutex_unlock(&ep->mtx);
+ }
+@@ -1134,7 +1145,7 @@ static int ep_send_events(struct eventpoll *ep,
+ esed.maxevents = maxevents;
+ esed.events = events;
+
+- return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
++ return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
+ }
+
+ static inline struct timespec ep_set_mstimeout(long ms)
+@@ -1267,7 +1278,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
+ struct rb_node *rbp;
+ struct epitem *epi;
+
+- mutex_lock(&ep->mtx);
++ mutex_lock_nested(&ep->mtx, call_nests + 1);
+ for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ epi = rb_entry(rbp, struct epitem, rbn);
+ if (unlikely(is_file_epoll(epi->ffd.file))) {
+@@ -1409,7 +1420,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
+ }
+
+
+- mutex_lock(&ep->mtx);
++ mutex_lock_nested(&ep->mtx, 0);
+
+ /*
+ * Try to lookup the file inside our RB tree, Since we grabbed "mtx"
+--
+1.7.4.1
+
+--
+To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
+the body of a message to majordomo at vger.kernel.org
+More majordomo info at http://vger.kernel.org/majordomo-info.html
\ No newline at end of file
diff --git a/kernel.spec b/kernel.spec
index 6fa0dcf..b8b21e2 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -682,7 +682,8 @@ Patch12204: linux-2.6-enable-more-pci-autosuspend.patch
Patch12303: dmar-disable-when-ricoh-multifunction.patch
Patch13000: fix-scsi_dispatch_cmd.patch
-Patch13001: hfsplus-ensure-bio-requests-are-not-smaller-than-the.patch
+Patch13001: epoll-fix-spurious-lockdep-warnings.patch
+Patch13002: hfsplus-ensure-bio-requests-are-not-smaller-than-the.patch
Patch20000: utrace.patch
@@ -1257,6 +1258,7 @@ ApplyPatch neuter_intel_microcode_load.patch
ApplyPatch dmar-disable-when-ricoh-multifunction.patch
ApplyPatch fix-scsi_dispatch_cmd.patch
+ApplyPatch epoll-fix-spurious-lockdep-warnings.patch
ApplyPatch hfsplus-ensure-bio-requests-are-not-smaller-than-the.patch
@@ -1875,6 +1877,9 @@ fi
%changelog
* Tue Aug 02 2011 Josh Boyer <jwboyer at redhat.com>
+- Fix epoll recursive lockdep warnings (rhbz 722472)
+
+* Tue Aug 02 2011 Josh Boyer <jwboyer at redhat.com>
- Add patch to fix HFSPlus filesystem mounting (rhbz 720771)
* Tue Aug 02 2011 Dave Jones <davej at redhat.com>
More information about the scm-commits
mailing list