aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/eventpoll.c25
1 files changed, 18 insertions, 7 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f9cfd168fbe..2acaf60f4e4 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -70,6 +70,15 @@
70 * simultaneous inserts (A into B and B into A) from racing and 70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is 71 * constructing a cycle without either insert observing that it is
72 * going to. 72 * going to.
73 * It is necessary to acquire multiple "ep->mtx"es at once in the
74 * case when one epoll fd is added to another. In this case, we
75 * always acquire the locks in the order of nesting (i.e. after
76 * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired
77 * before e2->mtx). Since we disallow cycles of epoll file
78 * descriptors, this ensures that the mutexes are well-ordered. In
79 * order to communicate this nesting to lockdep, when walking a tree
80 * of epoll file descriptors, we use the current recursion depth as
81 * the lockdep subkey.
73 * It is possible to drop the "ep->mtx" and to use the global 82 * It is possible to drop the "ep->mtx" and to use the global
74 * mutex "epmutex" (together with "ep->lock") to have it working, 83 * mutex "epmutex" (together with "ep->lock") to have it working,
75 * but having "ep->mtx" will make the interface more scalable. 84 * but having "ep->mtx" will make the interface more scalable.
@@ -464,13 +473,15 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
464 * @ep: Pointer to the epoll private data structure. 473 * @ep: Pointer to the epoll private data structure.
465 * @sproc: Pointer to the scan callback. 474 * @sproc: Pointer to the scan callback.
466 * @priv: Private opaque data passed to the @sproc callback. 475 * @priv: Private opaque data passed to the @sproc callback.
476 * @depth: The current depth of recursive f_op->poll calls.
467 * 477 *
468 * Returns: The same integer error code returned by the @sproc callback. 478 * Returns: The same integer error code returned by the @sproc callback.
469 */ 479 */
470static int ep_scan_ready_list(struct eventpoll *ep, 480static int ep_scan_ready_list(struct eventpoll *ep,
471 int (*sproc)(struct eventpoll *, 481 int (*sproc)(struct eventpoll *,
472 struct list_head *, void *), 482 struct list_head *, void *),
473 void *priv) 483 void *priv,
484 int depth)
474{ 485{
475 int error, pwake = 0; 486 int error, pwake = 0;
476 unsigned long flags; 487 unsigned long flags;
@@ -481,7 +492,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
481 * We need to lock this because we could be hit by 492 * We need to lock this because we could be hit by
482 * eventpoll_release_file() and epoll_ctl(). 493 * eventpoll_release_file() and epoll_ctl().
483 */ 494 */
484 mutex_lock(&ep->mtx); 495 mutex_lock_nested(&ep->mtx, depth);
485 496
486 /* 497 /*
487 * Steal the ready list, and re-init the original one to the 498 * Steal the ready list, and re-init the original one to the
@@ -670,7 +681,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
670 681
671static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) 682static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
672{ 683{
673 return ep_scan_ready_list(priv, ep_read_events_proc, NULL); 684 return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
674} 685}
675 686
676static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) 687static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
@@ -737,7 +748,7 @@ void eventpoll_release_file(struct file *file)
737 748
738 ep = epi->ep; 749 ep = epi->ep;
739 list_del_init(&epi->fllink); 750 list_del_init(&epi->fllink);
740 mutex_lock(&ep->mtx); 751 mutex_lock_nested(&ep->mtx, 0);
741 ep_remove(ep, epi); 752 ep_remove(ep, epi);
742 mutex_unlock(&ep->mtx); 753 mutex_unlock(&ep->mtx);
743 } 754 }
@@ -1134,7 +1145,7 @@ static int ep_send_events(struct eventpoll *ep,
1134 esed.maxevents = maxevents; 1145 esed.maxevents = maxevents;
1135 esed.events = events; 1146 esed.events = events;
1136 1147
1137 return ep_scan_ready_list(ep, ep_send_events_proc, &esed); 1148 return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
1138} 1149}
1139 1150
1140static inline struct timespec ep_set_mstimeout(long ms) 1151static inline struct timespec ep_set_mstimeout(long ms)
@@ -1267,7 +1278,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1267 struct rb_node *rbp; 1278 struct rb_node *rbp;
1268 struct epitem *epi; 1279 struct epitem *epi;
1269 1280
1270 mutex_lock(&ep->mtx); 1281 mutex_lock_nested(&ep->mtx, call_nests + 1);
1271 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { 1282 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1272 epi = rb_entry(rbp, struct epitem, rbn); 1283 epi = rb_entry(rbp, struct epitem, rbn);
1273 if (unlikely(is_file_epoll(epi->ffd.file))) { 1284 if (unlikely(is_file_epoll(epi->ffd.file))) {
@@ -1409,7 +1420,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1409 } 1420 }
1410 1421
1411 1422
1412 mutex_lock(&ep->mtx); 1423 mutex_lock_nested(&ep->mtx, 0);
1413 1424
1414 /* 1425 /*
1415 * Try to lookup the file inside our RB tree, Since we grabbed "mtx" 1426 * Try to lookup the file inside our RB tree, Since we grabbed "mtx"