diff options
-rw-r--r-- | fs/eventpoll.c | 25 |
1 files changed, 18 insertions, 7 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index f9cfd168fbe..2acaf60f4e4 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -70,6 +70,15 @@ | |||
70 | * simultaneous inserts (A into B and B into A) from racing and | 70 | * simultaneous inserts (A into B and B into A) from racing and |
71 | * constructing a cycle without either insert observing that it is | 71 | * constructing a cycle without either insert observing that it is |
72 | * going to. | 72 | * going to. |
73 | * It is necessary to acquire multiple "ep->mtx"es at once in the | ||
74 | * case when one epoll fd is added to another. In this case, we | ||
75 | * always acquire the locks in the order of nesting (i.e. after | ||
76 | * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired | ||
77 | * before e2->mtx). Since we disallow cycles of epoll file | ||
78 | * descriptors, this ensures that the mutexes are well-ordered. In | ||
79 | * order to communicate this nesting to lockdep, when walking a tree | ||
80 | * of epoll file descriptors, we use the current recursion depth as | ||
81 | * the lockdep subkey. | ||
73 | * It is possible to drop the "ep->mtx" and to use the global | 82 | * It is possible to drop the "ep->mtx" and to use the global |
74 | * mutex "epmutex" (together with "ep->lock") to have it working, | 83 | * mutex "epmutex" (together with "ep->lock") to have it working, |
75 | * but having "ep->mtx" will make the interface more scalable. | 84 | * but having "ep->mtx" will make the interface more scalable. |
@@ -464,13 +473,15 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) | |||
464 | * @ep: Pointer to the epoll private data structure. | 473 | * @ep: Pointer to the epoll private data structure. |
465 | * @sproc: Pointer to the scan callback. | 474 | * @sproc: Pointer to the scan callback. |
466 | * @priv: Private opaque data passed to the @sproc callback. | 475 | * @priv: Private opaque data passed to the @sproc callback. |
476 | * @depth: The current depth of recursive f_op->poll calls. | ||
467 | * | 477 | * |
468 | * Returns: The same integer error code returned by the @sproc callback. | 478 | * Returns: The same integer error code returned by the @sproc callback. |
469 | */ | 479 | */ |
470 | static int ep_scan_ready_list(struct eventpoll *ep, | 480 | static int ep_scan_ready_list(struct eventpoll *ep, |
471 | int (*sproc)(struct eventpoll *, | 481 | int (*sproc)(struct eventpoll *, |
472 | struct list_head *, void *), | 482 | struct list_head *, void *), |
473 | void *priv) | 483 | void *priv, |
484 | int depth) | ||
474 | { | 485 | { |
475 | int error, pwake = 0; | 486 | int error, pwake = 0; |
476 | unsigned long flags; | 487 | unsigned long flags; |
@@ -481,7 +492,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
481 | * We need to lock this because we could be hit by | 492 | * We need to lock this because we could be hit by |
482 | * eventpoll_release_file() and epoll_ctl(). | 493 | * eventpoll_release_file() and epoll_ctl(). |
483 | */ | 494 | */ |
484 | mutex_lock(&ep->mtx); | 495 | mutex_lock_nested(&ep->mtx, depth); |
485 | 496 | ||
486 | /* | 497 | /* |
487 | * Steal the ready list, and re-init the original one to the | 498 | * Steal the ready list, and re-init the original one to the |
@@ -670,7 +681,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, | |||
670 | 681 | ||
671 | static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) | 682 | static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) |
672 | { | 683 | { |
673 | return ep_scan_ready_list(priv, ep_read_events_proc, NULL); | 684 | return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1); |
674 | } | 685 | } |
675 | 686 | ||
676 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | 687 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) |
@@ -737,7 +748,7 @@ void eventpoll_release_file(struct file *file) | |||
737 | 748 | ||
738 | ep = epi->ep; | 749 | ep = epi->ep; |
739 | list_del_init(&epi->fllink); | 750 | list_del_init(&epi->fllink); |
740 | mutex_lock(&ep->mtx); | 751 | mutex_lock_nested(&ep->mtx, 0); |
741 | ep_remove(ep, epi); | 752 | ep_remove(ep, epi); |
742 | mutex_unlock(&ep->mtx); | 753 | mutex_unlock(&ep->mtx); |
743 | } | 754 | } |
@@ -1134,7 +1145,7 @@ static int ep_send_events(struct eventpoll *ep, | |||
1134 | esed.maxevents = maxevents; | 1145 | esed.maxevents = maxevents; |
1135 | esed.events = events; | 1146 | esed.events = events; |
1136 | 1147 | ||
1137 | return ep_scan_ready_list(ep, ep_send_events_proc, &esed); | 1148 | return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0); |
1138 | } | 1149 | } |
1139 | 1150 | ||
1140 | static inline struct timespec ep_set_mstimeout(long ms) | 1151 | static inline struct timespec ep_set_mstimeout(long ms) |
@@ -1267,7 +1278,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) | |||
1267 | struct rb_node *rbp; | 1278 | struct rb_node *rbp; |
1268 | struct epitem *epi; | 1279 | struct epitem *epi; |
1269 | 1280 | ||
1270 | mutex_lock(&ep->mtx); | 1281 | mutex_lock_nested(&ep->mtx, call_nests + 1); |
1271 | for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { | 1282 | for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { |
1272 | epi = rb_entry(rbp, struct epitem, rbn); | 1283 | epi = rb_entry(rbp, struct epitem, rbn); |
1273 | if (unlikely(is_file_epoll(epi->ffd.file))) { | 1284 | if (unlikely(is_file_epoll(epi->ffd.file))) { |
@@ -1409,7 +1420,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1409 | } | 1420 | } |
1410 | 1421 | ||
1411 | 1422 | ||
1412 | mutex_lock(&ep->mtx); | 1423 | mutex_lock_nested(&ep->mtx, 0); |
1413 | 1424 | ||
1414 | /* | 1425 | /* |
1415 | * Try to lookup the file inside our RB tree, Since we grabbed "mtx" | 1426 | * Try to lookup the file inside our RB tree, Since we grabbed "mtx" |