diff options
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r-- | fs/eventpoll.c | 80 |
1 files changed, 73 insertions, 7 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index aabdfc38cf24..739b0985b398 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include <linux/mutex.h> | 34 | #include <linux/mutex.h> |
35 | #include <linux/anon_inodes.h> | 35 | #include <linux/anon_inodes.h> |
36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | #include <asm/system.h> | ||
38 | #include <asm/io.h> | 37 | #include <asm/io.h> |
39 | #include <asm/mman.h> | 38 | #include <asm/mman.h> |
40 | #include <linux/atomic.h> | 39 | #include <linux/atomic.h> |
@@ -320,6 +319,11 @@ static inline int ep_is_linked(struct list_head *p) | |||
320 | return !list_empty(p); | 319 | return !list_empty(p); |
321 | } | 320 | } |
322 | 321 | ||
322 | static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p) | ||
323 | { | ||
324 | return container_of(p, struct eppoll_entry, wait); | ||
325 | } | ||
326 | |||
323 | /* Get the "struct epitem" from a wait queue pointer */ | 327 | /* Get the "struct epitem" from a wait queue pointer */ |
324 | static inline struct epitem *ep_item_from_wait(wait_queue_t *p) | 328 | static inline struct epitem *ep_item_from_wait(wait_queue_t *p) |
325 | { | 329 | { |
@@ -422,6 +426,31 @@ out_unlock: | |||
422 | return error; | 426 | return error; |
423 | } | 427 | } |
424 | 428 | ||
429 | /* | ||
430 | * As described in commit 0ccf831cb lockdep: annotate epoll | ||
431 | * the use of wait queues used by epoll is done in a very controlled | ||
432 | * manner. Wake ups can nest inside each other, but are never done | ||
433 | * with the same locking. For example: | ||
434 | * | ||
435 | * dfd = socket(...); | ||
436 | * efd1 = epoll_create(); | ||
437 | * efd2 = epoll_create(); | ||
438 | * epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...); | ||
439 | * epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...); | ||
440 | * | ||
441 | * When a packet arrives to the device underneath "dfd", the net code will | ||
442 | * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a | ||
443 | * callback wakeup entry on that queue, and the wake_up() performed by the | ||
444 | * "dfd" net code will end up in ep_poll_callback(). At this point epoll | ||
445 | * (efd1) notices that it may have some event ready, so it needs to wake up | ||
446 | * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake() | ||
447 | * that ends up in another wake_up(), after having checked about the | ||
448 | * recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to | ||
449 | * avoid stack blasting. | ||
450 | * | ||
451 | * When CONFIG_DEBUG_LOCK_ALLOC is enabled, make sure lockdep can handle | ||
452 | * this special case of epoll. | ||
453 | */ | ||
425 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 454 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
426 | static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, | 455 | static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, |
427 | unsigned long events, int subclass) | 456 | unsigned long events, int subclass) |
@@ -467,6 +496,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq) | |||
467 | put_cpu(); | 496 | put_cpu(); |
468 | } | 497 | } |
469 | 498 | ||
499 | static void ep_remove_wait_queue(struct eppoll_entry *pwq) | ||
500 | { | ||
501 | wait_queue_head_t *whead; | ||
502 | |||
503 | rcu_read_lock(); | ||
504 | /* If it is cleared by POLLFREE, it should be rcu-safe */ | ||
505 | whead = rcu_dereference(pwq->whead); | ||
506 | if (whead) | ||
507 | remove_wait_queue(whead, &pwq->wait); | ||
508 | rcu_read_unlock(); | ||
509 | } | ||
510 | |||
470 | /* | 511 | /* |
471 | * This function unregisters poll callbacks from the associated file | 512 | * This function unregisters poll callbacks from the associated file |
472 | * descriptor. Must be called with "mtx" held (or "epmutex" if called from | 513 | * descriptor. Must be called with "mtx" held (or "epmutex" if called from |
@@ -481,7 +522,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) | |||
481 | pwq = list_first_entry(lsthead, struct eppoll_entry, llink); | 522 | pwq = list_first_entry(lsthead, struct eppoll_entry, llink); |
482 | 523 | ||
483 | list_del(&pwq->llink); | 524 | list_del(&pwq->llink); |
484 | remove_wait_queue(pwq->whead, &pwq->wait); | 525 | ep_remove_wait_queue(pwq); |
485 | kmem_cache_free(pwq_cache, pwq); | 526 | kmem_cache_free(pwq_cache, pwq); |
486 | } | 527 | } |
487 | } | 528 | } |
@@ -682,9 +723,12 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, | |||
682 | void *priv) | 723 | void *priv) |
683 | { | 724 | { |
684 | struct epitem *epi, *tmp; | 725 | struct epitem *epi, *tmp; |
726 | poll_table pt; | ||
685 | 727 | ||
728 | init_poll_funcptr(&pt, NULL); | ||
686 | list_for_each_entry_safe(epi, tmp, head, rdllink) { | 729 | list_for_each_entry_safe(epi, tmp, head, rdllink) { |
687 | if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & | 730 | pt._key = epi->event.events; |
731 | if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & | ||
688 | epi->event.events) | 732 | epi->event.events) |
689 | return POLLIN | POLLRDNORM; | 733 | return POLLIN | POLLRDNORM; |
690 | else { | 734 | else { |
@@ -842,6 +886,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
842 | struct epitem *epi = ep_item_from_wait(wait); | 886 | struct epitem *epi = ep_item_from_wait(wait); |
843 | struct eventpoll *ep = epi->ep; | 887 | struct eventpoll *ep = epi->ep; |
844 | 888 | ||
889 | if ((unsigned long)key & POLLFREE) { | ||
890 | ep_pwq_from_wait(wait)->whead = NULL; | ||
891 | /* | ||
892 | * whead = NULL above can race with ep_remove_wait_queue() | ||
893 | * which can do another remove_wait_queue() after us, so we | ||
894 | * can't use __remove_wait_queue(). whead->lock is held by | ||
895 | * the caller. | ||
896 | */ | ||
897 | list_del_init(&wait->task_list); | ||
898 | } | ||
899 | |||
845 | spin_lock_irqsave(&ep->lock, flags); | 900 | spin_lock_irqsave(&ep->lock, flags); |
846 | 901 | ||
847 | /* | 902 | /* |
@@ -960,6 +1015,10 @@ static int path_count[PATH_ARR_SIZE]; | |||
960 | 1015 | ||
961 | static int path_count_inc(int nests) | 1016 | static int path_count_inc(int nests) |
962 | { | 1017 | { |
1018 | /* Allow an arbitrary number of depth 1 paths */ | ||
1019 | if (nests == 0) | ||
1020 | return 0; | ||
1021 | |||
963 | if (++path_count[nests] > path_limits[nests]) | 1022 | if (++path_count[nests] > path_limits[nests]) |
964 | return -1; | 1023 | return -1; |
965 | return 0; | 1024 | return 0; |
@@ -1017,13 +1076,11 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) | |||
1017 | */ | 1076 | */ |
1018 | static int reverse_path_check(void) | 1077 | static int reverse_path_check(void) |
1019 | { | 1078 | { |
1020 | int length = 0; | ||
1021 | int error = 0; | 1079 | int error = 0; |
1022 | struct file *current_file; | 1080 | struct file *current_file; |
1023 | 1081 | ||
1024 | /* let's call this for all tfiles */ | 1082 | /* let's call this for all tfiles */ |
1025 | list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { | 1083 | list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { |
1026 | length++; | ||
1027 | path_count_init(); | 1084 | path_count_init(); |
1028 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | 1085 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, |
1029 | reverse_path_check_proc, current_file, | 1086 | reverse_path_check_proc, current_file, |
@@ -1065,6 +1122,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1065 | /* Initialize the poll table using the queue callback */ | 1122 | /* Initialize the poll table using the queue callback */ |
1066 | epq.epi = epi; | 1123 | epq.epi = epi; |
1067 | init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); | 1124 | init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); |
1125 | epq.pt._key = event->events; | ||
1068 | 1126 | ||
1069 | /* | 1127 | /* |
1070 | * Attach the item to the poll hooks and get current event bits. | 1128 | * Attach the item to the poll hooks and get current event bits. |
@@ -1159,6 +1217,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1159 | { | 1217 | { |
1160 | int pwake = 0; | 1218 | int pwake = 0; |
1161 | unsigned int revents; | 1219 | unsigned int revents; |
1220 | poll_table pt; | ||
1221 | |||
1222 | init_poll_funcptr(&pt, NULL); | ||
1162 | 1223 | ||
1163 | /* | 1224 | /* |
1164 | * Set the new event interest mask before calling f_op->poll(); | 1225 | * Set the new event interest mask before calling f_op->poll(); |
@@ -1166,13 +1227,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1166 | * f_op->poll() call and the new event set registering. | 1227 | * f_op->poll() call and the new event set registering. |
1167 | */ | 1228 | */ |
1168 | epi->event.events = event->events; | 1229 | epi->event.events = event->events; |
1230 | pt._key = event->events; | ||
1169 | epi->event.data = event->data; /* protected by mtx */ | 1231 | epi->event.data = event->data; /* protected by mtx */ |
1170 | 1232 | ||
1171 | /* | 1233 | /* |
1172 | * Get current event bits. We can safely use the file* here because | 1234 | * Get current event bits. We can safely use the file* here because |
1173 | * its usage count has been increased by the caller of this function. | 1235 | * its usage count has been increased by the caller of this function. |
1174 | */ | 1236 | */ |
1175 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); | 1237 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt); |
1176 | 1238 | ||
1177 | /* | 1239 | /* |
1178 | * If the item is "hot" and it is not registered inside the ready | 1240 | * If the item is "hot" and it is not registered inside the ready |
@@ -1207,6 +1269,9 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1207 | unsigned int revents; | 1269 | unsigned int revents; |
1208 | struct epitem *epi; | 1270 | struct epitem *epi; |
1209 | struct epoll_event __user *uevent; | 1271 | struct epoll_event __user *uevent; |
1272 | poll_table pt; | ||
1273 | |||
1274 | init_poll_funcptr(&pt, NULL); | ||
1210 | 1275 | ||
1211 | /* | 1276 | /* |
1212 | * We can loop without lock because we are passed a task private list. | 1277 | * We can loop without lock because we are passed a task private list. |
@@ -1219,7 +1284,8 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1219 | 1284 | ||
1220 | list_del_init(&epi->rdllink); | 1285 | list_del_init(&epi->rdllink); |
1221 | 1286 | ||
1222 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & | 1287 | pt._key = epi->event.events; |
1288 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & | ||
1223 | epi->event.events; | 1289 | epi->event.events; |
1224 | 1290 | ||
1225 | /* | 1291 | /* |