diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /fs/eventpoll.c | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r-- | fs/eventpoll.c | 479 |
1 files changed, 52 insertions, 427 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9fec1836057..2d1744ab5bc 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -33,13 +33,11 @@ | |||
33 | #include <linux/bitops.h> | 33 | #include <linux/bitops.h> |
34 | #include <linux/mutex.h> | 34 | #include <linux/mutex.h> |
35 | #include <linux/anon_inodes.h> | 35 | #include <linux/anon_inodes.h> |
36 | #include <linux/device.h> | ||
37 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | #include <asm/system.h> | ||
38 | #include <asm/io.h> | 38 | #include <asm/io.h> |
39 | #include <asm/mman.h> | 39 | #include <asm/mman.h> |
40 | #include <linux/atomic.h> | 40 | #include <linux/atomic.h> |
41 | #include <linux/proc_fs.h> | ||
42 | #include <linux/seq_file.h> | ||
43 | 41 | ||
44 | /* | 42 | /* |
45 | * LOCKING: | 43 | * LOCKING: |
@@ -90,7 +88,7 @@ | |||
90 | */ | 88 | */ |
91 | 89 | ||
92 | /* Epoll private bits inside the event mask */ | 90 | /* Epoll private bits inside the event mask */ |
93 | #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET) | 91 | #define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) |
94 | 92 | ||
95 | /* Maximum number of nesting allowed inside epoll sets */ | 93 | /* Maximum number of nesting allowed inside epoll sets */ |
96 | #define EP_MAX_NESTS 4 | 94 | #define EP_MAX_NESTS 4 |
@@ -157,9 +155,6 @@ struct epitem { | |||
157 | /* List header used to link this item to the "struct file" items list */ | 155 | /* List header used to link this item to the "struct file" items list */ |
158 | struct list_head fllink; | 156 | struct list_head fllink; |
159 | 157 | ||
160 | /* wakeup_source used when EPOLLWAKEUP is set */ | ||
161 | struct wakeup_source *ws; | ||
162 | |||
163 | /* The structure that describe the interested events and the source fd */ | 158 | /* The structure that describe the interested events and the source fd */ |
164 | struct epoll_event event; | 159 | struct epoll_event event; |
165 | }; | 160 | }; |
@@ -200,17 +195,8 @@ struct eventpoll { | |||
200 | */ | 195 | */ |
201 | struct epitem *ovflist; | 196 | struct epitem *ovflist; |
202 | 197 | ||
203 | /* wakeup_source used when ep_scan_ready_list is running */ | ||
204 | struct wakeup_source *ws; | ||
205 | |||
206 | /* The user that created the eventpoll descriptor */ | 198 | /* The user that created the eventpoll descriptor */ |
207 | struct user_struct *user; | 199 | struct user_struct *user; |
208 | |||
209 | struct file *file; | ||
210 | |||
211 | /* used to optimize loop detection check */ | ||
212 | int visited; | ||
213 | struct list_head visited_list_link; | ||
214 | }; | 200 | }; |
215 | 201 | ||
216 | /* Wait structure used by the poll hooks */ | 202 | /* Wait structure used by the poll hooks */ |
@@ -269,15 +255,6 @@ static struct kmem_cache *epi_cache __read_mostly; | |||
269 | /* Slab cache used to allocate "struct eppoll_entry" */ | 255 | /* Slab cache used to allocate "struct eppoll_entry" */ |
270 | static struct kmem_cache *pwq_cache __read_mostly; | 256 | static struct kmem_cache *pwq_cache __read_mostly; |
271 | 257 | ||
272 | /* Visited nodes during ep_loop_check(), so we can unset them when we finish */ | ||
273 | static LIST_HEAD(visited_list); | ||
274 | |||
275 | /* | ||
276 | * List of files with newly added links, where we may need to limit the number | ||
277 | * of emanating paths. Protected by the epmutex. | ||
278 | */ | ||
279 | static LIST_HEAD(tfile_check_list); | ||
280 | |||
281 | #ifdef CONFIG_SYSCTL | 258 | #ifdef CONFIG_SYSCTL |
282 | 259 | ||
283 | #include <linux/sysctl.h> | 260 | #include <linux/sysctl.h> |
@@ -299,12 +276,6 @@ ctl_table epoll_table[] = { | |||
299 | }; | 276 | }; |
300 | #endif /* CONFIG_SYSCTL */ | 277 | #endif /* CONFIG_SYSCTL */ |
301 | 278 | ||
302 | static const struct file_operations eventpoll_fops; | ||
303 | |||
304 | static inline int is_file_epoll(struct file *f) | ||
305 | { | ||
306 | return f->f_op == &eventpoll_fops; | ||
307 | } | ||
308 | 279 | ||
309 | /* Setup the structure that is used as key for the RB tree */ | 280 | /* Setup the structure that is used as key for the RB tree */ |
310 | static inline void ep_set_ffd(struct epoll_filefd *ffd, | 281 | static inline void ep_set_ffd(struct epoll_filefd *ffd, |
@@ -328,11 +299,6 @@ static inline int ep_is_linked(struct list_head *p) | |||
328 | return !list_empty(p); | 299 | return !list_empty(p); |
329 | } | 300 | } |
330 | 301 | ||
331 | static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p) | ||
332 | { | ||
333 | return container_of(p, struct eppoll_entry, wait); | ||
334 | } | ||
335 | |||
336 | /* Get the "struct epitem" from a wait queue pointer */ | 302 | /* Get the "struct epitem" from a wait queue pointer */ |
337 | static inline struct epitem *ep_item_from_wait(wait_queue_t *p) | 303 | static inline struct epitem *ep_item_from_wait(wait_queue_t *p) |
338 | { | 304 | { |
@@ -435,31 +401,6 @@ out_unlock: | |||
435 | return error; | 401 | return error; |
436 | } | 402 | } |
437 | 403 | ||
438 | /* | ||
439 | * As described in commit 0ccf831cb lockdep: annotate epoll | ||
440 | * the use of wait queues used by epoll is done in a very controlled | ||
441 | * manner. Wake ups can nest inside each other, but are never done | ||
442 | * with the same locking. For example: | ||
443 | * | ||
444 | * dfd = socket(...); | ||
445 | * efd1 = epoll_create(); | ||
446 | * efd2 = epoll_create(); | ||
447 | * epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...); | ||
448 | * epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...); | ||
449 | * | ||
450 | * When a packet arrives to the device underneath "dfd", the net code will | ||
451 | * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a | ||
452 | * callback wakeup entry on that queue, and the wake_up() performed by the | ||
453 | * "dfd" net code will end up in ep_poll_callback(). At this point epoll | ||
454 | * (efd1) notices that it may have some event ready, so it needs to wake up | ||
455 | * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake() | ||
456 | * that ends up in another wake_up(), after having checked about the | ||
457 | * recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to | ||
458 | * avoid stack blasting. | ||
459 | * | ||
460 | * When CONFIG_DEBUG_LOCK_ALLOC is enabled, make sure lockdep can handle | ||
461 | * this special case of epoll. | ||
462 | */ | ||
463 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 404 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
464 | static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, | 405 | static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, |
465 | unsigned long events, int subclass) | 406 | unsigned long events, int subclass) |
@@ -505,18 +446,6 @@ static void ep_poll_safewake(wait_queue_head_t *wq) | |||
505 | put_cpu(); | 446 | put_cpu(); |
506 | } | 447 | } |
507 | 448 | ||
508 | static void ep_remove_wait_queue(struct eppoll_entry *pwq) | ||
509 | { | ||
510 | wait_queue_head_t *whead; | ||
511 | |||
512 | rcu_read_lock(); | ||
513 | /* If it is cleared by POLLFREE, it should be rcu-safe */ | ||
514 | whead = rcu_dereference(pwq->whead); | ||
515 | if (whead) | ||
516 | remove_wait_queue(whead, &pwq->wait); | ||
517 | rcu_read_unlock(); | ||
518 | } | ||
519 | |||
520 | /* | 449 | /* |
521 | * This function unregisters poll callbacks from the associated file | 450 | * This function unregisters poll callbacks from the associated file |
522 | * descriptor. Must be called with "mtx" held (or "epmutex" if called from | 451 | * descriptor. Must be called with "mtx" held (or "epmutex" if called from |
@@ -531,7 +460,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) | |||
531 | pwq = list_first_entry(lsthead, struct eppoll_entry, llink); | 460 | pwq = list_first_entry(lsthead, struct eppoll_entry, llink); |
532 | 461 | ||
533 | list_del(&pwq->llink); | 462 | list_del(&pwq->llink); |
534 | ep_remove_wait_queue(pwq); | 463 | remove_wait_queue(pwq->whead, &pwq->wait); |
535 | kmem_cache_free(pwq_cache, pwq); | 464 | kmem_cache_free(pwq_cache, pwq); |
536 | } | 465 | } |
537 | } | 466 | } |
@@ -597,10 +526,8 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
597 | * queued into ->ovflist but the "txlist" might already | 526 | * queued into ->ovflist but the "txlist" might already |
598 | * contain them, and the list_splice() below takes care of them. | 527 | * contain them, and the list_splice() below takes care of them. |
599 | */ | 528 | */ |
600 | if (!ep_is_linked(&epi->rdllink)) { | 529 | if (!ep_is_linked(&epi->rdllink)) |
601 | list_add_tail(&epi->rdllink, &ep->rdllist); | 530 | list_add_tail(&epi->rdllink, &ep->rdllist); |
602 | __pm_stay_awake(epi->ws); | ||
603 | } | ||
604 | } | 531 | } |
605 | /* | 532 | /* |
606 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after | 533 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after |
@@ -613,7 +540,6 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
613 | * Quickly re-inject items left on "txlist". | 540 | * Quickly re-inject items left on "txlist". |
614 | */ | 541 | */ |
615 | list_splice(&txlist, &ep->rdllist); | 542 | list_splice(&txlist, &ep->rdllist); |
616 | __pm_relax(ep->ws); | ||
617 | 543 | ||
618 | if (!list_empty(&ep->rdllist)) { | 544 | if (!list_empty(&ep->rdllist)) { |
619 | /* | 545 | /* |
@@ -668,8 +594,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) | |||
668 | list_del_init(&epi->rdllink); | 594 | list_del_init(&epi->rdllink); |
669 | spin_unlock_irqrestore(&ep->lock, flags); | 595 | spin_unlock_irqrestore(&ep->lock, flags); |
670 | 596 | ||
671 | wakeup_source_unregister(epi->ws); | ||
672 | |||
673 | /* At this point it is safe to free the eventpoll item */ | 597 | /* At this point it is safe to free the eventpoll item */ |
674 | kmem_cache_free(epi_cache, epi); | 598 | kmem_cache_free(epi_cache, epi); |
675 | 599 | ||
@@ -720,7 +644,6 @@ static void ep_free(struct eventpoll *ep) | |||
720 | mutex_unlock(&epmutex); | 644 | mutex_unlock(&epmutex); |
721 | mutex_destroy(&ep->mtx); | 645 | mutex_destroy(&ep->mtx); |
722 | free_uid(ep->user); | 646 | free_uid(ep->user); |
723 | wakeup_source_unregister(ep->ws); | ||
724 | kfree(ep); | 647 | kfree(ep); |
725 | } | 648 | } |
726 | 649 | ||
@@ -738,12 +661,9 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, | |||
738 | void *priv) | 661 | void *priv) |
739 | { | 662 | { |
740 | struct epitem *epi, *tmp; | 663 | struct epitem *epi, *tmp; |
741 | poll_table pt; | ||
742 | 664 | ||
743 | init_poll_funcptr(&pt, NULL); | ||
744 | list_for_each_entry_safe(epi, tmp, head, rdllink) { | 665 | list_for_each_entry_safe(epi, tmp, head, rdllink) { |
745 | pt._key = epi->event.events; | 666 | if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & |
746 | if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & | ||
747 | epi->event.events) | 667 | epi->event.events) |
748 | return POLLIN | POLLRDNORM; | 668 | return POLLIN | POLLRDNORM; |
749 | else { | 669 | else { |
@@ -752,7 +672,6 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, | |||
752 | * callback, but it's not actually ready, as far as | 672 | * callback, but it's not actually ready, as far as |
753 | * caller requested events goes. We can remove it here. | 673 | * caller requested events goes. We can remove it here. |
754 | */ | 674 | */ |
755 | __pm_relax(epi->ws); | ||
756 | list_del_init(&epi->rdllink); | 675 | list_del_init(&epi->rdllink); |
757 | } | 676 | } |
758 | } | 677 | } |
@@ -785,39 +704,19 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | |||
785 | return pollflags != -1 ? pollflags : 0; | 704 | return pollflags != -1 ? pollflags : 0; |
786 | } | 705 | } |
787 | 706 | ||
788 | #ifdef CONFIG_PROC_FS | ||
789 | static int ep_show_fdinfo(struct seq_file *m, struct file *f) | ||
790 | { | ||
791 | struct eventpoll *ep = f->private_data; | ||
792 | struct rb_node *rbp; | ||
793 | int ret = 0; | ||
794 | |||
795 | mutex_lock(&ep->mtx); | ||
796 | for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { | ||
797 | struct epitem *epi = rb_entry(rbp, struct epitem, rbn); | ||
798 | |||
799 | ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n", | ||
800 | epi->ffd.fd, epi->event.events, | ||
801 | (long long)epi->event.data); | ||
802 | if (ret) | ||
803 | break; | ||
804 | } | ||
805 | mutex_unlock(&ep->mtx); | ||
806 | |||
807 | return ret; | ||
808 | } | ||
809 | #endif | ||
810 | |||
811 | /* File callbacks that implement the eventpoll file behaviour */ | 707 | /* File callbacks that implement the eventpoll file behaviour */ |
812 | static const struct file_operations eventpoll_fops = { | 708 | static const struct file_operations eventpoll_fops = { |
813 | #ifdef CONFIG_PROC_FS | ||
814 | .show_fdinfo = ep_show_fdinfo, | ||
815 | #endif | ||
816 | .release = ep_eventpoll_release, | 709 | .release = ep_eventpoll_release, |
817 | .poll = ep_eventpoll_poll, | 710 | .poll = ep_eventpoll_poll, |
818 | .llseek = noop_llseek, | 711 | .llseek = noop_llseek, |
819 | }; | 712 | }; |
820 | 713 | ||
714 | /* Fast test to see if the file is an evenpoll file */ | ||
715 | static inline int is_file_epoll(struct file *f) | ||
716 | { | ||
717 | return f->f_op == &eventpoll_fops; | ||
718 | } | ||
719 | |||
821 | /* | 720 | /* |
822 | * This is called from eventpoll_release() to unlink files from the eventpoll | 721 | * This is called from eventpoll_release() to unlink files from the eventpoll |
823 | * interface. We need to have this facility to cleanup correctly files that are | 722 | * interface. We need to have this facility to cleanup correctly files that are |
@@ -928,17 +827,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
928 | struct epitem *epi = ep_item_from_wait(wait); | 827 | struct epitem *epi = ep_item_from_wait(wait); |
929 | struct eventpoll *ep = epi->ep; | 828 | struct eventpoll *ep = epi->ep; |
930 | 829 | ||
931 | if ((unsigned long)key & POLLFREE) { | ||
932 | ep_pwq_from_wait(wait)->whead = NULL; | ||
933 | /* | ||
934 | * whead = NULL above can race with ep_remove_wait_queue() | ||
935 | * which can do another remove_wait_queue() after us, so we | ||
936 | * can't use __remove_wait_queue(). whead->lock is held by | ||
937 | * the caller. | ||
938 | */ | ||
939 | list_del_init(&wait->task_list); | ||
940 | } | ||
941 | |||
942 | spin_lock_irqsave(&ep->lock, flags); | 830 | spin_lock_irqsave(&ep->lock, flags); |
943 | 831 | ||
944 | /* | 832 | /* |
@@ -969,23 +857,13 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
969 | if (epi->next == EP_UNACTIVE_PTR) { | 857 | if (epi->next == EP_UNACTIVE_PTR) { |
970 | epi->next = ep->ovflist; | 858 | epi->next = ep->ovflist; |
971 | ep->ovflist = epi; | 859 | ep->ovflist = epi; |
972 | if (epi->ws) { | ||
973 | /* | ||
974 | * Activate ep->ws since epi->ws may get | ||
975 | * deactivated at any time. | ||
976 | */ | ||
977 | __pm_stay_awake(ep->ws); | ||
978 | } | ||
979 | |||
980 | } | 860 | } |
981 | goto out_unlock; | 861 | goto out_unlock; |
982 | } | 862 | } |
983 | 863 | ||
984 | /* If this file is already in the ready list we exit soon */ | 864 | /* If this file is already in the ready list we exit soon */ |
985 | if (!ep_is_linked(&epi->rdllink)) { | 865 | if (!ep_is_linked(&epi->rdllink)) |
986 | list_add_tail(&epi->rdllink, &ep->rdllist); | 866 | list_add_tail(&epi->rdllink, &ep->rdllist); |
987 | __pm_stay_awake(epi->ws); | ||
988 | } | ||
989 | 867 | ||
990 | /* | 868 | /* |
991 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() | 869 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() |
@@ -1048,125 +926,6 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) | |||
1048 | rb_insert_color(&epi->rbn, &ep->rbr); | 926 | rb_insert_color(&epi->rbn, &ep->rbr); |
1049 | } | 927 | } |
1050 | 928 | ||
1051 | |||
1052 | |||
1053 | #define PATH_ARR_SIZE 5 | ||
1054 | /* | ||
1055 | * These are the number paths of length 1 to 5, that we are allowing to emanate | ||
1056 | * from a single file of interest. For example, we allow 1000 paths of length | ||
1057 | * 1, to emanate from each file of interest. This essentially represents the | ||
1058 | * potential wakeup paths, which need to be limited in order to avoid massive | ||
1059 | * uncontrolled wakeup storms. The common use case should be a single ep which | ||
1060 | * is connected to n file sources. In this case each file source has 1 path | ||
1061 | * of length 1. Thus, the numbers below should be more than sufficient. These | ||
1062 | * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify | ||
1063 | * and delete can't add additional paths. Protected by the epmutex. | ||
1064 | */ | ||
1065 | static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 }; | ||
1066 | static int path_count[PATH_ARR_SIZE]; | ||
1067 | |||
1068 | static int path_count_inc(int nests) | ||
1069 | { | ||
1070 | /* Allow an arbitrary number of depth 1 paths */ | ||
1071 | if (nests == 0) | ||
1072 | return 0; | ||
1073 | |||
1074 | if (++path_count[nests] > path_limits[nests]) | ||
1075 | return -1; | ||
1076 | return 0; | ||
1077 | } | ||
1078 | |||
1079 | static void path_count_init(void) | ||
1080 | { | ||
1081 | int i; | ||
1082 | |||
1083 | for (i = 0; i < PATH_ARR_SIZE; i++) | ||
1084 | path_count[i] = 0; | ||
1085 | } | ||
1086 | |||
1087 | static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) | ||
1088 | { | ||
1089 | int error = 0; | ||
1090 | struct file *file = priv; | ||
1091 | struct file *child_file; | ||
1092 | struct epitem *epi; | ||
1093 | |||
1094 | list_for_each_entry(epi, &file->f_ep_links, fllink) { | ||
1095 | child_file = epi->ep->file; | ||
1096 | if (is_file_epoll(child_file)) { | ||
1097 | if (list_empty(&child_file->f_ep_links)) { | ||
1098 | if (path_count_inc(call_nests)) { | ||
1099 | error = -1; | ||
1100 | break; | ||
1101 | } | ||
1102 | } else { | ||
1103 | error = ep_call_nested(&poll_loop_ncalls, | ||
1104 | EP_MAX_NESTS, | ||
1105 | reverse_path_check_proc, | ||
1106 | child_file, child_file, | ||
1107 | current); | ||
1108 | } | ||
1109 | if (error != 0) | ||
1110 | break; | ||
1111 | } else { | ||
1112 | printk(KERN_ERR "reverse_path_check_proc: " | ||
1113 | "file is not an ep!\n"); | ||
1114 | } | ||
1115 | } | ||
1116 | return error; | ||
1117 | } | ||
1118 | |||
1119 | /** | ||
1120 | * reverse_path_check - The tfile_check_list is list of file *, which have | ||
1121 | * links that are proposed to be newly added. We need to | ||
1122 | * make sure that those added links don't add too many | ||
1123 | * paths such that we will spend all our time waking up | ||
1124 | * eventpoll objects. | ||
1125 | * | ||
1126 | * Returns: Returns zero if the proposed links don't create too many paths, | ||
1127 | * -1 otherwise. | ||
1128 | */ | ||
1129 | static int reverse_path_check(void) | ||
1130 | { | ||
1131 | int error = 0; | ||
1132 | struct file *current_file; | ||
1133 | |||
1134 | /* let's call this for all tfiles */ | ||
1135 | list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { | ||
1136 | path_count_init(); | ||
1137 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | ||
1138 | reverse_path_check_proc, current_file, | ||
1139 | current_file, current); | ||
1140 | if (error) | ||
1141 | break; | ||
1142 | } | ||
1143 | return error; | ||
1144 | } | ||
1145 | |||
1146 | static int ep_create_wakeup_source(struct epitem *epi) | ||
1147 | { | ||
1148 | const char *name; | ||
1149 | |||
1150 | if (!epi->ep->ws) { | ||
1151 | epi->ep->ws = wakeup_source_register("eventpoll"); | ||
1152 | if (!epi->ep->ws) | ||
1153 | return -ENOMEM; | ||
1154 | } | ||
1155 | |||
1156 | name = epi->ffd.file->f_path.dentry->d_name.name; | ||
1157 | epi->ws = wakeup_source_register(name); | ||
1158 | if (!epi->ws) | ||
1159 | return -ENOMEM; | ||
1160 | |||
1161 | return 0; | ||
1162 | } | ||
1163 | |||
1164 | static void ep_destroy_wakeup_source(struct epitem *epi) | ||
1165 | { | ||
1166 | wakeup_source_unregister(epi->ws); | ||
1167 | epi->ws = NULL; | ||
1168 | } | ||
1169 | |||
1170 | /* | 929 | /* |
1171 | * Must be called with "mtx" held. | 930 | * Must be called with "mtx" held. |
1172 | */ | 931 | */ |
@@ -1194,18 +953,10 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1194 | epi->event = *event; | 953 | epi->event = *event; |
1195 | epi->nwait = 0; | 954 | epi->nwait = 0; |
1196 | epi->next = EP_UNACTIVE_PTR; | 955 | epi->next = EP_UNACTIVE_PTR; |
1197 | if (epi->event.events & EPOLLWAKEUP) { | ||
1198 | error = ep_create_wakeup_source(epi); | ||
1199 | if (error) | ||
1200 | goto error_create_wakeup_source; | ||
1201 | } else { | ||
1202 | epi->ws = NULL; | ||
1203 | } | ||
1204 | 956 | ||
1205 | /* Initialize the poll table using the queue callback */ | 957 | /* Initialize the poll table using the queue callback */ |
1206 | epq.epi = epi; | 958 | epq.epi = epi; |
1207 | init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); | 959 | init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); |
1208 | epq.pt._key = event->events; | ||
1209 | 960 | ||
1210 | /* | 961 | /* |
1211 | * Attach the item to the poll hooks and get current event bits. | 962 | * Attach the item to the poll hooks and get current event bits. |
@@ -1236,18 +987,12 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1236 | */ | 987 | */ |
1237 | ep_rbtree_insert(ep, epi); | 988 | ep_rbtree_insert(ep, epi); |
1238 | 989 | ||
1239 | /* now check if we've created too many backpaths */ | ||
1240 | error = -EINVAL; | ||
1241 | if (reverse_path_check()) | ||
1242 | goto error_remove_epi; | ||
1243 | |||
1244 | /* We have to drop the new item inside our item list to keep track of it */ | 990 | /* We have to drop the new item inside our item list to keep track of it */ |
1245 | spin_lock_irqsave(&ep->lock, flags); | 991 | spin_lock_irqsave(&ep->lock, flags); |
1246 | 992 | ||
1247 | /* If the file is already "ready" we drop it inside the ready list */ | 993 | /* If the file is already "ready" we drop it inside the ready list */ |
1248 | if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { | 994 | if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { |
1249 | list_add_tail(&epi->rdllink, &ep->rdllist); | 995 | list_add_tail(&epi->rdllink, &ep->rdllist); |
1250 | __pm_stay_awake(epi->ws); | ||
1251 | 996 | ||
1252 | /* Notify waiting tasks that events are available */ | 997 | /* Notify waiting tasks that events are available */ |
1253 | if (waitqueue_active(&ep->wq)) | 998 | if (waitqueue_active(&ep->wq)) |
@@ -1266,14 +1011,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1266 | 1011 | ||
1267 | return 0; | 1012 | return 0; |
1268 | 1013 | ||
1269 | error_remove_epi: | ||
1270 | spin_lock(&tfile->f_lock); | ||
1271 | if (ep_is_linked(&epi->fllink)) | ||
1272 | list_del_init(&epi->fllink); | ||
1273 | spin_unlock(&tfile->f_lock); | ||
1274 | |||
1275 | rb_erase(&epi->rbn, &ep->rbr); | ||
1276 | |||
1277 | error_unregister: | 1014 | error_unregister: |
1278 | ep_unregister_pollwait(ep, epi); | 1015 | ep_unregister_pollwait(ep, epi); |
1279 | 1016 | ||
@@ -1288,9 +1025,6 @@ error_unregister: | |||
1288 | list_del_init(&epi->rdllink); | 1025 | list_del_init(&epi->rdllink); |
1289 | spin_unlock_irqrestore(&ep->lock, flags); | 1026 | spin_unlock_irqrestore(&ep->lock, flags); |
1290 | 1027 | ||
1291 | wakeup_source_unregister(epi->ws); | ||
1292 | |||
1293 | error_create_wakeup_source: | ||
1294 | kmem_cache_free(epi_cache, epi); | 1028 | kmem_cache_free(epi_cache, epi); |
1295 | 1029 | ||
1296 | return error; | 1030 | return error; |
@@ -1304,50 +1038,20 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1304 | { | 1038 | { |
1305 | int pwake = 0; | 1039 | int pwake = 0; |
1306 | unsigned int revents; | 1040 | unsigned int revents; |
1307 | poll_table pt; | ||
1308 | |||
1309 | init_poll_funcptr(&pt, NULL); | ||
1310 | 1041 | ||
1311 | /* | 1042 | /* |
1312 | * Set the new event interest mask before calling f_op->poll(); | 1043 | * Set the new event interest mask before calling f_op->poll(); |
1313 | * otherwise we might miss an event that happens between the | 1044 | * otherwise we might miss an event that happens between the |
1314 | * f_op->poll() call and the new event set registering. | 1045 | * f_op->poll() call and the new event set registering. |
1315 | */ | 1046 | */ |
1316 | epi->event.events = event->events; /* need barrier below */ | 1047 | epi->event.events = event->events; |
1317 | pt._key = event->events; | ||
1318 | epi->event.data = event->data; /* protected by mtx */ | 1048 | epi->event.data = event->data; /* protected by mtx */ |
1319 | if (epi->event.events & EPOLLWAKEUP) { | ||
1320 | if (!epi->ws) | ||
1321 | ep_create_wakeup_source(epi); | ||
1322 | } else if (epi->ws) { | ||
1323 | ep_destroy_wakeup_source(epi); | ||
1324 | } | ||
1325 | |||
1326 | /* | ||
1327 | * The following barrier has two effects: | ||
1328 | * | ||
1329 | * 1) Flush epi changes above to other CPUs. This ensures | ||
1330 | * we do not miss events from ep_poll_callback if an | ||
1331 | * event occurs immediately after we call f_op->poll(). | ||
1332 | * We need this because we did not take ep->lock while | ||
1333 | * changing epi above (but ep_poll_callback does take | ||
1334 | * ep->lock). | ||
1335 | * | ||
1336 | * 2) We also need to ensure we do not miss _past_ events | ||
1337 | * when calling f_op->poll(). This barrier also | ||
1338 | * pairs with the barrier in wq_has_sleeper (see | ||
1339 | * comments for wq_has_sleeper). | ||
1340 | * | ||
1341 | * This barrier will now guarantee ep_poll_callback or f_op->poll | ||
1342 | * (or both) will notice the readiness of an item. | ||
1343 | */ | ||
1344 | smp_mb(); | ||
1345 | 1049 | ||
1346 | /* | 1050 | /* |
1347 | * Get current event bits. We can safely use the file* here because | 1051 | * Get current event bits. We can safely use the file* here because |
1348 | * its usage count has been increased by the caller of this function. | 1052 | * its usage count has been increased by the caller of this function. |
1349 | */ | 1053 | */ |
1350 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt); | 1054 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); |
1351 | 1055 | ||
1352 | /* | 1056 | /* |
1353 | * If the item is "hot" and it is not registered inside the ready | 1057 | * If the item is "hot" and it is not registered inside the ready |
@@ -1357,7 +1061,6 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1357 | spin_lock_irq(&ep->lock); | 1061 | spin_lock_irq(&ep->lock); |
1358 | if (!ep_is_linked(&epi->rdllink)) { | 1062 | if (!ep_is_linked(&epi->rdllink)) { |
1359 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1063 | list_add_tail(&epi->rdllink, &ep->rdllist); |
1360 | __pm_stay_awake(epi->ws); | ||
1361 | 1064 | ||
1362 | /* Notify waiting tasks that events are available */ | 1065 | /* Notify waiting tasks that events are available */ |
1363 | if (waitqueue_active(&ep->wq)) | 1066 | if (waitqueue_active(&ep->wq)) |
@@ -1383,9 +1086,6 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1383 | unsigned int revents; | 1086 | unsigned int revents; |
1384 | struct epitem *epi; | 1087 | struct epitem *epi; |
1385 | struct epoll_event __user *uevent; | 1088 | struct epoll_event __user *uevent; |
1386 | poll_table pt; | ||
1387 | |||
1388 | init_poll_funcptr(&pt, NULL); | ||
1389 | 1089 | ||
1390 | /* | 1090 | /* |
1391 | * We can loop without lock because we are passed a task private list. | 1091 | * We can loop without lock because we are passed a task private list. |
@@ -1396,22 +1096,9 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1396 | !list_empty(head) && eventcnt < esed->maxevents;) { | 1096 | !list_empty(head) && eventcnt < esed->maxevents;) { |
1397 | epi = list_first_entry(head, struct epitem, rdllink); | 1097 | epi = list_first_entry(head, struct epitem, rdllink); |
1398 | 1098 | ||
1399 | /* | ||
1400 | * Activate ep->ws before deactivating epi->ws to prevent | ||
1401 | * triggering auto-suspend here (in case we reactive epi->ws | ||
1402 | * below). | ||
1403 | * | ||
1404 | * This could be rearranged to delay the deactivation of epi->ws | ||
1405 | * instead, but then epi->ws would temporarily be out of sync | ||
1406 | * with ep_is_linked(). | ||
1407 | */ | ||
1408 | if (epi->ws && epi->ws->active) | ||
1409 | __pm_stay_awake(ep->ws); | ||
1410 | __pm_relax(epi->ws); | ||
1411 | list_del_init(&epi->rdllink); | 1099 | list_del_init(&epi->rdllink); |
1412 | 1100 | ||
1413 | pt._key = epi->event.events; | 1101 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & |
1414 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & | ||
1415 | epi->event.events; | 1102 | epi->event.events; |
1416 | 1103 | ||
1417 | /* | 1104 | /* |
@@ -1424,7 +1111,6 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1424 | if (__put_user(revents, &uevent->events) || | 1111 | if (__put_user(revents, &uevent->events) || |
1425 | __put_user(epi->event.data, &uevent->data)) { | 1112 | __put_user(epi->event.data, &uevent->data)) { |
1426 | list_add(&epi->rdllink, head); | 1113 | list_add(&epi->rdllink, head); |
1427 | __pm_stay_awake(epi->ws); | ||
1428 | return eventcnt ? eventcnt : -EFAULT; | 1114 | return eventcnt ? eventcnt : -EFAULT; |
1429 | } | 1115 | } |
1430 | eventcnt++; | 1116 | eventcnt++; |
@@ -1444,7 +1130,6 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1444 | * poll callback will queue them in ep->ovflist. | 1130 | * poll callback will queue them in ep->ovflist. |
1445 | */ | 1131 | */ |
1446 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1132 | list_add_tail(&epi->rdllink, &ep->rdllist); |
1447 | __pm_stay_awake(epi->ws); | ||
1448 | } | 1133 | } |
1449 | } | 1134 | } |
1450 | } | 1135 | } |
@@ -1590,36 +1275,18 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) | |||
1590 | int error = 0; | 1275 | int error = 0; |
1591 | struct file *file = priv; | 1276 | struct file *file = priv; |
1592 | struct eventpoll *ep = file->private_data; | 1277 | struct eventpoll *ep = file->private_data; |
1593 | struct eventpoll *ep_tovisit; | ||
1594 | struct rb_node *rbp; | 1278 | struct rb_node *rbp; |
1595 | struct epitem *epi; | 1279 | struct epitem *epi; |
1596 | 1280 | ||
1597 | mutex_lock_nested(&ep->mtx, call_nests + 1); | 1281 | mutex_lock_nested(&ep->mtx, call_nests + 1); |
1598 | ep->visited = 1; | ||
1599 | list_add(&ep->visited_list_link, &visited_list); | ||
1600 | for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { | 1282 | for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { |
1601 | epi = rb_entry(rbp, struct epitem, rbn); | 1283 | epi = rb_entry(rbp, struct epitem, rbn); |
1602 | if (unlikely(is_file_epoll(epi->ffd.file))) { | 1284 | if (unlikely(is_file_epoll(epi->ffd.file))) { |
1603 | ep_tovisit = epi->ffd.file->private_data; | ||
1604 | if (ep_tovisit->visited) | ||
1605 | continue; | ||
1606 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | 1285 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, |
1607 | ep_loop_check_proc, epi->ffd.file, | 1286 | ep_loop_check_proc, epi->ffd.file, |
1608 | ep_tovisit, current); | 1287 | epi->ffd.file->private_data, current); |
1609 | if (error != 0) | 1288 | if (error != 0) |
1610 | break; | 1289 | break; |
1611 | } else { | ||
1612 | /* | ||
1613 | * If we've reached a file that is not associated with | ||
1614 | * an ep, then we need to check if the newly added | ||
1615 | * links are going to add too many wakeup paths. We do | ||
1616 | * this by adding it to the tfile_check_list, if it's | ||
1617 | * not already there, and calling reverse_path_check() | ||
1618 | * during ep_insert(). | ||
1619 | */ | ||
1620 | if (list_empty(&epi->ffd.file->f_tfile_llink)) | ||
1621 | list_add(&epi->ffd.file->f_tfile_llink, | ||
1622 | &tfile_check_list); | ||
1623 | } | 1290 | } |
1624 | } | 1291 | } |
1625 | mutex_unlock(&ep->mtx); | 1292 | mutex_unlock(&ep->mtx); |
@@ -1640,31 +1307,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) | |||
1640 | */ | 1307 | */ |
1641 | static int ep_loop_check(struct eventpoll *ep, struct file *file) | 1308 | static int ep_loop_check(struct eventpoll *ep, struct file *file) |
1642 | { | 1309 | { |
1643 | int ret; | 1310 | return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, |
1644 | struct eventpoll *ep_cur, *ep_next; | ||
1645 | |||
1646 | ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | ||
1647 | ep_loop_check_proc, file, ep, current); | 1311 | ep_loop_check_proc, file, ep, current); |
1648 | /* clear visited list */ | ||
1649 | list_for_each_entry_safe(ep_cur, ep_next, &visited_list, | ||
1650 | visited_list_link) { | ||
1651 | ep_cur->visited = 0; | ||
1652 | list_del(&ep_cur->visited_list_link); | ||
1653 | } | ||
1654 | return ret; | ||
1655 | } | ||
1656 | |||
1657 | static void clear_tfile_check_list(void) | ||
1658 | { | ||
1659 | struct file *file; | ||
1660 | |||
1661 | /* first clear the tfile_check_list */ | ||
1662 | while (!list_empty(&tfile_check_list)) { | ||
1663 | file = list_first_entry(&tfile_check_list, struct file, | ||
1664 | f_tfile_llink); | ||
1665 | list_del_init(&file->f_tfile_llink); | ||
1666 | } | ||
1667 | INIT_LIST_HEAD(&tfile_check_list); | ||
1668 | } | 1312 | } |
1669 | 1313 | ||
1670 | /* | 1314 | /* |
@@ -1672,9 +1316,8 @@ static void clear_tfile_check_list(void) | |||
1672 | */ | 1316 | */ |
1673 | SYSCALL_DEFINE1(epoll_create1, int, flags) | 1317 | SYSCALL_DEFINE1(epoll_create1, int, flags) |
1674 | { | 1318 | { |
1675 | int error, fd; | 1319 | int error; |
1676 | struct eventpoll *ep = NULL; | 1320 | struct eventpoll *ep = NULL; |
1677 | struct file *file; | ||
1678 | 1321 | ||
1679 | /* Check the EPOLL_* constant for consistency. */ | 1322 | /* Check the EPOLL_* constant for consistency. */ |
1680 | BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); | 1323 | BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); |
@@ -1691,25 +1334,11 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) | |||
1691 | * Creates all the items needed to setup an eventpoll file. That is, | 1334 | * Creates all the items needed to setup an eventpoll file. That is, |
1692 | * a file structure and a free file descriptor. | 1335 | * a file structure and a free file descriptor. |
1693 | */ | 1336 | */ |
1694 | fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC)); | 1337 | error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, |
1695 | if (fd < 0) { | ||
1696 | error = fd; | ||
1697 | goto out_free_ep; | ||
1698 | } | ||
1699 | file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep, | ||
1700 | O_RDWR | (flags & O_CLOEXEC)); | 1338 | O_RDWR | (flags & O_CLOEXEC)); |
1701 | if (IS_ERR(file)) { | 1339 | if (error < 0) |
1702 | error = PTR_ERR(file); | 1340 | ep_free(ep); |
1703 | goto out_free_fd; | 1341 | |
1704 | } | ||
1705 | ep->file = file; | ||
1706 | fd_install(fd, file); | ||
1707 | return fd; | ||
1708 | |||
1709 | out_free_fd: | ||
1710 | put_unused_fd(fd); | ||
1711 | out_free_ep: | ||
1712 | ep_free(ep); | ||
1713 | return error; | 1342 | return error; |
1714 | } | 1343 | } |
1715 | 1344 | ||
@@ -1757,10 +1386,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1757 | if (!tfile->f_op || !tfile->f_op->poll) | 1386 | if (!tfile->f_op || !tfile->f_op->poll) |
1758 | goto error_tgt_fput; | 1387 | goto error_tgt_fput; |
1759 | 1388 | ||
1760 | /* Check if EPOLLWAKEUP is allowed */ | ||
1761 | if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) | ||
1762 | epds.events &= ~EPOLLWAKEUP; | ||
1763 | |||
1764 | /* | 1389 | /* |
1765 | * We have to check that the file structure underneath the file descriptor | 1390 | * We have to check that the file structure underneath the file descriptor |
1766 | * the user passed to us _is_ an eventpoll file. And also we do not permit | 1391 | * the user passed to us _is_ an eventpoll file. And also we do not permit |
@@ -1779,29 +1404,21 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1779 | /* | 1404 | /* |
1780 | * When we insert an epoll file descriptor, inside another epoll file | 1405 | * When we insert an epoll file descriptor, inside another epoll file |
1781 | * descriptor, there is the change of creating closed loops, which are | 1406 | * descriptor, there is the change of creating closed loops, which are |
1782 | * better be handled here, than in more critical paths. While we are | 1407 | * better be handled here, than in more critical paths. |
1783 | * checking for loops we also determine the list of files reachable | ||
1784 | * and hang them on the tfile_check_list, so we can check that we | ||
1785 | * haven't created too many possible wakeup paths. | ||
1786 | * | 1408 | * |
1787 | * We need to hold the epmutex across both ep_insert and ep_remove | 1409 | * We hold epmutex across the loop check and the insert in this case, in |
1788 | * b/c we want to make sure we are looking at a coherent view of | 1410 | * order to prevent two separate inserts from racing and each doing the |
1789 | * epoll network. | 1411 | * insert "at the same time" such that ep_loop_check passes on both |
1412 | * before either one does the insert, thereby creating a cycle. | ||
1790 | */ | 1413 | */ |
1791 | if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) { | 1414 | if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) { |
1792 | mutex_lock(&epmutex); | 1415 | mutex_lock(&epmutex); |
1793 | did_lock_epmutex = 1; | 1416 | did_lock_epmutex = 1; |
1417 | error = -ELOOP; | ||
1418 | if (ep_loop_check(ep, tfile) != 0) | ||
1419 | goto error_tgt_fput; | ||
1794 | } | 1420 | } |
1795 | if (op == EPOLL_CTL_ADD) { | 1421 | |
1796 | if (is_file_epoll(tfile)) { | ||
1797 | error = -ELOOP; | ||
1798 | if (ep_loop_check(ep, tfile) != 0) { | ||
1799 | clear_tfile_check_list(); | ||
1800 | goto error_tgt_fput; | ||
1801 | } | ||
1802 | } else | ||
1803 | list_add(&tfile->f_tfile_llink, &tfile_check_list); | ||
1804 | } | ||
1805 | 1422 | ||
1806 | mutex_lock_nested(&ep->mtx, 0); | 1423 | mutex_lock_nested(&ep->mtx, 0); |
1807 | 1424 | ||
@@ -1820,7 +1437,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1820 | error = ep_insert(ep, &epds, tfile, fd); | 1437 | error = ep_insert(ep, &epds, tfile, fd); |
1821 | } else | 1438 | } else |
1822 | error = -EEXIST; | 1439 | error = -EEXIST; |
1823 | clear_tfile_check_list(); | ||
1824 | break; | 1440 | break; |
1825 | case EPOLL_CTL_DEL: | 1441 | case EPOLL_CTL_DEL: |
1826 | if (epi) | 1442 | if (epi) |
@@ -1839,7 +1455,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1839 | mutex_unlock(&ep->mtx); | 1455 | mutex_unlock(&ep->mtx); |
1840 | 1456 | ||
1841 | error_tgt_fput: | 1457 | error_tgt_fput: |
1842 | if (did_lock_epmutex) | 1458 | if (unlikely(did_lock_epmutex)) |
1843 | mutex_unlock(&epmutex); | 1459 | mutex_unlock(&epmutex); |
1844 | 1460 | ||
1845 | fput(tfile); | 1461 | fput(tfile); |
@@ -1858,7 +1474,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, | |||
1858 | int, maxevents, int, timeout) | 1474 | int, maxevents, int, timeout) |
1859 | { | 1475 | { |
1860 | int error; | 1476 | int error; |
1861 | struct fd f; | 1477 | struct file *file; |
1862 | struct eventpoll *ep; | 1478 | struct eventpoll *ep; |
1863 | 1479 | ||
1864 | /* The maximum number of event must be greater than zero */ | 1480 | /* The maximum number of event must be greater than zero */ |
@@ -1866,36 +1482,43 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, | |||
1866 | return -EINVAL; | 1482 | return -EINVAL; |
1867 | 1483 | ||
1868 | /* Verify that the area passed by the user is writeable */ | 1484 | /* Verify that the area passed by the user is writeable */ |
1869 | if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) | 1485 | if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { |
1870 | return -EFAULT; | 1486 | error = -EFAULT; |
1487 | goto error_return; | ||
1488 | } | ||
1871 | 1489 | ||
1872 | /* Get the "struct file *" for the eventpoll file */ | 1490 | /* Get the "struct file *" for the eventpoll file */ |
1873 | f = fdget(epfd); | 1491 | error = -EBADF; |
1874 | if (!f.file) | 1492 | file = fget(epfd); |
1875 | return -EBADF; | 1493 | if (!file) |
1494 | goto error_return; | ||
1876 | 1495 | ||
1877 | /* | 1496 | /* |
1878 | * We have to check that the file structure underneath the fd | 1497 | * We have to check that the file structure underneath the fd |
1879 | * the user passed to us _is_ an eventpoll file. | 1498 | * the user passed to us _is_ an eventpoll file. |
1880 | */ | 1499 | */ |
1881 | error = -EINVAL; | 1500 | error = -EINVAL; |
1882 | if (!is_file_epoll(f.file)) | 1501 | if (!is_file_epoll(file)) |
1883 | goto error_fput; | 1502 | goto error_fput; |
1884 | 1503 | ||
1885 | /* | 1504 | /* |
1886 | * At this point it is safe to assume that the "private_data" contains | 1505 | * At this point it is safe to assume that the "private_data" contains |
1887 | * our own data structure. | 1506 | * our own data structure. |
1888 | */ | 1507 | */ |
1889 | ep = f.file->private_data; | 1508 | ep = file->private_data; |
1890 | 1509 | ||
1891 | /* Time to fish for events ... */ | 1510 | /* Time to fish for events ... */ |
1892 | error = ep_poll(ep, events, maxevents, timeout); | 1511 | error = ep_poll(ep, events, maxevents, timeout); |
1893 | 1512 | ||
1894 | error_fput: | 1513 | error_fput: |
1895 | fdput(f); | 1514 | fput(file); |
1515 | error_return: | ||
1516 | |||
1896 | return error; | 1517 | return error; |
1897 | } | 1518 | } |
1898 | 1519 | ||
1520 | #ifdef HAVE_SET_RESTORE_SIGMASK | ||
1521 | |||
1899 | /* | 1522 | /* |
1900 | * Implement the event wait interface for the eventpoll file. It is the kernel | 1523 | * Implement the event wait interface for the eventpoll file. It is the kernel |
1901 | * part of the user space epoll_pwait(2). | 1524 | * part of the user space epoll_pwait(2). |
@@ -1940,6 +1563,8 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, | |||
1940 | return error; | 1563 | return error; |
1941 | } | 1564 | } |
1942 | 1565 | ||
1566 | #endif /* HAVE_SET_RESTORE_SIGMASK */ | ||
1567 | |||
1943 | static int __init eventpoll_init(void) | 1568 | static int __init eventpoll_init(void) |
1944 | { | 1569 | { |
1945 | struct sysinfo si; | 1570 | struct sysinfo si; |