diff options
author | Davide Libenzi <davidel@xmailserver.org> | 2009-03-31 18:24:11 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-01 11:59:19 -0400 |
commit | 296e236e96dddef351a1809c0d414bcddfcf3800 (patch) | |
tree | 279d6eb3f0114a49897cd4bbc0eaf43fffd46c6e /fs/eventpoll.c | |
parent | 5071f97ec6d74f006072de0ce89b67c8792fe5a1 (diff) |
epoll: fix epoll's own poll (update)
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Pavel Pisa <pisa@cmp.felk.cvut.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r-- | fs/eventpoll.c | 110 |
1 files changed, 57 insertions, 53 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8a23a91e1377..e24d137c93b6 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -454,9 +454,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
454 | int error, pwake = 0; | 454 | int error, pwake = 0; |
455 | unsigned long flags; | 455 | unsigned long flags; |
456 | struct epitem *epi, *nepi; | 456 | struct epitem *epi, *nepi; |
457 | struct list_head txlist; | 457 | LIST_HEAD(txlist); |
458 | |||
459 | INIT_LIST_HEAD(&txlist); | ||
460 | 458 | ||
461 | /* | 459 | /* |
462 | * We need to lock this because we could be hit by | 460 | * We need to lock this because we could be hit by |
@@ -473,8 +471,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
473 | * in a lockless way. | 471 | * in a lockless way. |
474 | */ | 472 | */ |
475 | spin_lock_irqsave(&ep->lock, flags); | 473 | spin_lock_irqsave(&ep->lock, flags); |
476 | list_splice(&ep->rdllist, &txlist); | 474 | list_splice_init(&ep->rdllist, &txlist); |
477 | INIT_LIST_HEAD(&ep->rdllist); | ||
478 | ep->ovflist = NULL; | 475 | ep->ovflist = NULL; |
479 | spin_unlock_irqrestore(&ep->lock, flags); | 476 | spin_unlock_irqrestore(&ep->lock, flags); |
480 | 477 | ||
@@ -514,8 +511,8 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
514 | 511 | ||
515 | if (!list_empty(&ep->rdllist)) { | 512 | if (!list_empty(&ep->rdllist)) { |
516 | /* | 513 | /* |
517 | * Wake up (if active) both the eventpoll wait list and the ->poll() | 514 | * Wake up (if active) both the eventpoll wait list and |
518 | * wait list (delayed after we release the lock). | 515 | * the ->poll() wait list (delayed after we release the lock). |
519 | */ | 516 | */ |
520 | if (waitqueue_active(&ep->wq)) | 517 | if (waitqueue_active(&ep->wq)) |
521 | wake_up_locked(&ep->wq); | 518 | wake_up_locked(&ep->wq); |
@@ -632,7 +629,8 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) | |||
632 | return 0; | 629 | return 0; |
633 | } | 630 | } |
634 | 631 | ||
635 | static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, void *priv) | 632 | static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, |
633 | void *priv) | ||
636 | { | 634 | { |
637 | struct epitem *epi, *tmp; | 635 | struct epitem *epi, *tmp; |
638 | 636 | ||
@@ -640,13 +638,14 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, voi | |||
640 | if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & | 638 | if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & |
641 | epi->event.events) | 639 | epi->event.events) |
642 | return POLLIN | POLLRDNORM; | 640 | return POLLIN | POLLRDNORM; |
643 | else | 641 | else { |
644 | /* | 642 | /* |
645 | * Item has been dropped into the ready list by the poll | 643 | * Item has been dropped into the ready list by the poll |
646 | * callback, but it's not actually ready, as far as | 644 | * callback, but it's not actually ready, as far as |
647 | * caller requested events goes. We can remove it here. | 645 | * caller requested events goes. We can remove it here. |
648 | */ | 646 | */ |
649 | list_del_init(&epi->rdllink); | 647 | list_del_init(&epi->rdllink); |
648 | } | ||
650 | } | 649 | } |
651 | 650 | ||
652 | return 0; | 651 | return 0; |
@@ -674,7 +673,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | |||
674 | pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, | 673 | pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, |
675 | ep_poll_readyevents_proc, ep, ep); | 674 | ep_poll_readyevents_proc, ep, ep); |
676 | 675 | ||
677 | return pollflags != -1 ? pollflags: 0; | 676 | return pollflags != -1 ? pollflags : 0; |
678 | } | 677 | } |
679 | 678 | ||
680 | /* File callbacks that implement the eventpoll file behaviour */ | 679 | /* File callbacks that implement the eventpoll file behaviour */ |
@@ -872,9 +871,10 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, | |||
872 | add_wait_queue(whead, &pwq->wait); | 871 | add_wait_queue(whead, &pwq->wait); |
873 | list_add_tail(&pwq->llink, &epi->pwqlist); | 872 | list_add_tail(&pwq->llink, &epi->pwqlist); |
874 | epi->nwait++; | 873 | epi->nwait++; |
875 | } else | 874 | } else { |
876 | /* We have to signal that an error occurred */ | 875 | /* We have to signal that an error occurred */ |
877 | epi->nwait = -1; | 876 | epi->nwait = -1; |
877 | } | ||
878 | } | 878 | } |
879 | 879 | ||
880 | static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) | 880 | static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) |
@@ -1055,62 +1055,65 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1055 | return 0; | 1055 | return 0; |
1056 | } | 1056 | } |
1057 | 1057 | ||
1058 | static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, void *priv) | 1058 | static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, |
1059 | void *priv) | ||
1059 | { | 1060 | { |
1060 | struct ep_send_events_data *esed = priv; | 1061 | struct ep_send_events_data *esed = priv; |
1061 | int eventcnt; | 1062 | int eventcnt; |
1062 | unsigned int revents; | 1063 | unsigned int revents; |
1063 | struct epitem *epi; | 1064 | struct epitem *epi; |
1064 | struct epoll_event __user *uevent; | 1065 | struct epoll_event __user *uevent; |
1065 | 1066 | ||
1066 | /* | 1067 | /* |
1067 | * We can loop without lock because we are passed a task private list. | 1068 | * We can loop without lock because we are passed a task private list. |
1068 | * Items cannot vanish during the loop because ep_scan_ready_list() is | 1069 | * Items cannot vanish during the loop because ep_scan_ready_list() is |
1069 | * holding "mtx" during this call. | 1070 | * holding "mtx" during this call. |
1070 | */ | 1071 | */ |
1071 | for (eventcnt = 0, uevent = esed->events; | 1072 | for (eventcnt = 0, uevent = esed->events; |
1072 | !list_empty(head) && eventcnt < esed->maxevents;) { | 1073 | !list_empty(head) && eventcnt < esed->maxevents;) { |
1073 | epi = list_first_entry(head, struct epitem, rdllink); | 1074 | epi = list_first_entry(head, struct epitem, rdllink); |
1074 | 1075 | ||
1075 | list_del_init(&epi->rdllink); | 1076 | list_del_init(&epi->rdllink); |
1076 | 1077 | ||
1077 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & | 1078 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & |
1078 | epi->event.events; | 1079 | epi->event.events; |
1079 | 1080 | ||
1080 | /* | 1081 | /* |
1081 | * If the event mask intersect the caller-requested one, | 1082 | * If the event mask intersect the caller-requested one, |
1082 | * deliver the event to userspace. Again, ep_scan_ready_list() | 1083 | * deliver the event to userspace. Again, ep_scan_ready_list() |
1083 | * is holding "mtx", so no operations coming from userspace | 1084 | * is holding "mtx", so no operations coming from userspace |
1084 | * can change the item. | 1085 | * can change the item. |
1085 | */ | 1086 | */ |
1086 | if (revents) { | 1087 | if (revents) { |
1087 | if (__put_user(revents, &uevent->events) || | 1088 | if (__put_user(revents, &uevent->events) || |
1088 | __put_user(epi->event.data, &uevent->data)) | 1089 | __put_user(epi->event.data, &uevent->data)) |
1089 | return eventcnt ? eventcnt: -EFAULT; | 1090 | return eventcnt ? eventcnt : -EFAULT; |
1090 | eventcnt++; | 1091 | eventcnt++; |
1091 | uevent++; | 1092 | uevent++; |
1092 | if (epi->event.events & EPOLLONESHOT) | 1093 | if (epi->event.events & EPOLLONESHOT) |
1093 | epi->event.events &= EP_PRIVATE_BITS; | 1094 | epi->event.events &= EP_PRIVATE_BITS; |
1094 | else if (!(epi->event.events & EPOLLET)) | 1095 | else if (!(epi->event.events & EPOLLET)) { |
1095 | /* | 1096 | /* |
1096 | * If this file has been added with Level Trigger | 1097 | * If this file has been added with Level |
1097 | * mode, we need to insert back inside the ready | 1098 | * Trigger mode, we need to insert back inside |
1098 | * list, so that the next call to epoll_wait() | 1099 | * the ready list, so that the next call to |
1099 | * will check again the events availability. | 1100 | * epoll_wait() will check again the events |
1100 | * At this point, noone can insert into ep->rdllist | 1101 | * availability. At this point, noone can insert |
1101 | * besides us. The epoll_ctl() callers are locked | 1102 | * into ep->rdllist besides us. The epoll_ctl() |
1102 | * out by ep_scan_ready_list() holding "mtx" and | 1103 | * callers are locked out by |
1103 | * the poll callback will queue them in ep->ovflist. | 1104 | * ep_scan_ready_list() holding "mtx" and the |
1104 | */ | 1105 | * poll callback will queue them in ep->ovflist. |
1105 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1106 | */ |
1106 | } | 1107 | list_add_tail(&epi->rdllink, &ep->rdllist); |
1107 | } | 1108 | } |
1109 | } | ||
1110 | } | ||
1108 | 1111 | ||
1109 | return eventcnt; | 1112 | return eventcnt; |
1110 | } | 1113 | } |
1111 | 1114 | ||
1112 | static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, | 1115 | static int ep_send_events(struct eventpoll *ep, |
1113 | int maxevents) | 1116 | struct epoll_event __user *events, int maxevents) |
1114 | { | 1117 | { |
1115 | struct ep_send_events_data esed; | 1118 | struct ep_send_events_data esed; |
1116 | 1119 | ||
@@ -1194,40 +1197,41 @@ retry: | |||
1194 | */ | 1197 | */ |
1195 | SYSCALL_DEFINE1(epoll_create1, int, flags) | 1198 | SYSCALL_DEFINE1(epoll_create1, int, flags) |
1196 | { | 1199 | { |
1197 | int error; | 1200 | int error, fd = -1; |
1198 | struct eventpoll *ep = NULL; | 1201 | struct eventpoll *ep; |
1199 | 1202 | ||
1200 | /* Check the EPOLL_* constant for consistency. */ | 1203 | /* Check the EPOLL_* constant for consistency. */ |
1201 | BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); | 1204 | BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); |
1202 | 1205 | ||
1206 | if (flags & ~EPOLL_CLOEXEC) | ||
1207 | return -EINVAL; | ||
1208 | |||
1203 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", | 1209 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", |
1204 | current, flags)); | 1210 | current, flags)); |
1205 | 1211 | ||
1206 | error = -EINVAL; | ||
1207 | if (flags & ~EPOLL_CLOEXEC) | ||
1208 | goto error_return; | ||
1209 | |||
1210 | /* | 1212 | /* |
1211 | * Create the internal data structure ("struct eventpoll"). | 1213 | * Create the internal data structure ( "struct eventpoll" ). |
1212 | */ | 1214 | */ |
1213 | error = ep_alloc(&ep); | 1215 | error = ep_alloc(&ep); |
1214 | if (error < 0) | 1216 | if (error < 0) { |
1217 | fd = error; | ||
1215 | goto error_return; | 1218 | goto error_return; |
1219 | } | ||
1216 | 1220 | ||
1217 | /* | 1221 | /* |
1218 | * Creates all the items needed to setup an eventpoll file. That is, | 1222 | * Creates all the items needed to setup an eventpoll file. That is, |
1219 | * a file structure and a free file descriptor. | 1223 | * a file structure and a free file descriptor. |
1220 | */ | 1224 | */ |
1221 | error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, | 1225 | fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, |
1222 | flags & O_CLOEXEC); | 1226 | flags & O_CLOEXEC); |
1223 | if (error < 0) | 1227 | if (fd < 0) |
1224 | ep_free(ep); | 1228 | ep_free(ep); |
1225 | 1229 | ||
1226 | error_return: | 1230 | error_return: |
1227 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", | 1231 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", |
1228 | current, flags, error)); | 1232 | current, flags, fd)); |
1229 | 1233 | ||
1230 | return error; | 1234 | return fd; |
1231 | } | 1235 | } |
1232 | 1236 | ||
1233 | SYSCALL_DEFINE1(epoll_create, int, size) | 1237 | SYSCALL_DEFINE1(epoll_create, int, size) |