aboutsummaryrefslogtreecommitdiffstats
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
authorDavide Libenzi <davidel@xmailserver.org>2009-03-31 18:24:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-01 11:59:19 -0400
commit296e236e96dddef351a1809c0d414bcddfcf3800 (patch)
tree279d6eb3f0114a49897cd4bbc0eaf43fffd46c6e /fs/eventpoll.c
parent5071f97ec6d74f006072de0ce89b67c8792fe5a1 (diff)
epoll: fix epoll's own poll (update)
Signed-off-by: Davide Libenzi <davidel@xmailserver.org> Cc: Pavel Pisa <pisa@cmp.felk.cvut.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c110
1 files changed, 57 insertions, 53 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8a23a91e1377..e24d137c93b6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -454,9 +454,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
454 int error, pwake = 0; 454 int error, pwake = 0;
455 unsigned long flags; 455 unsigned long flags;
456 struct epitem *epi, *nepi; 456 struct epitem *epi, *nepi;
457 struct list_head txlist; 457 LIST_HEAD(txlist);
458
459 INIT_LIST_HEAD(&txlist);
460 458
461 /* 459 /*
462 * We need to lock this because we could be hit by 460 * We need to lock this because we could be hit by
@@ -473,8 +471,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
473 * in a lockless way. 471 * in a lockless way.
474 */ 472 */
475 spin_lock_irqsave(&ep->lock, flags); 473 spin_lock_irqsave(&ep->lock, flags);
476 list_splice(&ep->rdllist, &txlist); 474 list_splice_init(&ep->rdllist, &txlist);
477 INIT_LIST_HEAD(&ep->rdllist);
478 ep->ovflist = NULL; 475 ep->ovflist = NULL;
479 spin_unlock_irqrestore(&ep->lock, flags); 476 spin_unlock_irqrestore(&ep->lock, flags);
480 477
@@ -514,8 +511,8 @@ static int ep_scan_ready_list(struct eventpoll *ep,
514 511
515 if (!list_empty(&ep->rdllist)) { 512 if (!list_empty(&ep->rdllist)) {
516 /* 513 /*
517 * Wake up (if active) both the eventpoll wait list and the ->poll() 514 * Wake up (if active) both the eventpoll wait list and
518 * wait list (delayed after we release the lock). 515 * the ->poll() wait list (delayed after we release the lock).
519 */ 516 */
520 if (waitqueue_active(&ep->wq)) 517 if (waitqueue_active(&ep->wq))
521 wake_up_locked(&ep->wq); 518 wake_up_locked(&ep->wq);
@@ -632,7 +629,8 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file)
632 return 0; 629 return 0;
633} 630}
634 631
635static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, void *priv) 632static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
633 void *priv)
636{ 634{
637 struct epitem *epi, *tmp; 635 struct epitem *epi, *tmp;
638 636
@@ -640,13 +638,14 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, voi
640 if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & 638 if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &
641 epi->event.events) 639 epi->event.events)
642 return POLLIN | POLLRDNORM; 640 return POLLIN | POLLRDNORM;
643 else 641 else {
644 /* 642 /*
645 * Item has been dropped into the ready list by the poll 643 * Item has been dropped into the ready list by the poll
646 * callback, but it's not actually ready, as far as 644 * callback, but it's not actually ready, as far as
647 * caller requested events goes. We can remove it here. 645 * caller requested events goes. We can remove it here.
648 */ 646 */
649 list_del_init(&epi->rdllink); 647 list_del_init(&epi->rdllink);
648 }
650 } 649 }
651 650
652 return 0; 651 return 0;
@@ -674,7 +673,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
674 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, 673 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
675 ep_poll_readyevents_proc, ep, ep); 674 ep_poll_readyevents_proc, ep, ep);
676 675
677 return pollflags != -1 ? pollflags: 0; 676 return pollflags != -1 ? pollflags : 0;
678} 677}
679 678
680/* File callbacks that implement the eventpoll file behaviour */ 679/* File callbacks that implement the eventpoll file behaviour */
@@ -872,9 +871,10 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
872 add_wait_queue(whead, &pwq->wait); 871 add_wait_queue(whead, &pwq->wait);
873 list_add_tail(&pwq->llink, &epi->pwqlist); 872 list_add_tail(&pwq->llink, &epi->pwqlist);
874 epi->nwait++; 873 epi->nwait++;
875 } else 874 } else {
876 /* We have to signal that an error occurred */ 875 /* We have to signal that an error occurred */
877 epi->nwait = -1; 876 epi->nwait = -1;
877 }
878} 878}
879 879
880static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) 880static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
@@ -1055,62 +1055,65 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1055 return 0; 1055 return 0;
1056} 1056}
1057 1057
1058static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, void *priv) 1058static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1059 void *priv)
1059{ 1060{
1060 struct ep_send_events_data *esed = priv; 1061 struct ep_send_events_data *esed = priv;
1061 int eventcnt; 1062 int eventcnt;
1062 unsigned int revents; 1063 unsigned int revents;
1063 struct epitem *epi; 1064 struct epitem *epi;
1064 struct epoll_event __user *uevent; 1065 struct epoll_event __user *uevent;
1065 1066
1066 /* 1067 /*
1067 * We can loop without lock because we are passed a task private list. 1068 * We can loop without lock because we are passed a task private list.
1068 * Items cannot vanish during the loop because ep_scan_ready_list() is 1069 * Items cannot vanish during the loop because ep_scan_ready_list() is
1069 * holding "mtx" during this call. 1070 * holding "mtx" during this call.
1070 */ 1071 */
1071 for (eventcnt = 0, uevent = esed->events; 1072 for (eventcnt = 0, uevent = esed->events;
1072 !list_empty(head) && eventcnt < esed->maxevents;) { 1073 !list_empty(head) && eventcnt < esed->maxevents;) {
1073 epi = list_first_entry(head, struct epitem, rdllink); 1074 epi = list_first_entry(head, struct epitem, rdllink);
1074 1075
1075 list_del_init(&epi->rdllink); 1076 list_del_init(&epi->rdllink);
1076 1077
1077 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & 1078 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &
1078 epi->event.events; 1079 epi->event.events;
1079 1080
1080 /* 1081 /*
1081 * If the event mask intersect the caller-requested one, 1082 * If the event mask intersect the caller-requested one,
1082 * deliver the event to userspace. Again, ep_scan_ready_list() 1083 * deliver the event to userspace. Again, ep_scan_ready_list()
1083 * is holding "mtx", so no operations coming from userspace 1084 * is holding "mtx", so no operations coming from userspace
1084 * can change the item. 1085 * can change the item.
1085 */ 1086 */
1086 if (revents) { 1087 if (revents) {
1087 if (__put_user(revents, &uevent->events) || 1088 if (__put_user(revents, &uevent->events) ||
1088 __put_user(epi->event.data, &uevent->data)) 1089 __put_user(epi->event.data, &uevent->data))
1089 return eventcnt ? eventcnt: -EFAULT; 1090 return eventcnt ? eventcnt : -EFAULT;
1090 eventcnt++; 1091 eventcnt++;
1091 uevent++; 1092 uevent++;
1092 if (epi->event.events & EPOLLONESHOT) 1093 if (epi->event.events & EPOLLONESHOT)
1093 epi->event.events &= EP_PRIVATE_BITS; 1094 epi->event.events &= EP_PRIVATE_BITS;
1094 else if (!(epi->event.events & EPOLLET)) 1095 else if (!(epi->event.events & EPOLLET)) {
1095 /* 1096 /*
1096 * If this file has been added with Level Trigger 1097 * If this file has been added with Level
1097 * mode, we need to insert back inside the ready 1098 * Trigger mode, we need to insert back inside
1098 * list, so that the next call to epoll_wait() 1099 * the ready list, so that the next call to
1099 * will check again the events availability. 1100 * epoll_wait() will check again the events
1100 * At this point, noone can insert into ep->rdllist 1101 * availability. At this point, noone can insert
1101 * besides us. The epoll_ctl() callers are locked 1102 * into ep->rdllist besides us. The epoll_ctl()
1102 * out by ep_scan_ready_list() holding "mtx" and 1103 * callers are locked out by
1103 * the poll callback will queue them in ep->ovflist. 1104 * ep_scan_ready_list() holding "mtx" and the
1104 */ 1105 * poll callback will queue them in ep->ovflist.
1105 list_add_tail(&epi->rdllink, &ep->rdllist); 1106 */
1106 } 1107 list_add_tail(&epi->rdllink, &ep->rdllist);
1107 } 1108 }
1109 }
1110 }
1108 1111
1109 return eventcnt; 1112 return eventcnt;
1110} 1113}
1111 1114
1112static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, 1115static int ep_send_events(struct eventpoll *ep,
1113 int maxevents) 1116 struct epoll_event __user *events, int maxevents)
1114{ 1117{
1115 struct ep_send_events_data esed; 1118 struct ep_send_events_data esed;
1116 1119
@@ -1194,40 +1197,41 @@ retry:
1194 */ 1197 */
1195SYSCALL_DEFINE1(epoll_create1, int, flags) 1198SYSCALL_DEFINE1(epoll_create1, int, flags)
1196{ 1199{
1197 int error; 1200 int error, fd = -1;
1198 struct eventpoll *ep = NULL; 1201 struct eventpoll *ep;
1199 1202
1200 /* Check the EPOLL_* constant for consistency. */ 1203 /* Check the EPOLL_* constant for consistency. */
1201 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); 1204 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
1202 1205
1206 if (flags & ~EPOLL_CLOEXEC)
1207 return -EINVAL;
1208
1203 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", 1209 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1204 current, flags)); 1210 current, flags));
1205 1211
1206 error = -EINVAL;
1207 if (flags & ~EPOLL_CLOEXEC)
1208 goto error_return;
1209
1210 /* 1212 /*
1211 * Create the internal data structure ("struct eventpoll"). 1213 * Create the internal data structure ( "struct eventpoll" ).
1212 */ 1214 */
1213 error = ep_alloc(&ep); 1215 error = ep_alloc(&ep);
1214 if (error < 0) 1216 if (error < 0) {
1217 fd = error;
1215 goto error_return; 1218 goto error_return;
1219 }
1216 1220
1217 /* 1221 /*
1218 * Creates all the items needed to setup an eventpoll file. That is, 1222 * Creates all the items needed to setup an eventpoll file. That is,
1219 * a file structure and a free file descriptor. 1223 * a file structure and a free file descriptor.
1220 */ 1224 */
1221 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1225 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1222 flags & O_CLOEXEC); 1226 flags & O_CLOEXEC);
1223 if (error < 0) 1227 if (fd < 0)
1224 ep_free(ep); 1228 ep_free(ep);
1225 1229
1226error_return: 1230error_return:
1227 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1231 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1228 current, flags, error)); 1232 current, flags, fd));
1229 1233
1230 return error; 1234 return fd;
1231} 1235}
1232 1236
1233SYSCALL_DEFINE1(epoll_create, int, size) 1237SYSCALL_DEFINE1(epoll_create, int, size)