aboutsummaryrefslogtreecommitdiffstats
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c195
1 files changed, 110 insertions, 85 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9900e333655a..6ab1dd0ca904 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -101,57 +101,6 @@
101/* Maximum number of poll wake up nests we are allowing */ 101/* Maximum number of poll wake up nests we are allowing */
102#define EP_MAX_POLLWAKE_NESTS 4 102#define EP_MAX_POLLWAKE_NESTS 4
103 103
104/* Macro to allocate a "struct epitem" from the slab cache */
105#define EPI_MEM_ALLOC() (struct epitem *) kmem_cache_alloc(epi_cache, SLAB_KERNEL)
106
107/* Macro to free a "struct epitem" to the slab cache */
108#define EPI_MEM_FREE(p) kmem_cache_free(epi_cache, p)
109
110/* Macro to allocate a "struct eppoll_entry" from the slab cache */
111#define PWQ_MEM_ALLOC() (struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL)
112
113/* Macro to free a "struct eppoll_entry" to the slab cache */
114#define PWQ_MEM_FREE(p) kmem_cache_free(pwq_cache, p)
115
116/* Fast test to see if the file is an evenpoll file */
117#define IS_FILE_EPOLL(f) ((f)->f_op == &eventpoll_fops)
118
119/* Setup the structure that is used as key for the rb-tree */
120#define EP_SET_FFD(p, f, d) do { (p)->file = (f); (p)->fd = (d); } while (0)
121
122/* Compare rb-tree keys */
123#define EP_CMP_FFD(p1, p2) ((p1)->file > (p2)->file ? +1: \
124 ((p1)->file < (p2)->file ? -1: (p1)->fd - (p2)->fd))
125
126/* Special initialization for the rb-tree node to detect linkage */
127#define EP_RB_INITNODE(n) (n)->rb_parent = (n)
128
129/* Removes a node from the rb-tree and marks it for a fast is-linked check */
130#define EP_RB_ERASE(n, r) do { rb_erase(n, r); (n)->rb_parent = (n); } while (0)
131
132/* Fast check to verify that the item is linked to the main rb-tree */
133#define EP_RB_LINKED(n) ((n)->rb_parent != (n))
134
135/*
136 * Remove the item from the list and perform its initialization.
137 * This is useful for us because we can test if the item is linked
138 * using "EP_IS_LINKED(p)".
139 */
140#define EP_LIST_DEL(p) do { list_del(p); INIT_LIST_HEAD(p); } while (0)
141
142/* Tells us if the item is currently linked */
143#define EP_IS_LINKED(p) (!list_empty(p))
144
145/* Get the "struct epitem" from a wait queue pointer */
146#define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base)
147
148/* Get the "struct epitem" from an epoll queue wrapper */
149#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi)
150
151/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
152#define EP_OP_HASH_EVENT(op) ((op) != EPOLL_CTL_DEL)
153
154
155struct epoll_filefd { 104struct epoll_filefd {
156 struct file *file; 105 struct file *file;
157 int fd; 106 int fd;
@@ -357,6 +306,82 @@ static struct dentry_operations eventpollfs_dentry_operations = {
357 306
358 307
359 308
309/* Fast test to see if the file is an evenpoll file */
310static inline int is_file_epoll(struct file *f)
311{
312 return f->f_op == &eventpoll_fops;
313}
314
315/* Setup the structure that is used as key for the rb-tree */
316static inline void ep_set_ffd(struct epoll_filefd *ffd,
317 struct file *file, int fd)
318{
319 ffd->file = file;
320 ffd->fd = fd;
321}
322
323/* Compare rb-tree keys */
324static inline int ep_cmp_ffd(struct epoll_filefd *p1,
325 struct epoll_filefd *p2)
326{
327 return (p1->file > p2->file ? +1:
328 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
329}
330
331/* Special initialization for the rb-tree node to detect linkage */
332static inline void ep_rb_initnode(struct rb_node *n)
333{
334 n->rb_parent = n;
335}
336
337/* Removes a node from the rb-tree and marks it for a fast is-linked check */
338static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
339{
340 rb_erase(n, r);
341 n->rb_parent = n;
342}
343
344/* Fast check to verify that the item is linked to the main rb-tree */
345static inline int ep_rb_linked(struct rb_node *n)
346{
347 return n->rb_parent != n;
348}
349
350/*
351 * Remove the item from the list and perform its initialization.
352 * This is useful for us because we can test if the item is linked
353 * using "ep_is_linked(p)".
354 */
355static inline void ep_list_del(struct list_head *p)
356{
357 list_del(p);
358 INIT_LIST_HEAD(p);
359}
360
361/* Tells us if the item is currently linked */
362static inline int ep_is_linked(struct list_head *p)
363{
364 return !list_empty(p);
365}
366
367/* Get the "struct epitem" from a wait queue pointer */
368static inline struct epitem * ep_item_from_wait(wait_queue_t *p)
369{
370 return container_of(p, struct eppoll_entry, wait)->base;
371}
372
373/* Get the "struct epitem" from an epoll queue wrapper */
374static inline struct epitem * ep_item_from_epqueue(poll_table *p)
375{
376 return container_of(p, struct ep_pqueue, pt)->epi;
377}
378
379/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
380static inline int ep_op_hash_event(int op)
381{
382 return op != EPOLL_CTL_DEL;
383}
384
360/* Initialize the poll safe wake up structure */ 385/* Initialize the poll safe wake up structure */
361static void ep_poll_safewake_init(struct poll_safewake *psw) 386static void ep_poll_safewake_init(struct poll_safewake *psw)
362{ 387{
@@ -456,7 +481,7 @@ void eventpoll_release_file(struct file *file)
456 epi = list_entry(lsthead->next, struct epitem, fllink); 481 epi = list_entry(lsthead->next, struct epitem, fllink);
457 482
458 ep = epi->ep; 483 ep = epi->ep;
459 EP_LIST_DEL(&epi->fllink); 484 ep_list_del(&epi->fllink);
460 down_write(&ep->sem); 485 down_write(&ep->sem);
461 ep_remove(ep, epi); 486 ep_remove(ep, epi);
462 up_write(&ep->sem); 487 up_write(&ep->sem);
@@ -534,7 +559,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
534 current, epfd, op, fd, event)); 559 current, epfd, op, fd, event));
535 560
536 error = -EFAULT; 561 error = -EFAULT;
537 if (EP_OP_HASH_EVENT(op) && 562 if (ep_op_hash_event(op) &&
538 copy_from_user(&epds, event, sizeof(struct epoll_event))) 563 copy_from_user(&epds, event, sizeof(struct epoll_event)))
539 goto eexit_1; 564 goto eexit_1;
540 565
@@ -560,7 +585,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
560 * adding an epoll file descriptor inside itself. 585 * adding an epoll file descriptor inside itself.
561 */ 586 */
562 error = -EINVAL; 587 error = -EINVAL;
563 if (file == tfile || !IS_FILE_EPOLL(file)) 588 if (file == tfile || !is_file_epoll(file))
564 goto eexit_3; 589 goto eexit_3;
565 590
566 /* 591 /*
@@ -656,7 +681,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
656 * the user passed to us _is_ an eventpoll file. 681 * the user passed to us _is_ an eventpoll file.
657 */ 682 */
658 error = -EINVAL; 683 error = -EINVAL;
659 if (!IS_FILE_EPOLL(file)) 684 if (!is_file_epoll(file))
660 goto eexit_2; 685 goto eexit_2;
661 686
662 /* 687 /*
@@ -831,11 +856,11 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
831 struct epitem *epi, *epir = NULL; 856 struct epitem *epi, *epir = NULL;
832 struct epoll_filefd ffd; 857 struct epoll_filefd ffd;
833 858
834 EP_SET_FFD(&ffd, file, fd); 859 ep_set_ffd(&ffd, file, fd);
835 read_lock_irqsave(&ep->lock, flags); 860 read_lock_irqsave(&ep->lock, flags);
836 for (rbp = ep->rbr.rb_node; rbp; ) { 861 for (rbp = ep->rbr.rb_node; rbp; ) {
837 epi = rb_entry(rbp, struct epitem, rbn); 862 epi = rb_entry(rbp, struct epitem, rbn);
838 kcmp = EP_CMP_FFD(&ffd, &epi->ffd); 863 kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
839 if (kcmp > 0) 864 if (kcmp > 0)
840 rbp = rbp->rb_right; 865 rbp = rbp->rb_right;
841 else if (kcmp < 0) 866 else if (kcmp < 0)
@@ -875,7 +900,7 @@ static void ep_release_epitem(struct epitem *epi)
875{ 900{
876 901
877 if (atomic_dec_and_test(&epi->usecnt)) 902 if (atomic_dec_and_test(&epi->usecnt))
878 EPI_MEM_FREE(epi); 903 kmem_cache_free(epi_cache, epi);
879} 904}
880 905
881 906
@@ -886,10 +911,10 @@ static void ep_release_epitem(struct epitem *epi)
886static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, 911static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
887 poll_table *pt) 912 poll_table *pt)
888{ 913{
889 struct epitem *epi = EP_ITEM_FROM_EPQUEUE(pt); 914 struct epitem *epi = ep_item_from_epqueue(pt);
890 struct eppoll_entry *pwq; 915 struct eppoll_entry *pwq;
891 916
892 if (epi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC())) { 917 if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) {
893 init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); 918 init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
894 pwq->whead = whead; 919 pwq->whead = whead;
895 pwq->base = epi; 920 pwq->base = epi;
@@ -912,7 +937,7 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
912 while (*p) { 937 while (*p) {
913 parent = *p; 938 parent = *p;
914 epic = rb_entry(parent, struct epitem, rbn); 939 epic = rb_entry(parent, struct epitem, rbn);
915 kcmp = EP_CMP_FFD(&epi->ffd, &epic->ffd); 940 kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
916 if (kcmp > 0) 941 if (kcmp > 0)
917 p = &parent->rb_right; 942 p = &parent->rb_right;
918 else 943 else
@@ -932,17 +957,17 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
932 struct ep_pqueue epq; 957 struct ep_pqueue epq;
933 958
934 error = -ENOMEM; 959 error = -ENOMEM;
935 if (!(epi = EPI_MEM_ALLOC())) 960 if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL)))
936 goto eexit_1; 961 goto eexit_1;
937 962
938 /* Item initialization follow here ... */ 963 /* Item initialization follow here ... */
939 EP_RB_INITNODE(&epi->rbn); 964 ep_rb_initnode(&epi->rbn);
940 INIT_LIST_HEAD(&epi->rdllink); 965 INIT_LIST_HEAD(&epi->rdllink);
941 INIT_LIST_HEAD(&epi->fllink); 966 INIT_LIST_HEAD(&epi->fllink);
942 INIT_LIST_HEAD(&epi->txlink); 967 INIT_LIST_HEAD(&epi->txlink);
943 INIT_LIST_HEAD(&epi->pwqlist); 968 INIT_LIST_HEAD(&epi->pwqlist);
944 epi->ep = ep; 969 epi->ep = ep;
945 EP_SET_FFD(&epi->ffd, tfile, fd); 970 ep_set_ffd(&epi->ffd, tfile, fd);
946 epi->event = *event; 971 epi->event = *event;
947 atomic_set(&epi->usecnt, 1); 972 atomic_set(&epi->usecnt, 1);
948 epi->nwait = 0; 973 epi->nwait = 0;
@@ -978,7 +1003,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
978 ep_rbtree_insert(ep, epi); 1003 ep_rbtree_insert(ep, epi);
979 1004
980 /* If the file is already "ready" we drop it inside the ready list */ 1005 /* If the file is already "ready" we drop it inside the ready list */
981 if ((revents & event->events) && !EP_IS_LINKED(&epi->rdllink)) { 1006 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
982 list_add_tail(&epi->rdllink, &ep->rdllist); 1007 list_add_tail(&epi->rdllink, &ep->rdllist);
983 1008
984 /* Notify waiting tasks that events are available */ 1009 /* Notify waiting tasks that events are available */
@@ -1007,11 +1032,11 @@ eexit_2:
1007 * allocated wait queue. 1032 * allocated wait queue.
1008 */ 1033 */
1009 write_lock_irqsave(&ep->lock, flags); 1034 write_lock_irqsave(&ep->lock, flags);
1010 if (EP_IS_LINKED(&epi->rdllink)) 1035 if (ep_is_linked(&epi->rdllink))
1011 EP_LIST_DEL(&epi->rdllink); 1036 ep_list_del(&epi->rdllink);
1012 write_unlock_irqrestore(&ep->lock, flags); 1037 write_unlock_irqrestore(&ep->lock, flags);
1013 1038
1014 EPI_MEM_FREE(epi); 1039 kmem_cache_free(epi_cache, epi);
1015eexit_1: 1040eexit_1:
1016 return error; 1041 return error;
1017} 1042}
@@ -1050,14 +1075,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1050 * If the item is not linked to the hash it means that it's on its 1075 * If the item is not linked to the hash it means that it's on its
1051 * way toward the removal. Do nothing in this case. 1076 * way toward the removal. Do nothing in this case.
1052 */ 1077 */
1053 if (EP_RB_LINKED(&epi->rbn)) { 1078 if (ep_rb_linked(&epi->rbn)) {
1054 /* 1079 /*
1055 * If the item is "hot" and it is not registered inside the ready 1080 * If the item is "hot" and it is not registered inside the ready
1056 * list, push it inside. If the item is not "hot" and it is currently 1081 * list, push it inside. If the item is not "hot" and it is currently
1057 * registered inside the ready list, unlink it. 1082 * registered inside the ready list, unlink it.
1058 */ 1083 */
1059 if (revents & event->events) { 1084 if (revents & event->events) {
1060 if (!EP_IS_LINKED(&epi->rdllink)) { 1085 if (!ep_is_linked(&epi->rdllink)) {
1061 list_add_tail(&epi->rdllink, &ep->rdllist); 1086 list_add_tail(&epi->rdllink, &ep->rdllist);
1062 1087
1063 /* Notify waiting tasks that events are available */ 1088 /* Notify waiting tasks that events are available */
@@ -1097,9 +1122,9 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
1097 while (!list_empty(lsthead)) { 1122 while (!list_empty(lsthead)) {
1098 pwq = list_entry(lsthead->next, struct eppoll_entry, llink); 1123 pwq = list_entry(lsthead->next, struct eppoll_entry, llink);
1099 1124
1100 EP_LIST_DEL(&pwq->llink); 1125 ep_list_del(&pwq->llink);
1101 remove_wait_queue(pwq->whead, &pwq->wait); 1126 remove_wait_queue(pwq->whead, &pwq->wait);
1102 PWQ_MEM_FREE(pwq); 1127 kmem_cache_free(pwq_cache, pwq);
1103 } 1128 }
1104 } 1129 }
1105} 1130}
@@ -1118,7 +1143,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
1118 * The check protect us from doing a double unlink ( crash ). 1143 * The check protect us from doing a double unlink ( crash ).
1119 */ 1144 */
1120 error = -ENOENT; 1145 error = -ENOENT;
1121 if (!EP_RB_LINKED(&epi->rbn)) 1146 if (!ep_rb_linked(&epi->rbn))
1122 goto eexit_1; 1147 goto eexit_1;
1123 1148
1124 /* 1149 /*
@@ -1133,14 +1158,14 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
1133 * This operation togheter with the above check closes the door to 1158 * This operation togheter with the above check closes the door to
1134 * double unlinks. 1159 * double unlinks.
1135 */ 1160 */
1136 EP_RB_ERASE(&epi->rbn, &ep->rbr); 1161 ep_rb_erase(&epi->rbn, &ep->rbr);
1137 1162
1138 /* 1163 /*
1139 * If the item we are going to remove is inside the ready file descriptors 1164 * If the item we are going to remove is inside the ready file descriptors
1140 * we want to remove it from this list to avoid stale events. 1165 * we want to remove it from this list to avoid stale events.
1141 */ 1166 */
1142 if (EP_IS_LINKED(&epi->rdllink)) 1167 if (ep_is_linked(&epi->rdllink))
1143 EP_LIST_DEL(&epi->rdllink); 1168 ep_list_del(&epi->rdllink);
1144 1169
1145 error = 0; 1170 error = 0;
1146eexit_1: 1171eexit_1:
@@ -1174,8 +1199,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
1174 1199
1175 /* Remove the current item from the list of epoll hooks */ 1200 /* Remove the current item from the list of epoll hooks */
1176 spin_lock(&file->f_ep_lock); 1201 spin_lock(&file->f_ep_lock);
1177 if (EP_IS_LINKED(&epi->fllink)) 1202 if (ep_is_linked(&epi->fllink))
1178 EP_LIST_DEL(&epi->fllink); 1203 ep_list_del(&epi->fllink);
1179 spin_unlock(&file->f_ep_lock); 1204 spin_unlock(&file->f_ep_lock);
1180 1205
1181 /* We need to acquire the write IRQ lock before calling ep_unlink() */ 1206 /* We need to acquire the write IRQ lock before calling ep_unlink() */
@@ -1210,7 +1235,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
1210{ 1235{
1211 int pwake = 0; 1236 int pwake = 0;
1212 unsigned long flags; 1237 unsigned long flags;
1213 struct epitem *epi = EP_ITEM_FROM_WAIT(wait); 1238 struct epitem *epi = ep_item_from_wait(wait);
1214 struct eventpoll *ep = epi->ep; 1239 struct eventpoll *ep = epi->ep;
1215 1240
1216 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", 1241 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
@@ -1228,7 +1253,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
1228 goto is_disabled; 1253 goto is_disabled;
1229 1254
1230 /* If this file is already in the ready list we exit soon */ 1255 /* If this file is already in the ready list we exit soon */
1231 if (EP_IS_LINKED(&epi->rdllink)) 1256 if (ep_is_linked(&epi->rdllink))
1232 goto is_linked; 1257 goto is_linked;
1233 1258
1234 list_add_tail(&epi->rdllink, &ep->rdllist); 1259 list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -1307,7 +1332,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
1307 lnk = lnk->next; 1332 lnk = lnk->next;
1308 1333
1309 /* If this file is already in the ready list we exit soon */ 1334 /* If this file is already in the ready list we exit soon */
1310 if (!EP_IS_LINKED(&epi->txlink)) { 1335 if (!ep_is_linked(&epi->txlink)) {
1311 /* 1336 /*
1312 * This is initialized in this way so that the default 1337 * This is initialized in this way so that the default
1313 * behaviour of the reinjecting code will be to push back 1338 * behaviour of the reinjecting code will be to push back
@@ -1322,7 +1347,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
1322 /* 1347 /*
1323 * Unlink the item from the ready list. 1348 * Unlink the item from the ready list.
1324 */ 1349 */
1325 EP_LIST_DEL(&epi->rdllink); 1350 ep_list_del(&epi->rdllink);
1326 } 1351 }
1327 } 1352 }
1328 1353
@@ -1401,7 +1426,7 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1401 epi = list_entry(txlist->next, struct epitem, txlink); 1426 epi = list_entry(txlist->next, struct epitem, txlink);
1402 1427
1403 /* Unlink the current item from the transfer list */ 1428 /* Unlink the current item from the transfer list */
1404 EP_LIST_DEL(&epi->txlink); 1429 ep_list_del(&epi->txlink);
1405 1430
1406 /* 1431 /*
1407 * If the item is no more linked to the interest set, we don't 1432 * If the item is no more linked to the interest set, we don't
@@ -1410,8 +1435,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1410 * item is set to have an Edge Triggered behaviour, we don't have 1435 * item is set to have an Edge Triggered behaviour, we don't have
1411 * to push it back either. 1436 * to push it back either.
1412 */ 1437 */
1413 if (EP_RB_LINKED(&epi->rbn) && !(epi->event.events & EPOLLET) && 1438 if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) &&
1414 (epi->revents & epi->event.events) && !EP_IS_LINKED(&epi->rdllink)) { 1439 (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) {
1415 list_add_tail(&epi->rdllink, &ep->rdllist); 1440 list_add_tail(&epi->rdllink, &ep->rdllist);
1416 ricnt++; 1441 ricnt++;
1417 } 1442 }