diff options
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r-- | fs/eventpoll.c | 233 |
1 files changed, 86 insertions, 147 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 3ae644e7e860..997711c5a732 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -185,7 +185,7 @@ struct eppoll_entry { | |||
185 | 185 | ||
186 | /* | 186 | /* |
187 | * Each file descriptor added to the eventpoll interface will | 187 | * Each file descriptor added to the eventpoll interface will |
188 | * have an entry of this type linked to the hash. | 188 | * have an entry of this type linked to the "rbr" RB tree. |
189 | */ | 189 | */ |
190 | struct epitem { | 190 | struct epitem { |
191 | /* RB-Tree node used to link this structure to the eventpoll rb-tree */ | 191 | /* RB-Tree node used to link this structure to the eventpoll rb-tree */ |
@@ -217,15 +217,6 @@ struct epitem { | |||
217 | 217 | ||
218 | /* List header used to link this item to the "struct file" items list */ | 218 | /* List header used to link this item to the "struct file" items list */ |
219 | struct list_head fllink; | 219 | struct list_head fllink; |
220 | |||
221 | /* List header used to link the item to the transfer list */ | ||
222 | struct list_head txlink; | ||
223 | |||
224 | /* | ||
225 | * This is used during the collection/transfer of events to userspace | ||
226 | * to pin items empty events set. | ||
227 | */ | ||
228 | unsigned int revents; | ||
229 | }; | 220 | }; |
230 | 221 | ||
231 | /* Wrapper struct used by poll queueing */ | 222 | /* Wrapper struct used by poll queueing */ |
@@ -258,11 +249,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi); | |||
258 | static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key); | 249 | static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key); |
259 | static int ep_eventpoll_close(struct inode *inode, struct file *file); | 250 | static int ep_eventpoll_close(struct inode *inode, struct file *file); |
260 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait); | 251 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait); |
261 | static int ep_collect_ready_items(struct eventpoll *ep, | ||
262 | struct list_head *txlist, int maxevents); | ||
263 | static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, | 252 | static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, |
264 | struct epoll_event __user *events); | 253 | struct epoll_event __user *events, int maxevents); |
265 | static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist); | ||
266 | static int ep_events_transfer(struct eventpoll *ep, | 254 | static int ep_events_transfer(struct eventpoll *ep, |
267 | struct epoll_event __user *events, | 255 | struct epoll_event __user *events, |
268 | int maxevents); | 256 | int maxevents); |
@@ -355,17 +343,6 @@ static inline int ep_rb_linked(struct rb_node *n) | |||
355 | return rb_parent(n) != n; | 343 | return rb_parent(n) != n; |
356 | } | 344 | } |
357 | 345 | ||
358 | /* | ||
359 | * Remove the item from the list and perform its initialization. | ||
360 | * This is useful for us because we can test if the item is linked | ||
361 | * using "ep_is_linked(p)". | ||
362 | */ | ||
363 | static inline void ep_list_del(struct list_head *p) | ||
364 | { | ||
365 | list_del(p); | ||
366 | INIT_LIST_HEAD(p); | ||
367 | } | ||
368 | |||
369 | /* Tells us if the item is currently linked */ | 346 | /* Tells us if the item is currently linked */ |
370 | static inline int ep_is_linked(struct list_head *p) | 347 | static inline int ep_is_linked(struct list_head *p) |
371 | { | 348 | { |
@@ -385,7 +362,7 @@ static inline struct epitem * ep_item_from_epqueue(poll_table *p) | |||
385 | } | 362 | } |
386 | 363 | ||
387 | /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ | 364 | /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ |
388 | static inline int ep_op_hash_event(int op) | 365 | static inline int ep_op_has_event(int op) |
389 | { | 366 | { |
390 | return op != EPOLL_CTL_DEL; | 367 | return op != EPOLL_CTL_DEL; |
391 | } | 368 | } |
@@ -480,7 +457,7 @@ void eventpoll_release_file(struct file *file) | |||
480 | epi = list_entry(lsthead->next, struct epitem, fllink); | 457 | epi = list_entry(lsthead->next, struct epitem, fllink); |
481 | 458 | ||
482 | ep = epi->ep; | 459 | ep = epi->ep; |
483 | ep_list_del(&epi->fllink); | 460 | list_del_init(&epi->fllink); |
484 | down_write(&ep->sem); | 461 | down_write(&ep->sem); |
485 | ep_remove(ep, epi); | 462 | ep_remove(ep, epi); |
486 | up_write(&ep->sem); | 463 | up_write(&ep->sem); |
@@ -557,7 +534,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) | |||
557 | current, epfd, op, fd, event)); | 534 | current, epfd, op, fd, event)); |
558 | 535 | ||
559 | error = -EFAULT; | 536 | error = -EFAULT; |
560 | if (ep_op_hash_event(op) && | 537 | if (ep_op_has_event(op) && |
561 | copy_from_user(&epds, event, sizeof(struct epoll_event))) | 538 | copy_from_user(&epds, event, sizeof(struct epoll_event))) |
562 | goto eexit_1; | 539 | goto eexit_1; |
563 | 540 | ||
@@ -594,7 +571,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) | |||
594 | 571 | ||
595 | down_write(&ep->sem); | 572 | down_write(&ep->sem); |
596 | 573 | ||
597 | /* Try to lookup the file inside our hash table */ | 574 | /* Try to lookup the file inside our RB tree */ |
598 | epi = ep_find(ep, tfile, fd); | 575 | epi = ep_find(ep, tfile, fd); |
599 | 576 | ||
600 | error = -EINVAL; | 577 | error = -EINVAL; |
@@ -876,7 +853,7 @@ static void ep_free(struct eventpoll *ep) | |||
876 | } | 853 | } |
877 | 854 | ||
878 | /* | 855 | /* |
879 | * Walks through the whole hash by freeing each "struct epitem". At this | 856 | * Walks through the whole tree by freeing each "struct epitem". At this |
880 | * point we are sure no poll callbacks will be lingering around, and also by | 857 | * point we are sure no poll callbacks will be lingering around, and also by |
881 | * write-holding "sem" we can be sure that no file cleanup code will hit | 858 | * write-holding "sem" we can be sure that no file cleanup code will hit |
882 | * us during this operation. So we can avoid the lock on "ep->lock". | 859 | * us during this operation. So we can avoid the lock on "ep->lock". |
@@ -891,7 +868,7 @@ static void ep_free(struct eventpoll *ep) | |||
891 | 868 | ||
892 | 869 | ||
893 | /* | 870 | /* |
894 | * Search the file inside the eventpoll hash. It add usage count to | 871 | * Search the file inside the eventpoll tree. It add usage count to |
895 | * the returned item, so the caller must call ep_release_epitem() | 872 | * the returned item, so the caller must call ep_release_epitem() |
896 | * after finished using the "struct epitem". | 873 | * after finished using the "struct epitem". |
897 | */ | 874 | */ |
@@ -1011,7 +988,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1011 | ep_rb_initnode(&epi->rbn); | 988 | ep_rb_initnode(&epi->rbn); |
1012 | INIT_LIST_HEAD(&epi->rdllink); | 989 | INIT_LIST_HEAD(&epi->rdllink); |
1013 | INIT_LIST_HEAD(&epi->fllink); | 990 | INIT_LIST_HEAD(&epi->fllink); |
1014 | INIT_LIST_HEAD(&epi->txlink); | ||
1015 | INIT_LIST_HEAD(&epi->pwqlist); | 991 | INIT_LIST_HEAD(&epi->pwqlist); |
1016 | epi->ep = ep; | 992 | epi->ep = ep; |
1017 | ep_set_ffd(&epi->ffd, tfile, fd); | 993 | ep_set_ffd(&epi->ffd, tfile, fd); |
@@ -1080,7 +1056,7 @@ eexit_2: | |||
1080 | */ | 1056 | */ |
1081 | write_lock_irqsave(&ep->lock, flags); | 1057 | write_lock_irqsave(&ep->lock, flags); |
1082 | if (ep_is_linked(&epi->rdllink)) | 1058 | if (ep_is_linked(&epi->rdllink)) |
1083 | ep_list_del(&epi->rdllink); | 1059 | list_del_init(&epi->rdllink); |
1084 | write_unlock_irqrestore(&ep->lock, flags); | 1060 | write_unlock_irqrestore(&ep->lock, flags); |
1085 | 1061 | ||
1086 | kmem_cache_free(epi_cache, epi); | 1062 | kmem_cache_free(epi_cache, epi); |
@@ -1119,7 +1095,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1119 | epi->event.data = event->data; | 1095 | epi->event.data = event->data; |
1120 | 1096 | ||
1121 | /* | 1097 | /* |
1122 | * If the item is not linked to the hash it means that it's on its | 1098 | * If the item is not linked to the RB tree it means that it's on its |
1123 | * way toward the removal. Do nothing in this case. | 1099 | * way toward the removal. Do nothing in this case. |
1124 | */ | 1100 | */ |
1125 | if (ep_rb_linked(&epi->rbn)) { | 1101 | if (ep_rb_linked(&epi->rbn)) { |
@@ -1170,7 +1146,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) | |||
1170 | while (!list_empty(lsthead)) { | 1146 | while (!list_empty(lsthead)) { |
1171 | pwq = list_entry(lsthead->next, struct eppoll_entry, llink); | 1147 | pwq = list_entry(lsthead->next, struct eppoll_entry, llink); |
1172 | 1148 | ||
1173 | ep_list_del(&pwq->llink); | 1149 | list_del_init(&pwq->llink); |
1174 | remove_wait_queue(pwq->whead, &pwq->wait); | 1150 | remove_wait_queue(pwq->whead, &pwq->wait); |
1175 | kmem_cache_free(pwq_cache, pwq); | 1151 | kmem_cache_free(pwq_cache, pwq); |
1176 | } | 1152 | } |
@@ -1213,7 +1189,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi) | |||
1213 | * we want to remove it from this list to avoid stale events. | 1189 | * we want to remove it from this list to avoid stale events. |
1214 | */ | 1190 | */ |
1215 | if (ep_is_linked(&epi->rdllink)) | 1191 | if (ep_is_linked(&epi->rdllink)) |
1216 | ep_list_del(&epi->rdllink); | 1192 | list_del_init(&epi->rdllink); |
1217 | 1193 | ||
1218 | error = 0; | 1194 | error = 0; |
1219 | eexit_1: | 1195 | eexit_1: |
@@ -1226,7 +1202,7 @@ eexit_1: | |||
1226 | 1202 | ||
1227 | 1203 | ||
1228 | /* | 1204 | /* |
1229 | * Removes a "struct epitem" from the eventpoll hash and deallocates | 1205 | * Removes a "struct epitem" from the eventpoll RB tree and deallocates |
1230 | * all the associated resources. | 1206 | * all the associated resources. |
1231 | */ | 1207 | */ |
1232 | static int ep_remove(struct eventpoll *ep, struct epitem *epi) | 1208 | static int ep_remove(struct eventpoll *ep, struct epitem *epi) |
@@ -1248,13 +1224,13 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) | |||
1248 | /* Remove the current item from the list of epoll hooks */ | 1224 | /* Remove the current item from the list of epoll hooks */ |
1249 | spin_lock(&file->f_ep_lock); | 1225 | spin_lock(&file->f_ep_lock); |
1250 | if (ep_is_linked(&epi->fllink)) | 1226 | if (ep_is_linked(&epi->fllink)) |
1251 | ep_list_del(&epi->fllink); | 1227 | list_del_init(&epi->fllink); |
1252 | spin_unlock(&file->f_ep_lock); | 1228 | spin_unlock(&file->f_ep_lock); |
1253 | 1229 | ||
1254 | /* We need to acquire the write IRQ lock before calling ep_unlink() */ | 1230 | /* We need to acquire the write IRQ lock before calling ep_unlink() */ |
1255 | write_lock_irqsave(&ep->lock, flags); | 1231 | write_lock_irqsave(&ep->lock, flags); |
1256 | 1232 | ||
1257 | /* Really unlink the item from the hash */ | 1233 | /* Really unlink the item from the RB tree */ |
1258 | error = ep_unlink(ep, epi); | 1234 | error = ep_unlink(ep, epi); |
1259 | 1235 | ||
1260 | write_unlock_irqrestore(&ep->lock, flags); | 1236 | write_unlock_irqrestore(&ep->lock, flags); |
@@ -1362,71 +1338,30 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | |||
1362 | 1338 | ||
1363 | 1339 | ||
1364 | /* | 1340 | /* |
1365 | * Since we have to release the lock during the __copy_to_user() operation and | ||
1366 | * during the f_op->poll() call, we try to collect the maximum number of items | ||
1367 | * by reducing the irqlock/irqunlock switching rate. | ||
1368 | */ | ||
1369 | static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist, int maxevents) | ||
1370 | { | ||
1371 | int nepi; | ||
1372 | unsigned long flags; | ||
1373 | struct list_head *lsthead = &ep->rdllist, *lnk; | ||
1374 | struct epitem *epi; | ||
1375 | |||
1376 | write_lock_irqsave(&ep->lock, flags); | ||
1377 | |||
1378 | for (nepi = 0, lnk = lsthead->next; lnk != lsthead && nepi < maxevents;) { | ||
1379 | epi = list_entry(lnk, struct epitem, rdllink); | ||
1380 | |||
1381 | lnk = lnk->next; | ||
1382 | |||
1383 | /* If this file is already in the ready list we exit soon */ | ||
1384 | if (!ep_is_linked(&epi->txlink)) { | ||
1385 | /* | ||
1386 | * This is initialized in this way so that the default | ||
1387 | * behaviour of the reinjecting code will be to push back | ||
1388 | * the item inside the ready list. | ||
1389 | */ | ||
1390 | epi->revents = epi->event.events; | ||
1391 | |||
1392 | /* Link the ready item into the transfer list */ | ||
1393 | list_add(&epi->txlink, txlist); | ||
1394 | nepi++; | ||
1395 | |||
1396 | /* | ||
1397 | * Unlink the item from the ready list. | ||
1398 | */ | ||
1399 | ep_list_del(&epi->rdllink); | ||
1400 | } | ||
1401 | } | ||
1402 | |||
1403 | write_unlock_irqrestore(&ep->lock, flags); | ||
1404 | |||
1405 | return nepi; | ||
1406 | } | ||
1407 | |||
1408 | |||
1409 | /* | ||
1410 | * This function is called without holding the "ep->lock" since the call to | 1341 | * This function is called without holding the "ep->lock" since the call to |
1411 | * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ | 1342 | * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ |
1412 | * because of the way poll() is traditionally implemented in Linux. | 1343 | * because of the way poll() is traditionally implemented in Linux. |
1413 | */ | 1344 | */ |
1414 | static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, | 1345 | static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, |
1415 | struct epoll_event __user *events) | 1346 | struct epoll_event __user *events, int maxevents) |
1416 | { | 1347 | { |
1417 | int eventcnt = 0; | 1348 | int eventcnt, error = -EFAULT, pwake = 0; |
1418 | unsigned int revents; | 1349 | unsigned int revents; |
1419 | struct list_head *lnk; | 1350 | unsigned long flags; |
1420 | struct epitem *epi; | 1351 | struct epitem *epi; |
1352 | struct list_head injlist; | ||
1353 | |||
1354 | INIT_LIST_HEAD(&injlist); | ||
1421 | 1355 | ||
1422 | /* | 1356 | /* |
1423 | * We can loop without lock because this is a task private list. | 1357 | * We can loop without lock because this is a task private list. |
1424 | * The test done during the collection loop will guarantee us that | 1358 | * We just splice'd out the ep->rdllist in ep_collect_ready_items(). |
1425 | * another task will not try to collect this file. Also, items | 1359 | * Items cannot vanish during the loop because we are holding "sem" in |
1426 | * cannot vanish during the loop because we are holding "sem". | 1360 | * read. |
1427 | */ | 1361 | */ |
1428 | list_for_each(lnk, txlist) { | 1362 | for (eventcnt = 0; !list_empty(txlist) && eventcnt < maxevents;) { |
1429 | epi = list_entry(lnk, struct epitem, txlink); | 1363 | epi = list_entry(txlist->next, struct epitem, rdllink); |
1364 | prefetch(epi->rdllink.next); | ||
1430 | 1365 | ||
1431 | /* | 1366 | /* |
1432 | * Get the ready file event set. We can safely use the file | 1367 | * Get the ready file event set. We can safely use the file |
@@ -1434,64 +1369,65 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, | |||
1434 | * guarantee that both the file and the item will not vanish. | 1369 | * guarantee that both the file and the item will not vanish. |
1435 | */ | 1370 | */ |
1436 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); | 1371 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); |
1372 | revents &= epi->event.events; | ||
1437 | 1373 | ||
1438 | /* | 1374 | /* |
1439 | * Set the return event set for the current file descriptor. | 1375 | * Is the event mask intersect the caller-requested one, |
1440 | * Note that only the task task was successfully able to link | 1376 | * deliver the event to userspace. Again, we are holding |
1441 | * the item to its "txlist" will write this field. | 1377 | * "sem" in read, so no operations coming from userspace |
1378 | * can change the item. | ||
1442 | */ | 1379 | */ |
1443 | epi->revents = revents & epi->event.events; | 1380 | if (revents) { |
1444 | 1381 | if (__put_user(revents, | |
1445 | if (epi->revents) { | ||
1446 | if (__put_user(epi->revents, | ||
1447 | &events[eventcnt].events) || | 1382 | &events[eventcnt].events) || |
1448 | __put_user(epi->event.data, | 1383 | __put_user(epi->event.data, |
1449 | &events[eventcnt].data)) | 1384 | &events[eventcnt].data)) |
1450 | return -EFAULT; | 1385 | goto errxit; |
1451 | if (epi->event.events & EPOLLONESHOT) | 1386 | if (epi->event.events & EPOLLONESHOT) |
1452 | epi->event.events &= EP_PRIVATE_BITS; | 1387 | epi->event.events &= EP_PRIVATE_BITS; |
1453 | eventcnt++; | 1388 | eventcnt++; |
1454 | } | 1389 | } |
1455 | } | ||
1456 | return eventcnt; | ||
1457 | } | ||
1458 | |||
1459 | |||
1460 | /* | ||
1461 | * Walk through the transfer list we collected with ep_collect_ready_items() | ||
1462 | * and, if 1) the item is still "alive" 2) its event set is not empty 3) it's | ||
1463 | * not already linked, links it to the ready list. Same as above, we are holding | ||
1464 | * "sem" so items cannot vanish underneath our nose. | ||
1465 | */ | ||
1466 | static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) | ||
1467 | { | ||
1468 | int ricnt = 0, pwake = 0; | ||
1469 | unsigned long flags; | ||
1470 | struct epitem *epi; | ||
1471 | |||
1472 | write_lock_irqsave(&ep->lock, flags); | ||
1473 | |||
1474 | while (!list_empty(txlist)) { | ||
1475 | epi = list_entry(txlist->next, struct epitem, txlink); | ||
1476 | |||
1477 | /* Unlink the current item from the transfer list */ | ||
1478 | ep_list_del(&epi->txlink); | ||
1479 | 1390 | ||
1480 | /* | 1391 | /* |
1481 | * If the item is no more linked to the interest set, we don't | 1392 | * This is tricky. We are holding the "sem" in read, and this |
1482 | * have to push it inside the ready list because the following | 1393 | * means that the operations that can change the "linked" status |
1483 | * ep_release_epitem() is going to drop it. Also, if the current | 1394 | * of the epoll item (epi->rbn and epi->rdllink), cannot touch |
1484 | * item is set to have an Edge Triggered behaviour, we don't have | 1395 | * them. Also, since we are "linked" from a epi->rdllink POV |
1485 | * to push it back either. | 1396 | * (the item is linked to our transmission list we just |
1397 | * spliced), the ep_poll_callback() cannot touch us either, | ||
1398 | * because of the check present in there. Another parallel | ||
1399 | * epoll_wait() will not get the same result set, since we | ||
1400 | * spliced the ready list before. Note that list_del() still | ||
1401 | * shows the item as linked to the test in ep_poll_callback(). | ||
1486 | */ | 1402 | */ |
1487 | if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) && | 1403 | list_del(&epi->rdllink); |
1488 | (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) { | 1404 | if (!(epi->event.events & EPOLLET) && |
1489 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1405 | (revents & epi->event.events)) |
1490 | ricnt++; | 1406 | list_add_tail(&epi->rdllink, &injlist); |
1407 | else { | ||
1408 | /* | ||
1409 | * Be sure the item is totally detached before re-init | ||
1410 | * the list_head. After INIT_LIST_HEAD() is committed, | ||
1411 | * the ep_poll_callback() can requeue the item again, | ||
1412 | * but we don't care since we are already past it. | ||
1413 | */ | ||
1414 | smp_mb(); | ||
1415 | INIT_LIST_HEAD(&epi->rdllink); | ||
1491 | } | 1416 | } |
1492 | } | 1417 | } |
1418 | error = 0; | ||
1493 | 1419 | ||
1494 | if (ricnt) { | 1420 | errxit: |
1421 | |||
1422 | /* | ||
1423 | * If the re-injection list or the txlist are not empty, re-splice | ||
1424 | * them to the ready list and do proper wakeups. | ||
1425 | */ | ||
1426 | if (!list_empty(&injlist) || !list_empty(txlist)) { | ||
1427 | write_lock_irqsave(&ep->lock, flags); | ||
1428 | |||
1429 | list_splice(txlist, &ep->rdllist); | ||
1430 | list_splice(&injlist, &ep->rdllist); | ||
1495 | /* | 1431 | /* |
1496 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() | 1432 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() |
1497 | * wait list. | 1433 | * wait list. |
@@ -1501,13 +1437,15 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) | |||
1501 | TASK_INTERRUPTIBLE); | 1437 | TASK_INTERRUPTIBLE); |
1502 | if (waitqueue_active(&ep->poll_wait)) | 1438 | if (waitqueue_active(&ep->poll_wait)) |
1503 | pwake++; | 1439 | pwake++; |
1504 | } | ||
1505 | 1440 | ||
1506 | write_unlock_irqrestore(&ep->lock, flags); | 1441 | write_unlock_irqrestore(&ep->lock, flags); |
1442 | } | ||
1507 | 1443 | ||
1508 | /* We have to call this outside the lock */ | 1444 | /* We have to call this outside the lock */ |
1509 | if (pwake) | 1445 | if (pwake) |
1510 | ep_poll_safewake(&psw, &ep->poll_wait); | 1446 | ep_poll_safewake(&psw, &ep->poll_wait); |
1447 | |||
1448 | return eventcnt == 0 ? error: eventcnt; | ||
1511 | } | 1449 | } |
1512 | 1450 | ||
1513 | 1451 | ||
@@ -1517,7 +1455,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) | |||
1517 | static int ep_events_transfer(struct eventpoll *ep, | 1455 | static int ep_events_transfer(struct eventpoll *ep, |
1518 | struct epoll_event __user *events, int maxevents) | 1456 | struct epoll_event __user *events, int maxevents) |
1519 | { | 1457 | { |
1520 | int eventcnt = 0; | 1458 | int eventcnt; |
1459 | unsigned long flags; | ||
1521 | struct list_head txlist; | 1460 | struct list_head txlist; |
1522 | 1461 | ||
1523 | INIT_LIST_HEAD(&txlist); | 1462 | INIT_LIST_HEAD(&txlist); |
@@ -1528,14 +1467,17 @@ static int ep_events_transfer(struct eventpoll *ep, | |||
1528 | */ | 1467 | */ |
1529 | down_read(&ep->sem); | 1468 | down_read(&ep->sem); |
1530 | 1469 | ||
1531 | /* Collect/extract ready items */ | 1470 | /* |
1532 | if (ep_collect_ready_items(ep, &txlist, maxevents) > 0) { | 1471 | * Steal the ready list, and re-init the original one to the |
1533 | /* Build result set in userspace */ | 1472 | * empty list. |
1534 | eventcnt = ep_send_events(ep, &txlist, events); | 1473 | */ |
1474 | write_lock_irqsave(&ep->lock, flags); | ||
1475 | list_splice(&ep->rdllist, &txlist); | ||
1476 | INIT_LIST_HEAD(&ep->rdllist); | ||
1477 | write_unlock_irqrestore(&ep->lock, flags); | ||
1535 | 1478 | ||
1536 | /* Reinject ready items into the ready list */ | 1479 | /* Build result set in userspace */ |
1537 | ep_reinject_items(ep, &txlist); | 1480 | eventcnt = ep_send_events(ep, &txlist, events, maxevents); |
1538 | } | ||
1539 | 1481 | ||
1540 | up_read(&ep->sem); | 1482 | up_read(&ep->sem); |
1541 | 1483 | ||
@@ -1612,14 +1554,12 @@ retry: | |||
1612 | return res; | 1554 | return res; |
1613 | } | 1555 | } |
1614 | 1556 | ||
1615 | |||
1616 | static int eventpollfs_delete_dentry(struct dentry *dentry) | 1557 | static int eventpollfs_delete_dentry(struct dentry *dentry) |
1617 | { | 1558 | { |
1618 | 1559 | ||
1619 | return 1; | 1560 | return 1; |
1620 | } | 1561 | } |
1621 | 1562 | ||
1622 | |||
1623 | static struct inode *ep_eventpoll_inode(void) | 1563 | static struct inode *ep_eventpoll_inode(void) |
1624 | { | 1564 | { |
1625 | int error = -ENOMEM; | 1565 | int error = -ENOMEM; |
@@ -1647,7 +1587,6 @@ eexit_1: | |||
1647 | return ERR_PTR(error); | 1587 | return ERR_PTR(error); |
1648 | } | 1588 | } |
1649 | 1589 | ||
1650 | |||
1651 | static int | 1590 | static int |
1652 | eventpollfs_get_sb(struct file_system_type *fs_type, int flags, | 1591 | eventpollfs_get_sb(struct file_system_type *fs_type, int flags, |
1653 | const char *dev_name, void *data, struct vfsmount *mnt) | 1592 | const char *dev_name, void *data, struct vfsmount *mnt) |