diff options
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r-- | fs/eventpoll.c | 614 |
1 files changed, 339 insertions, 275 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c5c424f23fd5..a89f370fadb5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * fs/eventpoll.c (Efficent event polling implementation) | 2 | * fs/eventpoll.c (Efficient event retrieval implementation) |
3 | * Copyright (C) 2001,...,2007 Davide Libenzi | 3 | * Copyright (C) 2001,...,2009 Davide Libenzi |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -71,29 +71,11 @@ | |||
71 | * a better scalability. | 71 | * a better scalability. |
72 | */ | 72 | */ |
73 | 73 | ||
74 | #define DEBUG_EPOLL 0 | ||
75 | |||
76 | #if DEBUG_EPOLL > 0 | ||
77 | #define DPRINTK(x) printk x | ||
78 | #define DNPRINTK(n, x) do { if ((n) <= DEBUG_EPOLL) printk x; } while (0) | ||
79 | #else /* #if DEBUG_EPOLL > 0 */ | ||
80 | #define DPRINTK(x) (void) 0 | ||
81 | #define DNPRINTK(n, x) (void) 0 | ||
82 | #endif /* #if DEBUG_EPOLL > 0 */ | ||
83 | |||
84 | #define DEBUG_EPI 0 | ||
85 | |||
86 | #if DEBUG_EPI != 0 | ||
87 | #define EPI_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */) | ||
88 | #else /* #if DEBUG_EPI != 0 */ | ||
89 | #define EPI_SLAB_DEBUG 0 | ||
90 | #endif /* #if DEBUG_EPI != 0 */ | ||
91 | |||
92 | /* Epoll private bits inside the event mask */ | 74 | /* Epoll private bits inside the event mask */ |
93 | #define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) | 75 | #define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) |
94 | 76 | ||
95 | /* Maximum number of poll wake up nests we are allowing */ | 77 | /* Maximum number of nesting allowed inside epoll sets */ |
96 | #define EP_MAX_POLLWAKE_NESTS 4 | 78 | #define EP_MAX_NESTS 4 |
97 | 79 | ||
98 | /* Maximum msec timeout value storeable in a long int */ | 80 | /* Maximum msec timeout value storeable in a long int */ |
99 | #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) | 81 | #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) |
@@ -110,24 +92,21 @@ struct epoll_filefd { | |||
110 | }; | 92 | }; |
111 | 93 | ||
112 | /* | 94 | /* |
113 | * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". | 95 | * Structure used to track possible nested calls, for too deep recursions |
114 | * It is used to keep track on all tasks that are currently inside the wake_up() code | 96 | * and loop cycles. |
115 | * to 1) short-circuit the one coming from the same task and same wait queue head | ||
116 | * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting | ||
117 | * 3) let go the ones coming from other tasks. | ||
118 | */ | 97 | */ |
119 | struct wake_task_node { | 98 | struct nested_call_node { |
120 | struct list_head llink; | 99 | struct list_head llink; |
121 | struct task_struct *task; | 100 | void *cookie; |
122 | wait_queue_head_t *wq; | 101 | int cpu; |
123 | }; | 102 | }; |
124 | 103 | ||
125 | /* | 104 | /* |
126 | * This is used to implement the safe poll wake up avoiding to reenter | 105 | * This structure is used as collector for nested calls, to check for |
127 | * the poll callback from inside wake_up(). | 106 | * maximum recursion dept and loop cycles. |
128 | */ | 107 | */ |
129 | struct poll_safewake { | 108 | struct nested_calls { |
130 | struct list_head wake_task_list; | 109 | struct list_head tasks_call_list; |
131 | spinlock_t lock; | 110 | spinlock_t lock; |
132 | }; | 111 | }; |
133 | 112 | ||
@@ -213,7 +192,7 @@ struct eppoll_entry { | |||
213 | struct list_head llink; | 192 | struct list_head llink; |
214 | 193 | ||
215 | /* The "base" pointer is set to the container "struct epitem" */ | 194 | /* The "base" pointer is set to the container "struct epitem" */ |
216 | void *base; | 195 | struct epitem *base; |
217 | 196 | ||
218 | /* | 197 | /* |
219 | * Wait queue item that will be linked to the target file wait | 198 | * Wait queue item that will be linked to the target file wait |
@@ -231,6 +210,12 @@ struct ep_pqueue { | |||
231 | struct epitem *epi; | 210 | struct epitem *epi; |
232 | }; | 211 | }; |
233 | 212 | ||
213 | /* Used by the ep_send_events() function as callback private data */ | ||
214 | struct ep_send_events_data { | ||
215 | int maxevents; | ||
216 | struct epoll_event __user *events; | ||
217 | }; | ||
218 | |||
234 | /* | 219 | /* |
235 | * Configuration options available inside /proc/sys/fs/epoll/ | 220 | * Configuration options available inside /proc/sys/fs/epoll/ |
236 | */ | 221 | */ |
@@ -242,8 +227,11 @@ static int max_user_watches __read_mostly; | |||
242 | */ | 227 | */ |
243 | static DEFINE_MUTEX(epmutex); | 228 | static DEFINE_MUTEX(epmutex); |
244 | 229 | ||
245 | /* Safe wake up implementation */ | 230 | /* Used for safe wake up implementation */ |
246 | static struct poll_safewake psw; | 231 | static struct nested_calls poll_safewake_ncalls; |
232 | |||
233 | /* Used to call file's f_op->poll() under the nested calls boundaries */ | ||
234 | static struct nested_calls poll_readywalk_ncalls; | ||
247 | 235 | ||
248 | /* Slab cache used to allocate "struct epitem" */ | 236 | /* Slab cache used to allocate "struct epitem" */ |
249 | static struct kmem_cache *epi_cache __read_mostly; | 237 | static struct kmem_cache *epi_cache __read_mostly; |
@@ -312,89 +300,230 @@ static inline int ep_op_has_event(int op) | |||
312 | } | 300 | } |
313 | 301 | ||
314 | /* Initialize the poll safe wake up structure */ | 302 | /* Initialize the poll safe wake up structure */ |
315 | static void ep_poll_safewake_init(struct poll_safewake *psw) | 303 | static void ep_nested_calls_init(struct nested_calls *ncalls) |
316 | { | 304 | { |
317 | 305 | INIT_LIST_HEAD(&ncalls->tasks_call_list); | |
318 | INIT_LIST_HEAD(&psw->wake_task_list); | 306 | spin_lock_init(&ncalls->lock); |
319 | spin_lock_init(&psw->lock); | ||
320 | } | 307 | } |
321 | 308 | ||
322 | /* | 309 | /** |
323 | * Perform a safe wake up of the poll wait list. The problem is that | 310 | * ep_call_nested - Perform a bound (possibly) nested call, by checking |
324 | * with the new callback'd wake up system, it is possible that the | 311 | * that the recursion limit is not exceeded, and that |
325 | * poll callback is reentered from inside the call to wake_up() done | 312 | * the same nested call (by the meaning of same cookie) is |
326 | * on the poll wait queue head. The rule is that we cannot reenter the | 313 | * no re-entered. |
327 | * wake up code from the same task more than EP_MAX_POLLWAKE_NESTS times, | 314 | * |
328 | * and we cannot reenter the same wait queue head at all. This will | 315 | * @ncalls: Pointer to the nested_calls structure to be used for this call. |
329 | * enable to have a hierarchy of epoll file descriptor of no more than | 316 | * @max_nests: Maximum number of allowed nesting calls. |
330 | * EP_MAX_POLLWAKE_NESTS deep. We need the irq version of the spin lock | 317 | * @nproc: Nested call core function pointer. |
331 | * because this one gets called by the poll callback, that in turn is called | 318 | * @priv: Opaque data to be passed to the @nproc callback. |
332 | * from inside a wake_up(), that might be called from irq context. | 319 | * @cookie: Cookie to be used to identify this nested call. |
320 | * | ||
321 | * Returns: Returns the code returned by the @nproc callback, or -1 if | ||
322 | * the maximum recursion limit has been exceeded. | ||
333 | */ | 323 | */ |
334 | static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) | 324 | static int ep_call_nested(struct nested_calls *ncalls, int max_nests, |
325 | int (*nproc)(void *, void *, int), void *priv, | ||
326 | void *cookie) | ||
335 | { | 327 | { |
336 | int wake_nests = 0; | 328 | int error, call_nests = 0; |
337 | unsigned long flags; | 329 | unsigned long flags; |
338 | struct task_struct *this_task = current; | 330 | int this_cpu = get_cpu(); |
339 | struct list_head *lsthead = &psw->wake_task_list; | 331 | struct list_head *lsthead = &ncalls->tasks_call_list; |
340 | struct wake_task_node *tncur; | 332 | struct nested_call_node *tncur; |
341 | struct wake_task_node tnode; | 333 | struct nested_call_node tnode; |
342 | 334 | ||
343 | spin_lock_irqsave(&psw->lock, flags); | 335 | spin_lock_irqsave(&ncalls->lock, flags); |
344 | 336 | ||
345 | /* Try to see if the current task is already inside this wakeup call */ | 337 | /* |
338 | * Try to see if the current task is already inside this wakeup call. | ||
339 | * We use a list here, since the population inside this set is always | ||
340 | * very much limited. | ||
341 | */ | ||
346 | list_for_each_entry(tncur, lsthead, llink) { | 342 | list_for_each_entry(tncur, lsthead, llink) { |
347 | 343 | if (tncur->cpu == this_cpu && | |
348 | if (tncur->wq == wq || | 344 | (tncur->cookie == cookie || ++call_nests > max_nests)) { |
349 | (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) { | ||
350 | /* | 345 | /* |
351 | * Ops ... loop detected or maximum nest level reached. | 346 | * Ops ... loop detected or maximum nest level reached. |
352 | * We abort this wake by breaking the cycle itself. | 347 | * We abort this wake by breaking the cycle itself. |
353 | */ | 348 | */ |
354 | spin_unlock_irqrestore(&psw->lock, flags); | 349 | error = -1; |
355 | return; | 350 | goto out_unlock; |
356 | } | 351 | } |
357 | } | 352 | } |
358 | 353 | ||
359 | /* Add the current task to the list */ | 354 | /* Add the current task and cookie to the list */ |
360 | tnode.task = this_task; | 355 | tnode.cpu = this_cpu; |
361 | tnode.wq = wq; | 356 | tnode.cookie = cookie; |
362 | list_add(&tnode.llink, lsthead); | 357 | list_add(&tnode.llink, lsthead); |
363 | 358 | ||
364 | spin_unlock_irqrestore(&psw->lock, flags); | 359 | spin_unlock_irqrestore(&ncalls->lock, flags); |
365 | 360 | ||
366 | /* Do really wake up now */ | 361 | /* Call the nested function */ |
367 | wake_up_nested(wq, 1 + wake_nests); | 362 | error = (*nproc)(priv, cookie, call_nests); |
368 | 363 | ||
369 | /* Remove the current task from the list */ | 364 | /* Remove the current task from the list */ |
370 | spin_lock_irqsave(&psw->lock, flags); | 365 | spin_lock_irqsave(&ncalls->lock, flags); |
371 | list_del(&tnode.llink); | 366 | list_del(&tnode.llink); |
372 | spin_unlock_irqrestore(&psw->lock, flags); | 367 | out_unlock: |
368 | spin_unlock_irqrestore(&ncalls->lock, flags); | ||
369 | |||
370 | put_cpu(); | ||
371 | return error; | ||
372 | } | ||
373 | |||
374 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
375 | static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, | ||
376 | unsigned long events, int subclass) | ||
377 | { | ||
378 | unsigned long flags; | ||
379 | |||
380 | spin_lock_irqsave_nested(&wqueue->lock, flags, subclass); | ||
381 | wake_up_locked_poll(wqueue, events); | ||
382 | spin_unlock_irqrestore(&wqueue->lock, flags); | ||
383 | } | ||
384 | #else | ||
385 | static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, | ||
386 | unsigned long events, int subclass) | ||
387 | { | ||
388 | wake_up_poll(wqueue, events); | ||
389 | } | ||
390 | #endif | ||
391 | |||
392 | static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) | ||
393 | { | ||
394 | ep_wake_up_nested((wait_queue_head_t *) cookie, POLLIN, | ||
395 | 1 + call_nests); | ||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Perform a safe wake up of the poll wait list. The problem is that | ||
401 | * with the new callback'd wake up system, it is possible that the | ||
402 | * poll callback is reentered from inside the call to wake_up() done | ||
403 | * on the poll wait queue head. The rule is that we cannot reenter the | ||
404 | * wake up code from the same task more than EP_MAX_NESTS times, | ||
405 | * and we cannot reenter the same wait queue head at all. This will | ||
406 | * enable to have a hierarchy of epoll file descriptor of no more than | ||
407 | * EP_MAX_NESTS deep. | ||
408 | */ | ||
409 | static void ep_poll_safewake(wait_queue_head_t *wq) | ||
410 | { | ||
411 | ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, | ||
412 | ep_poll_wakeup_proc, NULL, wq); | ||
373 | } | 413 | } |
374 | 414 | ||
375 | /* | 415 | /* |
376 | * This function unregister poll callbacks from the associated file descriptor. | 416 | * This function unregisters poll callbacks from the associated file |
377 | * Since this must be called without holding "ep->lock" the atomic exchange trick | 417 | * descriptor. Must be called with "mtx" held (or "epmutex" if called from |
378 | * will protect us from multiple unregister. | 418 | * ep_free). |
379 | */ | 419 | */ |
380 | static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) | 420 | static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) |
381 | { | 421 | { |
382 | int nwait; | ||
383 | struct list_head *lsthead = &epi->pwqlist; | 422 | struct list_head *lsthead = &epi->pwqlist; |
384 | struct eppoll_entry *pwq; | 423 | struct eppoll_entry *pwq; |
385 | 424 | ||
386 | /* This is called without locks, so we need the atomic exchange */ | 425 | while (!list_empty(lsthead)) { |
387 | nwait = xchg(&epi->nwait, 0); | 426 | pwq = list_first_entry(lsthead, struct eppoll_entry, llink); |
388 | 427 | ||
389 | if (nwait) { | 428 | list_del(&pwq->llink); |
390 | while (!list_empty(lsthead)) { | 429 | remove_wait_queue(pwq->whead, &pwq->wait); |
391 | pwq = list_first_entry(lsthead, struct eppoll_entry, llink); | 430 | kmem_cache_free(pwq_cache, pwq); |
431 | } | ||
432 | } | ||
392 | 433 | ||
393 | list_del_init(&pwq->llink); | 434 | /** |
394 | remove_wait_queue(pwq->whead, &pwq->wait); | 435 | * ep_scan_ready_list - Scans the ready list in a way that makes possible for |
395 | kmem_cache_free(pwq_cache, pwq); | 436 | * the scan code, to call f_op->poll(). Also allows for |
396 | } | 437 | * O(NumReady) performance. |
438 | * | ||
439 | * @ep: Pointer to the epoll private data structure. | ||
440 | * @sproc: Pointer to the scan callback. | ||
441 | * @priv: Private opaque data passed to the @sproc callback. | ||
442 | * | ||
443 | * Returns: The same integer error code returned by the @sproc callback. | ||
444 | */ | ||
445 | static int ep_scan_ready_list(struct eventpoll *ep, | ||
446 | int (*sproc)(struct eventpoll *, | ||
447 | struct list_head *, void *), | ||
448 | void *priv) | ||
449 | { | ||
450 | int error, pwake = 0; | ||
451 | unsigned long flags; | ||
452 | struct epitem *epi, *nepi; | ||
453 | LIST_HEAD(txlist); | ||
454 | |||
455 | /* | ||
456 | * We need to lock this because we could be hit by | ||
457 | * eventpoll_release_file() and epoll_ctl(). | ||
458 | */ | ||
459 | mutex_lock(&ep->mtx); | ||
460 | |||
461 | /* | ||
462 | * Steal the ready list, and re-init the original one to the | ||
463 | * empty list. Also, set ep->ovflist to NULL so that events | ||
464 | * happening while looping w/out locks, are not lost. We cannot | ||
465 | * have the poll callback to queue directly on ep->rdllist, | ||
466 | * because we want the "sproc" callback to be able to do it | ||
467 | * in a lockless way. | ||
468 | */ | ||
469 | spin_lock_irqsave(&ep->lock, flags); | ||
470 | list_splice_init(&ep->rdllist, &txlist); | ||
471 | ep->ovflist = NULL; | ||
472 | spin_unlock_irqrestore(&ep->lock, flags); | ||
473 | |||
474 | /* | ||
475 | * Now call the callback function. | ||
476 | */ | ||
477 | error = (*sproc)(ep, &txlist, priv); | ||
478 | |||
479 | spin_lock_irqsave(&ep->lock, flags); | ||
480 | /* | ||
481 | * During the time we spent inside the "sproc" callback, some | ||
482 | * other events might have been queued by the poll callback. | ||
483 | * We re-insert them inside the main ready-list here. | ||
484 | */ | ||
485 | for (nepi = ep->ovflist; (epi = nepi) != NULL; | ||
486 | nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { | ||
487 | /* | ||
488 | * We need to check if the item is already in the list. | ||
489 | * During the "sproc" callback execution time, items are | ||
490 | * queued into ->ovflist but the "txlist" might already | ||
491 | * contain them, and the list_splice() below takes care of them. | ||
492 | */ | ||
493 | if (!ep_is_linked(&epi->rdllink)) | ||
494 | list_add_tail(&epi->rdllink, &ep->rdllist); | ||
495 | } | ||
496 | /* | ||
497 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after | ||
498 | * releasing the lock, events will be queued in the normal way inside | ||
499 | * ep->rdllist. | ||
500 | */ | ||
501 | ep->ovflist = EP_UNACTIVE_PTR; | ||
502 | |||
503 | /* | ||
504 | * Quickly re-inject items left on "txlist". | ||
505 | */ | ||
506 | list_splice(&txlist, &ep->rdllist); | ||
507 | |||
508 | if (!list_empty(&ep->rdllist)) { | ||
509 | /* | ||
510 | * Wake up (if active) both the eventpoll wait list and | ||
511 | * the ->poll() wait list (delayed after we release the lock). | ||
512 | */ | ||
513 | if (waitqueue_active(&ep->wq)) | ||
514 | wake_up_locked(&ep->wq); | ||
515 | if (waitqueue_active(&ep->poll_wait)) | ||
516 | pwake++; | ||
397 | } | 517 | } |
518 | spin_unlock_irqrestore(&ep->lock, flags); | ||
519 | |||
520 | mutex_unlock(&ep->mtx); | ||
521 | |||
522 | /* We have to call this outside the lock */ | ||
523 | if (pwake) | ||
524 | ep_poll_safewake(&ep->poll_wait); | ||
525 | |||
526 | return error; | ||
398 | } | 527 | } |
399 | 528 | ||
400 | /* | 529 | /* |
@@ -434,9 +563,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) | |||
434 | 563 | ||
435 | atomic_dec(&ep->user->epoll_watches); | 564 | atomic_dec(&ep->user->epoll_watches); |
436 | 565 | ||
437 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", | ||
438 | current, ep, file)); | ||
439 | |||
440 | return 0; | 566 | return 0; |
441 | } | 567 | } |
442 | 568 | ||
@@ -447,7 +573,7 @@ static void ep_free(struct eventpoll *ep) | |||
447 | 573 | ||
448 | /* We need to release all tasks waiting for these file */ | 574 | /* We need to release all tasks waiting for these file */ |
449 | if (waitqueue_active(&ep->poll_wait)) | 575 | if (waitqueue_active(&ep->poll_wait)) |
450 | ep_poll_safewake(&psw, &ep->poll_wait); | 576 | ep_poll_safewake(&ep->poll_wait); |
451 | 577 | ||
452 | /* | 578 | /* |
453 | * We need to lock this because we could be hit by | 579 | * We need to lock this because we could be hit by |
@@ -492,26 +618,54 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) | |||
492 | if (ep) | 618 | if (ep) |
493 | ep_free(ep); | 619 | ep_free(ep); |
494 | 620 | ||
495 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); | ||
496 | return 0; | 621 | return 0; |
497 | } | 622 | } |
498 | 623 | ||
624 | static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, | ||
625 | void *priv) | ||
626 | { | ||
627 | struct epitem *epi, *tmp; | ||
628 | |||
629 | list_for_each_entry_safe(epi, tmp, head, rdllink) { | ||
630 | if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & | ||
631 | epi->event.events) | ||
632 | return POLLIN | POLLRDNORM; | ||
633 | else { | ||
634 | /* | ||
635 | * Item has been dropped into the ready list by the poll | ||
636 | * callback, but it's not actually ready, as far as | ||
637 | * caller requested events goes. We can remove it here. | ||
638 | */ | ||
639 | list_del_init(&epi->rdllink); | ||
640 | } | ||
641 | } | ||
642 | |||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) | ||
647 | { | ||
648 | return ep_scan_ready_list(priv, ep_read_events_proc, NULL); | ||
649 | } | ||
650 | |||
499 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | 651 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) |
500 | { | 652 | { |
501 | unsigned int pollflags = 0; | 653 | int pollflags; |
502 | unsigned long flags; | ||
503 | struct eventpoll *ep = file->private_data; | 654 | struct eventpoll *ep = file->private_data; |
504 | 655 | ||
505 | /* Insert inside our poll wait queue */ | 656 | /* Insert inside our poll wait queue */ |
506 | poll_wait(file, &ep->poll_wait, wait); | 657 | poll_wait(file, &ep->poll_wait, wait); |
507 | 658 | ||
508 | /* Check our condition */ | 659 | /* |
509 | spin_lock_irqsave(&ep->lock, flags); | 660 | * Proceed to find out if wanted events are really available inside |
510 | if (!list_empty(&ep->rdllist)) | 661 | * the ready list. This need to be done under ep_call_nested() |
511 | pollflags = POLLIN | POLLRDNORM; | 662 | * supervision, since the call to f_op->poll() done on listed files |
512 | spin_unlock_irqrestore(&ep->lock, flags); | 663 | * could re-enter here. |
664 | */ | ||
665 | pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, | ||
666 | ep_poll_readyevents_proc, ep, ep); | ||
513 | 667 | ||
514 | return pollflags; | 668 | return pollflags != -1 ? pollflags : 0; |
515 | } | 669 | } |
516 | 670 | ||
517 | /* File callbacks that implement the eventpoll file behaviour */ | 671 | /* File callbacks that implement the eventpoll file behaviour */ |
@@ -541,7 +695,7 @@ void eventpoll_release_file(struct file *file) | |||
541 | * We don't want to get "file->f_lock" because it is not | 695 | * We don't want to get "file->f_lock" because it is not |
542 | * necessary. It is not necessary because we're in the "struct file" | 696 | * necessary. It is not necessary because we're in the "struct file" |
543 | * cleanup path, and this means that noone is using this file anymore. | 697 | * cleanup path, and this means that noone is using this file anymore. |
544 | * So, for example, epoll_ctl() cannot hit here sicne if we reach this | 698 | * So, for example, epoll_ctl() cannot hit here since if we reach this |
545 | * point, the file counter already went to zero and fget() would fail. | 699 | * point, the file counter already went to zero and fget() would fail. |
546 | * The only hit might come from ep_free() but by holding the mutex | 700 | * The only hit might come from ep_free() but by holding the mutex |
547 | * will correctly serialize the operation. We do need to acquire | 701 | * will correctly serialize the operation. We do need to acquire |
@@ -588,8 +742,6 @@ static int ep_alloc(struct eventpoll **pep) | |||
588 | 742 | ||
589 | *pep = ep; | 743 | *pep = ep; |
590 | 744 | ||
591 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", | ||
592 | current, ep)); | ||
593 | return 0; | 745 | return 0; |
594 | 746 | ||
595 | free_uid: | 747 | free_uid: |
@@ -623,9 +775,6 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) | |||
623 | } | 775 | } |
624 | } | 776 | } |
625 | 777 | ||
626 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", | ||
627 | current, file, epir)); | ||
628 | |||
629 | return epir; | 778 | return epir; |
630 | } | 779 | } |
631 | 780 | ||
@@ -641,9 +790,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
641 | struct epitem *epi = ep_item_from_wait(wait); | 790 | struct epitem *epi = ep_item_from_wait(wait); |
642 | struct eventpoll *ep = epi->ep; | 791 | struct eventpoll *ep = epi->ep; |
643 | 792 | ||
644 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", | ||
645 | current, epi->ffd.file, epi, ep)); | ||
646 | |||
647 | spin_lock_irqsave(&ep->lock, flags); | 793 | spin_lock_irqsave(&ep->lock, flags); |
648 | 794 | ||
649 | /* | 795 | /* |
@@ -656,6 +802,15 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
656 | goto out_unlock; | 802 | goto out_unlock; |
657 | 803 | ||
658 | /* | 804 | /* |
805 | * Check the events coming with the callback. At this stage, not | ||
806 | * every device reports the events in the "key" parameter of the | ||
807 | * callback. We need to be able to handle both cases here, hence the | ||
808 | * test for "key" != NULL before the event match test. | ||
809 | */ | ||
810 | if (key && !((unsigned long) key & epi->event.events)) | ||
811 | goto out_unlock; | ||
812 | |||
813 | /* | ||
659 | * If we are trasfering events to userspace, we can hold no locks | 814 | * If we are trasfering events to userspace, we can hold no locks |
660 | * (because we're accessing user memory, and because of linux f_op->poll() | 815 | * (because we're accessing user memory, and because of linux f_op->poll() |
661 | * semantics). All the events that happens during that period of time are | 816 | * semantics). All the events that happens during that period of time are |
@@ -670,12 +825,9 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
670 | } | 825 | } |
671 | 826 | ||
672 | /* If this file is already in the ready list we exit soon */ | 827 | /* If this file is already in the ready list we exit soon */ |
673 | if (ep_is_linked(&epi->rdllink)) | 828 | if (!ep_is_linked(&epi->rdllink)) |
674 | goto is_linked; | 829 | list_add_tail(&epi->rdllink, &ep->rdllist); |
675 | |||
676 | list_add_tail(&epi->rdllink, &ep->rdllist); | ||
677 | 830 | ||
678 | is_linked: | ||
679 | /* | 831 | /* |
680 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() | 832 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() |
681 | * wait list. | 833 | * wait list. |
@@ -690,7 +842,7 @@ out_unlock: | |||
690 | 842 | ||
691 | /* We have to call this outside the lock */ | 843 | /* We have to call this outside the lock */ |
692 | if (pwake) | 844 | if (pwake) |
693 | ep_poll_safewake(&psw, &ep->poll_wait); | 845 | ep_poll_safewake(&ep->poll_wait); |
694 | 846 | ||
695 | return 1; | 847 | return 1; |
696 | } | 848 | } |
@@ -817,10 +969,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
817 | 969 | ||
818 | /* We have to call this outside the lock */ | 970 | /* We have to call this outside the lock */ |
819 | if (pwake) | 971 | if (pwake) |
820 | ep_poll_safewake(&psw, &ep->poll_wait); | 972 | ep_poll_safewake(&ep->poll_wait); |
821 | |||
822 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n", | ||
823 | current, ep, tfile, fd)); | ||
824 | 973 | ||
825 | return 0; | 974 | return 0; |
826 | 975 | ||
@@ -851,15 +1000,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
851 | { | 1000 | { |
852 | int pwake = 0; | 1001 | int pwake = 0; |
853 | unsigned int revents; | 1002 | unsigned int revents; |
854 | unsigned long flags; | ||
855 | 1003 | ||
856 | /* | 1004 | /* |
857 | * Set the new event interest mask before calling f_op->poll(), otherwise | 1005 | * Set the new event interest mask before calling f_op->poll(); |
858 | * a potential race might occur. In fact if we do this operation inside | 1006 | * otherwise we might miss an event that happens between the |
859 | * the lock, an event might happen between the f_op->poll() call and the | 1007 | * f_op->poll() call and the new event set registering. |
860 | * new event set registering. | ||
861 | */ | 1008 | */ |
862 | epi->event.events = event->events; | 1009 | epi->event.events = event->events; |
1010 | epi->event.data = event->data; /* protected by mtx */ | ||
863 | 1011 | ||
864 | /* | 1012 | /* |
865 | * Get current event bits. We can safely use the file* here because | 1013 | * Get current event bits. We can safely use the file* here because |
@@ -867,16 +1015,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
867 | */ | 1015 | */ |
868 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); | 1016 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); |
869 | 1017 | ||
870 | spin_lock_irqsave(&ep->lock, flags); | ||
871 | |||
872 | /* Copy the data member from inside the lock */ | ||
873 | epi->event.data = event->data; | ||
874 | |||
875 | /* | 1018 | /* |
876 | * If the item is "hot" and it is not registered inside the ready | 1019 | * If the item is "hot" and it is not registered inside the ready |
877 | * list, push it inside. | 1020 | * list, push it inside. |
878 | */ | 1021 | */ |
879 | if (revents & event->events) { | 1022 | if (revents & event->events) { |
1023 | spin_lock_irq(&ep->lock); | ||
880 | if (!ep_is_linked(&epi->rdllink)) { | 1024 | if (!ep_is_linked(&epi->rdllink)) { |
881 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1025 | list_add_tail(&epi->rdllink, &ep->rdllist); |
882 | 1026 | ||
@@ -886,142 +1030,84 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
886 | if (waitqueue_active(&ep->poll_wait)) | 1030 | if (waitqueue_active(&ep->poll_wait)) |
887 | pwake++; | 1031 | pwake++; |
888 | } | 1032 | } |
1033 | spin_unlock_irq(&ep->lock); | ||
889 | } | 1034 | } |
890 | spin_unlock_irqrestore(&ep->lock, flags); | ||
891 | 1035 | ||
892 | /* We have to call this outside the lock */ | 1036 | /* We have to call this outside the lock */ |
893 | if (pwake) | 1037 | if (pwake) |
894 | ep_poll_safewake(&psw, &ep->poll_wait); | 1038 | ep_poll_safewake(&ep->poll_wait); |
895 | 1039 | ||
896 | return 0; | 1040 | return 0; |
897 | } | 1041 | } |
898 | 1042 | ||
899 | static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, | 1043 | static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, |
900 | int maxevents) | 1044 | void *priv) |
901 | { | 1045 | { |
902 | int eventcnt, error = -EFAULT, pwake = 0; | 1046 | struct ep_send_events_data *esed = priv; |
1047 | int eventcnt; | ||
903 | unsigned int revents; | 1048 | unsigned int revents; |
904 | unsigned long flags; | 1049 | struct epitem *epi; |
905 | struct epitem *epi, *nepi; | 1050 | struct epoll_event __user *uevent; |
906 | struct list_head txlist; | ||
907 | |||
908 | INIT_LIST_HEAD(&txlist); | ||
909 | |||
910 | /* | ||
911 | * We need to lock this because we could be hit by | ||
912 | * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). | ||
913 | */ | ||
914 | mutex_lock(&ep->mtx); | ||
915 | |||
916 | /* | ||
917 | * Steal the ready list, and re-init the original one to the | ||
918 | * empty list. Also, set ep->ovflist to NULL so that events | ||
919 | * happening while looping w/out locks, are not lost. We cannot | ||
920 | * have the poll callback to queue directly on ep->rdllist, | ||
921 | * because we are doing it in the loop below, in a lockless way. | ||
922 | */ | ||
923 | spin_lock_irqsave(&ep->lock, flags); | ||
924 | list_splice(&ep->rdllist, &txlist); | ||
925 | INIT_LIST_HEAD(&ep->rdllist); | ||
926 | ep->ovflist = NULL; | ||
927 | spin_unlock_irqrestore(&ep->lock, flags); | ||
928 | 1051 | ||
929 | /* | 1052 | /* |
930 | * We can loop without lock because this is a task private list. | 1053 | * We can loop without lock because we are passed a task private list. |
931 | * We just splice'd out the ep->rdllist in ep_collect_ready_items(). | 1054 | * Items cannot vanish during the loop because ep_scan_ready_list() is |
932 | * Items cannot vanish during the loop because we are holding "mtx". | 1055 | * holding "mtx" during this call. |
933 | */ | 1056 | */ |
934 | for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) { | 1057 | for (eventcnt = 0, uevent = esed->events; |
935 | epi = list_first_entry(&txlist, struct epitem, rdllink); | 1058 | !list_empty(head) && eventcnt < esed->maxevents;) { |
1059 | epi = list_first_entry(head, struct epitem, rdllink); | ||
936 | 1060 | ||
937 | list_del_init(&epi->rdllink); | 1061 | list_del_init(&epi->rdllink); |
938 | 1062 | ||
939 | /* | 1063 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & |
940 | * Get the ready file event set. We can safely use the file | 1064 | epi->event.events; |
941 | * because we are holding the "mtx" and this will guarantee | ||
942 | * that both the file and the item will not vanish. | ||
943 | */ | ||
944 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); | ||
945 | revents &= epi->event.events; | ||
946 | 1065 | ||
947 | /* | 1066 | /* |
948 | * Is the event mask intersect the caller-requested one, | 1067 | * If the event mask intersect the caller-requested one, |
949 | * deliver the event to userspace. Again, we are holding | 1068 | * deliver the event to userspace. Again, ep_scan_ready_list() |
950 | * "mtx", so no operations coming from userspace can change | 1069 | * is holding "mtx", so no operations coming from userspace |
951 | * the item. | 1070 | * can change the item. |
952 | */ | 1071 | */ |
953 | if (revents) { | 1072 | if (revents) { |
954 | if (__put_user(revents, | 1073 | if (__put_user(revents, &uevent->events) || |
955 | &events[eventcnt].events) || | 1074 | __put_user(epi->event.data, &uevent->data)) { |
956 | __put_user(epi->event.data, | 1075 | list_add(&epi->rdllink, head); |
957 | &events[eventcnt].data)) | 1076 | return eventcnt ? eventcnt : -EFAULT; |
958 | goto errxit; | 1077 | } |
1078 | eventcnt++; | ||
1079 | uevent++; | ||
959 | if (epi->event.events & EPOLLONESHOT) | 1080 | if (epi->event.events & EPOLLONESHOT) |
960 | epi->event.events &= EP_PRIVATE_BITS; | 1081 | epi->event.events &= EP_PRIVATE_BITS; |
961 | eventcnt++; | 1082 | else if (!(epi->event.events & EPOLLET)) { |
1083 | /* | ||
1084 | * If this file has been added with Level | ||
1085 | * Trigger mode, we need to insert back inside | ||
1086 | * the ready list, so that the next call to | ||
1087 | * epoll_wait() will check again the events | ||
1088 | * availability. At this point, noone can insert | ||
1089 | * into ep->rdllist besides us. The epoll_ctl() | ||
1090 | * callers are locked out by | ||
1091 | * ep_scan_ready_list() holding "mtx" and the | ||
1092 | * poll callback will queue them in ep->ovflist. | ||
1093 | */ | ||
1094 | list_add_tail(&epi->rdllink, &ep->rdllist); | ||
1095 | } | ||
962 | } | 1096 | } |
963 | /* | ||
964 | * At this point, noone can insert into ep->rdllist besides | ||
965 | * us. The epoll_ctl() callers are locked out by us holding | ||
966 | * "mtx" and the poll callback will queue them in ep->ovflist. | ||
967 | */ | ||
968 | if (!(epi->event.events & EPOLLET) && | ||
969 | (revents & epi->event.events)) | ||
970 | list_add_tail(&epi->rdllink, &ep->rdllist); | ||
971 | } | ||
972 | error = 0; | ||
973 | |||
974 | errxit: | ||
975 | |||
976 | spin_lock_irqsave(&ep->lock, flags); | ||
977 | /* | ||
978 | * During the time we spent in the loop above, some other events | ||
979 | * might have been queued by the poll callback. We re-insert them | ||
980 | * inside the main ready-list here. | ||
981 | */ | ||
982 | for (nepi = ep->ovflist; (epi = nepi) != NULL; | ||
983 | nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { | ||
984 | /* | ||
985 | * If the above loop quit with errors, the epoll item might still | ||
986 | * be linked to "txlist", and the list_splice() done below will | ||
987 | * take care of those cases. | ||
988 | */ | ||
989 | if (!ep_is_linked(&epi->rdllink)) | ||
990 | list_add_tail(&epi->rdllink, &ep->rdllist); | ||
991 | } | 1097 | } |
992 | /* | ||
993 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after | ||
994 | * releasing the lock, events will be queued in the normal way inside | ||
995 | * ep->rdllist. | ||
996 | */ | ||
997 | ep->ovflist = EP_UNACTIVE_PTR; | ||
998 | 1098 | ||
999 | /* | 1099 | return eventcnt; |
1000 | * In case of error in the event-send loop, or in case the number of | 1100 | } |
1001 | * ready events exceeds the userspace limit, we need to splice the | ||
1002 | * "txlist" back inside ep->rdllist. | ||
1003 | */ | ||
1004 | list_splice(&txlist, &ep->rdllist); | ||
1005 | |||
1006 | if (!list_empty(&ep->rdllist)) { | ||
1007 | /* | ||
1008 | * Wake up (if active) both the eventpoll wait list and the ->poll() | ||
1009 | * wait list (delayed after we release the lock). | ||
1010 | */ | ||
1011 | if (waitqueue_active(&ep->wq)) | ||
1012 | wake_up_locked(&ep->wq); | ||
1013 | if (waitqueue_active(&ep->poll_wait)) | ||
1014 | pwake++; | ||
1015 | } | ||
1016 | spin_unlock_irqrestore(&ep->lock, flags); | ||
1017 | 1101 | ||
1018 | mutex_unlock(&ep->mtx); | 1102 | static int ep_send_events(struct eventpoll *ep, |
1103 | struct epoll_event __user *events, int maxevents) | ||
1104 | { | ||
1105 | struct ep_send_events_data esed; | ||
1019 | 1106 | ||
1020 | /* We have to call this outside the lock */ | 1107 | esed.maxevents = maxevents; |
1021 | if (pwake) | 1108 | esed.events = events; |
1022 | ep_poll_safewake(&psw, &ep->poll_wait); | ||
1023 | 1109 | ||
1024 | return eventcnt == 0 ? error: eventcnt; | 1110 | return ep_scan_ready_list(ep, ep_send_events_proc, &esed); |
1025 | } | 1111 | } |
1026 | 1112 | ||
1027 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | 1113 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, |
@@ -1033,7 +1119,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | |||
1033 | wait_queue_t wait; | 1119 | wait_queue_t wait; |
1034 | 1120 | ||
1035 | /* | 1121 | /* |
1036 | * Calculate the timeout by checking for the "infinite" value ( -1 ) | 1122 | * Calculate the timeout by checking for the "infinite" value (-1) |
1037 | * and the overflow condition. The passed timeout is in milliseconds, | 1123 | * and the overflow condition. The passed timeout is in milliseconds, |
1038 | * that why (t * HZ) / 1000. | 1124 | * that why (t * HZ) / 1000. |
1039 | */ | 1125 | */ |
@@ -1076,9 +1162,8 @@ retry: | |||
1076 | 1162 | ||
1077 | set_current_state(TASK_RUNNING); | 1163 | set_current_state(TASK_RUNNING); |
1078 | } | 1164 | } |
1079 | |||
1080 | /* Is it worth to try to dig for events ? */ | 1165 | /* Is it worth to try to dig for events ? */ |
1081 | eavail = !list_empty(&ep->rdllist); | 1166 | eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; |
1082 | 1167 | ||
1083 | spin_unlock_irqrestore(&ep->lock, flags); | 1168 | spin_unlock_irqrestore(&ep->lock, flags); |
1084 | 1169 | ||
@@ -1099,41 +1184,30 @@ retry: | |||
1099 | */ | 1184 | */ |
1100 | SYSCALL_DEFINE1(epoll_create1, int, flags) | 1185 | SYSCALL_DEFINE1(epoll_create1, int, flags) |
1101 | { | 1186 | { |
1102 | int error, fd = -1; | 1187 | int error; |
1103 | struct eventpoll *ep; | 1188 | struct eventpoll *ep = NULL; |
1104 | 1189 | ||
1105 | /* Check the EPOLL_* constant for consistency. */ | 1190 | /* Check the EPOLL_* constant for consistency. */ |
1106 | BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); | 1191 | BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); |
1107 | 1192 | ||
1108 | if (flags & ~EPOLL_CLOEXEC) | 1193 | if (flags & ~EPOLL_CLOEXEC) |
1109 | return -EINVAL; | 1194 | return -EINVAL; |
1110 | |||
1111 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", | ||
1112 | current, flags)); | ||
1113 | |||
1114 | /* | 1195 | /* |
1115 | * Create the internal data structure ( "struct eventpoll" ). | 1196 | * Create the internal data structure ("struct eventpoll"). |
1116 | */ | 1197 | */ |
1117 | error = ep_alloc(&ep); | 1198 | error = ep_alloc(&ep); |
1118 | if (error < 0) { | 1199 | if (error < 0) |
1119 | fd = error; | 1200 | return error; |
1120 | goto error_return; | ||
1121 | } | ||
1122 | |||
1123 | /* | 1201 | /* |
1124 | * Creates all the items needed to setup an eventpoll file. That is, | 1202 | * Creates all the items needed to setup an eventpoll file. That is, |
1125 | * a file structure and a free file descriptor. | 1203 | * a file structure and a free file descriptor. |
1126 | */ | 1204 | */ |
1127 | fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, | 1205 | error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, |
1128 | flags & O_CLOEXEC); | 1206 | flags & O_CLOEXEC); |
1129 | if (fd < 0) | 1207 | if (error < 0) |
1130 | ep_free(ep); | 1208 | ep_free(ep); |
1131 | 1209 | ||
1132 | error_return: | 1210 | return error; |
1133 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", | ||
1134 | current, flags, fd)); | ||
1135 | |||
1136 | return fd; | ||
1137 | } | 1211 | } |
1138 | 1212 | ||
1139 | SYSCALL_DEFINE1(epoll_create, int, size) | 1213 | SYSCALL_DEFINE1(epoll_create, int, size) |
@@ -1158,9 +1232,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1158 | struct epitem *epi; | 1232 | struct epitem *epi; |
1159 | struct epoll_event epds; | 1233 | struct epoll_event epds; |
1160 | 1234 | ||
1161 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n", | ||
1162 | current, epfd, op, fd, event)); | ||
1163 | |||
1164 | error = -EFAULT; | 1235 | error = -EFAULT; |
1165 | if (ep_op_has_event(op) && | 1236 | if (ep_op_has_event(op) && |
1166 | copy_from_user(&epds, event, sizeof(struct epoll_event))) | 1237 | copy_from_user(&epds, event, sizeof(struct epoll_event))) |
@@ -1211,7 +1282,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1211 | case EPOLL_CTL_ADD: | 1282 | case EPOLL_CTL_ADD: |
1212 | if (!epi) { | 1283 | if (!epi) { |
1213 | epds.events |= POLLERR | POLLHUP; | 1284 | epds.events |= POLLERR | POLLHUP; |
1214 | |||
1215 | error = ep_insert(ep, &epds, tfile, fd); | 1285 | error = ep_insert(ep, &epds, tfile, fd); |
1216 | } else | 1286 | } else |
1217 | error = -EEXIST; | 1287 | error = -EEXIST; |
@@ -1237,8 +1307,6 @@ error_tgt_fput: | |||
1237 | error_fput: | 1307 | error_fput: |
1238 | fput(file); | 1308 | fput(file); |
1239 | error_return: | 1309 | error_return: |
1240 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n", | ||
1241 | current, epfd, op, fd, event, error)); | ||
1242 | 1310 | ||
1243 | return error; | 1311 | return error; |
1244 | } | 1312 | } |
@@ -1254,9 +1322,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, | |||
1254 | struct file *file; | 1322 | struct file *file; |
1255 | struct eventpoll *ep; | 1323 | struct eventpoll *ep; |
1256 | 1324 | ||
1257 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n", | ||
1258 | current, epfd, events, maxevents, timeout)); | ||
1259 | |||
1260 | /* The maximum number of event must be greater than zero */ | 1325 | /* The maximum number of event must be greater than zero */ |
1261 | if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) | 1326 | if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) |
1262 | return -EINVAL; | 1327 | return -EINVAL; |
@@ -1293,8 +1358,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, | |||
1293 | error_fput: | 1358 | error_fput: |
1294 | fput(file); | 1359 | fput(file); |
1295 | error_return: | 1360 | error_return: |
1296 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n", | ||
1297 | current, epfd, events, maxevents, timeout, error)); | ||
1298 | 1361 | ||
1299 | return error; | 1362 | return error; |
1300 | } | 1363 | } |
@@ -1359,17 +1422,18 @@ static int __init eventpoll_init(void) | |||
1359 | EP_ITEM_COST; | 1422 | EP_ITEM_COST; |
1360 | 1423 | ||
1361 | /* Initialize the structure used to perform safe poll wait head wake ups */ | 1424 | /* Initialize the structure used to perform safe poll wait head wake ups */ |
1362 | ep_poll_safewake_init(&psw); | 1425 | ep_nested_calls_init(&poll_safewake_ncalls); |
1426 | |||
1427 | /* Initialize the structure used to perform file's f_op->poll() calls */ | ||
1428 | ep_nested_calls_init(&poll_readywalk_ncalls); | ||
1363 | 1429 | ||
1364 | /* Allocates slab cache used to allocate "struct epitem" items */ | 1430 | /* Allocates slab cache used to allocate "struct epitem" items */ |
1365 | epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), | 1431 | epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), |
1366 | 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC, | 1432 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
1367 | NULL); | ||
1368 | 1433 | ||
1369 | /* Allocates slab cache used to allocate "struct eppoll_entry" */ | 1434 | /* Allocates slab cache used to allocate "struct eppoll_entry" */ |
1370 | pwq_cache = kmem_cache_create("eventpoll_pwq", | 1435 | pwq_cache = kmem_cache_create("eventpoll_pwq", |
1371 | sizeof(struct eppoll_entry), 0, | 1436 | sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL); |
1372 | EPI_SLAB_DEBUG|SLAB_PANIC, NULL); | ||
1373 | 1437 | ||
1374 | return 0; | 1438 | return 0; |
1375 | } | 1439 | } |