diff options
Diffstat (limited to 'fs/select.c')
-rw-r--r-- | fs/select.c | 348 |
1 files changed, 291 insertions, 57 deletions
diff --git a/fs/select.c b/fs/select.c index f10a10317d54..c0f02d36c60e 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -179,12 +179,11 @@ get_max: | |||
179 | #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) | 179 | #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) |
180 | #define POLLEX_SET (POLLPRI) | 180 | #define POLLEX_SET (POLLPRI) |
181 | 181 | ||
182 | int do_select(int n, fd_set_bits *fds, long *timeout) | 182 | int do_select(int n, fd_set_bits *fds, s64 *timeout) |
183 | { | 183 | { |
184 | struct poll_wqueues table; | 184 | struct poll_wqueues table; |
185 | poll_table *wait; | 185 | poll_table *wait; |
186 | int retval, i; | 186 | int retval, i; |
187 | long __timeout = *timeout; | ||
188 | 187 | ||
189 | rcu_read_lock(); | 188 | rcu_read_lock(); |
190 | retval = max_select_fd(n, fds); | 189 | retval = max_select_fd(n, fds); |
@@ -196,11 +195,12 @@ int do_select(int n, fd_set_bits *fds, long *timeout) | |||
196 | 195 | ||
197 | poll_initwait(&table); | 196 | poll_initwait(&table); |
198 | wait = &table.pt; | 197 | wait = &table.pt; |
199 | if (!__timeout) | 198 | if (!*timeout) |
200 | wait = NULL; | 199 | wait = NULL; |
201 | retval = 0; | 200 | retval = 0; |
202 | for (;;) { | 201 | for (;;) { |
203 | unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; | 202 | unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; |
203 | long __timeout; | ||
204 | 204 | ||
205 | set_current_state(TASK_INTERRUPTIBLE); | 205 | set_current_state(TASK_INTERRUPTIBLE); |
206 | 206 | ||
@@ -255,22 +255,32 @@ int do_select(int n, fd_set_bits *fds, long *timeout) | |||
255 | *rexp = res_ex; | 255 | *rexp = res_ex; |
256 | } | 256 | } |
257 | wait = NULL; | 257 | wait = NULL; |
258 | if (retval || !__timeout || signal_pending(current)) | 258 | if (retval || !*timeout || signal_pending(current)) |
259 | break; | 259 | break; |
260 | if(table.error) { | 260 | if(table.error) { |
261 | retval = table.error; | 261 | retval = table.error; |
262 | break; | 262 | break; |
263 | } | 263 | } |
264 | |||
265 | if (*timeout < 0) { | ||
266 | /* Wait indefinitely */ | ||
267 | __timeout = MAX_SCHEDULE_TIMEOUT; | ||
268 | } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { | ||
269 | /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ | ||
270 | __timeout = MAX_SCHEDULE_TIMEOUT - 1; | ||
271 | *timeout -= __timeout; | ||
272 | } else { | ||
273 | __timeout = *timeout; | ||
274 | *timeout = 0; | ||
275 | } | ||
264 | __timeout = schedule_timeout(__timeout); | 276 | __timeout = schedule_timeout(__timeout); |
277 | if (*timeout >= 0) | ||
278 | *timeout += __timeout; | ||
265 | } | 279 | } |
266 | __set_current_state(TASK_RUNNING); | 280 | __set_current_state(TASK_RUNNING); |
267 | 281 | ||
268 | poll_freewait(&table); | 282 | poll_freewait(&table); |
269 | 283 | ||
270 | /* | ||
271 | * Up-to-date the caller timeout. | ||
272 | */ | ||
273 | *timeout = __timeout; | ||
274 | return retval; | 284 | return retval; |
275 | } | 285 | } |
276 | 286 | ||
@@ -295,36 +305,14 @@ static void select_bits_free(void *bits, int size) | |||
295 | #define MAX_SELECT_SECONDS \ | 305 | #define MAX_SELECT_SECONDS \ |
296 | ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) | 306 | ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) |
297 | 307 | ||
298 | asmlinkage long | 308 | static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, |
299 | sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) | 309 | fd_set __user *exp, s64 *timeout) |
300 | { | 310 | { |
301 | fd_set_bits fds; | 311 | fd_set_bits fds; |
302 | char *bits; | 312 | char *bits; |
303 | long timeout; | ||
304 | int ret, size, max_fdset; | 313 | int ret, size, max_fdset; |
305 | struct fdtable *fdt; | 314 | struct fdtable *fdt; |
306 | 315 | ||
307 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
308 | if (tvp) { | ||
309 | time_t sec, usec; | ||
310 | |||
311 | if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp)) | ||
312 | || __get_user(sec, &tvp->tv_sec) | ||
313 | || __get_user(usec, &tvp->tv_usec)) { | ||
314 | ret = -EFAULT; | ||
315 | goto out_nofds; | ||
316 | } | ||
317 | |||
318 | ret = -EINVAL; | ||
319 | if (sec < 0 || usec < 0) | ||
320 | goto out_nofds; | ||
321 | |||
322 | if ((unsigned long) sec < MAX_SELECT_SECONDS) { | ||
323 | timeout = ROUND_UP(usec, 1000000/HZ); | ||
324 | timeout += sec * (unsigned long) HZ; | ||
325 | } | ||
326 | } | ||
327 | |||
328 | ret = -EINVAL; | 316 | ret = -EINVAL; |
329 | if (n < 0) | 317 | if (n < 0) |
330 | goto out_nofds; | 318 | goto out_nofds; |
@@ -362,18 +350,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s | |||
362 | zero_fd_set(n, fds.res_out); | 350 | zero_fd_set(n, fds.res_out); |
363 | zero_fd_set(n, fds.res_ex); | 351 | zero_fd_set(n, fds.res_ex); |
364 | 352 | ||
365 | ret = do_select(n, &fds, &timeout); | 353 | ret = do_select(n, &fds, timeout); |
366 | |||
367 | if (tvp && !(current->personality & STICKY_TIMEOUTS)) { | ||
368 | time_t sec = 0, usec = 0; | ||
369 | if (timeout) { | ||
370 | sec = timeout / HZ; | ||
371 | usec = timeout % HZ; | ||
372 | usec *= (1000000/HZ); | ||
373 | } | ||
374 | put_user(sec, &tvp->tv_sec); | ||
375 | put_user(usec, &tvp->tv_usec); | ||
376 | } | ||
377 | 354 | ||
378 | if (ret < 0) | 355 | if (ret < 0) |
379 | goto out; | 356 | goto out; |
@@ -395,6 +372,154 @@ out_nofds: | |||
395 | return ret; | 372 | return ret; |
396 | } | 373 | } |
397 | 374 | ||
375 | asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, | ||
376 | fd_set __user *exp, struct timeval __user *tvp) | ||
377 | { | ||
378 | s64 timeout = -1; | ||
379 | struct timeval tv; | ||
380 | int ret; | ||
381 | |||
382 | if (tvp) { | ||
383 | if (copy_from_user(&tv, tvp, sizeof(tv))) | ||
384 | return -EFAULT; | ||
385 | |||
386 | if (tv.tv_sec < 0 || tv.tv_usec < 0) | ||
387 | return -EINVAL; | ||
388 | |||
389 | /* Cast to u64 to make GCC stop complaining */ | ||
390 | if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) | ||
391 | timeout = -1; /* infinite */ | ||
392 | else { | ||
393 | timeout = ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ); | ||
394 | timeout += tv.tv_sec * HZ; | ||
395 | } | ||
396 | } | ||
397 | |||
398 | ret = core_sys_select(n, inp, outp, exp, &timeout); | ||
399 | |||
400 | if (tvp) { | ||
401 | if (current->personality & STICKY_TIMEOUTS) | ||
402 | goto sticky; | ||
403 | tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); | ||
404 | tv.tv_sec = timeout; | ||
405 | if (copy_to_user(tvp, &tv, sizeof(tv))) { | ||
406 | sticky: | ||
407 | /* | ||
408 | * If an application puts its timeval in read-only | ||
409 | * memory, we don't want the Linux-specific update to | ||
410 | * the timeval to cause a fault after the select has | ||
411 | * completed successfully. However, because we're not | ||
412 | * updating the timeval, we can't restart the system | ||
413 | * call. | ||
414 | */ | ||
415 | if (ret == -ERESTARTNOHAND) | ||
416 | ret = -EINTR; | ||
417 | } | ||
418 | } | ||
419 | |||
420 | return ret; | ||
421 | } | ||
422 | |||
423 | #ifdef TIF_RESTORE_SIGMASK | ||
424 | asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, | ||
425 | fd_set __user *exp, struct timespec __user *tsp, | ||
426 | const sigset_t __user *sigmask, size_t sigsetsize) | ||
427 | { | ||
428 | s64 timeout = MAX_SCHEDULE_TIMEOUT; | ||
429 | sigset_t ksigmask, sigsaved; | ||
430 | struct timespec ts; | ||
431 | int ret; | ||
432 | |||
433 | if (tsp) { | ||
434 | if (copy_from_user(&ts, tsp, sizeof(ts))) | ||
435 | return -EFAULT; | ||
436 | |||
437 | if (ts.tv_sec < 0 || ts.tv_nsec < 0) | ||
438 | return -EINVAL; | ||
439 | |||
440 | /* Cast to u64 to make GCC stop complaining */ | ||
441 | if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) | ||
442 | timeout = -1; /* infinite */ | ||
443 | else { | ||
444 | timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); | ||
445 | timeout += ts.tv_sec * HZ; | ||
446 | } | ||
447 | } | ||
448 | |||
449 | if (sigmask) { | ||
450 | /* XXX: Don't preclude handling different sized sigset_t's. */ | ||
451 | if (sigsetsize != sizeof(sigset_t)) | ||
452 | return -EINVAL; | ||
453 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) | ||
454 | return -EFAULT; | ||
455 | |||
456 | sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); | ||
457 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | ||
458 | } | ||
459 | |||
460 | ret = core_sys_select(n, inp, outp, exp, &timeout); | ||
461 | |||
462 | if (tsp) { | ||
463 | if (current->personality & STICKY_TIMEOUTS) | ||
464 | goto sticky; | ||
465 | ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; | ||
466 | ts.tv_sec = timeout; | ||
467 | if (copy_to_user(tsp, &ts, sizeof(ts))) { | ||
468 | sticky: | ||
469 | /* | ||
470 | * If an application puts its timeval in read-only | ||
471 | * memory, we don't want the Linux-specific update to | ||
472 | * the timeval to cause a fault after the select has | ||
473 | * completed successfully. However, because we're not | ||
474 | * updating the timeval, we can't restart the system | ||
475 | * call. | ||
476 | */ | ||
477 | if (ret == -ERESTARTNOHAND) | ||
478 | ret = -EINTR; | ||
479 | } | ||
480 | } | ||
481 | |||
482 | if (ret == -ERESTARTNOHAND) { | ||
483 | /* | ||
484 | * Don't restore the signal mask yet. Let do_signal() deliver | ||
485 | * the signal on the way back to userspace, before the signal | ||
486 | * mask is restored. | ||
487 | */ | ||
488 | if (sigmask) { | ||
489 | memcpy(¤t->saved_sigmask, &sigsaved, | ||
490 | sizeof(sigsaved)); | ||
491 | set_thread_flag(TIF_RESTORE_SIGMASK); | ||
492 | } | ||
493 | } else if (sigmask) | ||
494 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
495 | |||
496 | return ret; | ||
497 | } | ||
498 | |||
499 | /* | ||
500 | * Most architectures can't handle 7-argument syscalls. So we provide a | ||
501 | * 6-argument version where the sixth argument is a pointer to a structure | ||
502 | * which has a pointer to the sigset_t itself followed by a size_t containing | ||
503 | * the sigset size. | ||
504 | */ | ||
505 | asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, | ||
506 | fd_set __user *exp, struct timespec __user *tsp, void __user *sig) | ||
507 | { | ||
508 | size_t sigsetsize = 0; | ||
509 | sigset_t __user *up = NULL; | ||
510 | |||
511 | if (sig) { | ||
512 | if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t)) | ||
513 | || __get_user(up, (sigset_t * __user *)sig) | ||
514 | || __get_user(sigsetsize, | ||
515 | (size_t * __user)(sig+sizeof(void *)))) | ||
516 | return -EFAULT; | ||
517 | } | ||
518 | |||
519 | return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); | ||
520 | } | ||
521 | #endif /* TIF_RESTORE_SIGMASK */ | ||
522 | |||
398 | struct poll_list { | 523 | struct poll_list { |
399 | struct poll_list *next; | 524 | struct poll_list *next; |
400 | int len; | 525 | int len; |
@@ -436,16 +561,19 @@ static void do_pollfd(unsigned int num, struct pollfd * fdpage, | |||
436 | } | 561 | } |
437 | 562 | ||
438 | static int do_poll(unsigned int nfds, struct poll_list *list, | 563 | static int do_poll(unsigned int nfds, struct poll_list *list, |
439 | struct poll_wqueues *wait, long timeout) | 564 | struct poll_wqueues *wait, s64 *timeout) |
440 | { | 565 | { |
441 | int count = 0; | 566 | int count = 0; |
442 | poll_table* pt = &wait->pt; | 567 | poll_table* pt = &wait->pt; |
443 | 568 | ||
444 | if (!timeout) | 569 | /* Optimise the no-wait case */ |
570 | if (!(*timeout)) | ||
445 | pt = NULL; | 571 | pt = NULL; |
446 | 572 | ||
447 | for (;;) { | 573 | for (;;) { |
448 | struct poll_list *walk; | 574 | struct poll_list *walk; |
575 | long __timeout; | ||
576 | |||
449 | set_current_state(TASK_INTERRUPTIBLE); | 577 | set_current_state(TASK_INTERRUPTIBLE); |
450 | walk = list; | 578 | walk = list; |
451 | while(walk != NULL) { | 579 | while(walk != NULL) { |
@@ -453,18 +581,36 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
453 | walk = walk->next; | 581 | walk = walk->next; |
454 | } | 582 | } |
455 | pt = NULL; | 583 | pt = NULL; |
456 | if (count || !timeout || signal_pending(current)) | 584 | if (count || !*timeout || signal_pending(current)) |
457 | break; | 585 | break; |
458 | count = wait->error; | 586 | count = wait->error; |
459 | if (count) | 587 | if (count) |
460 | break; | 588 | break; |
461 | timeout = schedule_timeout(timeout); | 589 | |
590 | if (*timeout < 0) { | ||
591 | /* Wait indefinitely */ | ||
592 | __timeout = MAX_SCHEDULE_TIMEOUT; | ||
593 | } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { | ||
594 | /* | ||
595 | * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in | ||
596 | * a loop | ||
597 | */ | ||
598 | __timeout = MAX_SCHEDULE_TIMEOUT - 1; | ||
599 | *timeout -= __timeout; | ||
600 | } else { | ||
601 | __timeout = *timeout; | ||
602 | *timeout = 0; | ||
603 | } | ||
604 | |||
605 | __timeout = schedule_timeout(__timeout); | ||
606 | if (*timeout >= 0) | ||
607 | *timeout += __timeout; | ||
462 | } | 608 | } |
463 | __set_current_state(TASK_RUNNING); | 609 | __set_current_state(TASK_RUNNING); |
464 | return count; | 610 | return count; |
465 | } | 611 | } |
466 | 612 | ||
467 | asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout) | 613 | int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) |
468 | { | 614 | { |
469 | struct poll_wqueues table; | 615 | struct poll_wqueues table; |
470 | int fdcount, err; | 616 | int fdcount, err; |
@@ -482,14 +628,6 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti | |||
482 | if (nfds > max_fdset && nfds > OPEN_MAX) | 628 | if (nfds > max_fdset && nfds > OPEN_MAX) |
483 | return -EINVAL; | 629 | return -EINVAL; |
484 | 630 | ||
485 | if (timeout) { | ||
486 | /* Careful about overflow in the intermediate values */ | ||
487 | if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ) | ||
488 | timeout = (unsigned long)(timeout*HZ+999)/1000+1; | ||
489 | else /* Negative or overflow */ | ||
490 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
491 | } | ||
492 | |||
493 | poll_initwait(&table); | 631 | poll_initwait(&table); |
494 | 632 | ||
495 | head = NULL; | 633 | head = NULL; |
@@ -519,6 +657,7 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti | |||
519 | } | 657 | } |
520 | i -= pp->len; | 658 | i -= pp->len; |
521 | } | 659 | } |
660 | |||
522 | fdcount = do_poll(nfds, head, &table, timeout); | 661 | fdcount = do_poll(nfds, head, &table, timeout); |
523 | 662 | ||
524 | /* OK, now copy the revents fields back to user space. */ | 663 | /* OK, now copy the revents fields back to user space. */ |
@@ -547,3 +686,98 @@ out_fds: | |||
547 | poll_freewait(&table); | 686 | poll_freewait(&table); |
548 | return err; | 687 | return err; |
549 | } | 688 | } |
689 | |||
690 | asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, | ||
691 | long timeout_msecs) | ||
692 | { | ||
693 | s64 timeout_jiffies = 0; | ||
694 | |||
695 | if (timeout_msecs) { | ||
696 | #if HZ > 1000 | ||
697 | /* We can only overflow if HZ > 1000 */ | ||
698 | if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) | ||
699 | timeout_jiffies = -1; | ||
700 | else | ||
701 | #endif | ||
702 | timeout_jiffies = msecs_to_jiffies(timeout_msecs); | ||
703 | } | ||
704 | |||
705 | return do_sys_poll(ufds, nfds, &timeout_jiffies); | ||
706 | } | ||
707 | |||
708 | #ifdef TIF_RESTORE_SIGMASK | ||
709 | asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, | ||
710 | struct timespec __user *tsp, const sigset_t __user *sigmask, | ||
711 | size_t sigsetsize) | ||
712 | { | ||
713 | sigset_t ksigmask, sigsaved; | ||
714 | struct timespec ts; | ||
715 | s64 timeout = -1; | ||
716 | int ret; | ||
717 | |||
718 | if (tsp) { | ||
719 | if (copy_from_user(&ts, tsp, sizeof(ts))) | ||
720 | return -EFAULT; | ||
721 | |||
722 | /* Cast to u64 to make GCC stop complaining */ | ||
723 | if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) | ||
724 | timeout = -1; /* infinite */ | ||
725 | else { | ||
726 | timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); | ||
727 | timeout += ts.tv_sec * HZ; | ||
728 | } | ||
729 | } | ||
730 | |||
731 | if (sigmask) { | ||
732 | /* XXX: Don't preclude handling different sized sigset_t's. */ | ||
733 | if (sigsetsize != sizeof(sigset_t)) | ||
734 | return -EINVAL; | ||
735 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) | ||
736 | return -EFAULT; | ||
737 | |||
738 | sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); | ||
739 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | ||
740 | } | ||
741 | |||
742 | ret = do_sys_poll(ufds, nfds, &timeout); | ||
743 | |||
744 | /* We can restart this syscall, usually */ | ||
745 | if (ret == -EINTR) { | ||
746 | /* | ||
747 | * Don't restore the signal mask yet. Let do_signal() deliver | ||
748 | * the signal on the way back to userspace, before the signal | ||
749 | * mask is restored. | ||
750 | */ | ||
751 | if (sigmask) { | ||
752 | memcpy(¤t->saved_sigmask, &sigsaved, | ||
753 | sizeof(sigsaved)); | ||
754 | set_thread_flag(TIF_RESTORE_SIGMASK); | ||
755 | } | ||
756 | ret = -ERESTARTNOHAND; | ||
757 | } else if (sigmask) | ||
758 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
759 | |||
760 | if (tsp && timeout >= 0) { | ||
761 | if (current->personality & STICKY_TIMEOUTS) | ||
762 | goto sticky; | ||
763 | /* Yes, we know it's actually an s64, but it's also positive. */ | ||
764 | ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; | ||
765 | ts.tv_sec = timeout; | ||
766 | if (copy_to_user(tsp, &ts, sizeof(ts))) { | ||
767 | sticky: | ||
768 | /* | ||
769 | * If an application puts its timeval in read-only | ||
770 | * memory, we don't want the Linux-specific update to | ||
771 | * the timeval to cause a fault after the select has | ||
772 | * completed successfully. However, because we're not | ||
773 | * updating the timeval, we can't restart the system | ||
774 | * call. | ||
775 | */ | ||
776 | if (ret == -ERESTARTNOHAND && timeout >= 0) | ||
777 | ret = -EINTR; | ||
778 | } | ||
779 | } | ||
780 | |||
781 | return ret; | ||
782 | } | ||
783 | #endif /* TIF_RESTORE_SIGMASK */ | ||