diff options
| author | Andrea Arcangeli <aarcange@redhat.com> | 2015-09-04 18:47:23 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-04 19:54:41 -0400 |
| commit | 2c5b7e1be74ff0175dedbbd325abe9f0dbbb09ae (patch) | |
| tree | e93f6aa423faeb6fbcd46103694d629b0a7bb82c | |
| parent | 230c92a8797e0e717c6732de0fffdd5726c0f48f (diff) | |
userfaultfd: avoid missing wakeups during refile in userfaultfd_read
During the refile in userfaultfd_read both waitqueues could look empty to
the lockless wake_userfault(). Use a seqcount to prevent this false
negative that could leave an userfault blocked.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | fs/userfaultfd.c | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index a14d63e945f4..634e676072cb 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c | |||
| @@ -45,6 +45,8 @@ struct userfaultfd_ctx { | |||
| 45 | wait_queue_head_t fault_wqh; | 45 | wait_queue_head_t fault_wqh; |
| 46 | /* waitqueue head for the pseudo fd to wakeup poll/read */ | 46 | /* waitqueue head for the pseudo fd to wakeup poll/read */ |
| 47 | wait_queue_head_t fd_wqh; | 47 | wait_queue_head_t fd_wqh; |
| 48 | /* a refile sequence protected by fault_pending_wqh lock */ | ||
| 49 | struct seqcount refile_seq; | ||
| 48 | /* pseudo fd refcounting */ | 50 | /* pseudo fd refcounting */ |
| 49 | atomic_t refcount; | 51 | atomic_t refcount; |
| 50 | /* userfaultfd syscall flags */ | 52 | /* userfaultfd syscall flags */ |
| @@ -547,6 +549,15 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, | |||
| 547 | uwq = find_userfault(ctx); | 549 | uwq = find_userfault(ctx); |
| 548 | if (uwq) { | 550 | if (uwq) { |
| 549 | /* | 551 | /* |
| 552 | * Use a seqcount to repeat the lockless check | ||
| 553 | * in wake_userfault() to avoid missing | ||
| 554 | * wakeups because during the refile both | ||
| 555 | * waitqueue could become empty if this is the | ||
| 556 | * only userfault. | ||
| 557 | */ | ||
| 558 | write_seqcount_begin(&ctx->refile_seq); | ||
| 559 | |||
| 560 | /* | ||
| 550 | * The fault_pending_wqh.lock prevents the uwq | 561 | * The fault_pending_wqh.lock prevents the uwq |
| 551 | * to disappear from under us. | 562 | * to disappear from under us. |
| 552 | * | 563 | * |
| @@ -570,6 +581,8 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, | |||
| 570 | list_del(&uwq->wq.task_list); | 581 | list_del(&uwq->wq.task_list); |
| 571 | __add_wait_queue(&ctx->fault_wqh, &uwq->wq); | 582 | __add_wait_queue(&ctx->fault_wqh, &uwq->wq); |
| 572 | 583 | ||
| 584 | write_seqcount_end(&ctx->refile_seq); | ||
| 585 | |||
| 573 | /* careful to always initialize msg if ret == 0 */ | 586 | /* careful to always initialize msg if ret == 0 */ |
| 574 | *msg = uwq->msg; | 587 | *msg = uwq->msg; |
| 575 | spin_unlock(&ctx->fault_pending_wqh.lock); | 588 | spin_unlock(&ctx->fault_pending_wqh.lock); |
| @@ -647,6 +660,9 @@ static void __wake_userfault(struct userfaultfd_ctx *ctx, | |||
| 647 | static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, | 660 | static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, |
| 648 | struct userfaultfd_wake_range *range) | 661 | struct userfaultfd_wake_range *range) |
| 649 | { | 662 | { |
| 663 | unsigned seq; | ||
| 664 | bool need_wakeup; | ||
| 665 | |||
| 650 | /* | 666 | /* |
| 651 | * To be sure waitqueue_active() is not reordered by the CPU | 667 | * To be sure waitqueue_active() is not reordered by the CPU |
| 652 | * before the pagetable update, use an explicit SMP memory | 668 | * before the pagetable update, use an explicit SMP memory |
| @@ -662,8 +678,13 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, | |||
| 662 | * userfaults yet. So we take the spinlock only when we're | 678 | * userfaults yet. So we take the spinlock only when we're |
| 663 | * sure we've userfaults to wake. | 679 | * sure we've userfaults to wake. |
| 664 | */ | 680 | */ |
| 665 | if (waitqueue_active(&ctx->fault_pending_wqh) || | 681 | do { |
| 666 | waitqueue_active(&ctx->fault_wqh)) | 682 | seq = read_seqcount_begin(&ctx->refile_seq); |
| 683 | need_wakeup = waitqueue_active(&ctx->fault_pending_wqh) || | ||
| 684 | waitqueue_active(&ctx->fault_wqh); | ||
| 685 | cond_resched(); | ||
| 686 | } while (read_seqcount_retry(&ctx->refile_seq, seq)); | ||
| 687 | if (need_wakeup) | ||
| 667 | __wake_userfault(ctx, range); | 688 | __wake_userfault(ctx, range); |
| 668 | } | 689 | } |
| 669 | 690 | ||
| @@ -1219,6 +1240,7 @@ static void init_once_userfaultfd_ctx(void *mem) | |||
| 1219 | init_waitqueue_head(&ctx->fault_pending_wqh); | 1240 | init_waitqueue_head(&ctx->fault_pending_wqh); |
| 1220 | init_waitqueue_head(&ctx->fault_wqh); | 1241 | init_waitqueue_head(&ctx->fault_wqh); |
| 1221 | init_waitqueue_head(&ctx->fd_wqh); | 1242 | init_waitqueue_head(&ctx->fd_wqh); |
| 1243 | seqcount_init(&ctx->refile_seq); | ||
| 1222 | } | 1244 | } |
| 1223 | 1245 | ||
| 1224 | /** | 1246 | /** |
