aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2015-09-04 18:47:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-04 19:54:41 -0400
commit2c5b7e1be74ff0175dedbbd325abe9f0dbbb09ae (patch)
treee93f6aa423faeb6fbcd46103694d629b0a7bb82c
parent230c92a8797e0e717c6732de0fffdd5726c0f48f (diff)
userfaultfd: avoid missing wakeups during refile in userfaultfd_read
During the refile in userfaultfd_read both waitqueues could look empty to the lockless wake_userfault(). Use a seqcount to prevent this false negative that could leave an userfault blocked. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/userfaultfd.c26
1 files changed, 24 insertions, 2 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index a14d63e945f4..634e676072cb 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -45,6 +45,8 @@ struct userfaultfd_ctx {
45 wait_queue_head_t fault_wqh; 45 wait_queue_head_t fault_wqh;
46 /* waitqueue head for the pseudo fd to wakeup poll/read */ 46 /* waitqueue head for the pseudo fd to wakeup poll/read */
47 wait_queue_head_t fd_wqh; 47 wait_queue_head_t fd_wqh;
48 /* a refile sequence protected by fault_pending_wqh lock */
49 struct seqcount refile_seq;
48 /* pseudo fd refcounting */ 50 /* pseudo fd refcounting */
49 atomic_t refcount; 51 atomic_t refcount;
50 /* userfaultfd syscall flags */ 52 /* userfaultfd syscall flags */
@@ -547,6 +549,15 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
547 uwq = find_userfault(ctx); 549 uwq = find_userfault(ctx);
548 if (uwq) { 550 if (uwq) {
549 /* 551 /*
552 * Use a seqcount to repeat the lockless check
553 * in wake_userfault() to avoid missing
554 * wakeups because during the refile both
555 * waitqueue could become empty if this is the
556 * only userfault.
557 */
558 write_seqcount_begin(&ctx->refile_seq);
559
560 /*
550 * The fault_pending_wqh.lock prevents the uwq 561 * The fault_pending_wqh.lock prevents the uwq
551 * to disappear from under us. 562 * to disappear from under us.
552 * 563 *
@@ -570,6 +581,8 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
570 list_del(&uwq->wq.task_list); 581 list_del(&uwq->wq.task_list);
571 __add_wait_queue(&ctx->fault_wqh, &uwq->wq); 582 __add_wait_queue(&ctx->fault_wqh, &uwq->wq);
572 583
584 write_seqcount_end(&ctx->refile_seq);
585
573 /* careful to always initialize msg if ret == 0 */ 586 /* careful to always initialize msg if ret == 0 */
574 *msg = uwq->msg; 587 *msg = uwq->msg;
575 spin_unlock(&ctx->fault_pending_wqh.lock); 588 spin_unlock(&ctx->fault_pending_wqh.lock);
@@ -647,6 +660,9 @@ static void __wake_userfault(struct userfaultfd_ctx *ctx,
647static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, 660static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
648 struct userfaultfd_wake_range *range) 661 struct userfaultfd_wake_range *range)
649{ 662{
663 unsigned seq;
664 bool need_wakeup;
665
650 /* 666 /*
651 * To be sure waitqueue_active() is not reordered by the CPU 667 * To be sure waitqueue_active() is not reordered by the CPU
652 * before the pagetable update, use an explicit SMP memory 668 * before the pagetable update, use an explicit SMP memory
@@ -662,8 +678,13 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
662 * userfaults yet. So we take the spinlock only when we're 678 * userfaults yet. So we take the spinlock only when we're
663 * sure we've userfaults to wake. 679 * sure we've userfaults to wake.
664 */ 680 */
665 if (waitqueue_active(&ctx->fault_pending_wqh) || 681 do {
666 waitqueue_active(&ctx->fault_wqh)) 682 seq = read_seqcount_begin(&ctx->refile_seq);
683 need_wakeup = waitqueue_active(&ctx->fault_pending_wqh) ||
684 waitqueue_active(&ctx->fault_wqh);
685 cond_resched();
686 } while (read_seqcount_retry(&ctx->refile_seq, seq));
687 if (need_wakeup)
667 __wake_userfault(ctx, range); 688 __wake_userfault(ctx, range);
668} 689}
669 690
@@ -1219,6 +1240,7 @@ static void init_once_userfaultfd_ctx(void *mem)
1219 init_waitqueue_head(&ctx->fault_pending_wqh); 1240 init_waitqueue_head(&ctx->fault_pending_wqh);
1220 init_waitqueue_head(&ctx->fault_wqh); 1241 init_waitqueue_head(&ctx->fault_wqh);
1221 init_waitqueue_head(&ctx->fd_wqh); 1242 init_waitqueue_head(&ctx->fd_wqh);
1243 seqcount_init(&ctx->refile_seq);
1222} 1244}
1223 1245
1224/** 1246/**