aboutsummaryrefslogtreecommitdiffstats
path: root/fs/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r--fs/userfaultfd.c37
1 files changed, 35 insertions, 2 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index d96e2f30084b..43953e03c356 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -63,6 +63,7 @@ struct userfaultfd_wait_queue {
63 struct uffd_msg msg; 63 struct uffd_msg msg;
64 wait_queue_t wq; 64 wait_queue_t wq;
65 struct userfaultfd_ctx *ctx; 65 struct userfaultfd_ctx *ctx;
66 bool waken;
66}; 67};
67 68
68struct userfaultfd_wake_range { 69struct userfaultfd_wake_range {
@@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
86 if (len && (start > uwq->msg.arg.pagefault.address || 87 if (len && (start > uwq->msg.arg.pagefault.address ||
87 start + len <= uwq->msg.arg.pagefault.address)) 88 start + len <= uwq->msg.arg.pagefault.address))
88 goto out; 89 goto out;
90 WRITE_ONCE(uwq->waken, true);
91 /*
92 * The implicit smp_mb__before_spinlock in try_to_wake_up()
93 * renders uwq->waken visible to other CPUs before the task is
94 * waken.
95 */
89 ret = wake_up_state(wq->private, mode); 96 ret = wake_up_state(wq->private, mode);
90 if (ret) 97 if (ret)
91 /* 98 /*
@@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
264 struct userfaultfd_wait_queue uwq; 271 struct userfaultfd_wait_queue uwq;
265 int ret; 272 int ret;
266 bool must_wait, return_to_userland; 273 bool must_wait, return_to_userland;
274 long blocking_state;
267 275
268 BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); 276 BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
269 277
@@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
334 uwq.wq.private = current; 342 uwq.wq.private = current;
335 uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); 343 uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
336 uwq.ctx = ctx; 344 uwq.ctx = ctx;
345 uwq.waken = false;
337 346
338 return_to_userland = 347 return_to_userland =
339 (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == 348 (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
340 (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); 349 (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
350 blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
351 TASK_KILLABLE;
341 352
342 spin_lock(&ctx->fault_pending_wqh.lock); 353 spin_lock(&ctx->fault_pending_wqh.lock);
343 /* 354 /*
@@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
350 * following the spin_unlock to happen before the list_add in 361 * following the spin_unlock to happen before the list_add in
351 * __add_wait_queue. 362 * __add_wait_queue.
352 */ 363 */
353 set_current_state(return_to_userland ? TASK_INTERRUPTIBLE : 364 set_current_state(blocking_state);
354 TASK_KILLABLE);
355 spin_unlock(&ctx->fault_pending_wqh.lock); 365 spin_unlock(&ctx->fault_pending_wqh.lock);
356 366
357 must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, 367 must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
@@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
364 wake_up_poll(&ctx->fd_wqh, POLLIN); 374 wake_up_poll(&ctx->fd_wqh, POLLIN);
365 schedule(); 375 schedule();
366 ret |= VM_FAULT_MAJOR; 376 ret |= VM_FAULT_MAJOR;
377
378 /*
379 * False wakeups can orginate even from rwsem before
380 * up_read() however userfaults will wait either for a
381 * targeted wakeup on the specific uwq waitqueue from
382 * wake_userfault() or for signals or for uffd
383 * release.
384 */
385 while (!READ_ONCE(uwq.waken)) {
386 /*
387 * This needs the full smp_store_mb()
388 * guarantee as the state write must be
389 * visible to other CPUs before reading
390 * uwq.waken from other CPUs.
391 */
392 set_current_state(blocking_state);
393 if (READ_ONCE(uwq.waken) ||
394 READ_ONCE(ctx->released) ||
395 (return_to_userland ? signal_pending(current) :
396 fatal_signal_pending(current)))
397 break;
398 schedule();
399 }
367 } 400 }
368 401
369 __set_current_state(TASK_RUNNING); 402 __set_current_state(TASK_RUNNING);