diff options
Diffstat (limited to 'fs/userfaultfd.c')
| -rw-r--r-- | fs/userfaultfd.c | 37 |
1 files changed, 35 insertions, 2 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d96e2f30084b..43953e03c356 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c | |||
| @@ -63,6 +63,7 @@ struct userfaultfd_wait_queue { | |||
| 63 | struct uffd_msg msg; | 63 | struct uffd_msg msg; |
| 64 | wait_queue_t wq; | 64 | wait_queue_t wq; |
| 65 | struct userfaultfd_ctx *ctx; | 65 | struct userfaultfd_ctx *ctx; |
| 66 | bool waken; | ||
| 66 | }; | 67 | }; |
| 67 | 68 | ||
| 68 | struct userfaultfd_wake_range { | 69 | struct userfaultfd_wake_range { |
| @@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode, | |||
| 86 | if (len && (start > uwq->msg.arg.pagefault.address || | 87 | if (len && (start > uwq->msg.arg.pagefault.address || |
| 87 | start + len <= uwq->msg.arg.pagefault.address)) | 88 | start + len <= uwq->msg.arg.pagefault.address)) |
| 88 | goto out; | 89 | goto out; |
| 90 | WRITE_ONCE(uwq->waken, true); | ||
| 91 | /* | ||
| 92 | * The implicit smp_mb__before_spinlock in try_to_wake_up() | ||
| 93 | * renders uwq->waken visible to other CPUs before the task is | ||
| 94 | * waken. | ||
| 95 | */ | ||
| 89 | ret = wake_up_state(wq->private, mode); | 96 | ret = wake_up_state(wq->private, mode); |
| 90 | if (ret) | 97 | if (ret) |
| 91 | /* | 98 | /* |
| @@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) | |||
| 264 | struct userfaultfd_wait_queue uwq; | 271 | struct userfaultfd_wait_queue uwq; |
| 265 | int ret; | 272 | int ret; |
| 266 | bool must_wait, return_to_userland; | 273 | bool must_wait, return_to_userland; |
| 274 | long blocking_state; | ||
| 267 | 275 | ||
| 268 | BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); | 276 | BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); |
| 269 | 277 | ||
| @@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) | |||
| 334 | uwq.wq.private = current; | 342 | uwq.wq.private = current; |
| 335 | uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); | 343 | uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); |
| 336 | uwq.ctx = ctx; | 344 | uwq.ctx = ctx; |
| 345 | uwq.waken = false; | ||
| 337 | 346 | ||
| 338 | return_to_userland = | 347 | return_to_userland = |
| 339 | (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == | 348 | (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == |
| 340 | (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); | 349 | (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); |
| 350 | blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : | ||
| 351 | TASK_KILLABLE; | ||
| 341 | 352 | ||
| 342 | spin_lock(&ctx->fault_pending_wqh.lock); | 353 | spin_lock(&ctx->fault_pending_wqh.lock); |
| 343 | /* | 354 | /* |
| @@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) | |||
| 350 | * following the spin_unlock to happen before the list_add in | 361 | * following the spin_unlock to happen before the list_add in |
| 351 | * __add_wait_queue. | 362 | * __add_wait_queue. |
| 352 | */ | 363 | */ |
| 353 | set_current_state(return_to_userland ? TASK_INTERRUPTIBLE : | 364 | set_current_state(blocking_state); |
| 354 | TASK_KILLABLE); | ||
| 355 | spin_unlock(&ctx->fault_pending_wqh.lock); | 365 | spin_unlock(&ctx->fault_pending_wqh.lock); |
| 356 | 366 | ||
| 357 | must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, | 367 | must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, |
| @@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) | |||
| 364 | wake_up_poll(&ctx->fd_wqh, POLLIN); | 374 | wake_up_poll(&ctx->fd_wqh, POLLIN); |
| 365 | schedule(); | 375 | schedule(); |
| 366 | ret |= VM_FAULT_MAJOR; | 376 | ret |= VM_FAULT_MAJOR; |
| 377 | |||
| 378 | /* | ||
| 379 | * False wakeups can orginate even from rwsem before | ||
| 380 | * up_read() however userfaults will wait either for a | ||
| 381 | * targeted wakeup on the specific uwq waitqueue from | ||
| 382 | * wake_userfault() or for signals or for uffd | ||
| 383 | * release. | ||
| 384 | */ | ||
| 385 | while (!READ_ONCE(uwq.waken)) { | ||
| 386 | /* | ||
| 387 | * This needs the full smp_store_mb() | ||
| 388 | * guarantee as the state write must be | ||
| 389 | * visible to other CPUs before reading | ||
| 390 | * uwq.waken from other CPUs. | ||
| 391 | */ | ||
| 392 | set_current_state(blocking_state); | ||
| 393 | if (READ_ONCE(uwq.waken) || | ||
| 394 | READ_ONCE(ctx->released) || | ||
| 395 | (return_to_userland ? signal_pending(current) : | ||
| 396 | fatal_signal_pending(current))) | ||
| 397 | break; | ||
| 398 | schedule(); | ||
| 399 | } | ||
| 367 | } | 400 | } |
| 368 | 401 | ||
| 369 | __set_current_state(TASK_RUNNING); | 402 | __set_current_state(TASK_RUNNING); |
