aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/userfaultfd.c35
1 files changed, 32 insertions, 3 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index af88ef6fffff..a14d63e945f4 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -262,7 +262,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
262 struct userfaultfd_ctx *ctx; 262 struct userfaultfd_ctx *ctx;
263 struct userfaultfd_wait_queue uwq; 263 struct userfaultfd_wait_queue uwq;
264 int ret; 264 int ret;
265 bool must_wait; 265 bool must_wait, return_to_userland;
266 266
267 BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); 267 BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
268 268
@@ -327,6 +327,9 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
327 uwq.msg = userfault_msg(address, flags, reason); 327 uwq.msg = userfault_msg(address, flags, reason);
328 uwq.ctx = ctx; 328 uwq.ctx = ctx;
329 329
330 return_to_userland = (flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
331 (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
332
330 spin_lock(&ctx->fault_pending_wqh.lock); 333 spin_lock(&ctx->fault_pending_wqh.lock);
331 /* 334 /*
332 * After the __add_wait_queue the uwq is visible to userland 335 * After the __add_wait_queue the uwq is visible to userland
@@ -338,14 +341,16 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
338 * following the spin_unlock to happen before the list_add in 341 * following the spin_unlock to happen before the list_add in
339 * __add_wait_queue. 342 * __add_wait_queue.
340 */ 343 */
341 set_current_state(TASK_KILLABLE); 344 set_current_state(return_to_userland ? TASK_INTERRUPTIBLE :
345 TASK_KILLABLE);
342 spin_unlock(&ctx->fault_pending_wqh.lock); 346 spin_unlock(&ctx->fault_pending_wqh.lock);
343 347
344 must_wait = userfaultfd_must_wait(ctx, address, flags, reason); 348 must_wait = userfaultfd_must_wait(ctx, address, flags, reason);
345 up_read(&mm->mmap_sem); 349 up_read(&mm->mmap_sem);
346 350
347 if (likely(must_wait && !ACCESS_ONCE(ctx->released) && 351 if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
348 !fatal_signal_pending(current))) { 352 (return_to_userland ? !signal_pending(current) :
353 !fatal_signal_pending(current)))) {
349 wake_up_poll(&ctx->fd_wqh, POLLIN); 354 wake_up_poll(&ctx->fd_wqh, POLLIN);
350 schedule(); 355 schedule();
351 ret |= VM_FAULT_MAJOR; 356 ret |= VM_FAULT_MAJOR;
@@ -353,6 +358,30 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
353 358
354 __set_current_state(TASK_RUNNING); 359 __set_current_state(TASK_RUNNING);
355 360
361 if (return_to_userland) {
362 if (signal_pending(current) &&
363 !fatal_signal_pending(current)) {
364 /*
365 * If we got a SIGSTOP or SIGCONT and this is
366 * a normal userland page fault, just let
367 * userland return so the signal will be
368 * handled and gdb debugging works. The page
369 * fault code immediately after we return from
370 * this function is going to release the
371 * mmap_sem and it's not depending on it
372 * (unlike gup would if we were not to return
373 * VM_FAULT_RETRY).
374 *
375 * If a fatal signal is pending we still take
376 * the streamlined VM_FAULT_RETRY failure path
377 * and there's no need to retake the mmap_sem
378 * in such case.
379 */
380 down_read(&mm->mmap_sem);
381 ret = 0;
382 }
383 }
384
356 /* 385 /*
357 * Here we race with the list_del; list_add in 386 * Here we race with the list_del; list_add in
358 * userfaultfd_ctx_read(), however because we don't ever run 387 * userfaultfd_ctx_read(), however because we don't ever run