1 files changed, 32 insertions, 3 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index af88ef6fffff..a14d63e945f4 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -262,7 +262,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
        struct userfaultfd_ctx *ctx;
        struct userfaultfd_wait_queue uwq;
        int ret;
-        bool must_wait;
+        bool must_wait, return_to_userland;
        BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
@@ -327,6 +327,9 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
        uwq.msg = userfault_msg(address, flags, reason);
        uwq.ctx = ctx;
+        return_to_userland = (flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
+                (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
        spin_lock(&ctx->fault_pending_wqh.lock);
        /*
         * After the __add_wait_queue the uwq is visible to userland
@@ -338,14 +341,16 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
         * following the spin_unlock to happen before the list_add in
         * __add_wait_queue.
         */
-        set_current_state(TASK_KILLABLE);
+        set_current_state(return_to_userland ? TASK_INTERRUPTIBLE :
+                          TASK_KILLABLE);
        spin_unlock(&ctx->fault_pending_wqh.lock);
        must_wait = userfaultfd_must_wait(ctx, address, flags, reason);
        up_read(&mm->mmap_sem);
        if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
-                   !fatal_signal_pending(current))) {
+                   (return_to_userland ? !signal_pending(current) :
+                    !fatal_signal_pending(current)))) {
                wake_up_poll(&ctx->fd_wqh, POLLIN);
                schedule();
                ret |= VM_FAULT_MAJOR;
@@ -353,6 +358,30 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
        __set_current_state(TASK_RUNNING);
+        if (return_to_userland) {
+                if (signal_pending(current) &&
+                    !fatal_signal_pending(current)) {
+                        /*
+                         * If we got a SIGSTOP or SIGCONT and this is
+                         * a normal userland page fault, just let
+                         * userland return so the signal will be
+                         * handled and gdb debugging works.  The page
+                         * fault code immediately after we return from
+                         * this function is going to release the
+                         * mmap_sem and it's not depending on it
+                         * (unlike gup would if we were not to return
+                         * VM_FAULT_RETRY).
+                         *
+                         * If a fatal signal is pending we still take
+                         * the streamlined VM_FAULT_RETRY failure path
+                         * and there's no need to retake the mmap_sem
+                         * in such case.
+                         */
+                        down_read(&mm->mmap_sem);
+                        ret = 0;
+                }
+        }
        /*
         * Here we race with the list_del; list_add in
         * userfaultfd_ctx_read(), however because we don't ever run