aboutsummaryrefslogtreecommitdiffstats
path: root/fs/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r--fs/userfaultfd.c75
1 files changed, 26 insertions, 49 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 973607df579d..1d227b0fcf49 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -138,8 +138,6 @@ out:
138 * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd 138 * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd
139 * context. 139 * context.
140 * @ctx: [in] Pointer to the userfaultfd context. 140 * @ctx: [in] Pointer to the userfaultfd context.
141 *
142 * Returns: In case of success, returns not zero.
143 */ 141 */
144static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) 142static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
145{ 143{
@@ -267,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
267{ 265{
268 struct mm_struct *mm = ctx->mm; 266 struct mm_struct *mm = ctx->mm;
269 pgd_t *pgd; 267 pgd_t *pgd;
268 p4d_t *p4d;
270 pud_t *pud; 269 pud_t *pud;
271 pmd_t *pmd, _pmd; 270 pmd_t *pmd, _pmd;
272 pte_t *pte; 271 pte_t *pte;
@@ -277,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
277 pgd = pgd_offset(mm, address); 276 pgd = pgd_offset(mm, address);
278 if (!pgd_present(*pgd)) 277 if (!pgd_present(*pgd))
279 goto out; 278 goto out;
280 pud = pud_offset(pgd, address); 279 p4d = p4d_offset(pgd, address);
280 if (!p4d_present(*p4d))
281 goto out;
282 pud = pud_offset(p4d, address);
281 if (!pud_present(*pud)) 283 if (!pud_present(*pud))
282 goto out; 284 goto out;
283 pmd = pmd_offset(pud, address); 285 pmd = pmd_offset(pud, address);
@@ -490,7 +492,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
490 * in such case. 492 * in such case.
491 */ 493 */
492 down_read(&mm->mmap_sem); 494 down_read(&mm->mmap_sem);
493 ret = 0; 495 ret = VM_FAULT_NOPAGE;
494 } 496 }
495 } 497 }
496 498
@@ -527,10 +529,11 @@ out:
527 return ret; 529 return ret;
528} 530}
529 531
530static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, 532static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
531 struct userfaultfd_wait_queue *ewq) 533 struct userfaultfd_wait_queue *ewq)
532{ 534{
533 int ret = 0; 535 if (WARN_ON_ONCE(current->flags & PF_EXITING))
536 goto out;
534 537
535 ewq->ctx = ctx; 538 ewq->ctx = ctx;
536 init_waitqueue_entry(&ewq->wq, current); 539 init_waitqueue_entry(&ewq->wq, current);
@@ -547,8 +550,16 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
547 break; 550 break;
548 if (ACCESS_ONCE(ctx->released) || 551 if (ACCESS_ONCE(ctx->released) ||
549 fatal_signal_pending(current)) { 552 fatal_signal_pending(current)) {
550 ret = -1;
551 __remove_wait_queue(&ctx->event_wqh, &ewq->wq); 553 __remove_wait_queue(&ctx->event_wqh, &ewq->wq);
554 if (ewq->msg.event == UFFD_EVENT_FORK) {
555 struct userfaultfd_ctx *new;
556
557 new = (struct userfaultfd_ctx *)
558 (unsigned long)
559 ewq->msg.arg.reserved.reserved1;
560
561 userfaultfd_ctx_put(new);
562 }
552 break; 563 break;
553 } 564 }
554 565
@@ -566,9 +577,8 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
566 * ctx may go away after this if the userfault pseudo fd is 577 * ctx may go away after this if the userfault pseudo fd is
567 * already released. 578 * already released.
568 */ 579 */
569 580out:
570 userfaultfd_ctx_put(ctx); 581 userfaultfd_ctx_put(ctx);
571 return ret;
572} 582}
573 583
574static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, 584static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
@@ -626,7 +636,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
626 return 0; 636 return 0;
627} 637}
628 638
629static int dup_fctx(struct userfaultfd_fork_ctx *fctx) 639static void dup_fctx(struct userfaultfd_fork_ctx *fctx)
630{ 640{
631 struct userfaultfd_ctx *ctx = fctx->orig; 641 struct userfaultfd_ctx *ctx = fctx->orig;
632 struct userfaultfd_wait_queue ewq; 642 struct userfaultfd_wait_queue ewq;
@@ -636,17 +646,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
636 ewq.msg.event = UFFD_EVENT_FORK; 646 ewq.msg.event = UFFD_EVENT_FORK;
637 ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; 647 ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new;
638 648
639 return userfaultfd_event_wait_completion(ctx, &ewq); 649 userfaultfd_event_wait_completion(ctx, &ewq);
640} 650}
641 651
642void dup_userfaultfd_complete(struct list_head *fcs) 652void dup_userfaultfd_complete(struct list_head *fcs)
643{ 653{
644 int ret = 0;
645 struct userfaultfd_fork_ctx *fctx, *n; 654 struct userfaultfd_fork_ctx *fctx, *n;
646 655
647 list_for_each_entry_safe(fctx, n, fcs, list) { 656 list_for_each_entry_safe(fctx, n, fcs, list) {
648 if (!ret) 657 dup_fctx(fctx);
649 ret = dup_fctx(fctx);
650 list_del(&fctx->list); 658 list_del(&fctx->list);
651 kfree(fctx); 659 kfree(fctx);
652 } 660 }
@@ -689,8 +697,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
689 userfaultfd_event_wait_completion(ctx, &ewq); 697 userfaultfd_event_wait_completion(ctx, &ewq);
690} 698}
691 699
692void userfaultfd_remove(struct vm_area_struct *vma, 700bool userfaultfd_remove(struct vm_area_struct *vma,
693 struct vm_area_struct **prev,
694 unsigned long start, unsigned long end) 701 unsigned long start, unsigned long end)
695{ 702{
696 struct mm_struct *mm = vma->vm_mm; 703 struct mm_struct *mm = vma->vm_mm;
@@ -699,13 +706,11 @@ void userfaultfd_remove(struct vm_area_struct *vma,
699 706
700 ctx = vma->vm_userfaultfd_ctx.ctx; 707 ctx = vma->vm_userfaultfd_ctx.ctx;
701 if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) 708 if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
702 return; 709 return true;
703 710
704 userfaultfd_ctx_get(ctx); 711 userfaultfd_ctx_get(ctx);
705 up_read(&mm->mmap_sem); 712 up_read(&mm->mmap_sem);
706 713
707 *prev = NULL; /* We wait for ACK w/o the mmap semaphore */
708
709 msg_init(&ewq.msg); 714 msg_init(&ewq.msg);
710 715
711 ewq.msg.event = UFFD_EVENT_REMOVE; 716 ewq.msg.event = UFFD_EVENT_REMOVE;
@@ -714,7 +719,7 @@ void userfaultfd_remove(struct vm_area_struct *vma,
714 719
715 userfaultfd_event_wait_completion(ctx, &ewq); 720 userfaultfd_event_wait_completion(ctx, &ewq);
716 721
717 down_read(&mm->mmap_sem); 722 return false;
718} 723}
719 724
720static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, 725static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
@@ -775,34 +780,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
775 } 780 }
776} 781}
777 782
778void userfaultfd_exit(struct mm_struct *mm)
779{
780 struct vm_area_struct *vma = mm->mmap;
781
782 /*
783 * We can do the vma walk without locking because the caller
784 * (exit_mm) knows it now has exclusive access
785 */
786 while (vma) {
787 struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
788
789 if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) {
790 struct userfaultfd_wait_queue ewq;
791
792 userfaultfd_ctx_get(ctx);
793
794 msg_init(&ewq.msg);
795 ewq.msg.event = UFFD_EVENT_EXIT;
796
797 userfaultfd_event_wait_completion(ctx, &ewq);
798
799 ctx->features &= ~UFFD_FEATURE_EVENT_EXIT;
800 }
801
802 vma = vma->vm_next;
803 }
804}
805
806static int userfaultfd_release(struct inode *inode, struct file *file) 783static int userfaultfd_release(struct inode *inode, struct file *file)
807{ 784{
808 struct userfaultfd_ctx *ctx = file->private_data; 785 struct userfaultfd_ctx *ctx = file->private_data;