aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@parallels.com>2017-02-22 18:42:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 19:41:28 -0500
commit72f87654c69690ff4721bd9b4a39983f971de9a5 (patch)
tree0e607734e047d2dc7379b186c5c8c57e9276fd8a
parentd3aadc8ed4cb447981ecf34f9af71cddc6cf907d (diff)
userfaultfd: non-cooperative: add mremap() event
The event denotes that an area [start:end] moves to different location. Length change isn't reported as "new" addresses, if they appear on the uffd reader side they will not contain any data and the latter can just zeromap them. Waiting for the event ACK is also done outside of mmap sem, as for fork event. Link: http://lkml.kernel.org/r/20161216144821.5183-12-aarcange@redhat.com Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Michael Rapoport <RAPOPORT@il.ibm.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/userfaultfd.c37
-rw-r--r--include/linux/userfaultfd_k.h17
-rw-r--r--include/uapi/linux/userfaultfd.h11
-rw-r--r--mm/mremap.c17
4 files changed, 76 insertions, 6 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 27978f249016..68f978beefac 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -596,6 +596,43 @@ void dup_userfaultfd_complete(struct list_head *fcs)
596 } 596 }
597} 597}
598 598
599void mremap_userfaultfd_prep(struct vm_area_struct *vma,
600 struct vm_userfaultfd_ctx *vm_ctx)
601{
602 struct userfaultfd_ctx *ctx;
603
604 ctx = vma->vm_userfaultfd_ctx.ctx;
605 if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) {
606 vm_ctx->ctx = ctx;
607 userfaultfd_ctx_get(ctx);
608 }
609}
610
611void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx vm_ctx,
612 unsigned long from, unsigned long to,
613 unsigned long len)
614{
615 struct userfaultfd_ctx *ctx = vm_ctx.ctx;
616 struct userfaultfd_wait_queue ewq;
617
618 if (!ctx)
619 return;
620
621 if (to & ~PAGE_MASK) {
622 userfaultfd_ctx_put(ctx);
623 return;
624 }
625
626 msg_init(&ewq.msg);
627
628 ewq.msg.event = UFFD_EVENT_REMAP;
629 ewq.msg.arg.remap.from = from;
630 ewq.msg.arg.remap.to = to;
631 ewq.msg.arg.remap.len = len;
632
633 userfaultfd_event_wait_completion(ctx, &ewq);
634}
635
599static int userfaultfd_release(struct inode *inode, struct file *file) 636static int userfaultfd_release(struct inode *inode, struct file *file)
600{ 637{
601 struct userfaultfd_ctx *ctx = file->private_data; 638 struct userfaultfd_ctx *ctx = file->private_data;
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 79002bca1f43..7f318a46044b 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -55,6 +55,12 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
55extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); 55extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
56extern void dup_userfaultfd_complete(struct list_head *); 56extern void dup_userfaultfd_complete(struct list_head *);
57 57
58extern void mremap_userfaultfd_prep(struct vm_area_struct *,
59 struct vm_userfaultfd_ctx *);
60extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx,
61 unsigned long from, unsigned long to,
62 unsigned long len);
63
58#else /* CONFIG_USERFAULTFD */ 64#else /* CONFIG_USERFAULTFD */
59 65
60/* mm helpers */ 66/* mm helpers */
@@ -89,6 +95,17 @@ static inline void dup_userfaultfd_complete(struct list_head *l)
89{ 95{
90} 96}
91 97
98static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
99 struct vm_userfaultfd_ctx *ctx)
100{
101}
102
103static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx ctx,
104 unsigned long from,
105 unsigned long to,
106 unsigned long len)
107{
108}
92#endif /* CONFIG_USERFAULTFD */ 109#endif /* CONFIG_USERFAULTFD */
93 110
94#endif /* _LINUX_USERFAULTFD_K_H */ 111#endif /* _LINUX_USERFAULTFD_K_H */
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index c8953c84fdcc..79a85e5bd388 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -18,7 +18,8 @@
18 * means the userland is reading). 18 * means the userland is reading).
19 */ 19 */
20#define UFFD_API ((__u64)0xAA) 20#define UFFD_API ((__u64)0xAA)
21#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK) 21#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \
22 UFFD_FEATURE_EVENT_REMAP)
22#define UFFD_API_IOCTLS \ 23#define UFFD_API_IOCTLS \
23 ((__u64)1 << _UFFDIO_REGISTER | \ 24 ((__u64)1 << _UFFDIO_REGISTER | \
24 (__u64)1 << _UFFDIO_UNREGISTER | \ 25 (__u64)1 << _UFFDIO_UNREGISTER | \
@@ -77,6 +78,12 @@ struct uffd_msg {
77 } fork; 78 } fork;
78 79
79 struct { 80 struct {
81 __u64 from;
82 __u64 to;
83 __u64 len;
84 } remap;
85
86 struct {
80 /* unused reserved fields */ 87 /* unused reserved fields */
81 __u64 reserved1; 88 __u64 reserved1;
82 __u64 reserved2; 89 __u64 reserved2;
@@ -90,6 +97,7 @@ struct uffd_msg {
90 */ 97 */
91#define UFFD_EVENT_PAGEFAULT 0x12 98#define UFFD_EVENT_PAGEFAULT 0x12
92#define UFFD_EVENT_FORK 0x13 99#define UFFD_EVENT_FORK 0x13
100#define UFFD_EVENT_REMAP 0x14
93 101
94/* flags for UFFD_EVENT_PAGEFAULT */ 102/* flags for UFFD_EVENT_PAGEFAULT */
95#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ 103#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
@@ -110,6 +118,7 @@ struct uffdio_api {
110 */ 118 */
111#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) 119#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
112#define UFFD_FEATURE_EVENT_FORK (1<<1) 120#define UFFD_FEATURE_EVENT_FORK (1<<1)
121#define UFFD_FEATURE_EVENT_REMAP (1<<2)
113 __u64 features; 122 __u64 features;
114 123
115 __u64 ioctls; 124 __u64 ioctls;
diff --git a/mm/mremap.c b/mm/mremap.c
index 30d7d2482eea..504b560c013c 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -22,6 +22,7 @@
22#include <linux/mmu_notifier.h> 22#include <linux/mmu_notifier.h>
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/mm-arch-hooks.h> 24#include <linux/mm-arch-hooks.h>
25#include <linux/userfaultfd_k.h>
25 26
26#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
27#include <asm/tlbflush.h> 28#include <asm/tlbflush.h>
@@ -250,7 +251,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
250 251
251static unsigned long move_vma(struct vm_area_struct *vma, 252static unsigned long move_vma(struct vm_area_struct *vma,
252 unsigned long old_addr, unsigned long old_len, 253 unsigned long old_addr, unsigned long old_len,
253 unsigned long new_len, unsigned long new_addr, bool *locked) 254 unsigned long new_len, unsigned long new_addr,
255 bool *locked, struct vm_userfaultfd_ctx *uf)
254{ 256{
255 struct mm_struct *mm = vma->vm_mm; 257 struct mm_struct *mm = vma->vm_mm;
256 struct vm_area_struct *new_vma; 258 struct vm_area_struct *new_vma;
@@ -309,6 +311,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
309 old_addr = new_addr; 311 old_addr = new_addr;
310 new_addr = err; 312 new_addr = err;
311 } else { 313 } else {
314 mremap_userfaultfd_prep(new_vma, uf);
312 arch_remap(mm, old_addr, old_addr + old_len, 315 arch_remap(mm, old_addr, old_addr + old_len,
313 new_addr, new_addr + new_len); 316 new_addr, new_addr + new_len);
314 } 317 }
@@ -413,7 +416,8 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
413} 416}
414 417
415static unsigned long mremap_to(unsigned long addr, unsigned long old_len, 418static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
416 unsigned long new_addr, unsigned long new_len, bool *locked) 419 unsigned long new_addr, unsigned long new_len, bool *locked,
420 struct vm_userfaultfd_ctx *uf)
417{ 421{
418 struct mm_struct *mm = current->mm; 422 struct mm_struct *mm = current->mm;
419 struct vm_area_struct *vma; 423 struct vm_area_struct *vma;
@@ -458,7 +462,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
458 if (offset_in_page(ret)) 462 if (offset_in_page(ret))
459 goto out1; 463 goto out1;
460 464
461 ret = move_vma(vma, addr, old_len, new_len, new_addr, locked); 465 ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf);
462 if (!(offset_in_page(ret))) 466 if (!(offset_in_page(ret)))
463 goto out; 467 goto out;
464out1: 468out1:
@@ -497,6 +501,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
497 unsigned long ret = -EINVAL; 501 unsigned long ret = -EINVAL;
498 unsigned long charged = 0; 502 unsigned long charged = 0;
499 bool locked = false; 503 bool locked = false;
504 struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
500 505
501 if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) 506 if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
502 return ret; 507 return ret;
@@ -523,7 +528,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
523 528
524 if (flags & MREMAP_FIXED) { 529 if (flags & MREMAP_FIXED) {
525 ret = mremap_to(addr, old_len, new_addr, new_len, 530 ret = mremap_to(addr, old_len, new_addr, new_len,
526 &locked); 531 &locked, &uf);
527 goto out; 532 goto out;
528 } 533 }
529 534
@@ -592,7 +597,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
592 goto out; 597 goto out;
593 } 598 }
594 599
595 ret = move_vma(vma, addr, old_len, new_len, new_addr, &locked); 600 ret = move_vma(vma, addr, old_len, new_len, new_addr,
601 &locked, &uf);
596 } 602 }
597out: 603out:
598 if (offset_in_page(ret)) { 604 if (offset_in_page(ret)) {
@@ -602,5 +608,6 @@ out:
602 up_write(&current->mm->mmap_sem); 608 up_write(&current->mm->mmap_sem);
603 if (locked && new_len > old_len) 609 if (locked && new_len > old_len)
604 mm_populate(new_addr + old_len, new_len - old_len); 610 mm_populate(new_addr + old_len, new_len - old_len);
611 mremap_userfaultfd_complete(uf, addr, new_addr, old_len);
605 return ret; 612 return ret;
606} 613}