summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMike Rapoport <rppt@linux.vnet.ibm.com>2017-02-24 17:58:22 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-24 20:46:55 -0500
commit897ab3e0c49e24b62e2d54d165c7afec6bbca65b (patch)
tree5fa7e09864d6c959cef33849f6cb10ed04e459e4 /mm
parent846b1a0f1db065a8479159dd8fecddb1ebf30547 (diff)
userfaultfd: non-cooperative: add event for memory unmaps
When a non-cooperative userfaultfd monitor copies pages in the background, it may encounter regions that were already unmapped. Addition of UFFD_EVENT_UNMAP allows the uffd monitor to track precisely changes in the virtual memory layout. Since there might be different uffd contexts for the affected VMAs, we first should create a temporary representation for the unmap event for each uffd context and then notify them one by one to the appropriate userfault file descriptors. The event notification occurs after the mmap_sem has been released. [arnd@arndb.de: fix nommu build] Link: http://lkml.kernel.org/r/20170203165141.3665284-1-arnd@arndb.de [mhocko@suse.com: fix nommu build] Link: http://lkml.kernel.org/r/20170202091503.GA22823@dhcp22.suse.cz Link: http://lkml.kernel.org/r/1485542673-24387-3-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Signed-off-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Pavel Emelyanov <xemul@virtuozzo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mmap.c46
-rw-r--r--mm/mremap.c23
-rw-r--r--mm/nommu.c7
-rw-r--r--mm/util.c5
4 files changed, 53 insertions, 28 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 13d16a2b7623..1cec28d20583 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -176,7 +176,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
176 return next; 176 return next;
177} 177}
178 178
179static int do_brk(unsigned long addr, unsigned long len); 179static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf);
180 180
181SYSCALL_DEFINE1(brk, unsigned long, brk) 181SYSCALL_DEFINE1(brk, unsigned long, brk)
182{ 182{
@@ -185,6 +185,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
185 struct mm_struct *mm = current->mm; 185 struct mm_struct *mm = current->mm;
186 unsigned long min_brk; 186 unsigned long min_brk;
187 bool populate; 187 bool populate;
188 LIST_HEAD(uf);
188 189
189 if (down_write_killable(&mm->mmap_sem)) 190 if (down_write_killable(&mm->mmap_sem))
190 return -EINTR; 191 return -EINTR;
@@ -222,7 +223,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
222 223
223 /* Always allow shrinking brk. */ 224 /* Always allow shrinking brk. */
224 if (brk <= mm->brk) { 225 if (brk <= mm->brk) {
225 if (!do_munmap(mm, newbrk, oldbrk-newbrk)) 226 if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf))
226 goto set_brk; 227 goto set_brk;
227 goto out; 228 goto out;
228 } 229 }
@@ -232,13 +233,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
232 goto out; 233 goto out;
233 234
234 /* Ok, looks good - let it rip. */ 235 /* Ok, looks good - let it rip. */
235 if (do_brk(oldbrk, newbrk-oldbrk) < 0) 236 if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
236 goto out; 237 goto out;
237 238
238set_brk: 239set_brk:
239 mm->brk = brk; 240 mm->brk = brk;
240 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0; 241 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
241 up_write(&mm->mmap_sem); 242 up_write(&mm->mmap_sem);
243 userfaultfd_unmap_complete(mm, &uf);
242 if (populate) 244 if (populate)
243 mm_populate(oldbrk, newbrk - oldbrk); 245 mm_populate(oldbrk, newbrk - oldbrk);
244 return brk; 246 return brk;
@@ -1304,7 +1306,8 @@ static inline int mlock_future_check(struct mm_struct *mm,
1304unsigned long do_mmap(struct file *file, unsigned long addr, 1306unsigned long do_mmap(struct file *file, unsigned long addr,
1305 unsigned long len, unsigned long prot, 1307 unsigned long len, unsigned long prot,
1306 unsigned long flags, vm_flags_t vm_flags, 1308 unsigned long flags, vm_flags_t vm_flags,
1307 unsigned long pgoff, unsigned long *populate) 1309 unsigned long pgoff, unsigned long *populate,
1310 struct list_head *uf)
1308{ 1311{
1309 struct mm_struct *mm = current->mm; 1312 struct mm_struct *mm = current->mm;
1310 int pkey = 0; 1313 int pkey = 0;
@@ -1447,7 +1450,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
1447 vm_flags |= VM_NORESERVE; 1450 vm_flags |= VM_NORESERVE;
1448 } 1451 }
1449 1452
1450 addr = mmap_region(file, addr, len, vm_flags, pgoff); 1453 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1451 if (!IS_ERR_VALUE(addr) && 1454 if (!IS_ERR_VALUE(addr) &&
1452 ((vm_flags & VM_LOCKED) || 1455 ((vm_flags & VM_LOCKED) ||
1453 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) 1456 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
@@ -1583,7 +1586,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1583} 1586}
1584 1587
1585unsigned long mmap_region(struct file *file, unsigned long addr, 1588unsigned long mmap_region(struct file *file, unsigned long addr,
1586 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff) 1589 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1590 struct list_head *uf)
1587{ 1591{
1588 struct mm_struct *mm = current->mm; 1592 struct mm_struct *mm = current->mm;
1589 struct vm_area_struct *vma, *prev; 1593 struct vm_area_struct *vma, *prev;
@@ -1609,7 +1613,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1609 /* Clear old maps */ 1613 /* Clear old maps */
1610 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link, 1614 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
1611 &rb_parent)) { 1615 &rb_parent)) {
1612 if (do_munmap(mm, addr, len)) 1616 if (do_munmap(mm, addr, len, uf))
1613 return -ENOMEM; 1617 return -ENOMEM;
1614 } 1618 }
1615 1619
@@ -2579,7 +2583,8 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2579 * work. This now handles partial unmappings. 2583 * work. This now handles partial unmappings.
2580 * Jeremy Fitzhardinge <jeremy@goop.org> 2584 * Jeremy Fitzhardinge <jeremy@goop.org>
2581 */ 2585 */
2582int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) 2586int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2587 struct list_head *uf)
2583{ 2588{
2584 unsigned long end; 2589 unsigned long end;
2585 struct vm_area_struct *vma, *prev, *last; 2590 struct vm_area_struct *vma, *prev, *last;
@@ -2603,6 +2608,13 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2603 if (vma->vm_start >= end) 2608 if (vma->vm_start >= end)
2604 return 0; 2609 return 0;
2605 2610
2611 if (uf) {
2612 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2613
2614 if (error)
2615 return error;
2616 }
2617
2606 /* 2618 /*
2607 * If we need to split any vma, do it now to save pain later. 2619 * If we need to split any vma, do it now to save pain later.
2608 * 2620 *
@@ -2668,12 +2680,14 @@ int vm_munmap(unsigned long start, size_t len)
2668{ 2680{
2669 int ret; 2681 int ret;
2670 struct mm_struct *mm = current->mm; 2682 struct mm_struct *mm = current->mm;
2683 LIST_HEAD(uf);
2671 2684
2672 if (down_write_killable(&mm->mmap_sem)) 2685 if (down_write_killable(&mm->mmap_sem))
2673 return -EINTR; 2686 return -EINTR;
2674 2687
2675 ret = do_munmap(mm, start, len); 2688 ret = do_munmap(mm, start, len, &uf);
2676 up_write(&mm->mmap_sem); 2689 up_write(&mm->mmap_sem);
2690 userfaultfd_unmap_complete(mm, &uf);
2677 return ret; 2691 return ret;
2678} 2692}
2679EXPORT_SYMBOL(vm_munmap); 2693EXPORT_SYMBOL(vm_munmap);
@@ -2773,7 +2787,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2773 2787
2774 file = get_file(vma->vm_file); 2788 file = get_file(vma->vm_file);
2775 ret = do_mmap_pgoff(vma->vm_file, start, size, 2789 ret = do_mmap_pgoff(vma->vm_file, start, size,
2776 prot, flags, pgoff, &populate); 2790 prot, flags, pgoff, &populate, NULL);
2777 fput(file); 2791 fput(file);
2778out: 2792out:
2779 up_write(&mm->mmap_sem); 2793 up_write(&mm->mmap_sem);
@@ -2799,7 +2813,7 @@ static inline void verify_mm_writelocked(struct mm_struct *mm)
2799 * anonymous maps. eventually we may be able to do some 2813 * anonymous maps. eventually we may be able to do some
2800 * brk-specific accounting here. 2814 * brk-specific accounting here.
2801 */ 2815 */
2802static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) 2816static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf)
2803{ 2817{
2804 struct mm_struct *mm = current->mm; 2818 struct mm_struct *mm = current->mm;
2805 struct vm_area_struct *vma, *prev; 2819 struct vm_area_struct *vma, *prev;
@@ -2838,7 +2852,7 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
2838 */ 2852 */
2839 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link, 2853 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
2840 &rb_parent)) { 2854 &rb_parent)) {
2841 if (do_munmap(mm, addr, len)) 2855 if (do_munmap(mm, addr, len, uf))
2842 return -ENOMEM; 2856 return -ENOMEM;
2843 } 2857 }
2844 2858
@@ -2885,9 +2899,9 @@ out:
2885 return 0; 2899 return 0;
2886} 2900}
2887 2901
2888static int do_brk(unsigned long addr, unsigned long len) 2902static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
2889{ 2903{
2890 return do_brk_flags(addr, len, 0); 2904 return do_brk_flags(addr, len, 0, uf);
2891} 2905}
2892 2906
2893int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags) 2907int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
@@ -2895,13 +2909,15 @@ int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
2895 struct mm_struct *mm = current->mm; 2909 struct mm_struct *mm = current->mm;
2896 int ret; 2910 int ret;
2897 bool populate; 2911 bool populate;
2912 LIST_HEAD(uf);
2898 2913
2899 if (down_write_killable(&mm->mmap_sem)) 2914 if (down_write_killable(&mm->mmap_sem))
2900 return -EINTR; 2915 return -EINTR;
2901 2916
2902 ret = do_brk_flags(addr, len, flags); 2917 ret = do_brk_flags(addr, len, flags, &uf);
2903 populate = ((mm->def_flags & VM_LOCKED) != 0); 2918 populate = ((mm->def_flags & VM_LOCKED) != 0);
2904 up_write(&mm->mmap_sem); 2919 up_write(&mm->mmap_sem);
2920 userfaultfd_unmap_complete(mm, &uf);
2905 if (populate && !ret) 2921 if (populate && !ret)
2906 mm_populate(addr, len); 2922 mm_populate(addr, len);
2907 return ret; 2923 return ret;
diff --git a/mm/mremap.c b/mm/mremap.c
index 8779928d6a70..8233b0105c82 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -252,7 +252,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
252static unsigned long move_vma(struct vm_area_struct *vma, 252static unsigned long move_vma(struct vm_area_struct *vma,
253 unsigned long old_addr, unsigned long old_len, 253 unsigned long old_addr, unsigned long old_len,
254 unsigned long new_len, unsigned long new_addr, 254 unsigned long new_len, unsigned long new_addr,
255 bool *locked, struct vm_userfaultfd_ctx *uf) 255 bool *locked, struct vm_userfaultfd_ctx *uf,
256 struct list_head *uf_unmap)
256{ 257{
257 struct mm_struct *mm = vma->vm_mm; 258 struct mm_struct *mm = vma->vm_mm;
258 struct vm_area_struct *new_vma; 259 struct vm_area_struct *new_vma;
@@ -341,7 +342,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
341 if (unlikely(vma->vm_flags & VM_PFNMAP)) 342 if (unlikely(vma->vm_flags & VM_PFNMAP))
342 untrack_pfn_moved(vma); 343 untrack_pfn_moved(vma);
343 344
344 if (do_munmap(mm, old_addr, old_len) < 0) { 345 if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
345 /* OOM: unable to split vma, just get accounts right */ 346 /* OOM: unable to split vma, just get accounts right */
346 vm_unacct_memory(excess >> PAGE_SHIFT); 347 vm_unacct_memory(excess >> PAGE_SHIFT);
347 excess = 0; 348 excess = 0;
@@ -417,7 +418,8 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
417 418
418static unsigned long mremap_to(unsigned long addr, unsigned long old_len, 419static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
419 unsigned long new_addr, unsigned long new_len, bool *locked, 420 unsigned long new_addr, unsigned long new_len, bool *locked,
420 struct vm_userfaultfd_ctx *uf) 421 struct vm_userfaultfd_ctx *uf,
422 struct list_head *uf_unmap)
421{ 423{
422 struct mm_struct *mm = current->mm; 424 struct mm_struct *mm = current->mm;
423 struct vm_area_struct *vma; 425 struct vm_area_struct *vma;
@@ -435,12 +437,12 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
435 if (addr + old_len > new_addr && new_addr + new_len > addr) 437 if (addr + old_len > new_addr && new_addr + new_len > addr)
436 goto out; 438 goto out;
437 439
438 ret = do_munmap(mm, new_addr, new_len); 440 ret = do_munmap(mm, new_addr, new_len, NULL);
439 if (ret) 441 if (ret)
440 goto out; 442 goto out;
441 443
442 if (old_len >= new_len) { 444 if (old_len >= new_len) {
443 ret = do_munmap(mm, addr+new_len, old_len - new_len); 445 ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
444 if (ret && old_len != new_len) 446 if (ret && old_len != new_len)
445 goto out; 447 goto out;
446 old_len = new_len; 448 old_len = new_len;
@@ -462,7 +464,8 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
462 if (offset_in_page(ret)) 464 if (offset_in_page(ret))
463 goto out1; 465 goto out1;
464 466
465 ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf); 467 ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
468 uf_unmap);
466 if (!(offset_in_page(ret))) 469 if (!(offset_in_page(ret)))
467 goto out; 470 goto out;
468out1: 471out1:
@@ -502,6 +505,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
502 unsigned long charged = 0; 505 unsigned long charged = 0;
503 bool locked = false; 506 bool locked = false;
504 struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; 507 struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
508 LIST_HEAD(uf_unmap);
505 509
506 if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) 510 if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
507 return ret; 511 return ret;
@@ -528,7 +532,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
528 532
529 if (flags & MREMAP_FIXED) { 533 if (flags & MREMAP_FIXED) {
530 ret = mremap_to(addr, old_len, new_addr, new_len, 534 ret = mremap_to(addr, old_len, new_addr, new_len,
531 &locked, &uf); 535 &locked, &uf, &uf_unmap);
532 goto out; 536 goto out;
533 } 537 }
534 538
@@ -538,7 +542,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
538 * do_munmap does all the needed commit accounting 542 * do_munmap does all the needed commit accounting
539 */ 543 */
540 if (old_len >= new_len) { 544 if (old_len >= new_len) {
541 ret = do_munmap(mm, addr+new_len, old_len - new_len); 545 ret = do_munmap(mm, addr+new_len, old_len - new_len, &uf_unmap);
542 if (ret && old_len != new_len) 546 if (ret && old_len != new_len)
543 goto out; 547 goto out;
544 ret = addr; 548 ret = addr;
@@ -598,7 +602,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
598 } 602 }
599 603
600 ret = move_vma(vma, addr, old_len, new_len, new_addr, 604 ret = move_vma(vma, addr, old_len, new_len, new_addr,
601 &locked, &uf); 605 &locked, &uf, &uf_unmap);
602 } 606 }
603out: 607out:
604 if (offset_in_page(ret)) { 608 if (offset_in_page(ret)) {
@@ -609,5 +613,6 @@ out:
609 if (locked && new_len > old_len) 613 if (locked && new_len > old_len)
610 mm_populate(new_addr + old_len, new_len - old_len); 614 mm_populate(new_addr + old_len, new_len - old_len);
611 mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); 615 mremap_userfaultfd_complete(&uf, addr, new_addr, old_len);
616 userfaultfd_unmap_complete(mm, &uf_unmap);
612 return ret; 617 return ret;
613} 618}
diff --git a/mm/nommu.c b/mm/nommu.c
index 215c62296028..fe9f4fa4a7a7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1205,7 +1205,8 @@ unsigned long do_mmap(struct file *file,
1205 unsigned long flags, 1205 unsigned long flags,
1206 vm_flags_t vm_flags, 1206 vm_flags_t vm_flags,
1207 unsigned long pgoff, 1207 unsigned long pgoff,
1208 unsigned long *populate) 1208 unsigned long *populate,
1209 struct list_head *uf)
1209{ 1210{
1210 struct vm_area_struct *vma; 1211 struct vm_area_struct *vma;
1211 struct vm_region *region; 1212 struct vm_region *region;
@@ -1577,7 +1578,7 @@ static int shrink_vma(struct mm_struct *mm,
1577 * - under NOMMU conditions the chunk to be unmapped must be backed by a single 1578 * - under NOMMU conditions the chunk to be unmapped must be backed by a single
1578 * VMA, though it need not cover the whole VMA 1579 * VMA, though it need not cover the whole VMA
1579 */ 1580 */
1580int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) 1581int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf)
1581{ 1582{
1582 struct vm_area_struct *vma; 1583 struct vm_area_struct *vma;
1583 unsigned long end; 1584 unsigned long end;
@@ -1643,7 +1644,7 @@ int vm_munmap(unsigned long addr, size_t len)
1643 int ret; 1644 int ret;
1644 1645
1645 down_write(&mm->mmap_sem); 1646 down_write(&mm->mmap_sem);
1646 ret = do_munmap(mm, addr, len); 1647 ret = do_munmap(mm, addr, len, NULL);
1647 up_write(&mm->mmap_sem); 1648 up_write(&mm->mmap_sem);
1648 return ret; 1649 return ret;
1649} 1650}
diff --git a/mm/util.c b/mm/util.c
index 3cb2164f4099..b8f538863b5a 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -11,6 +11,7 @@
11#include <linux/mman.h> 11#include <linux/mman.h>
12#include <linux/hugetlb.h> 12#include <linux/hugetlb.h>
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/userfaultfd_k.h>
14 15
15#include <asm/sections.h> 16#include <asm/sections.h>
16#include <linux/uaccess.h> 17#include <linux/uaccess.h>
@@ -297,14 +298,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
297 unsigned long ret; 298 unsigned long ret;
298 struct mm_struct *mm = current->mm; 299 struct mm_struct *mm = current->mm;
299 unsigned long populate; 300 unsigned long populate;
301 LIST_HEAD(uf);
300 302
301 ret = security_mmap_file(file, prot, flag); 303 ret = security_mmap_file(file, prot, flag);
302 if (!ret) { 304 if (!ret) {
303 if (down_write_killable(&mm->mmap_sem)) 305 if (down_write_killable(&mm->mmap_sem))
304 return -EINTR; 306 return -EINTR;
305 ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, 307 ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
306 &populate); 308 &populate, &uf);
307 up_write(&mm->mmap_sem); 309 up_write(&mm->mmap_sem);
310 userfaultfd_unmap_complete(mm, &uf);
308 if (populate) 311 if (populate)
309 mm_populate(ret, populate); 312 mm_populate(ret, populate);
310 } 313 }