diff options
author | Michel Lespinasse <walken@google.com> | 2013-02-22 19:32:37 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:10 -0500 |
commit | bebeb3d68b24bb4132d452c5707fe321208bcbcd (patch) | |
tree | 6e609cb7323fb1b4b7026fa0e35867145a181094 /mm | |
parent | 940e7da5163029978c2f6b5bbe213607add59062 (diff) |
mm: introduce mm_populate() for populating new vmas
When creating new mappings using the MAP_POPULATE / MAP_LOCKED flags (or
with MCL_FUTURE in effect), we want to populate the pages within the
newly created vmas. This may take a while as we may have to read pages
from disk, so ideally we want to do this outside of the write-locked
mmap_sem region.
This change introduces mm_populate(), which is used to defer populating
such mappings until after the mmap_sem write lock has been released.
This is implemented as a generalization of the former do_mlock_pages(),
which accomplished the same task but was using during mlock() /
mlockall().
Signed-off-by: Michel Lespinasse <walken@google.com>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Andy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/mlock.c | 17 | ||||
-rw-r--r-- | mm/mmap.c | 20 | ||||
-rw-r--r-- | mm/nommu.c | 5 | ||||
-rw-r--r-- | mm/util.c | 6 |
4 files changed, 35 insertions, 13 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index c9bd528b01d2..a296a49865df 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -416,7 +416,14 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
416 | return error; | 416 | return error; |
417 | } | 417 | } |
418 | 418 | ||
419 | static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors) | 419 | /* |
420 | * __mm_populate - populate and/or mlock pages within a range of address space. | ||
421 | * | ||
422 | * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap | ||
423 | * flags. VMAs must be already marked with the desired vm_flags, and | ||
424 | * mmap_sem must not be held. | ||
425 | */ | ||
426 | int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) | ||
420 | { | 427 | { |
421 | struct mm_struct *mm = current->mm; | 428 | struct mm_struct *mm = current->mm; |
422 | unsigned long end, nstart, nend; | 429 | unsigned long end, nstart, nend; |
@@ -498,7 +505,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) | |||
498 | error = do_mlock(start, len, 1); | 505 | error = do_mlock(start, len, 1); |
499 | up_write(¤t->mm->mmap_sem); | 506 | up_write(¤t->mm->mmap_sem); |
500 | if (!error) | 507 | if (!error) |
501 | error = do_mlock_pages(start, len, 0); | 508 | error = __mm_populate(start, len, 0); |
502 | return error; | 509 | return error; |
503 | } | 510 | } |
504 | 511 | ||
@@ -564,10 +571,8 @@ SYSCALL_DEFINE1(mlockall, int, flags) | |||
564 | capable(CAP_IPC_LOCK)) | 571 | capable(CAP_IPC_LOCK)) |
565 | ret = do_mlockall(flags); | 572 | ret = do_mlockall(flags); |
566 | up_write(¤t->mm->mmap_sem); | 573 | up_write(¤t->mm->mmap_sem); |
567 | if (!ret && (flags & MCL_CURRENT)) { | 574 | if (!ret && (flags & MCL_CURRENT)) |
568 | /* Ignore errors */ | 575 | mm_populate(0, TASK_SIZE); |
569 | do_mlock_pages(0, TASK_SIZE, 1); | ||
570 | } | ||
571 | out: | 576 | out: |
572 | return ret; | 577 | return ret; |
573 | } | 578 | } |
@@ -1154,12 +1154,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint) | |||
1154 | 1154 | ||
1155 | unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | 1155 | unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, |
1156 | unsigned long len, unsigned long prot, | 1156 | unsigned long len, unsigned long prot, |
1157 | unsigned long flags, unsigned long pgoff) | 1157 | unsigned long flags, unsigned long pgoff, |
1158 | bool *populate) | ||
1158 | { | 1159 | { |
1159 | struct mm_struct * mm = current->mm; | 1160 | struct mm_struct * mm = current->mm; |
1160 | struct inode *inode; | 1161 | struct inode *inode; |
1161 | vm_flags_t vm_flags; | 1162 | vm_flags_t vm_flags; |
1162 | 1163 | ||
1164 | *populate = false; | ||
1165 | |||
1163 | /* | 1166 | /* |
1164 | * Does the application expect PROT_READ to imply PROT_EXEC? | 1167 | * Does the application expect PROT_READ to imply PROT_EXEC? |
1165 | * | 1168 | * |
@@ -1280,7 +1283,12 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1280 | } | 1283 | } |
1281 | } | 1284 | } |
1282 | 1285 | ||
1283 | return mmap_region(file, addr, len, flags, vm_flags, pgoff); | 1286 | addr = mmap_region(file, addr, len, flags, vm_flags, pgoff); |
1287 | if (!IS_ERR_VALUE(addr) && | ||
1288 | ((vm_flags & VM_LOCKED) || | ||
1289 | (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) | ||
1290 | *populate = true; | ||
1291 | return addr; | ||
1284 | } | 1292 | } |
1285 | 1293 | ||
1286 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | 1294 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, |
@@ -1531,10 +1539,12 @@ out: | |||
1531 | 1539 | ||
1532 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); | 1540 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); |
1533 | if (vm_flags & VM_LOCKED) { | 1541 | if (vm_flags & VM_LOCKED) { |
1534 | if (!mlock_vma_pages_range(vma, addr, addr + len)) | 1542 | if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || |
1543 | vma == get_gate_vma(current->mm))) | ||
1535 | mm->locked_vm += (len >> PAGE_SHIFT); | 1544 | mm->locked_vm += (len >> PAGE_SHIFT); |
1536 | } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) | 1545 | else |
1537 | make_pages_present(addr, addr + len); | 1546 | vma->vm_flags &= ~VM_LOCKED; |
1547 | } | ||
1538 | 1548 | ||
1539 | if (file) | 1549 | if (file) |
1540 | uprobe_mmap(vma); | 1550 | uprobe_mmap(vma); |
diff --git a/mm/nommu.c b/mm/nommu.c index b20db4e22263..7296a5a280e7 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1250,7 +1250,8 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
1250 | unsigned long len, | 1250 | unsigned long len, |
1251 | unsigned long prot, | 1251 | unsigned long prot, |
1252 | unsigned long flags, | 1252 | unsigned long flags, |
1253 | unsigned long pgoff) | 1253 | unsigned long pgoff, |
1254 | bool *populate) | ||
1254 | { | 1255 | { |
1255 | struct vm_area_struct *vma; | 1256 | struct vm_area_struct *vma; |
1256 | struct vm_region *region; | 1257 | struct vm_region *region; |
@@ -1260,6 +1261,8 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
1260 | 1261 | ||
1261 | kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); | 1262 | kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); |
1262 | 1263 | ||
1264 | *populate = false; | ||
1265 | |||
1263 | /* decide whether we should attempt the mapping, and if so what sort of | 1266 | /* decide whether we should attempt the mapping, and if so what sort of |
1264 | * mapping */ | 1267 | * mapping */ |
1265 | ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, | 1268 | ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, |
@@ -355,12 +355,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, | |||
355 | { | 355 | { |
356 | unsigned long ret; | 356 | unsigned long ret; |
357 | struct mm_struct *mm = current->mm; | 357 | struct mm_struct *mm = current->mm; |
358 | bool populate; | ||
358 | 359 | ||
359 | ret = security_mmap_file(file, prot, flag); | 360 | ret = security_mmap_file(file, prot, flag); |
360 | if (!ret) { | 361 | if (!ret) { |
361 | down_write(&mm->mmap_sem); | 362 | down_write(&mm->mmap_sem); |
362 | ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff); | 363 | ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, |
364 | &populate); | ||
363 | up_write(&mm->mmap_sem); | 365 | up_write(&mm->mmap_sem); |
366 | if (!IS_ERR_VALUE(ret) && populate) | ||
367 | mm_populate(ret, len); | ||
364 | } | 368 | } |
365 | return ret; | 369 | return ret; |
366 | } | 370 | } |