aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2013-02-22 19:32:37 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:10 -0500
commitbebeb3d68b24bb4132d452c5707fe321208bcbcd (patch)
tree6e609cb7323fb1b4b7026fa0e35867145a181094
parent940e7da5163029978c2f6b5bbe213607add59062 (diff)
mm: introduce mm_populate() for populating new vmas
When creating new mappings using the MAP_POPULATE / MAP_LOCKED flags (or with MCL_FUTURE in effect), we want to populate the pages within the newly created vmas. This may take a while as we may have to read pages from disk, so ideally we want to do this outside of the write-locked mmap_sem region. This change introduces mm_populate(), which is used to defer populating such mappings until after the mmap_sem write lock has been released. This is implemented as a generalization of the former do_mlock_pages(), which accomplished the same task but was using during mlock() / mlockall(). Signed-off-by: Michel Lespinasse <walken@google.com> Reported-by: Andy Lutomirski <luto@amacapital.net> Acked-by: Rik van Riel <riel@redhat.com> Tested-by: Andy Lutomirski <luto@amacapital.net> Cc: Greg Ungerer <gregungerer@westnet.com.au> Cc: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/aio.c6
-rw-r--r--include/linux/mm.h18
-rw-r--r--ipc/shm.c12
-rw-r--r--mm/mlock.c17
-rw-r--r--mm/mmap.c20
-rw-r--r--mm/nommu.c5
-rw-r--r--mm/util.c6
7 files changed, 62 insertions, 22 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 71f613cf4a85..82eec7c7b4bb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -103,6 +103,7 @@ static int aio_setup_ring(struct kioctx *ctx)
103 unsigned nr_events = ctx->max_reqs; 103 unsigned nr_events = ctx->max_reqs;
104 unsigned long size; 104 unsigned long size;
105 int nr_pages; 105 int nr_pages;
106 bool populate;
106 107
107 /* Compensate for the ring buffer's head/tail overlap entry */ 108 /* Compensate for the ring buffer's head/tail overlap entry */
108 nr_events += 2; /* 1 is required, 2 for good luck */ 109 nr_events += 2; /* 1 is required, 2 for good luck */
@@ -129,7 +130,8 @@ static int aio_setup_ring(struct kioctx *ctx)
129 down_write(&ctx->mm->mmap_sem); 130 down_write(&ctx->mm->mmap_sem);
130 info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, 131 info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size,
131 PROT_READ|PROT_WRITE, 132 PROT_READ|PROT_WRITE,
132 MAP_ANONYMOUS|MAP_PRIVATE, 0); 133 MAP_ANONYMOUS|MAP_PRIVATE, 0,
134 &populate);
133 if (IS_ERR((void *)info->mmap_base)) { 135 if (IS_ERR((void *)info->mmap_base)) {
134 up_write(&ctx->mm->mmap_sem); 136 up_write(&ctx->mm->mmap_sem);
135 info->mmap_size = 0; 137 info->mmap_size = 0;
@@ -147,6 +149,8 @@ static int aio_setup_ring(struct kioctx *ctx)
147 aio_free_ring(ctx); 149 aio_free_ring(ctx);
148 return -EAGAIN; 150 return -EAGAIN;
149 } 151 }
152 if (populate)
153 mm_populate(info->mmap_base, info->mmap_size);
150 154
151 ctx->user_id = info->mmap_base; 155 ctx->user_id = info->mmap_base;
152 156
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9d9dcc35d6a1..da0a0fe970c2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1474,11 +1474,23 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
1474extern unsigned long mmap_region(struct file *file, unsigned long addr, 1474extern unsigned long mmap_region(struct file *file, unsigned long addr,
1475 unsigned long len, unsigned long flags, 1475 unsigned long len, unsigned long flags,
1476 vm_flags_t vm_flags, unsigned long pgoff); 1476 vm_flags_t vm_flags, unsigned long pgoff);
1477extern unsigned long do_mmap_pgoff(struct file *, unsigned long, 1477extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1478 unsigned long, unsigned long, 1478 unsigned long len, unsigned long prot, unsigned long flags,
1479 unsigned long, unsigned long); 1479 unsigned long pgoff, bool *populate);
1480extern int do_munmap(struct mm_struct *, unsigned long, size_t); 1480extern int do_munmap(struct mm_struct *, unsigned long, size_t);
1481 1481
1482#ifdef CONFIG_MMU
1483extern int __mm_populate(unsigned long addr, unsigned long len,
1484 int ignore_errors);
1485static inline void mm_populate(unsigned long addr, unsigned long len)
1486{
1487 /* Ignore errors */
1488 (void) __mm_populate(addr, len, 1);
1489}
1490#else
1491static inline void mm_populate(unsigned long addr, unsigned long len) {}
1492#endif
1493
1482/* These take the mm semaphore themselves */ 1494/* These take the mm semaphore themselves */
1483extern unsigned long vm_brk(unsigned long, unsigned long); 1495extern unsigned long vm_brk(unsigned long, unsigned long);
1484extern int vm_munmap(unsigned long, size_t); 1496extern int vm_munmap(unsigned long, size_t);
diff --git a/ipc/shm.c b/ipc/shm.c
index 4fa6d8fee730..9f047ba69e62 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -967,11 +967,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
967 unsigned long flags; 967 unsigned long flags;
968 unsigned long prot; 968 unsigned long prot;
969 int acc_mode; 969 int acc_mode;
970 unsigned long user_addr;
971 struct ipc_namespace *ns; 970 struct ipc_namespace *ns;
972 struct shm_file_data *sfd; 971 struct shm_file_data *sfd;
973 struct path path; 972 struct path path;
974 fmode_t f_mode; 973 fmode_t f_mode;
974 bool populate = false;
975 975
976 err = -EINVAL; 976 err = -EINVAL;
977 if (shmid < 0) 977 if (shmid < 0)
@@ -1070,13 +1070,15 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
1070 goto invalid; 1070 goto invalid;
1071 } 1071 }
1072 1072
1073 user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0); 1073 addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
1074 *raddr = user_addr; 1074 *raddr = addr;
1075 err = 0; 1075 err = 0;
1076 if (IS_ERR_VALUE(user_addr)) 1076 if (IS_ERR_VALUE(addr))
1077 err = (long)user_addr; 1077 err = (long)addr;
1078invalid: 1078invalid:
1079 up_write(&current->mm->mmap_sem); 1079 up_write(&current->mm->mmap_sem);
1080 if (populate)
1081 mm_populate(addr, size);
1080 1082
1081out_fput: 1083out_fput:
1082 fput(file); 1084 fput(file);
diff --git a/mm/mlock.c b/mm/mlock.c
index c9bd528b01d2..a296a49865df 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -416,7 +416,14 @@ static int do_mlock(unsigned long start, size_t len, int on)
416 return error; 416 return error;
417} 417}
418 418
419static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors) 419/*
420 * __mm_populate - populate and/or mlock pages within a range of address space.
421 *
422 * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
423 * flags. VMAs must be already marked with the desired vm_flags, and
424 * mmap_sem must not be held.
425 */
426int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
420{ 427{
421 struct mm_struct *mm = current->mm; 428 struct mm_struct *mm = current->mm;
422 unsigned long end, nstart, nend; 429 unsigned long end, nstart, nend;
@@ -498,7 +505,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
498 error = do_mlock(start, len, 1); 505 error = do_mlock(start, len, 1);
499 up_write(&current->mm->mmap_sem); 506 up_write(&current->mm->mmap_sem);
500 if (!error) 507 if (!error)
501 error = do_mlock_pages(start, len, 0); 508 error = __mm_populate(start, len, 0);
502 return error; 509 return error;
503} 510}
504 511
@@ -564,10 +571,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
564 capable(CAP_IPC_LOCK)) 571 capable(CAP_IPC_LOCK))
565 ret = do_mlockall(flags); 572 ret = do_mlockall(flags);
566 up_write(&current->mm->mmap_sem); 573 up_write(&current->mm->mmap_sem);
567 if (!ret && (flags & MCL_CURRENT)) { 574 if (!ret && (flags & MCL_CURRENT))
568 /* Ignore errors */ 575 mm_populate(0, TASK_SIZE);
569 do_mlock_pages(0, TASK_SIZE, 1);
570 }
571out: 576out:
572 return ret; 577 return ret;
573} 578}
diff --git a/mm/mmap.c b/mm/mmap.c
index 09da0b264982..9b12e3047a86 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1154,12 +1154,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
1154 1154
1155unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, 1155unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1156 unsigned long len, unsigned long prot, 1156 unsigned long len, unsigned long prot,
1157 unsigned long flags, unsigned long pgoff) 1157 unsigned long flags, unsigned long pgoff,
1158 bool *populate)
1158{ 1159{
1159 struct mm_struct * mm = current->mm; 1160 struct mm_struct * mm = current->mm;
1160 struct inode *inode; 1161 struct inode *inode;
1161 vm_flags_t vm_flags; 1162 vm_flags_t vm_flags;
1162 1163
1164 *populate = false;
1165
1163 /* 1166 /*
1164 * Does the application expect PROT_READ to imply PROT_EXEC? 1167 * Does the application expect PROT_READ to imply PROT_EXEC?
1165 * 1168 *
@@ -1280,7 +1283,12 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1280 } 1283 }
1281 } 1284 }
1282 1285
1283 return mmap_region(file, addr, len, flags, vm_flags, pgoff); 1286 addr = mmap_region(file, addr, len, flags, vm_flags, pgoff);
1287 if (!IS_ERR_VALUE(addr) &&
1288 ((vm_flags & VM_LOCKED) ||
1289 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1290 *populate = true;
1291 return addr;
1284} 1292}
1285 1293
1286SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, 1294SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
@@ -1531,10 +1539,12 @@ out:
1531 1539
1532 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); 1540 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1533 if (vm_flags & VM_LOCKED) { 1541 if (vm_flags & VM_LOCKED) {
1534 if (!mlock_vma_pages_range(vma, addr, addr + len)) 1542 if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1543 vma == get_gate_vma(current->mm)))
1535 mm->locked_vm += (len >> PAGE_SHIFT); 1544 mm->locked_vm += (len >> PAGE_SHIFT);
1536 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) 1545 else
1537 make_pages_present(addr, addr + len); 1546 vma->vm_flags &= ~VM_LOCKED;
1547 }
1538 1548
1539 if (file) 1549 if (file)
1540 uprobe_mmap(vma); 1550 uprobe_mmap(vma);
diff --git a/mm/nommu.c b/mm/nommu.c
index b20db4e22263..7296a5a280e7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1250,7 +1250,8 @@ unsigned long do_mmap_pgoff(struct file *file,
1250 unsigned long len, 1250 unsigned long len,
1251 unsigned long prot, 1251 unsigned long prot,
1252 unsigned long flags, 1252 unsigned long flags,
1253 unsigned long pgoff) 1253 unsigned long pgoff,
1254 bool *populate)
1254{ 1255{
1255 struct vm_area_struct *vma; 1256 struct vm_area_struct *vma;
1256 struct vm_region *region; 1257 struct vm_region *region;
@@ -1260,6 +1261,8 @@ unsigned long do_mmap_pgoff(struct file *file,
1260 1261
1261 kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); 1262 kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
1262 1263
1264 *populate = false;
1265
1263 /* decide whether we should attempt the mapping, and if so what sort of 1266 /* decide whether we should attempt the mapping, and if so what sort of
1264 * mapping */ 1267 * mapping */
1265 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1268 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
diff --git a/mm/util.c b/mm/util.c
index c55e26b17d93..13467e043e9e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -355,12 +355,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
355{ 355{
356 unsigned long ret; 356 unsigned long ret;
357 struct mm_struct *mm = current->mm; 357 struct mm_struct *mm = current->mm;
358 bool populate;
358 359
359 ret = security_mmap_file(file, prot, flag); 360 ret = security_mmap_file(file, prot, flag);
360 if (!ret) { 361 if (!ret) {
361 down_write(&mm->mmap_sem); 362 down_write(&mm->mmap_sem);
362 ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff); 363 ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
364 &populate);
363 up_write(&mm->mmap_sem); 365 up_write(&mm->mmap_sem);
366 if (!IS_ERR_VALUE(ret) && populate)
367 mm_populate(ret, len);
364 } 368 }
365 return ret; 369 return ret;
366} 370}