aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/fremap.c2
-rw-r--r--mm/hugetlb.c28
-rw-r--r--mm/memory.c2
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mlock.c48
-rw-r--r--mm/mmap.c48
-rw-r--r--mm/mprotect.c5
-rw-r--r--mm/page-writeback.c20
-rw-r--r--mm/page_cgroup.c3
-rw-r--r--mm/rmap.c3
-rw-r--r--mm/slub.c2
11 files changed, 85 insertions, 78 deletions
diff --git a/mm/fremap.c b/mm/fremap.c
index 736ba7f3306a..b6ec85abbb39 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -198,7 +198,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
198 flags &= MAP_NONBLOCK; 198 flags &= MAP_NONBLOCK;
199 get_file(file); 199 get_file(file);
200 addr = mmap_region(file, start, size, 200 addr = mmap_region(file, start, size,
201 flags, vma->vm_flags, pgoff, 1); 201 flags, vma->vm_flags, pgoff);
202 fput(file); 202 fput(file);
203 if (IS_ERR_VALUE(addr)) { 203 if (IS_ERR_VALUE(addr)) {
204 err = addr; 204 err = addr;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 618e98304080..107da3d809a8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2269,12 +2269,18 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
2269 2269
2270int hugetlb_reserve_pages(struct inode *inode, 2270int hugetlb_reserve_pages(struct inode *inode,
2271 long from, long to, 2271 long from, long to,
2272 struct vm_area_struct *vma) 2272 struct vm_area_struct *vma,
2273 int acctflag)
2273{ 2274{
2274 long ret, chg; 2275 long ret, chg;
2275 struct hstate *h = hstate_inode(inode); 2276 struct hstate *h = hstate_inode(inode);
2276 2277
2277 if (vma && vma->vm_flags & VM_NORESERVE) 2278 /*
2279 * Only apply hugepage reservation if asked. At fault time, an
2280 * attempt will be made for VM_NORESERVE to allocate a page
2281 * and filesystem quota without using reserves
2282 */
2283 if (acctflag & VM_NORESERVE)
2278 return 0; 2284 return 0;
2279 2285
2280 /* 2286 /*
@@ -2299,13 +2305,31 @@ int hugetlb_reserve_pages(struct inode *inode,
2299 if (chg < 0) 2305 if (chg < 0)
2300 return chg; 2306 return chg;
2301 2307
2308 /* There must be enough filesystem quota for the mapping */
2302 if (hugetlb_get_quota(inode->i_mapping, chg)) 2309 if (hugetlb_get_quota(inode->i_mapping, chg))
2303 return -ENOSPC; 2310 return -ENOSPC;
2311
2312 /*
2313 * Check enough hugepages are available for the reservation.
2314 * Hand back the quota if there are not
2315 */
2304 ret = hugetlb_acct_memory(h, chg); 2316 ret = hugetlb_acct_memory(h, chg);
2305 if (ret < 0) { 2317 if (ret < 0) {
2306 hugetlb_put_quota(inode->i_mapping, chg); 2318 hugetlb_put_quota(inode->i_mapping, chg);
2307 return ret; 2319 return ret;
2308 } 2320 }
2321
2322 /*
2323 * Account for the reservations made. Shared mappings record regions
2324 * that have reservations as they are shared by multiple VMAs.
2325 * When the last VMA disappears, the region map says how much
2326 * the reservation was and the page cache tells how much of
2327 * the reservation was consumed. Private mappings are per-VMA and
2328 * only the consumed reservations are tracked. When the VMA
2329 * disappears, the original reservation is the VMA size and the
2330 * consumed reservations are stored in the map. Hence, nothing
2331 * else has to be done for private mappings here
2332 */
2309 if (!vma || vma->vm_flags & VM_SHARED) 2333 if (!vma || vma->vm_flags & VM_SHARED)
2310 region_add(&inode->i_mapping->private_list, from, to); 2334 region_add(&inode->i_mapping->private_list, from, to);
2311 return 0; 2335 return 0;
diff --git a/mm/memory.c b/mm/memory.c
index 22bfa7a47a0b..baa999e87cd2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1999,7 +1999,7 @@ gotten:
1999 * Don't let another task, with possibly unlocked vma, 1999 * Don't let another task, with possibly unlocked vma,
2000 * keep the mlocked page. 2000 * keep the mlocked page.
2001 */ 2001 */
2002 if (vma->vm_flags & VM_LOCKED) { 2002 if ((vma->vm_flags & VM_LOCKED) && old_page) {
2003 lock_page(old_page); /* for LRU manipulation */ 2003 lock_page(old_page); /* for LRU manipulation */
2004 clear_page_mlock(old_page); 2004 clear_page_mlock(old_page);
2005 unlock_page(old_page); 2005 unlock_page(old_page);
diff --git a/mm/migrate.c b/mm/migrate.c
index 2bb4e1d63520..a9eff3f092f6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1129,7 +1129,7 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
1129 struct vm_area_struct *vma; 1129 struct vm_area_struct *vma;
1130 int err = 0; 1130 int err = 0;
1131 1131
1132 for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) { 1132 for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
1133 if (vma->vm_ops && vma->vm_ops->migrate) { 1133 if (vma->vm_ops && vma->vm_ops->migrate) {
1134 err = vma->vm_ops->migrate(vma, to, from, flags); 1134 err = vma->vm_ops->migrate(vma, to, from, flags);
1135 if (err) 1135 if (err)
diff --git a/mm/mlock.c b/mm/mlock.c
index 2904a347e476..037161d61b4e 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -294,14 +294,10 @@ static inline int __mlock_posix_error_return(long retval)
294 * 294 *
295 * return number of pages [> 0] to be removed from locked_vm on success 295 * return number of pages [> 0] to be removed from locked_vm on success
296 * of "special" vmas. 296 * of "special" vmas.
297 *
298 * return negative error if vma spanning @start-@range disappears while
299 * mmap semaphore is dropped. Unlikely?
300 */ 297 */
301long mlock_vma_pages_range(struct vm_area_struct *vma, 298long mlock_vma_pages_range(struct vm_area_struct *vma,
302 unsigned long start, unsigned long end) 299 unsigned long start, unsigned long end)
303{ 300{
304 struct mm_struct *mm = vma->vm_mm;
305 int nr_pages = (end - start) / PAGE_SIZE; 301 int nr_pages = (end - start) / PAGE_SIZE;
306 BUG_ON(!(vma->vm_flags & VM_LOCKED)); 302 BUG_ON(!(vma->vm_flags & VM_LOCKED));
307 303
@@ -314,20 +310,11 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
314 if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || 310 if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
315 is_vm_hugetlb_page(vma) || 311 is_vm_hugetlb_page(vma) ||
316 vma == get_gate_vma(current))) { 312 vma == get_gate_vma(current))) {
317 long error;
318 downgrade_write(&mm->mmap_sem);
319
320 error = __mlock_vma_pages_range(vma, start, end, 1);
321 313
322 up_read(&mm->mmap_sem); 314 __mlock_vma_pages_range(vma, start, end, 1);
323 /* vma can change or disappear */
324 down_write(&mm->mmap_sem);
325 vma = find_vma(mm, start);
326 /* non-NULL vma must contain @start, but need to check @end */
327 if (!vma || end > vma->vm_end)
328 return -ENOMEM;
329 315
330 return 0; /* hide other errors from mmap(), et al */ 316 /* Hide errors from mmap() and other callers */
317 return 0;
331 } 318 }
332 319
333 /* 320 /*
@@ -438,41 +425,14 @@ success:
438 vma->vm_flags = newflags; 425 vma->vm_flags = newflags;
439 426
440 if (lock) { 427 if (lock) {
441 /*
442 * mmap_sem is currently held for write. Downgrade the write
443 * lock to a read lock so that other faults, mmap scans, ...
444 * while we fault in all pages.
445 */
446 downgrade_write(&mm->mmap_sem);
447
448 ret = __mlock_vma_pages_range(vma, start, end, 1); 428 ret = __mlock_vma_pages_range(vma, start, end, 1);
449 429
450 /* 430 if (ret > 0) {
451 * Need to reacquire mmap sem in write mode, as our callers
452 * expect this. We have no support for atomically upgrading
453 * a sem to write, so we need to check for ranges while sem
454 * is unlocked.
455 */
456 up_read(&mm->mmap_sem);
457 /* vma can change or disappear */
458 down_write(&mm->mmap_sem);
459 *prev = find_vma(mm, start);
460 /* non-NULL *prev must contain @start, but need to check @end */
461 if (!(*prev) || end > (*prev)->vm_end)
462 ret = -ENOMEM;
463 else if (ret > 0) {
464 mm->locked_vm -= ret; 431 mm->locked_vm -= ret;
465 ret = 0; 432 ret = 0;
466 } else 433 } else
467 ret = __mlock_posix_error_return(ret); /* translate if needed */ 434 ret = __mlock_posix_error_return(ret); /* translate if needed */
468 } else { 435 } else {
469 /*
470 * TODO: for unlocking, pages will already be resident, so
471 * we don't need to wait for allocations/reclaim/pagein, ...
472 * However, unlocking a very large region can still take a
473 * while. Should we downgrade the semaphore for both lock
474 * AND unlock ?
475 */
476 __mlock_vma_pages_range(vma, start, end, 0); 436 __mlock_vma_pages_range(vma, start, end, 0);
477 } 437 }
478 438
diff --git a/mm/mmap.c b/mm/mmap.c
index 214b6a258eeb..00ced3ee49a8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -918,7 +918,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
918 struct inode *inode; 918 struct inode *inode;
919 unsigned int vm_flags; 919 unsigned int vm_flags;
920 int error; 920 int error;
921 int accountable = 1;
922 unsigned long reqprot = prot; 921 unsigned long reqprot = prot;
923 922
924 /* 923 /*
@@ -1019,8 +1018,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1019 return -EPERM; 1018 return -EPERM;
1020 vm_flags &= ~VM_MAYEXEC; 1019 vm_flags &= ~VM_MAYEXEC;
1021 } 1020 }
1022 if (is_file_hugepages(file))
1023 accountable = 0;
1024 1021
1025 if (!file->f_op || !file->f_op->mmap) 1022 if (!file->f_op || !file->f_op->mmap)
1026 return -ENODEV; 1023 return -ENODEV;
@@ -1053,8 +1050,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1053 if (error) 1050 if (error)
1054 return error; 1051 return error;
1055 1052
1056 return mmap_region(file, addr, len, flags, vm_flags, pgoff, 1053 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1057 accountable);
1058} 1054}
1059EXPORT_SYMBOL(do_mmap_pgoff); 1055EXPORT_SYMBOL(do_mmap_pgoff);
1060 1056
@@ -1092,17 +1088,23 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
1092 1088
1093/* 1089/*
1094 * We account for memory if it's a private writeable mapping, 1090 * We account for memory if it's a private writeable mapping,
1095 * and VM_NORESERVE wasn't set. 1091 * not hugepages and VM_NORESERVE wasn't set.
1096 */ 1092 */
1097static inline int accountable_mapping(unsigned int vm_flags) 1093static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
1098{ 1094{
1095 /*
1096 * hugetlb has its own accounting separate from the core VM
1097 * VM_HUGETLB may not be set yet so we cannot check for that flag.
1098 */
1099 if (file && is_file_hugepages(file))
1100 return 0;
1101
1099 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; 1102 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1100} 1103}
1101 1104
1102unsigned long mmap_region(struct file *file, unsigned long addr, 1105unsigned long mmap_region(struct file *file, unsigned long addr,
1103 unsigned long len, unsigned long flags, 1106 unsigned long len, unsigned long flags,
1104 unsigned int vm_flags, unsigned long pgoff, 1107 unsigned int vm_flags, unsigned long pgoff)
1105 int accountable)
1106{ 1108{
1107 struct mm_struct *mm = current->mm; 1109 struct mm_struct *mm = current->mm;
1108 struct vm_area_struct *vma, *prev; 1110 struct vm_area_struct *vma, *prev;
@@ -1128,18 +1130,22 @@ munmap_back:
1128 1130
1129 /* 1131 /*
1130 * Set 'VM_NORESERVE' if we should not account for the 1132 * Set 'VM_NORESERVE' if we should not account for the
1131 * memory use of this mapping. We only honor MAP_NORESERVE 1133 * memory use of this mapping.
1132 * if we're allowed to overcommit memory.
1133 */ 1134 */
1134 if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) 1135 if ((flags & MAP_NORESERVE)) {
1135 vm_flags |= VM_NORESERVE; 1136 /* We honor MAP_NORESERVE if allowed to overcommit */
1136 if (!accountable) 1137 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1137 vm_flags |= VM_NORESERVE; 1138 vm_flags |= VM_NORESERVE;
1139
1140 /* hugetlb applies strict overcommit unless MAP_NORESERVE */
1141 if (file && is_file_hugepages(file))
1142 vm_flags |= VM_NORESERVE;
1143 }
1138 1144
1139 /* 1145 /*
1140 * Private writable mapping: check memory availability 1146 * Private writable mapping: check memory availability
1141 */ 1147 */
1142 if (accountable_mapping(vm_flags)) { 1148 if (accountable_mapping(file, vm_flags)) {
1143 charged = len >> PAGE_SHIFT; 1149 charged = len >> PAGE_SHIFT;
1144 if (security_vm_enough_memory(charged)) 1150 if (security_vm_enough_memory(charged))
1145 return -ENOMEM; 1151 return -ENOMEM;
@@ -2078,12 +2084,8 @@ void exit_mmap(struct mm_struct *mm)
2078 unsigned long end; 2084 unsigned long end;
2079 2085
2080 /* mm's last user has gone, and its about to be pulled down */ 2086 /* mm's last user has gone, and its about to be pulled down */
2081 arch_exit_mmap(mm);
2082 mmu_notifier_release(mm); 2087 mmu_notifier_release(mm);
2083 2088
2084 if (!mm->mmap) /* Can happen if dup_mmap() received an OOM */
2085 return;
2086
2087 if (mm->locked_vm) { 2089 if (mm->locked_vm) {
2088 vma = mm->mmap; 2090 vma = mm->mmap;
2089 while (vma) { 2091 while (vma) {
@@ -2092,7 +2094,13 @@ void exit_mmap(struct mm_struct *mm)
2092 vma = vma->vm_next; 2094 vma = vma->vm_next;
2093 } 2095 }
2094 } 2096 }
2097
2098 arch_exit_mmap(mm);
2099
2095 vma = mm->mmap; 2100 vma = mm->mmap;
2101 if (!vma) /* Can happen if dup_mmap() received an OOM */
2102 return;
2103
2096 lru_add_drain(); 2104 lru_add_drain();
2097 flush_cache_mm(mm); 2105 flush_cache_mm(mm);
2098 tlb = tlb_gather_mmu(mm, 1); 2106 tlb = tlb_gather_mmu(mm, 1);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index abe2694e13f4..258197b76fb4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -151,10 +151,11 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
151 /* 151 /*
152 * If we make a private mapping writable we increase our commit; 152 * If we make a private mapping writable we increase our commit;
153 * but (without finer accounting) cannot reduce our commit if we 153 * but (without finer accounting) cannot reduce our commit if we
154 * make it unwritable again. 154 * make it unwritable again. hugetlb mapping were accounted for
155 * even if read-only so there is no need to account for them here
155 */ 156 */
156 if (newflags & VM_WRITE) { 157 if (newflags & VM_WRITE) {
157 if (!(oldflags & (VM_ACCOUNT|VM_WRITE| 158 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
158 VM_SHARED|VM_NORESERVE))) { 159 VM_SHARED|VM_NORESERVE))) {
159 charged = nrpages; 160 charged = nrpages;
160 if (security_vm_enough_memory(charged)) 161 if (security_vm_enough_memory(charged))
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b493db7841dc..6106a5c7ed44 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -209,7 +209,7 @@ int dirty_bytes_handler(struct ctl_table *table, int write,
209 struct file *filp, void __user *buffer, size_t *lenp, 209 struct file *filp, void __user *buffer, size_t *lenp,
210 loff_t *ppos) 210 loff_t *ppos)
211{ 211{
212 int old_bytes = vm_dirty_bytes; 212 unsigned long old_bytes = vm_dirty_bytes;
213 int ret; 213 int ret;
214 214
215 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); 215 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
@@ -1051,13 +1051,25 @@ continue_unlock:
1051 } 1051 }
1052 } 1052 }
1053 1053
1054 if (wbc->sync_mode == WB_SYNC_NONE) { 1054 if (nr_to_write > 0) {
1055 wbc->nr_to_write--; 1055 nr_to_write--;
1056 if (wbc->nr_to_write <= 0) { 1056 if (nr_to_write == 0 &&
1057 wbc->sync_mode == WB_SYNC_NONE) {
1058 /*
1059 * We stop writing back only if we are
1060 * not doing integrity sync. In case of
1061 * integrity sync we have to keep going
1062 * because someone may be concurrently
1063 * dirtying pages, and we might have
1064 * synced a lot of newly appeared dirty
1065 * pages, but have not synced all of the
1066 * old dirty pages.
1067 */
1057 done = 1; 1068 done = 1;
1058 break; 1069 break;
1059 } 1070 }
1060 } 1071 }
1072
1061 if (wbc->nonblocking && bdi_write_congested(bdi)) { 1073 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1062 wbc->encountered_congestion = 1; 1074 wbc->encountered_congestion = 1;
1063 done = 1; 1075 done = 1;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 7006a11350c8..ceecfbb143fa 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -114,7 +114,8 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
114 nid = page_to_nid(pfn_to_page(pfn)); 114 nid = page_to_nid(pfn_to_page(pfn));
115 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 115 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
116 if (slab_is_available()) { 116 if (slab_is_available()) {
117 base = kmalloc_node(table_size, GFP_KERNEL, nid); 117 base = kmalloc_node(table_size,
118 GFP_KERNEL | __GFP_NOWARN, nid);
118 if (!base) 119 if (!base)
119 base = vmalloc_node(table_size, nid); 120 base = vmalloc_node(table_size, nid);
120 } else { 121 } else {
diff --git a/mm/rmap.c b/mm/rmap.c
index ac4af8cffbf9..16521664010d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1072,7 +1072,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
1072 spin_lock(&mapping->i_mmap_lock); 1072 spin_lock(&mapping->i_mmap_lock);
1073 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1073 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1074 if (MLOCK_PAGES && unlikely(unlock)) { 1074 if (MLOCK_PAGES && unlikely(unlock)) {
1075 if (!(vma->vm_flags & VM_LOCKED)) 1075 if (!((vma->vm_flags & VM_LOCKED) &&
1076 page_mapped_in_vma(page, vma)))
1076 continue; /* must visit all vmas */ 1077 continue; /* must visit all vmas */
1077 ret = SWAP_MLOCK; 1078 ret = SWAP_MLOCK;
1078 } else { 1079 } else {
diff --git a/mm/slub.c b/mm/slub.c
index 6392ae5cc6b1..bdc9abb08a23 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1996,7 +1996,7 @@ static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
1996static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) 1996static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
1997{ 1997{
1998 if (c < per_cpu(kmem_cache_cpu, cpu) || 1998 if (c < per_cpu(kmem_cache_cpu, cpu) ||
1999 c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { 1999 c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
2000 kfree(c); 2000 kfree(c);
2001 return; 2001 return;
2002 } 2002 }