aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c25
-rw-r--r--mm/mlock.c47
-rw-r--r--mm/mmap.c79
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/swapfile.c5
5 files changed, 65 insertions, 93 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4d0ea3ceba6d..8e4be9cb2a6a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -202,6 +202,7 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
202 202
203static void mem_cgroup_get(struct mem_cgroup *mem); 203static void mem_cgroup_get(struct mem_cgroup *mem);
204static void mem_cgroup_put(struct mem_cgroup *mem); 204static void mem_cgroup_put(struct mem_cgroup *mem);
205static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
205 206
206static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, 207static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
207 struct page_cgroup *pc, 208 struct page_cgroup *pc,
@@ -1684,7 +1685,7 @@ move_account:
1684 /* This is for making all *used* pages to be on LRU. */ 1685 /* This is for making all *used* pages to be on LRU. */
1685 lru_add_drain_all(); 1686 lru_add_drain_all();
1686 ret = 0; 1687 ret = 0;
1687 for_each_node_state(node, N_POSSIBLE) { 1688 for_each_node_state(node, N_HIGH_MEMORY) {
1688 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { 1689 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
1689 enum lru_list l; 1690 enum lru_list l;
1690 for_each_lru(l) { 1691 for_each_lru(l) {
@@ -2193,10 +2194,23 @@ static void mem_cgroup_get(struct mem_cgroup *mem)
2193 2194
2194static void mem_cgroup_put(struct mem_cgroup *mem) 2195static void mem_cgroup_put(struct mem_cgroup *mem)
2195{ 2196{
2196 if (atomic_dec_and_test(&mem->refcnt)) 2197 if (atomic_dec_and_test(&mem->refcnt)) {
2198 struct mem_cgroup *parent = parent_mem_cgroup(mem);
2197 __mem_cgroup_free(mem); 2199 __mem_cgroup_free(mem);
2200 if (parent)
2201 mem_cgroup_put(parent);
2202 }
2198} 2203}
2199 2204
2205/*
2206 * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
2207 */
2208static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem)
2209{
2210 if (!mem->res.parent)
2211 return NULL;
2212 return mem_cgroup_from_res_counter(mem->res.parent, res);
2213}
2200 2214
2201#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 2215#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
2202static void __init enable_swap_cgroup(void) 2216static void __init enable_swap_cgroup(void)
@@ -2235,6 +2249,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2235 if (parent && parent->use_hierarchy) { 2249 if (parent && parent->use_hierarchy) {
2236 res_counter_init(&mem->res, &parent->res); 2250 res_counter_init(&mem->res, &parent->res);
2237 res_counter_init(&mem->memsw, &parent->memsw); 2251 res_counter_init(&mem->memsw, &parent->memsw);
2252 /*
2253 * We increment refcnt of the parent to ensure that we can
2254 * safely access it on res_counter_charge/uncharge.
2255 * This refcnt will be decremented when freeing this
2256 * mem_cgroup(see mem_cgroup_put).
2257 */
2258 mem_cgroup_get(parent);
2238 } else { 2259 } else {
2239 res_counter_init(&mem->res, NULL); 2260 res_counter_init(&mem->res, NULL);
2240 res_counter_init(&mem->memsw, NULL); 2261 res_counter_init(&mem->memsw, NULL);
diff --git a/mm/mlock.c b/mm/mlock.c
index 2904a347e476..028ec482fdd4 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -294,14 +294,10 @@ static inline int __mlock_posix_error_return(long retval)
294 * 294 *
295 * return number of pages [> 0] to be removed from locked_vm on success 295 * return number of pages [> 0] to be removed from locked_vm on success
296 * of "special" vmas. 296 * of "special" vmas.
297 *
298 * return negative error if vma spanning @start-@range disappears while
299 * mmap semaphore is dropped. Unlikely?
300 */ 297 */
301long mlock_vma_pages_range(struct vm_area_struct *vma, 298long mlock_vma_pages_range(struct vm_area_struct *vma,
302 unsigned long start, unsigned long end) 299 unsigned long start, unsigned long end)
303{ 300{
304 struct mm_struct *mm = vma->vm_mm;
305 int nr_pages = (end - start) / PAGE_SIZE; 301 int nr_pages = (end - start) / PAGE_SIZE;
306 BUG_ON(!(vma->vm_flags & VM_LOCKED)); 302 BUG_ON(!(vma->vm_flags & VM_LOCKED));
307 303
@@ -314,20 +310,8 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
314 if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || 310 if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
315 is_vm_hugetlb_page(vma) || 311 is_vm_hugetlb_page(vma) ||
316 vma == get_gate_vma(current))) { 312 vma == get_gate_vma(current))) {
317 long error;
318 downgrade_write(&mm->mmap_sem);
319
320 error = __mlock_vma_pages_range(vma, start, end, 1);
321 313
322 up_read(&mm->mmap_sem); 314 return __mlock_vma_pages_range(vma, start, end, 1);
323 /* vma can change or disappear */
324 down_write(&mm->mmap_sem);
325 vma = find_vma(mm, start);
326 /* non-NULL vma must contain @start, but need to check @end */
327 if (!vma || end > vma->vm_end)
328 return -ENOMEM;
329
330 return 0; /* hide other errors from mmap(), et al */
331 } 315 }
332 316
333 /* 317 /*
@@ -438,41 +422,14 @@ success:
438 vma->vm_flags = newflags; 422 vma->vm_flags = newflags;
439 423
440 if (lock) { 424 if (lock) {
441 /*
442 * mmap_sem is currently held for write. Downgrade the write
443 * lock to a read lock so that other faults, mmap scans, ...
444 * while we fault in all pages.
445 */
446 downgrade_write(&mm->mmap_sem);
447
448 ret = __mlock_vma_pages_range(vma, start, end, 1); 425 ret = __mlock_vma_pages_range(vma, start, end, 1);
449 426
450 /* 427 if (ret > 0) {
451 * Need to reacquire mmap sem in write mode, as our callers
452 * expect this. We have no support for atomically upgrading
453 * a sem to write, so we need to check for ranges while sem
454 * is unlocked.
455 */
456 up_read(&mm->mmap_sem);
457 /* vma can change or disappear */
458 down_write(&mm->mmap_sem);
459 *prev = find_vma(mm, start);
460 /* non-NULL *prev must contain @start, but need to check @end */
461 if (!(*prev) || end > (*prev)->vm_end)
462 ret = -ENOMEM;
463 else if (ret > 0) {
464 mm->locked_vm -= ret; 428 mm->locked_vm -= ret;
465 ret = 0; 429 ret = 0;
466 } else 430 } else
467 ret = __mlock_posix_error_return(ret); /* translate if needed */ 431 ret = __mlock_posix_error_return(ret); /* translate if needed */
468 } else { 432 } else {
469 /*
470 * TODO: for unlocking, pages will already be resident, so
471 * we don't need to wait for allocations/reclaim/pagein, ...
472 * However, unlocking a very large region can still take a
473 * while. Should we downgrade the semaphore for both lock
474 * AND unlock ?
475 */
476 __mlock_vma_pages_range(vma, start, end, 0); 433 __mlock_vma_pages_range(vma, start, end, 0);
477 } 434 }
478 435
diff --git a/mm/mmap.c b/mm/mmap.c
index 8d95902e9a38..214b6a258eeb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -658,6 +658,9 @@ again: remove_next = 1 + (end > next->vm_end);
658 validate_mm(mm); 658 validate_mm(mm);
659} 659}
660 660
661/* Flags that can be inherited from an existing mapping when merging */
662#define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR)
663
661/* 664/*
662 * If the vma has a ->close operation then the driver probably needs to release 665 * If the vma has a ->close operation then the driver probably needs to release
663 * per-vma resources, so we don't attempt to merge those. 666 * per-vma resources, so we don't attempt to merge those.
@@ -665,7 +668,7 @@ again: remove_next = 1 + (end > next->vm_end);
665static inline int is_mergeable_vma(struct vm_area_struct *vma, 668static inline int is_mergeable_vma(struct vm_area_struct *vma,
666 struct file *file, unsigned long vm_flags) 669 struct file *file, unsigned long vm_flags)
667{ 670{
668 if (vma->vm_flags != vm_flags) 671 if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS)
669 return 0; 672 return 0;
670 if (vma->vm_file != file) 673 if (vma->vm_file != file)
671 return 0; 674 return 0;
@@ -1087,6 +1090,15 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
1087 mapping_cap_account_dirty(vma->vm_file->f_mapping); 1090 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1088} 1091}
1089 1092
1093/*
1094 * We account for memory if it's a private writeable mapping,
1095 * and VM_NORESERVE wasn't set.
1096 */
1097static inline int accountable_mapping(unsigned int vm_flags)
1098{
1099 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1100}
1101
1090unsigned long mmap_region(struct file *file, unsigned long addr, 1102unsigned long mmap_region(struct file *file, unsigned long addr,
1091 unsigned long len, unsigned long flags, 1103 unsigned long len, unsigned long flags,
1092 unsigned int vm_flags, unsigned long pgoff, 1104 unsigned int vm_flags, unsigned long pgoff,
@@ -1114,36 +1126,32 @@ munmap_back:
1114 if (!may_expand_vm(mm, len >> PAGE_SHIFT)) 1126 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1115 return -ENOMEM; 1127 return -ENOMEM;
1116 1128
1117 if (flags & MAP_NORESERVE) 1129 /*
1130 * Set 'VM_NORESERVE' if we should not account for the
1131 * memory use of this mapping. We only honor MAP_NORESERVE
1132 * if we're allowed to overcommit memory.
1133 */
1134 if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1135 vm_flags |= VM_NORESERVE;
1136 if (!accountable)
1118 vm_flags |= VM_NORESERVE; 1137 vm_flags |= VM_NORESERVE;
1119 1138
1120 if (accountable && (!(flags & MAP_NORESERVE) || 1139 /*
1121 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { 1140 * Private writable mapping: check memory availability
1122 if (vm_flags & VM_SHARED) { 1141 */
1123 /* Check memory availability in shmem_file_setup? */ 1142 if (accountable_mapping(vm_flags)) {
1124 vm_flags |= VM_ACCOUNT; 1143 charged = len >> PAGE_SHIFT;
1125 } else if (vm_flags & VM_WRITE) { 1144 if (security_vm_enough_memory(charged))
1126 /* 1145 return -ENOMEM;
1127 * Private writable mapping: check memory availability 1146 vm_flags |= VM_ACCOUNT;
1128 */
1129 charged = len >> PAGE_SHIFT;
1130 if (security_vm_enough_memory(charged))
1131 return -ENOMEM;
1132 vm_flags |= VM_ACCOUNT;
1133 }
1134 } 1147 }
1135 1148
1136 /* 1149 /*
1137 * Can we just expand an old private anonymous mapping? 1150 * Can we just expand an old mapping?
1138 * The VM_SHARED test is necessary because shmem_zero_setup
1139 * will create the file object for a shared anonymous map below.
1140 */ 1151 */
1141 if (!file && !(vm_flags & VM_SHARED)) { 1152 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1142 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, 1153 if (vma)
1143 NULL, NULL, pgoff, NULL); 1154 goto out;
1144 if (vma)
1145 goto out;
1146 }
1147 1155
1148 /* 1156 /*
1149 * Determine the object being mapped and call the appropriate 1157 * Determine the object being mapped and call the appropriate
@@ -1186,14 +1194,6 @@ munmap_back:
1186 goto free_vma; 1194 goto free_vma;
1187 } 1195 }
1188 1196
1189 /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
1190 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
1191 * that memory reservation must be checked; but that reservation
1192 * belongs to shared memory object, not to vma: so now clear it.
1193 */
1194 if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
1195 vma->vm_flags &= ~VM_ACCOUNT;
1196
1197 /* Can addr have changed?? 1197 /* Can addr have changed??
1198 * 1198 *
1199 * Answer: Yes, several device drivers can do it in their 1199 * Answer: Yes, several device drivers can do it in their
@@ -1206,17 +1206,8 @@ munmap_back:
1206 if (vma_wants_writenotify(vma)) 1206 if (vma_wants_writenotify(vma))
1207 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); 1207 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1208 1208
1209 if (file && vma_merge(mm, prev, addr, vma->vm_end, 1209 vma_link(mm, vma, prev, rb_link, rb_parent);
1210 vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { 1210 file = vma->vm_file;
1211 mpol_put(vma_policy(vma));
1212 kmem_cache_free(vm_area_cachep, vma);
1213 fput(file);
1214 if (vm_flags & VM_EXECUTABLE)
1215 removed_exe_file_vma(mm);
1216 } else {
1217 vma_link(mm, vma, prev, rb_link, rb_parent);
1218 file = vma->vm_file;
1219 }
1220 1211
1221 /* Once vma denies write, undo our temporary denial count */ 1212 /* Once vma denies write, undo our temporary denial count */
1222 if (correct_wcount) 1213 if (correct_wcount)
diff --git a/mm/shmem.c b/mm/shmem.c
index 5d0de96c9789..19d566ccdeea 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2628,7 +2628,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2628 goto close_file; 2628 goto close_file;
2629 2629
2630#ifdef CONFIG_SHMEM 2630#ifdef CONFIG_SHMEM
2631 SHMEM_I(inode)->flags = flags & VM_ACCOUNT; 2631 SHMEM_I(inode)->flags = (flags & VM_NORESERVE) ? 0 : VM_ACCOUNT;
2632#endif 2632#endif
2633 d_instantiate(dentry, inode); 2633 d_instantiate(dentry, inode);
2634 inode->i_size = size; 2634 inode->i_size = size;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f48b831e5e5c..7e6304dfafab 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -698,8 +698,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
698 pte_t *pte; 698 pte_t *pte;
699 int ret = 1; 699 int ret = 1;
700 700
701 if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) 701 if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) {
702 ret = -ENOMEM; 702 ret = -ENOMEM;
703 goto out_nolock;
704 }
703 705
704 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 706 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
705 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { 707 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
@@ -723,6 +725,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
723 activate_page(page); 725 activate_page(page);
724out: 726out:
725 pte_unmap_unlock(pte, ptl); 727 pte_unmap_unlock(pte, ptl);
728out_nolock:
726 return ret; 729 return ret;
727} 730}
728 731