aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJames Morris <jmorris@namei.org>2009-03-23 19:52:46 -0400
committerJames Morris <jmorris@namei.org>2009-03-23 19:52:46 -0400
commit703a3cd72817e99201cef84a8a7aecc60b2b3581 (patch)
tree3e943755178ff410694722bb031f523136fbc432 /mm
parentdf7f54c012b92ec93d56b68547351dcdf8a163d3 (diff)
parent8e0ee43bc2c3e19db56a4adaa9a9b04ce885cd84 (diff)
Merge branch 'master' into next
Diffstat (limited to 'mm')
-rw-r--r--mm/fremap.c2
-rw-r--r--mm/hugetlb.c28
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mlock.c12
-rw-r--r--mm/mmap.c48
-rw-r--r--mm/mprotect.c5
-rw-r--r--mm/page-writeback.c46
-rw-r--r--mm/page_alloc.c27
-rw-r--r--mm/page_cgroup.c3
-rw-r--r--mm/page_io.c2
-rw-r--r--mm/rmap.c3
-rw-r--r--mm/shmem.c43
-rw-r--r--mm/slab.c1
-rw-r--r--mm/slob.c1
-rw-r--r--mm/slub.c1
-rw-r--r--mm/swapfile.c4
-rw-r--r--mm/util.c20
-rw-r--r--mm/vmalloc.c20
-rw-r--r--mm/vmscan.c32
19 files changed, 199 insertions, 101 deletions
diff --git a/mm/fremap.c b/mm/fremap.c
index 736ba7f3306a..b6ec85abbb39 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -198,7 +198,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
198 flags &= MAP_NONBLOCK; 198 flags &= MAP_NONBLOCK;
199 get_file(file); 199 get_file(file);
200 addr = mmap_region(file, start, size, 200 addr = mmap_region(file, start, size,
201 flags, vma->vm_flags, pgoff, 1); 201 flags, vma->vm_flags, pgoff);
202 fput(file); 202 fput(file);
203 if (IS_ERR_VALUE(addr)) { 203 if (IS_ERR_VALUE(addr)) {
204 err = addr; 204 err = addr;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 618e98304080..107da3d809a8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2269,12 +2269,18 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
2269 2269
2270int hugetlb_reserve_pages(struct inode *inode, 2270int hugetlb_reserve_pages(struct inode *inode,
2271 long from, long to, 2271 long from, long to,
2272 struct vm_area_struct *vma) 2272 struct vm_area_struct *vma,
2273 int acctflag)
2273{ 2274{
2274 long ret, chg; 2275 long ret, chg;
2275 struct hstate *h = hstate_inode(inode); 2276 struct hstate *h = hstate_inode(inode);
2276 2277
2277 if (vma && vma->vm_flags & VM_NORESERVE) 2278 /*
2279 * Only apply hugepage reservation if asked. At fault time, an
2280 * attempt will be made for VM_NORESERVE to allocate a page
2281 * and filesystem quota without using reserves
2282 */
2283 if (acctflag & VM_NORESERVE)
2278 return 0; 2284 return 0;
2279 2285
2280 /* 2286 /*
@@ -2299,13 +2305,31 @@ int hugetlb_reserve_pages(struct inode *inode,
2299 if (chg < 0) 2305 if (chg < 0)
2300 return chg; 2306 return chg;
2301 2307
2308 /* There must be enough filesystem quota for the mapping */
2302 if (hugetlb_get_quota(inode->i_mapping, chg)) 2309 if (hugetlb_get_quota(inode->i_mapping, chg))
2303 return -ENOSPC; 2310 return -ENOSPC;
2311
2312 /*
2313 * Check enough hugepages are available for the reservation.
2314 * Hand back the quota if there are not
2315 */
2304 ret = hugetlb_acct_memory(h, chg); 2316 ret = hugetlb_acct_memory(h, chg);
2305 if (ret < 0) { 2317 if (ret < 0) {
2306 hugetlb_put_quota(inode->i_mapping, chg); 2318 hugetlb_put_quota(inode->i_mapping, chg);
2307 return ret; 2319 return ret;
2308 } 2320 }
2321
2322 /*
2323 * Account for the reservations made. Shared mappings record regions
2324 * that have reservations as they are shared by multiple VMAs.
2325 * When the last VMA disappears, the region map says how much
2326 * the reservation was and the page cache tells how much of
2327 * the reservation was consumed. Private mappings are per-VMA and
2328 * only the consumed reservations are tracked. When the VMA
2329 * disappears, the original reservation is the VMA size and the
2330 * consumed reservations are stored in the map. Hence, nothing
2331 * else has to be done for private mappings here
2332 */
2309 if (!vma || vma->vm_flags & VM_SHARED) 2333 if (!vma || vma->vm_flags & VM_SHARED)
2310 region_add(&inode->i_mapping->private_list, from, to); 2334 region_add(&inode->i_mapping->private_list, from, to);
2311 return 0; 2335 return 0;
diff --git a/mm/migrate.c b/mm/migrate.c
index 2bb4e1d63520..a9eff3f092f6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1129,7 +1129,7 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
1129 struct vm_area_struct *vma; 1129 struct vm_area_struct *vma;
1130 int err = 0; 1130 int err = 0;
1131 1131
1132 for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) { 1132 for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
1133 if (vma->vm_ops && vma->vm_ops->migrate) { 1133 if (vma->vm_ops && vma->vm_ops->migrate) {
1134 err = vma->vm_ops->migrate(vma, to, from, flags); 1134 err = vma->vm_ops->migrate(vma, to, from, flags);
1135 if (err) 1135 if (err)
diff --git a/mm/mlock.c b/mm/mlock.c
index 028ec482fdd4..cbe9e0581b75 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -311,7 +311,10 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
311 is_vm_hugetlb_page(vma) || 311 is_vm_hugetlb_page(vma) ||
312 vma == get_gate_vma(current))) { 312 vma == get_gate_vma(current))) {
313 313
314 return __mlock_vma_pages_range(vma, start, end, 1); 314 __mlock_vma_pages_range(vma, start, end, 1);
315
316 /* Hide errors from mmap() and other callers */
317 return 0;
315 } 318 }
316 319
317 /* 320 /*
@@ -657,7 +660,7 @@ void *alloc_locked_buffer(size_t size)
657 return buffer; 660 return buffer;
658} 661}
659 662
660void free_locked_buffer(void *buffer, size_t size) 663void release_locked_buffer(void *buffer, size_t size)
661{ 664{
662 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; 665 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
663 666
@@ -667,6 +670,11 @@ void free_locked_buffer(void *buffer, size_t size)
667 current->mm->locked_vm -= pgsz; 670 current->mm->locked_vm -= pgsz;
668 671
669 up_write(&current->mm->mmap_sem); 672 up_write(&current->mm->mmap_sem);
673}
674
675void free_locked_buffer(void *buffer, size_t size)
676{
677 release_locked_buffer(buffer, size);
670 678
671 kfree(buffer); 679 kfree(buffer);
672} 680}
diff --git a/mm/mmap.c b/mm/mmap.c
index 3b3ed0bb9fdb..1abb9185a686 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -919,7 +919,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
919 struct inode *inode; 919 struct inode *inode;
920 unsigned int vm_flags; 920 unsigned int vm_flags;
921 int error; 921 int error;
922 int accountable = 1;
923 unsigned long reqprot = prot; 922 unsigned long reqprot = prot;
924 923
925 /* 924 /*
@@ -1020,8 +1019,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1020 return -EPERM; 1019 return -EPERM;
1021 vm_flags &= ~VM_MAYEXEC; 1020 vm_flags &= ~VM_MAYEXEC;
1022 } 1021 }
1023 if (is_file_hugepages(file))
1024 accountable = 0;
1025 1022
1026 if (!file->f_op || !file->f_op->mmap) 1023 if (!file->f_op || !file->f_op->mmap)
1027 return -ENODEV; 1024 return -ENODEV;
@@ -1057,8 +1054,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1057 if (error) 1054 if (error)
1058 return error; 1055 return error;
1059 1056
1060 return mmap_region(file, addr, len, flags, vm_flags, pgoff, 1057 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1061 accountable);
1062} 1058}
1063EXPORT_SYMBOL(do_mmap_pgoff); 1059EXPORT_SYMBOL(do_mmap_pgoff);
1064 1060
@@ -1096,17 +1092,23 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
1096 1092
1097/* 1093/*
1098 * We account for memory if it's a private writeable mapping, 1094 * We account for memory if it's a private writeable mapping,
1099 * and VM_NORESERVE wasn't set. 1095 * not hugepages and VM_NORESERVE wasn't set.
1100 */ 1096 */
1101static inline int accountable_mapping(unsigned int vm_flags) 1097static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
1102{ 1098{
1099 /*
1100 * hugetlb has its own accounting separate from the core VM
1101 * VM_HUGETLB may not be set yet so we cannot check for that flag.
1102 */
1103 if (file && is_file_hugepages(file))
1104 return 0;
1105
1103 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; 1106 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1104} 1107}
1105 1108
1106unsigned long mmap_region(struct file *file, unsigned long addr, 1109unsigned long mmap_region(struct file *file, unsigned long addr,
1107 unsigned long len, unsigned long flags, 1110 unsigned long len, unsigned long flags,
1108 unsigned int vm_flags, unsigned long pgoff, 1111 unsigned int vm_flags, unsigned long pgoff)
1109 int accountable)
1110{ 1112{
1111 struct mm_struct *mm = current->mm; 1113 struct mm_struct *mm = current->mm;
1112 struct vm_area_struct *vma, *prev; 1114 struct vm_area_struct *vma, *prev;
@@ -1132,18 +1134,22 @@ munmap_back:
1132 1134
1133 /* 1135 /*
1134 * Set 'VM_NORESERVE' if we should not account for the 1136 * Set 'VM_NORESERVE' if we should not account for the
1135 * memory use of this mapping. We only honor MAP_NORESERVE 1137 * memory use of this mapping.
1136 * if we're allowed to overcommit memory.
1137 */ 1138 */
1138 if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) 1139 if ((flags & MAP_NORESERVE)) {
1139 vm_flags |= VM_NORESERVE; 1140 /* We honor MAP_NORESERVE if allowed to overcommit */
1140 if (!accountable) 1141 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1141 vm_flags |= VM_NORESERVE; 1142 vm_flags |= VM_NORESERVE;
1143
1144 /* hugetlb applies strict overcommit unless MAP_NORESERVE */
1145 if (file && is_file_hugepages(file))
1146 vm_flags |= VM_NORESERVE;
1147 }
1142 1148
1143 /* 1149 /*
1144 * Private writable mapping: check memory availability 1150 * Private writable mapping: check memory availability
1145 */ 1151 */
1146 if (accountable_mapping(vm_flags)) { 1152 if (accountable_mapping(file, vm_flags)) {
1147 charged = len >> PAGE_SHIFT; 1153 charged = len >> PAGE_SHIFT;
1148 if (security_vm_enough_memory(charged)) 1154 if (security_vm_enough_memory(charged))
1149 return -ENOMEM; 1155 return -ENOMEM;
@@ -2082,12 +2088,8 @@ void exit_mmap(struct mm_struct *mm)
2082 unsigned long end; 2088 unsigned long end;
2083 2089
2084 /* mm's last user has gone, and its about to be pulled down */ 2090 /* mm's last user has gone, and its about to be pulled down */
2085 arch_exit_mmap(mm);
2086 mmu_notifier_release(mm); 2091 mmu_notifier_release(mm);
2087 2092
2088 if (!mm->mmap) /* Can happen if dup_mmap() received an OOM */
2089 return;
2090
2091 if (mm->locked_vm) { 2093 if (mm->locked_vm) {
2092 vma = mm->mmap; 2094 vma = mm->mmap;
2093 while (vma) { 2095 while (vma) {
@@ -2096,7 +2098,13 @@ void exit_mmap(struct mm_struct *mm)
2096 vma = vma->vm_next; 2098 vma = vma->vm_next;
2097 } 2099 }
2098 } 2100 }
2101
2102 arch_exit_mmap(mm);
2103
2099 vma = mm->mmap; 2104 vma = mm->mmap;
2105 if (!vma) /* Can happen if dup_mmap() received an OOM */
2106 return;
2107
2100 lru_add_drain(); 2108 lru_add_drain();
2101 flush_cache_mm(mm); 2109 flush_cache_mm(mm);
2102 tlb = tlb_gather_mmu(mm, 1); 2110 tlb = tlb_gather_mmu(mm, 1);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index abe2694e13f4..258197b76fb4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -151,10 +151,11 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
151 /* 151 /*
152 * If we make a private mapping writable we increase our commit; 152 * If we make a private mapping writable we increase our commit;
153 * but (without finer accounting) cannot reduce our commit if we 153 * but (without finer accounting) cannot reduce our commit if we
154 * make it unwritable again. 154 * make it unwritable again. hugetlb mapping were accounted for
155 * even if read-only so there is no need to account for them here
155 */ 156 */
156 if (newflags & VM_WRITE) { 157 if (newflags & VM_WRITE) {
157 if (!(oldflags & (VM_ACCOUNT|VM_WRITE| 158 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
158 VM_SHARED|VM_NORESERVE))) { 159 VM_SHARED|VM_NORESERVE))) {
159 charged = nrpages; 160 charged = nrpages;
160 if (security_vm_enough_memory(charged)) 161 if (security_vm_enough_memory(charged))
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index dc32dae01e5f..74dc57c74349 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -209,7 +209,7 @@ int dirty_bytes_handler(struct ctl_table *table, int write,
209 struct file *filp, void __user *buffer, size_t *lenp, 209 struct file *filp, void __user *buffer, size_t *lenp,
210 loff_t *ppos) 210 loff_t *ppos)
211{ 211{
212 int old_bytes = vm_dirty_bytes; 212 unsigned long old_bytes = vm_dirty_bytes;
213 int ret; 213 int ret;
214 214
215 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); 215 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
@@ -240,7 +240,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
240} 240}
241EXPORT_SYMBOL_GPL(bdi_writeout_inc); 241EXPORT_SYMBOL_GPL(bdi_writeout_inc);
242 242
243static inline void task_dirty_inc(struct task_struct *tsk) 243void task_dirty_inc(struct task_struct *tsk)
244{ 244{
245 prop_inc_single(&vm_dirties, &tsk->dirties); 245 prop_inc_single(&vm_dirties, &tsk->dirties);
246} 246}
@@ -1051,20 +1051,23 @@ continue_unlock:
1051 } 1051 }
1052 } 1052 }
1053 1053
1054 if (nr_to_write > 0) 1054 if (nr_to_write > 0) {
1055 nr_to_write--; 1055 nr_to_write--;
1056 else if (wbc->sync_mode == WB_SYNC_NONE) { 1056 if (nr_to_write == 0 &&
1057 /* 1057 wbc->sync_mode == WB_SYNC_NONE) {
1058 * We stop writing back only if we are not 1058 /*
1059 * doing integrity sync. In case of integrity 1059 * We stop writing back only if we are
1060 * sync we have to keep going because someone 1060 * not doing integrity sync. In case of
1061 * may be concurrently dirtying pages, and we 1061 * integrity sync we have to keep going
1062 * might have synced a lot of newly appeared 1062 * because someone may be concurrently
1063 * dirty pages, but have not synced all of the 1063 * dirtying pages, and we might have
1064 * old dirty pages. 1064 * synced a lot of newly appeared dirty
1065 */ 1065 * pages, but have not synced all of the
1066 done = 1; 1066 * old dirty pages.
1067 break; 1067 */
1068 done = 1;
1069 break;
1070 }
1068 } 1071 }
1069 1072
1070 if (wbc->nonblocking && bdi_write_congested(bdi)) { 1073 if (wbc->nonblocking && bdi_write_congested(bdi)) {
@@ -1076,7 +1079,7 @@ continue_unlock:
1076 pagevec_release(&pvec); 1079 pagevec_release(&pvec);
1077 cond_resched(); 1080 cond_resched();
1078 } 1081 }
1079 if (!cycled) { 1082 if (!cycled && !done) {
1080 /* 1083 /*
1081 * range_cyclic: 1084 * range_cyclic:
1082 * We hit the last page and there is more work to be done: wrap 1085 * We hit the last page and there is more work to be done: wrap
@@ -1227,6 +1230,7 @@ int __set_page_dirty_nobuffers(struct page *page)
1227 __inc_zone_page_state(page, NR_FILE_DIRTY); 1230 __inc_zone_page_state(page, NR_FILE_DIRTY);
1228 __inc_bdi_stat(mapping->backing_dev_info, 1231 __inc_bdi_stat(mapping->backing_dev_info,
1229 BDI_RECLAIMABLE); 1232 BDI_RECLAIMABLE);
1233 task_dirty_inc(current);
1230 task_io_account_write(PAGE_CACHE_SIZE); 1234 task_io_account_write(PAGE_CACHE_SIZE);
1231 } 1235 }
1232 radix_tree_tag_set(&mapping->page_tree, 1236 radix_tree_tag_set(&mapping->page_tree,
@@ -1259,7 +1263,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
1259 * If the mapping doesn't provide a set_page_dirty a_op, then 1263 * If the mapping doesn't provide a set_page_dirty a_op, then
1260 * just fall through and assume that it wants buffer_heads. 1264 * just fall through and assume that it wants buffer_heads.
1261 */ 1265 */
1262static int __set_page_dirty(struct page *page) 1266int set_page_dirty(struct page *page)
1263{ 1267{
1264 struct address_space *mapping = page_mapping(page); 1268 struct address_space *mapping = page_mapping(page);
1265 1269
@@ -1277,14 +1281,6 @@ static int __set_page_dirty(struct page *page)
1277 } 1281 }
1278 return 0; 1282 return 0;
1279} 1283}
1280
1281int set_page_dirty(struct page *page)
1282{
1283 int ret = __set_page_dirty(page);
1284 if (ret)
1285 task_dirty_inc(current);
1286 return ret;
1287}
1288EXPORT_SYMBOL(set_page_dirty); 1284EXPORT_SYMBOL(set_page_dirty);
1289 1285
1290/* 1286/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5675b3073854..5c44ed49ca93 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2989,7 +2989,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
2989 * was used and there are no special requirements, this is a convenient 2989 * was used and there are no special requirements, this is a convenient
2990 * alternative 2990 * alternative
2991 */ 2991 */
2992int __meminit early_pfn_to_nid(unsigned long pfn) 2992int __meminit __early_pfn_to_nid(unsigned long pfn)
2993{ 2993{
2994 int i; 2994 int i;
2995 2995
@@ -3000,10 +3000,33 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
3000 if (start_pfn <= pfn && pfn < end_pfn) 3000 if (start_pfn <= pfn && pfn < end_pfn)
3001 return early_node_map[i].nid; 3001 return early_node_map[i].nid;
3002 } 3002 }
3003 /* This is a memory hole */
3004 return -1;
3005}
3006#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
3007
3008int __meminit early_pfn_to_nid(unsigned long pfn)
3009{
3010 int nid;
3003 3011
3012 nid = __early_pfn_to_nid(pfn);
3013 if (nid >= 0)
3014 return nid;
3015 /* just returns 0 */
3004 return 0; 3016 return 0;
3005} 3017}
3006#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ 3018
3019#ifdef CONFIG_NODES_SPAN_OTHER_NODES
3020bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3021{
3022 int nid;
3023
3024 nid = __early_pfn_to_nid(pfn);
3025 if (nid >= 0 && nid != node)
3026 return false;
3027 return true;
3028}
3029#endif
3007 3030
3008/* Basic iterator support to walk early_node_map[] */ 3031/* Basic iterator support to walk early_node_map[] */
3009#define for_each_active_range_index_in_nid(i, nid) \ 3032#define for_each_active_range_index_in_nid(i, nid) \
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 7006a11350c8..ceecfbb143fa 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -114,7 +114,8 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
114 nid = page_to_nid(pfn_to_page(pfn)); 114 nid = page_to_nid(pfn_to_page(pfn));
115 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 115 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
116 if (slab_is_available()) { 116 if (slab_is_available()) {
117 base = kmalloc_node(table_size, GFP_KERNEL, nid); 117 base = kmalloc_node(table_size,
118 GFP_KERNEL | __GFP_NOWARN, nid);
118 if (!base) 119 if (!base)
119 base = vmalloc_node(table_size, nid); 120 base = vmalloc_node(table_size, nid);
120 } else { 121 } else {
diff --git a/mm/page_io.c b/mm/page_io.c
index dc6ce0afbded..3023c475e041 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -111,7 +111,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
111 goto out; 111 goto out;
112 } 112 }
113 if (wbc->sync_mode == WB_SYNC_ALL) 113 if (wbc->sync_mode == WB_SYNC_ALL)
114 rw |= (1 << BIO_RW_SYNC); 114 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
115 count_vm_event(PSWPOUT); 115 count_vm_event(PSWPOUT);
116 set_page_writeback(page); 116 set_page_writeback(page);
117 unlock_page(page); 117 unlock_page(page);
diff --git a/mm/rmap.c b/mm/rmap.c
index ac4af8cffbf9..16521664010d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1072,7 +1072,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
1072 spin_lock(&mapping->i_mmap_lock); 1072 spin_lock(&mapping->i_mmap_lock);
1073 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1073 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1074 if (MLOCK_PAGES && unlikely(unlock)) { 1074 if (MLOCK_PAGES && unlikely(unlock)) {
1075 if (!(vma->vm_flags & VM_LOCKED)) 1075 if (!((vma->vm_flags & VM_LOCKED) &&
1076 page_mapped_in_vma(page, vma)))
1076 continue; /* must visit all vmas */ 1077 continue; /* must visit all vmas */
1077 ret = SWAP_MLOCK; 1078 ret = SWAP_MLOCK;
1078 } else { 1079 } else {
diff --git a/mm/shmem.c b/mm/shmem.c
index 8135fac294ee..7ec78e24a30d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -170,13 +170,13 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
170 */ 170 */
171static inline int shmem_acct_size(unsigned long flags, loff_t size) 171static inline int shmem_acct_size(unsigned long flags, loff_t size)
172{ 172{
173 return (flags & VM_ACCOUNT) ? 173 return (flags & VM_NORESERVE) ?
174 security_vm_enough_memory_kern(VM_ACCT(size)) : 0; 174 0 : security_vm_enough_memory_kern(VM_ACCT(size));
175} 175}
176 176
177static inline void shmem_unacct_size(unsigned long flags, loff_t size) 177static inline void shmem_unacct_size(unsigned long flags, loff_t size)
178{ 178{
179 if (flags & VM_ACCOUNT) 179 if (!(flags & VM_NORESERVE))
180 vm_unacct_memory(VM_ACCT(size)); 180 vm_unacct_memory(VM_ACCT(size));
181} 181}
182 182
@@ -188,13 +188,13 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
188 */ 188 */
189static inline int shmem_acct_block(unsigned long flags) 189static inline int shmem_acct_block(unsigned long flags)
190{ 190{
191 return (flags & VM_ACCOUNT) ? 191 return (flags & VM_NORESERVE) ?
192 0 : security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)); 192 security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)) : 0;
193} 193}
194 194
195static inline void shmem_unacct_blocks(unsigned long flags, long pages) 195static inline void shmem_unacct_blocks(unsigned long flags, long pages)
196{ 196{
197 if (!(flags & VM_ACCOUNT)) 197 if (flags & VM_NORESERVE)
198 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); 198 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
199} 199}
200 200
@@ -1516,8 +1516,8 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1516 return 0; 1516 return 0;
1517} 1517}
1518 1518
1519static struct inode * 1519static struct inode *shmem_get_inode(struct super_block *sb, int mode,
1520shmem_get_inode(struct super_block *sb, int mode, dev_t dev) 1520 dev_t dev, unsigned long flags)
1521{ 1521{
1522 struct inode *inode; 1522 struct inode *inode;
1523 struct shmem_inode_info *info; 1523 struct shmem_inode_info *info;
@@ -1538,6 +1538,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1538 info = SHMEM_I(inode); 1538 info = SHMEM_I(inode);
1539 memset(info, 0, (char *)inode - (char *)info); 1539 memset(info, 0, (char *)inode - (char *)info);
1540 spin_lock_init(&info->lock); 1540 spin_lock_init(&info->lock);
1541 info->flags = flags & VM_NORESERVE;
1541 INIT_LIST_HEAD(&info->swaplist); 1542 INIT_LIST_HEAD(&info->swaplist);
1542 1543
1543 switch (mode & S_IFMT) { 1544 switch (mode & S_IFMT) {
@@ -1780,9 +1781,10 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1780static int 1781static int
1781shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1782shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1782{ 1783{
1783 struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev); 1784 struct inode *inode;
1784 int error = -ENOSPC; 1785 int error = -ENOSPC;
1785 1786
1787 inode = shmem_get_inode(dir->i_sb, mode, dev, VM_NORESERVE);
1786 if (inode) { 1788 if (inode) {
1787 error = security_inode_init_security(inode, dir, NULL, NULL, 1789 error = security_inode_init_security(inode, dir, NULL, NULL,
1788 NULL); 1790 NULL);
@@ -1921,7 +1923,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
1921 if (len > PAGE_CACHE_SIZE) 1923 if (len > PAGE_CACHE_SIZE)
1922 return -ENAMETOOLONG; 1924 return -ENAMETOOLONG;
1923 1925
1924 inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); 1926 inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
1925 if (!inode) 1927 if (!inode)
1926 return -ENOSPC; 1928 return -ENOSPC;
1927 1929
@@ -2333,7 +2335,7 @@ static int shmem_fill_super(struct super_block *sb,
2333 sb->s_flags |= MS_POSIXACL; 2335 sb->s_flags |= MS_POSIXACL;
2334#endif 2336#endif
2335 2337
2336 inode = shmem_get_inode(sb, S_IFDIR | sbinfo->mode, 0); 2338 inode = shmem_get_inode(sb, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
2337 if (!inode) 2339 if (!inode)
2338 goto failed; 2340 goto failed;
2339 inode->i_uid = sbinfo->uid; 2341 inode->i_uid = sbinfo->uid;
@@ -2575,12 +2577,12 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
2575 return 0; 2577 return 0;
2576} 2578}
2577 2579
2578#define shmem_file_operations ramfs_file_operations 2580#define shmem_vm_ops generic_file_vm_ops
2579#define shmem_vm_ops generic_file_vm_ops 2581#define shmem_file_operations ramfs_file_operations
2580#define shmem_get_inode ramfs_get_inode 2582#define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev)
2581#define shmem_acct_size(a, b) 0 2583#define shmem_acct_size(flags, size) 0
2582#define shmem_unacct_size(a, b) do {} while (0) 2584#define shmem_unacct_size(flags, size) do {} while (0)
2583#define SHMEM_MAX_BYTES LLONG_MAX 2585#define SHMEM_MAX_BYTES LLONG_MAX
2584 2586
2585#endif /* CONFIG_SHMEM */ 2587#endif /* CONFIG_SHMEM */
2586 2588
@@ -2590,7 +2592,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
2590 * shmem_file_setup - get an unlinked file living in tmpfs 2592 * shmem_file_setup - get an unlinked file living in tmpfs
2591 * @name: name for dentry (to be seen in /proc/<pid>/maps 2593 * @name: name for dentry (to be seen in /proc/<pid>/maps
2592 * @size: size to be set for the file 2594 * @size: size to be set for the file
2593 * @flags: vm_flags 2595 * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
2594 */ 2596 */
2595struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) 2597struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2596{ 2598{
@@ -2624,13 +2626,10 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2624 goto put_dentry; 2626 goto put_dentry;
2625 2627
2626 error = -ENOSPC; 2628 error = -ENOSPC;
2627 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); 2629 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, flags);
2628 if (!inode) 2630 if (!inode)
2629 goto close_file; 2631 goto close_file;
2630 2632
2631#ifdef CONFIG_SHMEM
2632 SHMEM_I(inode)->flags = (flags & VM_NORESERVE) ? 0 : VM_ACCOUNT;
2633#endif
2634 d_instantiate(dentry, inode); 2633 d_instantiate(dentry, inode);
2635 inode->i_size = size; 2634 inode->i_size = size;
2636 inode->i_nlink = 0; /* It is unlinked */ 2635 inode->i_nlink = 0; /* It is unlinked */
diff --git a/mm/slab.c b/mm/slab.c
index ddc41f337d58..4d00855629c4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4457,3 +4457,4 @@ size_t ksize(const void *objp)
4457 4457
4458 return obj_size(virt_to_cache(objp)); 4458 return obj_size(virt_to_cache(objp));
4459} 4459}
4460EXPORT_SYMBOL(ksize);
diff --git a/mm/slob.c b/mm/slob.c
index bf7e8fc3aed8..52bc8a2bd9ef 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -521,6 +521,7 @@ size_t ksize(const void *block)
521 } else 521 } else
522 return sp->page.private; 522 return sp->page.private;
523} 523}
524EXPORT_SYMBOL(ksize);
524 525
525struct kmem_cache { 526struct kmem_cache {
526 unsigned int size, align; 527 unsigned int size, align;
diff --git a/mm/slub.c b/mm/slub.c
index bdc9abb08a23..0280eee6cf37 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2736,6 +2736,7 @@ size_t ksize(const void *object)
2736 */ 2736 */
2737 return s->size; 2737 return s->size;
2738} 2738}
2739EXPORT_SYMBOL(ksize);
2739 2740
2740void kfree(const void *x) 2741void kfree(const void *x)
2741{ 2742{
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 7e6304dfafab..312fafe0ab6e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -635,7 +635,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
635 635
636 if (!bdev) { 636 if (!bdev) {
637 if (bdev_p) 637 if (bdev_p)
638 *bdev_p = sis->bdev; 638 *bdev_p = bdget(sis->bdev->bd_dev);
639 639
640 spin_unlock(&swap_lock); 640 spin_unlock(&swap_lock);
641 return i; 641 return i;
@@ -647,7 +647,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
647 struct swap_extent, list); 647 struct swap_extent, list);
648 if (se->start_block == offset) { 648 if (se->start_block == offset) {
649 if (bdev_p) 649 if (bdev_p)
650 *bdev_p = sis->bdev; 650 *bdev_p = bdget(sis->bdev->bd_dev);
651 651
652 spin_unlock(&swap_lock); 652 spin_unlock(&swap_lock);
653 bdput(bdev); 653 bdput(bdev);
diff --git a/mm/util.c b/mm/util.c
index cb00b748ce47..37eaccdf3054 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -129,6 +129,26 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags)
129} 129}
130EXPORT_SYMBOL(krealloc); 130EXPORT_SYMBOL(krealloc);
131 131
132/**
133 * kzfree - like kfree but zero memory
134 * @p: object to free memory of
135 *
136 * The memory of the object @p points to is zeroed before freed.
137 * If @p is %NULL, kzfree() does nothing.
138 */
139void kzfree(const void *p)
140{
141 size_t ks;
142 void *mem = (void *)p;
143
144 if (unlikely(ZERO_OR_NULL_PTR(mem)))
145 return;
146 ks = ksize(mem);
147 memset(mem, 0, ks);
148 kfree(mem);
149}
150EXPORT_SYMBOL(kzfree);
151
132/* 152/*
133 * strndup_user - duplicate an existing string from user space 153 * strndup_user - duplicate an existing string from user space
134 * @s: The string to duplicate 154 * @s: The string to duplicate
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 75f49d312e8c..520a75980269 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -323,6 +323,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
323 unsigned long addr; 323 unsigned long addr;
324 int purged = 0; 324 int purged = 0;
325 325
326 BUG_ON(!size);
326 BUG_ON(size & ~PAGE_MASK); 327 BUG_ON(size & ~PAGE_MASK);
327 328
328 va = kmalloc_node(sizeof(struct vmap_area), 329 va = kmalloc_node(sizeof(struct vmap_area),
@@ -334,6 +335,9 @@ retry:
334 addr = ALIGN(vstart, align); 335 addr = ALIGN(vstart, align);
335 336
336 spin_lock(&vmap_area_lock); 337 spin_lock(&vmap_area_lock);
338 if (addr + size - 1 < addr)
339 goto overflow;
340
337 /* XXX: could have a last_hole cache */ 341 /* XXX: could have a last_hole cache */
338 n = vmap_area_root.rb_node; 342 n = vmap_area_root.rb_node;
339 if (n) { 343 if (n) {
@@ -365,6 +369,8 @@ retry:
365 369
366 while (addr + size > first->va_start && addr + size <= vend) { 370 while (addr + size > first->va_start && addr + size <= vend) {
367 addr = ALIGN(first->va_end + PAGE_SIZE, align); 371 addr = ALIGN(first->va_end + PAGE_SIZE, align);
372 if (addr + size - 1 < addr)
373 goto overflow;
368 374
369 n = rb_next(&first->rb_node); 375 n = rb_next(&first->rb_node);
370 if (n) 376 if (n)
@@ -375,6 +381,7 @@ retry:
375 } 381 }
376found: 382found:
377 if (addr + size > vend) { 383 if (addr + size > vend) {
384overflow:
378 spin_unlock(&vmap_area_lock); 385 spin_unlock(&vmap_area_lock);
379 if (!purged) { 386 if (!purged) {
380 purge_vmap_area_lazy(); 387 purge_vmap_area_lazy();
@@ -498,6 +505,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
498 static DEFINE_SPINLOCK(purge_lock); 505 static DEFINE_SPINLOCK(purge_lock);
499 LIST_HEAD(valist); 506 LIST_HEAD(valist);
500 struct vmap_area *va; 507 struct vmap_area *va;
508 struct vmap_area *n_va;
501 int nr = 0; 509 int nr = 0;
502 510
503 /* 511 /*
@@ -537,7 +545,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
537 545
538 if (nr) { 546 if (nr) {
539 spin_lock(&vmap_area_lock); 547 spin_lock(&vmap_area_lock);
540 list_for_each_entry(va, &valist, purge_list) 548 list_for_each_entry_safe(va, n_va, &valist, purge_list)
541 __free_vmap_area(va); 549 __free_vmap_area(va);
542 spin_unlock(&vmap_area_lock); 550 spin_unlock(&vmap_area_lock);
543 } 551 }
@@ -1012,6 +1020,8 @@ void __init vmalloc_init(void)
1012void unmap_kernel_range(unsigned long addr, unsigned long size) 1020void unmap_kernel_range(unsigned long addr, unsigned long size)
1013{ 1021{
1014 unsigned long end = addr + size; 1022 unsigned long end = addr + size;
1023
1024 flush_cache_vunmap(addr, end);
1015 vunmap_page_range(addr, end); 1025 vunmap_page_range(addr, end);
1016 flush_tlb_kernel_range(addr, end); 1026 flush_tlb_kernel_range(addr, end);
1017} 1027}
@@ -1106,6 +1116,14 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1106} 1116}
1107EXPORT_SYMBOL_GPL(__get_vm_area); 1117EXPORT_SYMBOL_GPL(__get_vm_area);
1108 1118
1119struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1120 unsigned long start, unsigned long end,
1121 void *caller)
1122{
1123 return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
1124 caller);
1125}
1126
1109/** 1127/**
1110 * get_vm_area - reserve a contiguous kernel virtual area 1128 * get_vm_area - reserve a contiguous kernel virtual area
1111 * @size: size of the area 1129 * @size: size of the area
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9a27c44aa327..56ddf41149eb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1262,7 +1262,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1262 * Move the pages to the [file or anon] inactive list. 1262 * Move the pages to the [file or anon] inactive list.
1263 */ 1263 */
1264 pagevec_init(&pvec, 1); 1264 pagevec_init(&pvec, 1);
1265 pgmoved = 0;
1266 lru = LRU_BASE + file * LRU_FILE; 1265 lru = LRU_BASE + file * LRU_FILE;
1267 1266
1268 spin_lock_irq(&zone->lru_lock); 1267 spin_lock_irq(&zone->lru_lock);
@@ -1274,6 +1273,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1274 */ 1273 */
1275 reclaim_stat->recent_rotated[!!file] += pgmoved; 1274 reclaim_stat->recent_rotated[!!file] += pgmoved;
1276 1275
1276 pgmoved = 0;
1277 while (!list_empty(&l_inactive)) { 1277 while (!list_empty(&l_inactive)) {
1278 page = lru_to_page(&l_inactive); 1278 page = lru_to_page(&l_inactive);
1279 prefetchw_prev_lru_page(page, &l_inactive, flags); 1279 prefetchw_prev_lru_page(page, &l_inactive, flags);
@@ -1469,7 +1469,7 @@ static void shrink_zone(int priority, struct zone *zone,
1469 int file = is_file_lru(l); 1469 int file = is_file_lru(l);
1470 int scan; 1470 int scan;
1471 1471
1472 scan = zone_page_state(zone, NR_LRU_BASE + l); 1472 scan = zone_nr_pages(zone, sc, l);
1473 if (priority) { 1473 if (priority) {
1474 scan >>= priority; 1474 scan >>= priority;
1475 scan = (scan * percent[file]) / 100; 1475 scan = (scan * percent[file]) / 100;
@@ -2057,31 +2057,31 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
2057 int pass, struct scan_control *sc) 2057 int pass, struct scan_control *sc)
2058{ 2058{
2059 struct zone *zone; 2059 struct zone *zone;
2060 unsigned long nr_to_scan, ret = 0; 2060 unsigned long ret = 0;
2061 enum lru_list l;
2062 2061
2063 for_each_zone(zone) { 2062 for_each_zone(zone) {
2063 enum lru_list l;
2064 2064
2065 if (!populated_zone(zone)) 2065 if (!populated_zone(zone))
2066 continue; 2066 continue;
2067
2068 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) 2067 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
2069 continue; 2068 continue;
2070 2069
2071 for_each_evictable_lru(l) { 2070 for_each_evictable_lru(l) {
2071 enum zone_stat_item ls = NR_LRU_BASE + l;
2072 unsigned long lru_pages = zone_page_state(zone, ls);
2073
2072 /* For pass = 0, we don't shrink the active list */ 2074 /* For pass = 0, we don't shrink the active list */
2073 if (pass == 0 && 2075 if (pass == 0 && (l == LRU_ACTIVE_ANON ||
2074 (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) 2076 l == LRU_ACTIVE_FILE))
2075 continue; 2077 continue;
2076 2078
2077 zone->lru[l].nr_scan += 2079 zone->lru[l].nr_scan += (lru_pages >> prio) + 1;
2078 (zone_page_state(zone, NR_LRU_BASE + l)
2079 >> prio) + 1;
2080 if (zone->lru[l].nr_scan >= nr_pages || pass > 3) { 2080 if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {
2081 unsigned long nr_to_scan;
2082
2081 zone->lru[l].nr_scan = 0; 2083 zone->lru[l].nr_scan = 0;
2082 nr_to_scan = min(nr_pages, 2084 nr_to_scan = min(nr_pages, lru_pages);
2083 zone_page_state(zone,
2084 NR_LRU_BASE + l));
2085 ret += shrink_list(l, nr_to_scan, zone, 2085 ret += shrink_list(l, nr_to_scan, zone,
2086 sc, prio); 2086 sc, prio);
2087 if (ret >= nr_pages) 2087 if (ret >= nr_pages)
@@ -2089,7 +2089,6 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
2089 } 2089 }
2090 } 2090 }
2091 } 2091 }
2092
2093 return ret; 2092 return ret;
2094} 2093}
2095 2094
@@ -2112,7 +2111,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2112 .may_swap = 0, 2111 .may_swap = 0,
2113 .swap_cluster_max = nr_pages, 2112 .swap_cluster_max = nr_pages,
2114 .may_writepage = 1, 2113 .may_writepage = 1,
2115 .swappiness = vm_swappiness,
2116 .isolate_pages = isolate_pages_global, 2114 .isolate_pages = isolate_pages_global,
2117 }; 2115 };
2118 2116
@@ -2146,10 +2144,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2146 int prio; 2144 int prio;
2147 2145
2148 /* Force reclaiming mapped pages in the passes #3 and #4 */ 2146 /* Force reclaiming mapped pages in the passes #3 and #4 */
2149 if (pass > 2) { 2147 if (pass > 2)
2150 sc.may_swap = 1; 2148 sc.may_swap = 1;
2151 sc.swappiness = 100;
2152 }
2153 2149
2154 for (prio = DEF_PRIORITY; prio >= 0; prio--) { 2150 for (prio = DEF_PRIORITY; prio >= 0; prio--) {
2155 unsigned long nr_to_scan = nr_pages - ret; 2151 unsigned long nr_to_scan = nr_pages - ret;