aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-05 05:45:22 -0500
committerIngo Molnar <mingo@elte.hu>2009-03-05 05:45:22 -0500
commita140feab42d1cfd811930ab76104559c19dfc4b0 (patch)
tree41fd871990e888dd5616a6bf1891a1ff307221df /mm
parent1075414b06109a99b0e87601e84c74a95bd45681 (diff)
parentfec6c6fec3e20637bee5d276fb61dd8b49a3f9cc (diff)
Merge commit 'v2.6.29-rc7' into core/locking
Diffstat (limited to 'mm')
-rw-r--r--mm/fremap.c2
-rw-r--r--mm/hugetlb.c28
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mlock.c12
-rw-r--r--mm/mmap.c48
-rw-r--r--mm/mprotect.c5
-rw-r--r--mm/page-writeback.c46
-rw-r--r--mm/page_alloc.c27
-rw-r--r--mm/page_cgroup.c3
-rw-r--r--mm/page_io.c2
-rw-r--r--mm/rmap.c3
-rw-r--r--mm/shmem.c43
-rw-r--r--mm/slab.c1
-rw-r--r--mm/slob.c1
-rw-r--r--mm/slub.c1
-rw-r--r--mm/swapfile.c4
-rw-r--r--mm/util.c20
-rw-r--r--mm/vmalloc.c20
-rw-r--r--mm/vmscan.c28
19 files changed, 197 insertions, 99 deletions
diff --git a/mm/fremap.c b/mm/fremap.c
index 736ba7f3306a..b6ec85abbb39 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -198,7 +198,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
198 flags &= MAP_NONBLOCK; 198 flags &= MAP_NONBLOCK;
199 get_file(file); 199 get_file(file);
200 addr = mmap_region(file, start, size, 200 addr = mmap_region(file, start, size,
201 flags, vma->vm_flags, pgoff, 1); 201 flags, vma->vm_flags, pgoff);
202 fput(file); 202 fput(file);
203 if (IS_ERR_VALUE(addr)) { 203 if (IS_ERR_VALUE(addr)) {
204 err = addr; 204 err = addr;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 618e98304080..107da3d809a8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2269,12 +2269,18 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
2269 2269
2270int hugetlb_reserve_pages(struct inode *inode, 2270int hugetlb_reserve_pages(struct inode *inode,
2271 long from, long to, 2271 long from, long to,
2272 struct vm_area_struct *vma) 2272 struct vm_area_struct *vma,
2273 int acctflag)
2273{ 2274{
2274 long ret, chg; 2275 long ret, chg;
2275 struct hstate *h = hstate_inode(inode); 2276 struct hstate *h = hstate_inode(inode);
2276 2277
2277 if (vma && vma->vm_flags & VM_NORESERVE) 2278 /*
2279 * Only apply hugepage reservation if asked. At fault time, an
2280 * attempt will be made for VM_NORESERVE to allocate a page
2281 * and filesystem quota without using reserves
2282 */
2283 if (acctflag & VM_NORESERVE)
2278 return 0; 2284 return 0;
2279 2285
2280 /* 2286 /*
@@ -2299,13 +2305,31 @@ int hugetlb_reserve_pages(struct inode *inode,
2299 if (chg < 0) 2305 if (chg < 0)
2300 return chg; 2306 return chg;
2301 2307
2308 /* There must be enough filesystem quota for the mapping */
2302 if (hugetlb_get_quota(inode->i_mapping, chg)) 2309 if (hugetlb_get_quota(inode->i_mapping, chg))
2303 return -ENOSPC; 2310 return -ENOSPC;
2311
2312 /*
2313 * Check enough hugepages are available for the reservation.
2314 * Hand back the quota if there are not
2315 */
2304 ret = hugetlb_acct_memory(h, chg); 2316 ret = hugetlb_acct_memory(h, chg);
2305 if (ret < 0) { 2317 if (ret < 0) {
2306 hugetlb_put_quota(inode->i_mapping, chg); 2318 hugetlb_put_quota(inode->i_mapping, chg);
2307 return ret; 2319 return ret;
2308 } 2320 }
2321
2322 /*
2323 * Account for the reservations made. Shared mappings record regions
2324 * that have reservations as they are shared by multiple VMAs.
2325 * When the last VMA disappears, the region map says how much
2326 * the reservation was and the page cache tells how much of
2327 * the reservation was consumed. Private mappings are per-VMA and
2328 * only the consumed reservations are tracked. When the VMA
2329 * disappears, the original reservation is the VMA size and the
2330 * consumed reservations are stored in the map. Hence, nothing
2331 * else has to be done for private mappings here
2332 */
2309 if (!vma || vma->vm_flags & VM_SHARED) 2333 if (!vma || vma->vm_flags & VM_SHARED)
2310 region_add(&inode->i_mapping->private_list, from, to); 2334 region_add(&inode->i_mapping->private_list, from, to);
2311 return 0; 2335 return 0;
diff --git a/mm/migrate.c b/mm/migrate.c
index 2bb4e1d63520..a9eff3f092f6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1129,7 +1129,7 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
1129 struct vm_area_struct *vma; 1129 struct vm_area_struct *vma;
1130 int err = 0; 1130 int err = 0;
1131 1131
1132 for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) { 1132 for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
1133 if (vma->vm_ops && vma->vm_ops->migrate) { 1133 if (vma->vm_ops && vma->vm_ops->migrate) {
1134 err = vma->vm_ops->migrate(vma, to, from, flags); 1134 err = vma->vm_ops->migrate(vma, to, from, flags);
1135 if (err) 1135 if (err)
diff --git a/mm/mlock.c b/mm/mlock.c
index 028ec482fdd4..cbe9e0581b75 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -311,7 +311,10 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
311 is_vm_hugetlb_page(vma) || 311 is_vm_hugetlb_page(vma) ||
312 vma == get_gate_vma(current))) { 312 vma == get_gate_vma(current))) {
313 313
314 return __mlock_vma_pages_range(vma, start, end, 1); 314 __mlock_vma_pages_range(vma, start, end, 1);
315
316 /* Hide errors from mmap() and other callers */
317 return 0;
315 } 318 }
316 319
317 /* 320 /*
@@ -657,7 +660,7 @@ void *alloc_locked_buffer(size_t size)
657 return buffer; 660 return buffer;
658} 661}
659 662
660void free_locked_buffer(void *buffer, size_t size) 663void release_locked_buffer(void *buffer, size_t size)
661{ 664{
662 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; 665 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
663 666
@@ -667,6 +670,11 @@ void free_locked_buffer(void *buffer, size_t size)
667 current->mm->locked_vm -= pgsz; 670 current->mm->locked_vm -= pgsz;
668 671
669 up_write(&current->mm->mmap_sem); 672 up_write(&current->mm->mmap_sem);
673}
674
675void free_locked_buffer(void *buffer, size_t size)
676{
677 release_locked_buffer(buffer, size);
670 678
671 kfree(buffer); 679 kfree(buffer);
672} 680}
diff --git a/mm/mmap.c b/mm/mmap.c
index 214b6a258eeb..00ced3ee49a8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -918,7 +918,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
918 struct inode *inode; 918 struct inode *inode;
919 unsigned int vm_flags; 919 unsigned int vm_flags;
920 int error; 920 int error;
921 int accountable = 1;
922 unsigned long reqprot = prot; 921 unsigned long reqprot = prot;
923 922
924 /* 923 /*
@@ -1019,8 +1018,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1019 return -EPERM; 1018 return -EPERM;
1020 vm_flags &= ~VM_MAYEXEC; 1019 vm_flags &= ~VM_MAYEXEC;
1021 } 1020 }
1022 if (is_file_hugepages(file))
1023 accountable = 0;
1024 1021
1025 if (!file->f_op || !file->f_op->mmap) 1022 if (!file->f_op || !file->f_op->mmap)
1026 return -ENODEV; 1023 return -ENODEV;
@@ -1053,8 +1050,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1053 if (error) 1050 if (error)
1054 return error; 1051 return error;
1055 1052
1056 return mmap_region(file, addr, len, flags, vm_flags, pgoff, 1053 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1057 accountable);
1058} 1054}
1059EXPORT_SYMBOL(do_mmap_pgoff); 1055EXPORT_SYMBOL(do_mmap_pgoff);
1060 1056
@@ -1092,17 +1088,23 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
1092 1088
1093/* 1089/*
1094 * We account for memory if it's a private writeable mapping, 1090 * We account for memory if it's a private writeable mapping,
1095 * and VM_NORESERVE wasn't set. 1091 * not hugepages and VM_NORESERVE wasn't set.
1096 */ 1092 */
1097static inline int accountable_mapping(unsigned int vm_flags) 1093static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
1098{ 1094{
1095 /*
1096 * hugetlb has its own accounting separate from the core VM
1097 * VM_HUGETLB may not be set yet so we cannot check for that flag.
1098 */
1099 if (file && is_file_hugepages(file))
1100 return 0;
1101
1099 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; 1102 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1100} 1103}
1101 1104
1102unsigned long mmap_region(struct file *file, unsigned long addr, 1105unsigned long mmap_region(struct file *file, unsigned long addr,
1103 unsigned long len, unsigned long flags, 1106 unsigned long len, unsigned long flags,
1104 unsigned int vm_flags, unsigned long pgoff, 1107 unsigned int vm_flags, unsigned long pgoff)
1105 int accountable)
1106{ 1108{
1107 struct mm_struct *mm = current->mm; 1109 struct mm_struct *mm = current->mm;
1108 struct vm_area_struct *vma, *prev; 1110 struct vm_area_struct *vma, *prev;
@@ -1128,18 +1130,22 @@ munmap_back:
1128 1130
1129 /* 1131 /*
1130 * Set 'VM_NORESERVE' if we should not account for the 1132 * Set 'VM_NORESERVE' if we should not account for the
1131 * memory use of this mapping. We only honor MAP_NORESERVE 1133 * memory use of this mapping.
1132 * if we're allowed to overcommit memory.
1133 */ 1134 */
1134 if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) 1135 if ((flags & MAP_NORESERVE)) {
1135 vm_flags |= VM_NORESERVE; 1136 /* We honor MAP_NORESERVE if allowed to overcommit */
1136 if (!accountable) 1137 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1137 vm_flags |= VM_NORESERVE; 1138 vm_flags |= VM_NORESERVE;
1139
1140 /* hugetlb applies strict overcommit unless MAP_NORESERVE */
1141 if (file && is_file_hugepages(file))
1142 vm_flags |= VM_NORESERVE;
1143 }
1138 1144
1139 /* 1145 /*
1140 * Private writable mapping: check memory availability 1146 * Private writable mapping: check memory availability
1141 */ 1147 */
1142 if (accountable_mapping(vm_flags)) { 1148 if (accountable_mapping(file, vm_flags)) {
1143 charged = len >> PAGE_SHIFT; 1149 charged = len >> PAGE_SHIFT;
1144 if (security_vm_enough_memory(charged)) 1150 if (security_vm_enough_memory(charged))
1145 return -ENOMEM; 1151 return -ENOMEM;
@@ -2078,12 +2084,8 @@ void exit_mmap(struct mm_struct *mm)
2078 unsigned long end; 2084 unsigned long end;
2079 2085
2080 /* mm's last user has gone, and its about to be pulled down */ 2086 /* mm's last user has gone, and its about to be pulled down */
2081 arch_exit_mmap(mm);
2082 mmu_notifier_release(mm); 2087 mmu_notifier_release(mm);
2083 2088
2084 if (!mm->mmap) /* Can happen if dup_mmap() received an OOM */
2085 return;
2086
2087 if (mm->locked_vm) { 2089 if (mm->locked_vm) {
2088 vma = mm->mmap; 2090 vma = mm->mmap;
2089 while (vma) { 2091 while (vma) {
@@ -2092,7 +2094,13 @@ void exit_mmap(struct mm_struct *mm)
2092 vma = vma->vm_next; 2094 vma = vma->vm_next;
2093 } 2095 }
2094 } 2096 }
2097
2098 arch_exit_mmap(mm);
2099
2095 vma = mm->mmap; 2100 vma = mm->mmap;
2101 if (!vma) /* Can happen if dup_mmap() received an OOM */
2102 return;
2103
2096 lru_add_drain(); 2104 lru_add_drain();
2097 flush_cache_mm(mm); 2105 flush_cache_mm(mm);
2098 tlb = tlb_gather_mmu(mm, 1); 2106 tlb = tlb_gather_mmu(mm, 1);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index abe2694e13f4..258197b76fb4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -151,10 +151,11 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
151 /* 151 /*
152 * If we make a private mapping writable we increase our commit; 152 * If we make a private mapping writable we increase our commit;
153 * but (without finer accounting) cannot reduce our commit if we 153 * but (without finer accounting) cannot reduce our commit if we
154 * make it unwritable again. 154 * make it unwritable again. hugetlb mapping were accounted for
155 * even if read-only so there is no need to account for them here
155 */ 156 */
156 if (newflags & VM_WRITE) { 157 if (newflags & VM_WRITE) {
157 if (!(oldflags & (VM_ACCOUNT|VM_WRITE| 158 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
158 VM_SHARED|VM_NORESERVE))) { 159 VM_SHARED|VM_NORESERVE))) {
159 charged = nrpages; 160 charged = nrpages;
160 if (security_vm_enough_memory(charged)) 161 if (security_vm_enough_memory(charged))
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index dc32dae01e5f..74dc57c74349 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -209,7 +209,7 @@ int dirty_bytes_handler(struct ctl_table *table, int write,
209 struct file *filp, void __user *buffer, size_t *lenp, 209 struct file *filp, void __user *buffer, size_t *lenp,
210 loff_t *ppos) 210 loff_t *ppos)
211{ 211{
212 int old_bytes = vm_dirty_bytes; 212 unsigned long old_bytes = vm_dirty_bytes;
213 int ret; 213 int ret;
214 214
215 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); 215 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
@@ -240,7 +240,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
240} 240}
241EXPORT_SYMBOL_GPL(bdi_writeout_inc); 241EXPORT_SYMBOL_GPL(bdi_writeout_inc);
242 242
243static inline void task_dirty_inc(struct task_struct *tsk) 243void task_dirty_inc(struct task_struct *tsk)
244{ 244{
245 prop_inc_single(&vm_dirties, &tsk->dirties); 245 prop_inc_single(&vm_dirties, &tsk->dirties);
246} 246}
@@ -1051,20 +1051,23 @@ continue_unlock:
1051 } 1051 }
1052 } 1052 }
1053 1053
1054 if (nr_to_write > 0) 1054 if (nr_to_write > 0) {
1055 nr_to_write--; 1055 nr_to_write--;
1056 else if (wbc->sync_mode == WB_SYNC_NONE) { 1056 if (nr_to_write == 0 &&
1057 /* 1057 wbc->sync_mode == WB_SYNC_NONE) {
1058 * We stop writing back only if we are not 1058 /*
1059 * doing integrity sync. In case of integrity 1059 * We stop writing back only if we are
1060 * sync we have to keep going because someone 1060 * not doing integrity sync. In case of
1061 * may be concurrently dirtying pages, and we 1061 * integrity sync we have to keep going
1062 * might have synced a lot of newly appeared 1062 * because someone may be concurrently
1063 * dirty pages, but have not synced all of the 1063 * dirtying pages, and we might have
1064 * old dirty pages. 1064 * synced a lot of newly appeared dirty
1065 */ 1065 * pages, but have not synced all of the
1066 done = 1; 1066 * old dirty pages.
1067 break; 1067 */
1068 done = 1;
1069 break;
1070 }
1068 } 1071 }
1069 1072
1070 if (wbc->nonblocking && bdi_write_congested(bdi)) { 1073 if (wbc->nonblocking && bdi_write_congested(bdi)) {
@@ -1076,7 +1079,7 @@ continue_unlock:
1076 pagevec_release(&pvec); 1079 pagevec_release(&pvec);
1077 cond_resched(); 1080 cond_resched();
1078 } 1081 }
1079 if (!cycled) { 1082 if (!cycled && !done) {
1080 /* 1083 /*
1081 * range_cyclic: 1084 * range_cyclic:
1082 * We hit the last page and there is more work to be done: wrap 1085 * We hit the last page and there is more work to be done: wrap
@@ -1227,6 +1230,7 @@ int __set_page_dirty_nobuffers(struct page *page)
1227 __inc_zone_page_state(page, NR_FILE_DIRTY); 1230 __inc_zone_page_state(page, NR_FILE_DIRTY);
1228 __inc_bdi_stat(mapping->backing_dev_info, 1231 __inc_bdi_stat(mapping->backing_dev_info,
1229 BDI_RECLAIMABLE); 1232 BDI_RECLAIMABLE);
1233 task_dirty_inc(current);
1230 task_io_account_write(PAGE_CACHE_SIZE); 1234 task_io_account_write(PAGE_CACHE_SIZE);
1231 } 1235 }
1232 radix_tree_tag_set(&mapping->page_tree, 1236 radix_tree_tag_set(&mapping->page_tree,
@@ -1259,7 +1263,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
1259 * If the mapping doesn't provide a set_page_dirty a_op, then 1263 * If the mapping doesn't provide a set_page_dirty a_op, then
1260 * just fall through and assume that it wants buffer_heads. 1264 * just fall through and assume that it wants buffer_heads.
1261 */ 1265 */
1262static int __set_page_dirty(struct page *page) 1266int set_page_dirty(struct page *page)
1263{ 1267{
1264 struct address_space *mapping = page_mapping(page); 1268 struct address_space *mapping = page_mapping(page);
1265 1269
@@ -1277,14 +1281,6 @@ static int __set_page_dirty(struct page *page)
1277 } 1281 }
1278 return 0; 1282 return 0;
1279} 1283}
1280
1281int set_page_dirty(struct page *page)
1282{
1283 int ret = __set_page_dirty(page);
1284 if (ret)
1285 task_dirty_inc(current);
1286 return ret;
1287}
1288EXPORT_SYMBOL(set_page_dirty); 1284EXPORT_SYMBOL(set_page_dirty);
1289 1285
1290/* 1286/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 22b15a4cde8a..a3803ea8c27d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2994,7 +2994,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
2994 * was used and there are no special requirements, this is a convenient 2994 * was used and there are no special requirements, this is a convenient
2995 * alternative 2995 * alternative
2996 */ 2996 */
2997int __meminit early_pfn_to_nid(unsigned long pfn) 2997int __meminit __early_pfn_to_nid(unsigned long pfn)
2998{ 2998{
2999 int i; 2999 int i;
3000 3000
@@ -3005,10 +3005,33 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
3005 if (start_pfn <= pfn && pfn < end_pfn) 3005 if (start_pfn <= pfn && pfn < end_pfn)
3006 return early_node_map[i].nid; 3006 return early_node_map[i].nid;
3007 } 3007 }
3008 /* This is a memory hole */
3009 return -1;
3010}
3011#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
3012
3013int __meminit early_pfn_to_nid(unsigned long pfn)
3014{
3015 int nid;
3008 3016
3017 nid = __early_pfn_to_nid(pfn);
3018 if (nid >= 0)
3019 return nid;
3020 /* just returns 0 */
3009 return 0; 3021 return 0;
3010} 3022}
3011#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ 3023
3024#ifdef CONFIG_NODES_SPAN_OTHER_NODES
3025bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3026{
3027 int nid;
3028
3029 nid = __early_pfn_to_nid(pfn);
3030 if (nid >= 0 && nid != node)
3031 return false;
3032 return true;
3033}
3034#endif
3012 3035
3013/* Basic iterator support to walk early_node_map[] */ 3036/* Basic iterator support to walk early_node_map[] */
3014#define for_each_active_range_index_in_nid(i, nid) \ 3037#define for_each_active_range_index_in_nid(i, nid) \
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 7006a11350c8..ceecfbb143fa 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -114,7 +114,8 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
114 nid = page_to_nid(pfn_to_page(pfn)); 114 nid = page_to_nid(pfn_to_page(pfn));
115 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 115 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
116 if (slab_is_available()) { 116 if (slab_is_available()) {
117 base = kmalloc_node(table_size, GFP_KERNEL, nid); 117 base = kmalloc_node(table_size,
118 GFP_KERNEL | __GFP_NOWARN, nid);
118 if (!base) 119 if (!base)
119 base = vmalloc_node(table_size, nid); 120 base = vmalloc_node(table_size, nid);
120 } else { 121 } else {
diff --git a/mm/page_io.c b/mm/page_io.c
index dc6ce0afbded..3023c475e041 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -111,7 +111,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
111 goto out; 111 goto out;
112 } 112 }
113 if (wbc->sync_mode == WB_SYNC_ALL) 113 if (wbc->sync_mode == WB_SYNC_ALL)
114 rw |= (1 << BIO_RW_SYNC); 114 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
115 count_vm_event(PSWPOUT); 115 count_vm_event(PSWPOUT);
116 set_page_writeback(page); 116 set_page_writeback(page);
117 unlock_page(page); 117 unlock_page(page);
diff --git a/mm/rmap.c b/mm/rmap.c
index ac4af8cffbf9..16521664010d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1072,7 +1072,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
1072 spin_lock(&mapping->i_mmap_lock); 1072 spin_lock(&mapping->i_mmap_lock);
1073 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1073 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1074 if (MLOCK_PAGES && unlikely(unlock)) { 1074 if (MLOCK_PAGES && unlikely(unlock)) {
1075 if (!(vma->vm_flags & VM_LOCKED)) 1075 if (!((vma->vm_flags & VM_LOCKED) &&
1076 page_mapped_in_vma(page, vma)))
1076 continue; /* must visit all vmas */ 1077 continue; /* must visit all vmas */
1077 ret = SWAP_MLOCK; 1078 ret = SWAP_MLOCK;
1078 } else { 1079 } else {
diff --git a/mm/shmem.c b/mm/shmem.c
index 19d566ccdeea..4103a239ce84 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -169,13 +169,13 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
169 */ 169 */
170static inline int shmem_acct_size(unsigned long flags, loff_t size) 170static inline int shmem_acct_size(unsigned long flags, loff_t size)
171{ 171{
172 return (flags & VM_ACCOUNT) ? 172 return (flags & VM_NORESERVE) ?
173 security_vm_enough_memory_kern(VM_ACCT(size)) : 0; 173 0 : security_vm_enough_memory_kern(VM_ACCT(size));
174} 174}
175 175
176static inline void shmem_unacct_size(unsigned long flags, loff_t size) 176static inline void shmem_unacct_size(unsigned long flags, loff_t size)
177{ 177{
178 if (flags & VM_ACCOUNT) 178 if (!(flags & VM_NORESERVE))
179 vm_unacct_memory(VM_ACCT(size)); 179 vm_unacct_memory(VM_ACCT(size));
180} 180}
181 181
@@ -187,13 +187,13 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
187 */ 187 */
188static inline int shmem_acct_block(unsigned long flags) 188static inline int shmem_acct_block(unsigned long flags)
189{ 189{
190 return (flags & VM_ACCOUNT) ? 190 return (flags & VM_NORESERVE) ?
191 0 : security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)); 191 security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)) : 0;
192} 192}
193 193
194static inline void shmem_unacct_blocks(unsigned long flags, long pages) 194static inline void shmem_unacct_blocks(unsigned long flags, long pages)
195{ 195{
196 if (!(flags & VM_ACCOUNT)) 196 if (flags & VM_NORESERVE)
197 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); 197 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
198} 198}
199 199
@@ -1515,8 +1515,8 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1515 return 0; 1515 return 0;
1516} 1516}
1517 1517
1518static struct inode * 1518static struct inode *shmem_get_inode(struct super_block *sb, int mode,
1519shmem_get_inode(struct super_block *sb, int mode, dev_t dev) 1519 dev_t dev, unsigned long flags)
1520{ 1520{
1521 struct inode *inode; 1521 struct inode *inode;
1522 struct shmem_inode_info *info; 1522 struct shmem_inode_info *info;
@@ -1537,6 +1537,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1537 info = SHMEM_I(inode); 1537 info = SHMEM_I(inode);
1538 memset(info, 0, (char *)inode - (char *)info); 1538 memset(info, 0, (char *)inode - (char *)info);
1539 spin_lock_init(&info->lock); 1539 spin_lock_init(&info->lock);
1540 info->flags = flags & VM_NORESERVE;
1540 INIT_LIST_HEAD(&info->swaplist); 1541 INIT_LIST_HEAD(&info->swaplist);
1541 1542
1542 switch (mode & S_IFMT) { 1543 switch (mode & S_IFMT) {
@@ -1779,9 +1780,10 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1779static int 1780static int
1780shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1781shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1781{ 1782{
1782 struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev); 1783 struct inode *inode;
1783 int error = -ENOSPC; 1784 int error = -ENOSPC;
1784 1785
1786 inode = shmem_get_inode(dir->i_sb, mode, dev, VM_NORESERVE);
1785 if (inode) { 1787 if (inode) {
1786 error = security_inode_init_security(inode, dir, NULL, NULL, 1788 error = security_inode_init_security(inode, dir, NULL, NULL,
1787 NULL); 1789 NULL);
@@ -1920,7 +1922,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
1920 if (len > PAGE_CACHE_SIZE) 1922 if (len > PAGE_CACHE_SIZE)
1921 return -ENAMETOOLONG; 1923 return -ENAMETOOLONG;
1922 1924
1923 inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); 1925 inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
1924 if (!inode) 1926 if (!inode)
1925 return -ENOSPC; 1927 return -ENOSPC;
1926 1928
@@ -2332,7 +2334,7 @@ static int shmem_fill_super(struct super_block *sb,
2332 sb->s_flags |= MS_POSIXACL; 2334 sb->s_flags |= MS_POSIXACL;
2333#endif 2335#endif
2334 2336
2335 inode = shmem_get_inode(sb, S_IFDIR | sbinfo->mode, 0); 2337 inode = shmem_get_inode(sb, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
2336 if (!inode) 2338 if (!inode)
2337 goto failed; 2339 goto failed;
2338 inode->i_uid = sbinfo->uid; 2340 inode->i_uid = sbinfo->uid;
@@ -2574,12 +2576,12 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
2574 return 0; 2576 return 0;
2575} 2577}
2576 2578
2577#define shmem_file_operations ramfs_file_operations 2579#define shmem_vm_ops generic_file_vm_ops
2578#define shmem_vm_ops generic_file_vm_ops 2580#define shmem_file_operations ramfs_file_operations
2579#define shmem_get_inode ramfs_get_inode 2581#define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev)
2580#define shmem_acct_size(a, b) 0 2582#define shmem_acct_size(flags, size) 0
2581#define shmem_unacct_size(a, b) do {} while (0) 2583#define shmem_unacct_size(flags, size) do {} while (0)
2582#define SHMEM_MAX_BYTES LLONG_MAX 2584#define SHMEM_MAX_BYTES LLONG_MAX
2583 2585
2584#endif /* CONFIG_SHMEM */ 2586#endif /* CONFIG_SHMEM */
2585 2587
@@ -2589,7 +2591,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
2589 * shmem_file_setup - get an unlinked file living in tmpfs 2591 * shmem_file_setup - get an unlinked file living in tmpfs
2590 * @name: name for dentry (to be seen in /proc/<pid>/maps 2592 * @name: name for dentry (to be seen in /proc/<pid>/maps
2591 * @size: size to be set for the file 2593 * @size: size to be set for the file
2592 * @flags: vm_flags 2594 * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
2593 */ 2595 */
2594struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) 2596struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2595{ 2597{
@@ -2623,13 +2625,10 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2623 goto put_dentry; 2625 goto put_dentry;
2624 2626
2625 error = -ENOSPC; 2627 error = -ENOSPC;
2626 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); 2628 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, flags);
2627 if (!inode) 2629 if (!inode)
2628 goto close_file; 2630 goto close_file;
2629 2631
2630#ifdef CONFIG_SHMEM
2631 SHMEM_I(inode)->flags = (flags & VM_NORESERVE) ? 0 : VM_ACCOUNT;
2632#endif
2633 d_instantiate(dentry, inode); 2632 d_instantiate(dentry, inode);
2634 inode->i_size = size; 2633 inode->i_size = size;
2635 inode->i_nlink = 0; /* It is unlinked */ 2634 inode->i_nlink = 0; /* It is unlinked */
diff --git a/mm/slab.c b/mm/slab.c
index 6b61de8543ec..825c606f691d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4461,3 +4461,4 @@ size_t ksize(const void *objp)
4461 4461
4462 return obj_size(virt_to_cache(objp)); 4462 return obj_size(virt_to_cache(objp));
4463} 4463}
4464EXPORT_SYMBOL(ksize);
diff --git a/mm/slob.c b/mm/slob.c
index 1264799df5d1..26aa464877b7 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -523,6 +523,7 @@ size_t ksize(const void *block)
523 } else 523 } else
524 return sp->page.private; 524 return sp->page.private;
525} 525}
526EXPORT_SYMBOL(ksize);
526 527
527struct kmem_cache { 528struct kmem_cache {
528 unsigned int size, align; 529 unsigned int size, align;
diff --git a/mm/slub.c b/mm/slub.c
index 214eb207c513..604da4ba59c4 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2737,6 +2737,7 @@ size_t ksize(const void *object)
2737 */ 2737 */
2738 return s->size; 2738 return s->size;
2739} 2739}
2740EXPORT_SYMBOL(ksize);
2740 2741
2741void kfree(const void *x) 2742void kfree(const void *x)
2742{ 2743{
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 7e6304dfafab..312fafe0ab6e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -635,7 +635,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
635 635
636 if (!bdev) { 636 if (!bdev) {
637 if (bdev_p) 637 if (bdev_p)
638 *bdev_p = sis->bdev; 638 *bdev_p = bdget(sis->bdev->bd_dev);
639 639
640 spin_unlock(&swap_lock); 640 spin_unlock(&swap_lock);
641 return i; 641 return i;
@@ -647,7 +647,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
647 struct swap_extent, list); 647 struct swap_extent, list);
648 if (se->start_block == offset) { 648 if (se->start_block == offset) {
649 if (bdev_p) 649 if (bdev_p)
650 *bdev_p = sis->bdev; 650 *bdev_p = bdget(sis->bdev->bd_dev);
651 651
652 spin_unlock(&swap_lock); 652 spin_unlock(&swap_lock);
653 bdput(bdev); 653 bdput(bdev);
diff --git a/mm/util.c b/mm/util.c
index cb00b748ce47..37eaccdf3054 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -129,6 +129,26 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags)
129} 129}
130EXPORT_SYMBOL(krealloc); 130EXPORT_SYMBOL(krealloc);
131 131
132/**
133 * kzfree - like kfree but zero memory
134 * @p: object to free memory of
135 *
136 * The memory of the object @p points to is zeroed before freed.
137 * If @p is %NULL, kzfree() does nothing.
138 */
139void kzfree(const void *p)
140{
141 size_t ks;
142 void *mem = (void *)p;
143
144 if (unlikely(ZERO_OR_NULL_PTR(mem)))
145 return;
146 ks = ksize(mem);
147 memset(mem, 0, ks);
148 kfree(mem);
149}
150EXPORT_SYMBOL(kzfree);
151
132/* 152/*
133 * strndup_user - duplicate an existing string from user space 153 * strndup_user - duplicate an existing string from user space
134 * @s: The string to duplicate 154 * @s: The string to duplicate
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 75f49d312e8c..520a75980269 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -323,6 +323,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
323 unsigned long addr; 323 unsigned long addr;
324 int purged = 0; 324 int purged = 0;
325 325
326 BUG_ON(!size);
326 BUG_ON(size & ~PAGE_MASK); 327 BUG_ON(size & ~PAGE_MASK);
327 328
328 va = kmalloc_node(sizeof(struct vmap_area), 329 va = kmalloc_node(sizeof(struct vmap_area),
@@ -334,6 +335,9 @@ retry:
334 addr = ALIGN(vstart, align); 335 addr = ALIGN(vstart, align);
335 336
336 spin_lock(&vmap_area_lock); 337 spin_lock(&vmap_area_lock);
338 if (addr + size - 1 < addr)
339 goto overflow;
340
337 /* XXX: could have a last_hole cache */ 341 /* XXX: could have a last_hole cache */
338 n = vmap_area_root.rb_node; 342 n = vmap_area_root.rb_node;
339 if (n) { 343 if (n) {
@@ -365,6 +369,8 @@ retry:
365 369
366 while (addr + size > first->va_start && addr + size <= vend) { 370 while (addr + size > first->va_start && addr + size <= vend) {
367 addr = ALIGN(first->va_end + PAGE_SIZE, align); 371 addr = ALIGN(first->va_end + PAGE_SIZE, align);
372 if (addr + size - 1 < addr)
373 goto overflow;
368 374
369 n = rb_next(&first->rb_node); 375 n = rb_next(&first->rb_node);
370 if (n) 376 if (n)
@@ -375,6 +381,7 @@ retry:
375 } 381 }
376found: 382found:
377 if (addr + size > vend) { 383 if (addr + size > vend) {
384overflow:
378 spin_unlock(&vmap_area_lock); 385 spin_unlock(&vmap_area_lock);
379 if (!purged) { 386 if (!purged) {
380 purge_vmap_area_lazy(); 387 purge_vmap_area_lazy();
@@ -498,6 +505,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
498 static DEFINE_SPINLOCK(purge_lock); 505 static DEFINE_SPINLOCK(purge_lock);
499 LIST_HEAD(valist); 506 LIST_HEAD(valist);
500 struct vmap_area *va; 507 struct vmap_area *va;
508 struct vmap_area *n_va;
501 int nr = 0; 509 int nr = 0;
502 510
503 /* 511 /*
@@ -537,7 +545,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
537 545
538 if (nr) { 546 if (nr) {
539 spin_lock(&vmap_area_lock); 547 spin_lock(&vmap_area_lock);
540 list_for_each_entry(va, &valist, purge_list) 548 list_for_each_entry_safe(va, n_va, &valist, purge_list)
541 __free_vmap_area(va); 549 __free_vmap_area(va);
542 spin_unlock(&vmap_area_lock); 550 spin_unlock(&vmap_area_lock);
543 } 551 }
@@ -1012,6 +1020,8 @@ void __init vmalloc_init(void)
1012void unmap_kernel_range(unsigned long addr, unsigned long size) 1020void unmap_kernel_range(unsigned long addr, unsigned long size)
1013{ 1021{
1014 unsigned long end = addr + size; 1022 unsigned long end = addr + size;
1023
1024 flush_cache_vunmap(addr, end);
1015 vunmap_page_range(addr, end); 1025 vunmap_page_range(addr, end);
1016 flush_tlb_kernel_range(addr, end); 1026 flush_tlb_kernel_range(addr, end);
1017} 1027}
@@ -1106,6 +1116,14 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1106} 1116}
1107EXPORT_SYMBOL_GPL(__get_vm_area); 1117EXPORT_SYMBOL_GPL(__get_vm_area);
1108 1118
1119struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1120 unsigned long start, unsigned long end,
1121 void *caller)
1122{
1123 return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
1124 caller);
1125}
1126
1109/** 1127/**
1110 * get_vm_area - reserve a contiguous kernel virtual area 1128 * get_vm_area - reserve a contiguous kernel virtual area
1111 * @size: size of the area 1129 * @size: size of the area
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cf8441345277..ae6f4c174a12 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2059,31 +2059,31 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
2059 int pass, struct scan_control *sc) 2059 int pass, struct scan_control *sc)
2060{ 2060{
2061 struct zone *zone; 2061 struct zone *zone;
2062 unsigned long nr_to_scan, ret = 0; 2062 unsigned long ret = 0;
2063 enum lru_list l;
2064 2063
2065 for_each_zone(zone) { 2064 for_each_zone(zone) {
2065 enum lru_list l;
2066 2066
2067 if (!populated_zone(zone)) 2067 if (!populated_zone(zone))
2068 continue; 2068 continue;
2069
2070 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) 2069 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
2071 continue; 2070 continue;
2072 2071
2073 for_each_evictable_lru(l) { 2072 for_each_evictable_lru(l) {
2073 enum zone_stat_item ls = NR_LRU_BASE + l;
2074 unsigned long lru_pages = zone_page_state(zone, ls);
2075
2074 /* For pass = 0, we don't shrink the active list */ 2076 /* For pass = 0, we don't shrink the active list */
2075 if (pass == 0 && 2077 if (pass == 0 && (l == LRU_ACTIVE_ANON ||
2076 (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) 2078 l == LRU_ACTIVE_FILE))
2077 continue; 2079 continue;
2078 2080
2079 zone->lru[l].nr_scan += 2081 zone->lru[l].nr_scan += (lru_pages >> prio) + 1;
2080 (zone_page_state(zone, NR_LRU_BASE + l)
2081 >> prio) + 1;
2082 if (zone->lru[l].nr_scan >= nr_pages || pass > 3) { 2082 if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {
2083 unsigned long nr_to_scan;
2084
2083 zone->lru[l].nr_scan = 0; 2085 zone->lru[l].nr_scan = 0;
2084 nr_to_scan = min(nr_pages, 2086 nr_to_scan = min(nr_pages, lru_pages);
2085 zone_page_state(zone,
2086 NR_LRU_BASE + l));
2087 ret += shrink_list(l, nr_to_scan, zone, 2087 ret += shrink_list(l, nr_to_scan, zone,
2088 sc, prio); 2088 sc, prio);
2089 if (ret >= nr_pages) 2089 if (ret >= nr_pages)
@@ -2091,7 +2091,6 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
2091 } 2091 }
2092 } 2092 }
2093 } 2093 }
2094
2095 return ret; 2094 return ret;
2096} 2095}
2097 2096
@@ -2114,7 +2113,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2114 .may_swap = 0, 2113 .may_swap = 0,
2115 .swap_cluster_max = nr_pages, 2114 .swap_cluster_max = nr_pages,
2116 .may_writepage = 1, 2115 .may_writepage = 1,
2117 .swappiness = vm_swappiness,
2118 .isolate_pages = isolate_pages_global, 2116 .isolate_pages = isolate_pages_global,
2119 }; 2117 };
2120 2118
@@ -2148,10 +2146,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2148 int prio; 2146 int prio;
2149 2147
2150 /* Force reclaiming mapped pages in the passes #3 and #4 */ 2148 /* Force reclaiming mapped pages in the passes #3 and #4 */
2151 if (pass > 2) { 2149 if (pass > 2)
2152 sc.may_swap = 1; 2150 sc.may_swap = 1;
2153 sc.swappiness = 100;
2154 }
2155 2151
2156 for (prio = DEF_PRIORITY; prio >= 0; prio--) { 2152 for (prio = DEF_PRIORITY; prio >= 0; prio--) {
2157 unsigned long nr_to_scan = nr_pages - ret; 2153 unsigned long nr_to_scan = nr_pages - ret;