aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c11
-rw-r--r--mm/madvise.c13
-rw-r--r--mm/memory.c73
-rw-r--r--mm/mempolicy.c4
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/nommu.c6
-rw-r--r--mm/page_alloc.c25
8 files changed, 85 insertions, 55 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fbd1111ea119..6bf720bc662c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -301,6 +301,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
301{ 301{
302 struct mm_struct *mm = vma->vm_mm; 302 struct mm_struct *mm = vma->vm_mm;
303 unsigned long address; 303 unsigned long address;
304 pte_t *ptep;
304 pte_t pte; 305 pte_t pte;
305 struct page *page; 306 struct page *page;
306 307
@@ -309,9 +310,17 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
309 BUG_ON(end & ~HPAGE_MASK); 310 BUG_ON(end & ~HPAGE_MASK);
310 311
311 for (address = start; address < end; address += HPAGE_SIZE) { 312 for (address = start; address < end; address += HPAGE_SIZE) {
312 pte = huge_ptep_get_and_clear(mm, address, huge_pte_offset(mm, address)); 313 ptep = huge_pte_offset(mm, address);
314 if (! ptep)
315 /* This can happen on truncate, or if an
316 * mmap() is aborted due to an error before
317 * the prefault */
318 continue;
319
320 pte = huge_ptep_get_and_clear(mm, address, ptep);
313 if (pte_none(pte)) 321 if (pte_none(pte))
314 continue; 322 continue;
323
315 page = pte_page(pte); 324 page = pte_page(pte);
316 put_page(page); 325 put_page(page);
317 } 326 }
diff --git a/mm/madvise.c b/mm/madvise.c
index 73180a22877e..c8c01a12fea4 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -83,9 +83,6 @@ static long madvise_willneed(struct vm_area_struct * vma,
83{ 83{
84 struct file *file = vma->vm_file; 84 struct file *file = vma->vm_file;
85 85
86 if (!file)
87 return -EBADF;
88
89 if (file->f_mapping->a_ops->get_xip_page) { 86 if (file->f_mapping->a_ops->get_xip_page) {
90 /* no bad return value, but ignore advice */ 87 /* no bad return value, but ignore advice */
91 return 0; 88 return 0;
@@ -140,11 +137,16 @@ static long madvise_dontneed(struct vm_area_struct * vma,
140 return 0; 137 return 0;
141} 138}
142 139
143static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, 140static long
144 unsigned long start, unsigned long end, int behavior) 141madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
142 unsigned long start, unsigned long end, int behavior)
145{ 143{
144 struct file *filp = vma->vm_file;
146 long error = -EBADF; 145 long error = -EBADF;
147 146
147 if (!filp)
148 goto out;
149
148 switch (behavior) { 150 switch (behavior) {
149 case MADV_NORMAL: 151 case MADV_NORMAL:
150 case MADV_SEQUENTIAL: 152 case MADV_SEQUENTIAL:
@@ -165,6 +167,7 @@ static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev
165 break; 167 break;
166 } 168 }
167 169
170out:
168 return error; 171 return error;
169} 172}
170 173
diff --git a/mm/memory.c b/mm/memory.c
index beabdefa6254..e046b7e4b530 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -776,8 +776,8 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
776 * Do a quick page-table lookup for a single page. 776 * Do a quick page-table lookup for a single page.
777 * mm->page_table_lock must be held. 777 * mm->page_table_lock must be held.
778 */ 778 */
779static struct page * 779static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
780__follow_page(struct mm_struct *mm, unsigned long address, int read, int write) 780 int read, int write, int accessed)
781{ 781{
782 pgd_t *pgd; 782 pgd_t *pgd;
783 pud_t *pud; 783 pud_t *pud;
@@ -818,9 +818,11 @@ __follow_page(struct mm_struct *mm, unsigned long address, int read, int write)
818 pfn = pte_pfn(pte); 818 pfn = pte_pfn(pte);
819 if (pfn_valid(pfn)) { 819 if (pfn_valid(pfn)) {
820 page = pfn_to_page(pfn); 820 page = pfn_to_page(pfn);
821 if (write && !pte_dirty(pte) && !PageDirty(page)) 821 if (accessed) {
822 set_page_dirty(page); 822 if (write && !pte_dirty(pte) &&!PageDirty(page))
823 mark_page_accessed(page); 823 set_page_dirty(page);
824 mark_page_accessed(page);
825 }
824 return page; 826 return page;
825 } 827 }
826 } 828 }
@@ -829,16 +831,19 @@ out:
829 return NULL; 831 return NULL;
830} 832}
831 833
832struct page * 834inline struct page *
833follow_page(struct mm_struct *mm, unsigned long address, int write) 835follow_page(struct mm_struct *mm, unsigned long address, int write)
834{ 836{
835 return __follow_page(mm, address, /*read*/0, write); 837 return __follow_page(mm, address, 0, write, 1);
836} 838}
837 839
838int 840/*
839check_user_page_readable(struct mm_struct *mm, unsigned long address) 841 * check_user_page_readable() can be called frm niterrupt context by oprofile,
842 * so we need to avoid taking any non-irq-safe locks
843 */
844int check_user_page_readable(struct mm_struct *mm, unsigned long address)
840{ 845{
841 return __follow_page(mm, address, /*read*/1, /*write*/0) != NULL; 846 return __follow_page(mm, address, 1, 0, 0) != NULL;
842} 847}
843EXPORT_SYMBOL(check_user_page_readable); 848EXPORT_SYMBOL(check_user_page_readable);
844 849
@@ -908,9 +913,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
908 pud = pud_offset(pgd, pg); 913 pud = pud_offset(pgd, pg);
909 BUG_ON(pud_none(*pud)); 914 BUG_ON(pud_none(*pud));
910 pmd = pmd_offset(pud, pg); 915 pmd = pmd_offset(pud, pg);
911 BUG_ON(pmd_none(*pmd)); 916 if (pmd_none(*pmd))
917 return i ? : -EFAULT;
912 pte = pte_offset_map(pmd, pg); 918 pte = pte_offset_map(pmd, pg);
913 BUG_ON(pte_none(*pte)); 919 if (pte_none(*pte)) {
920 pte_unmap(pte);
921 return i ? : -EFAULT;
922 }
914 if (pages) { 923 if (pages) {
915 pages[i] = pte_page(*pte); 924 pages[i] = pte_page(*pte);
916 get_page(pages[i]); 925 get_page(pages[i]);
@@ -935,11 +944,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
935 } 944 }
936 spin_lock(&mm->page_table_lock); 945 spin_lock(&mm->page_table_lock);
937 do { 946 do {
947 int write_access = write;
938 struct page *page; 948 struct page *page;
939 int lookup_write = write;
940 949
941 cond_resched_lock(&mm->page_table_lock); 950 cond_resched_lock(&mm->page_table_lock);
942 while (!(page = follow_page(mm, start, lookup_write))) { 951 while (!(page = follow_page(mm, start, write_access))) {
952 int ret;
953
943 /* 954 /*
944 * Shortcut for anonymous pages. We don't want 955 * Shortcut for anonymous pages. We don't want
945 * to force the creation of pages tables for 956 * to force the creation of pages tables for
@@ -947,13 +958,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
947 * nobody touched so far. This is important 958 * nobody touched so far. This is important
948 * for doing a core dump for these mappings. 959 * for doing a core dump for these mappings.
949 */ 960 */
950 if (!lookup_write && 961 if (!write && untouched_anonymous_page(mm,vma,start)) {
951 untouched_anonymous_page(mm,vma,start)) {
952 page = ZERO_PAGE(start); 962 page = ZERO_PAGE(start);
953 break; 963 break;
954 } 964 }
955 spin_unlock(&mm->page_table_lock); 965 spin_unlock(&mm->page_table_lock);
956 switch (handle_mm_fault(mm,vma,start,write)) { 966 ret = __handle_mm_fault(mm, vma, start, write_access);
967
968 /*
969 * The VM_FAULT_WRITE bit tells us that do_wp_page has
970 * broken COW when necessary, even if maybe_mkwrite
971 * decided not to set pte_write. We can thus safely do
972 * subsequent page lookups as if they were reads.
973 */
974 if (ret & VM_FAULT_WRITE)
975 write_access = 0;
976
977 switch (ret & ~VM_FAULT_WRITE) {
957 case VM_FAULT_MINOR: 978 case VM_FAULT_MINOR:
958 tsk->min_flt++; 979 tsk->min_flt++;
959 break; 980 break;
@@ -967,14 +988,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
967 default: 988 default:
968 BUG(); 989 BUG();
969 } 990 }
970 /*
971 * Now that we have performed a write fault
972 * and surely no longer have a shared page we
973 * shouldn't write, we shouldn't ignore an
974 * unwritable page in the page table if
975 * we are forcing write access.
976 */
977 lookup_write = write && !force;
978 spin_lock(&mm->page_table_lock); 991 spin_lock(&mm->page_table_lock);
979 } 992 }
980 if (pages) { 993 if (pages) {
@@ -1224,6 +1237,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1224 struct page *old_page, *new_page; 1237 struct page *old_page, *new_page;
1225 unsigned long pfn = pte_pfn(pte); 1238 unsigned long pfn = pte_pfn(pte);
1226 pte_t entry; 1239 pte_t entry;
1240 int ret;
1227 1241
1228 if (unlikely(!pfn_valid(pfn))) { 1242 if (unlikely(!pfn_valid(pfn))) {
1229 /* 1243 /*
@@ -1251,7 +1265,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1251 lazy_mmu_prot_update(entry); 1265 lazy_mmu_prot_update(entry);
1252 pte_unmap(page_table); 1266 pte_unmap(page_table);
1253 spin_unlock(&mm->page_table_lock); 1267 spin_unlock(&mm->page_table_lock);
1254 return VM_FAULT_MINOR; 1268 return VM_FAULT_MINOR|VM_FAULT_WRITE;
1255 } 1269 }
1256 } 1270 }
1257 pte_unmap(page_table); 1271 pte_unmap(page_table);
@@ -1278,6 +1292,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1278 /* 1292 /*
1279 * Re-check the pte - we dropped the lock 1293 * Re-check the pte - we dropped the lock
1280 */ 1294 */
1295 ret = VM_FAULT_MINOR;
1281 spin_lock(&mm->page_table_lock); 1296 spin_lock(&mm->page_table_lock);
1282 page_table = pte_offset_map(pmd, address); 1297 page_table = pte_offset_map(pmd, address);
1283 if (likely(pte_same(*page_table, pte))) { 1298 if (likely(pte_same(*page_table, pte))) {
@@ -1294,12 +1309,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1294 1309
1295 /* Free the old page.. */ 1310 /* Free the old page.. */
1296 new_page = old_page; 1311 new_page = old_page;
1312 ret |= VM_FAULT_WRITE;
1297 } 1313 }
1298 pte_unmap(page_table); 1314 pte_unmap(page_table);
1299 page_cache_release(new_page); 1315 page_cache_release(new_page);
1300 page_cache_release(old_page); 1316 page_cache_release(old_page);
1301 spin_unlock(&mm->page_table_lock); 1317 spin_unlock(&mm->page_table_lock);
1302 return VM_FAULT_MINOR; 1318 return ret;
1303 1319
1304no_new_page: 1320no_new_page:
1305 page_cache_release(old_page); 1321 page_cache_release(old_page);
@@ -1991,7 +2007,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
1991 if (write_access) { 2007 if (write_access) {
1992 if (!pte_write(entry)) 2008 if (!pte_write(entry))
1993 return do_wp_page(mm, vma, address, pte, pmd, entry); 2009 return do_wp_page(mm, vma, address, pte, pmd, entry);
1994
1995 entry = pte_mkdirty(entry); 2010 entry = pte_mkdirty(entry);
1996 } 2011 }
1997 entry = pte_mkyoung(entry); 2012 entry = pte_mkyoung(entry);
@@ -2006,7 +2021,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2006/* 2021/*
2007 * By the time we get here, we already hold the mm semaphore 2022 * By the time we get here, we already hold the mm semaphore
2008 */ 2023 */
2009int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, 2024int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
2010 unsigned long address, int write_access) 2025 unsigned long address, int write_access)
2011{ 2026{
2012 pgd_t *pgd; 2027 pgd_t *pgd;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index cb41c31e7c87..b4eababc8198 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -443,7 +443,7 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
443 struct mempolicy *new; 443 struct mempolicy *new;
444 DECLARE_BITMAP(nodes, MAX_NUMNODES); 444 DECLARE_BITMAP(nodes, MAX_NUMNODES);
445 445
446 if (mode > MPOL_MAX) 446 if (mode < 0 || mode > MPOL_MAX)
447 return -EINVAL; 447 return -EINVAL;
448 err = get_nodes(nodes, nmask, maxnode, mode); 448 err = get_nodes(nodes, nmask, maxnode, mode);
449 if (err) 449 if (err)
@@ -1138,11 +1138,11 @@ void mpol_free_shared_policy(struct shared_policy *p)
1138 while (next) { 1138 while (next) {
1139 n = rb_entry(next, struct sp_node, nd); 1139 n = rb_entry(next, struct sp_node, nd);
1140 next = rb_next(&n->nd); 1140 next = rb_next(&n->nd);
1141 rb_erase(&n->nd, &p->root);
1141 mpol_free(n->policy); 1142 mpol_free(n->policy);
1142 kmem_cache_free(sn_cache, n); 1143 kmem_cache_free(sn_cache, n);
1143 } 1144 }
1144 spin_unlock(&p->lock); 1145 spin_unlock(&p->lock);
1145 p->root = RB_ROOT;
1146} 1146}
1147 1147
1148/* assumes fs == KERNEL_DS */ 1148/* assumes fs == KERNEL_DS */
diff --git a/mm/mmap.c b/mm/mmap.c
index da3fa90a0aae..404319477e71 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -143,7 +143,11 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
143 leave 3% of the size of this process for other processes */ 143 leave 3% of the size of this process for other processes */
144 allowed -= current->mm->total_vm / 32; 144 allowed -= current->mm->total_vm / 32;
145 145
146 if (atomic_read(&vm_committed_space) < allowed) 146 /*
147 * cast `allowed' as a signed long because vm_committed_space
148 * sometimes has a negative value
149 */
150 if (atomic_read(&vm_committed_space) < (long)allowed)
147 return 0; 151 return 0;
148 152
149 vm_unacct_memory(pages); 153 vm_unacct_memory(pages);
diff --git a/mm/mremap.c b/mm/mremap.c
index ec7238a78f36..fc45dc9a617b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -229,6 +229,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
229 * since do_munmap() will decrement it by old_len == new_len 229 * since do_munmap() will decrement it by old_len == new_len
230 */ 230 */
231 mm->total_vm += new_len >> PAGE_SHIFT; 231 mm->total_vm += new_len >> PAGE_SHIFT;
232 __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
232 233
233 if (do_munmap(mm, old_addr, old_len) < 0) { 234 if (do_munmap(mm, old_addr, old_len) < 0) {
234 /* OOM: unable to split vma, just get accounts right */ 235 /* OOM: unable to split vma, just get accounts right */
@@ -243,7 +244,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
243 vma->vm_next->vm_flags |= VM_ACCOUNT; 244 vma->vm_next->vm_flags |= VM_ACCOUNT;
244 } 245 }
245 246
246 __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
247 if (vm_flags & VM_LOCKED) { 247 if (vm_flags & VM_LOCKED) {
248 mm->locked_vm += new_len >> PAGE_SHIFT; 248 mm->locked_vm += new_len >> PAGE_SHIFT;
249 if (new_len > old_len) 249 if (new_len > old_len)
diff --git a/mm/nommu.c b/mm/nommu.c
index ce74452c02d9..fd4e8df0f02d 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1167,7 +1167,11 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
1167 leave 3% of the size of this process for other processes */ 1167 leave 3% of the size of this process for other processes */
1168 allowed -= current->mm->total_vm / 32; 1168 allowed -= current->mm->total_vm / 32;
1169 1169
1170 if (atomic_read(&vm_committed_space) < allowed) 1170 /*
1171 * cast `allowed' as a signed long because vm_committed_space
1172 * sometimes has a negative value
1173 */
1174 if (atomic_read(&vm_committed_space) < (long)allowed)
1171 return 0; 1175 return 0;
1172 1176
1173 vm_unacct_memory(pages); 1177 vm_unacct_memory(pages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1d6ba6a4b594..8d088371196a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1061,20 +1061,19 @@ unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
1061 1061
1062static unsigned int nr_free_zone_pages(int offset) 1062static unsigned int nr_free_zone_pages(int offset)
1063{ 1063{
1064 pg_data_t *pgdat; 1064 /* Just pick one node, since fallback list is circular */
1065 pg_data_t *pgdat = NODE_DATA(numa_node_id());
1065 unsigned int sum = 0; 1066 unsigned int sum = 0;
1066 1067
1067 for_each_pgdat(pgdat) { 1068 struct zonelist *zonelist = pgdat->node_zonelists + offset;
1068 struct zonelist *zonelist = pgdat->node_zonelists + offset; 1069 struct zone **zonep = zonelist->zones;
1069 struct zone **zonep = zonelist->zones; 1070 struct zone *zone;
1070 struct zone *zone;
1071 1071
1072 for (zone = *zonep++; zone; zone = *zonep++) { 1072 for (zone = *zonep++; zone; zone = *zonep++) {
1073 unsigned long size = zone->present_pages; 1073 unsigned long size = zone->present_pages;
1074 unsigned long high = zone->pages_high; 1074 unsigned long high = zone->pages_high;
1075 if (size > high) 1075 if (size > high)
1076 sum += size - high; 1076 sum += size - high;
1077 }
1078 } 1077 }
1079 1078
1080 return sum; 1079 return sum;
@@ -1861,7 +1860,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
1861 unsigned long *zones_size, unsigned long *zholes_size) 1860 unsigned long *zones_size, unsigned long *zholes_size)
1862{ 1861{
1863 unsigned long i, j; 1862 unsigned long i, j;
1864 const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
1865 int cpu, nid = pgdat->node_id; 1863 int cpu, nid = pgdat->node_id;
1866 unsigned long zone_start_pfn = pgdat->node_start_pfn; 1864 unsigned long zone_start_pfn = pgdat->node_start_pfn;
1867 1865
@@ -1934,9 +1932,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
1934 zone->zone_mem_map = pfn_to_page(zone_start_pfn); 1932 zone->zone_mem_map = pfn_to_page(zone_start_pfn);
1935 zone->zone_start_pfn = zone_start_pfn; 1933 zone->zone_start_pfn = zone_start_pfn;
1936 1934
1937 if ((zone_start_pfn) & (zone_required_alignment-1))
1938 printk(KERN_CRIT "BUG: wrong zone alignment, it will crash\n");
1939
1940 memmap_init(size, nid, j, zone_start_pfn); 1935 memmap_init(size, nid, j, zone_start_pfn);
1941 1936
1942 zonetable_add(zone, nid, j, zone_start_pfn, size); 1937 zonetable_add(zone, nid, j, zone_start_pfn, size);