aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c11
-rw-r--r--mm/memory.c48
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/nommu.c6
-rw-r--r--mm/page_alloc.c21
7 files changed, 61 insertions, 35 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fbd1111ea119..6bf720bc662c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -301,6 +301,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
301{ 301{
302 struct mm_struct *mm = vma->vm_mm; 302 struct mm_struct *mm = vma->vm_mm;
303 unsigned long address; 303 unsigned long address;
304 pte_t *ptep;
304 pte_t pte; 305 pte_t pte;
305 struct page *page; 306 struct page *page;
306 307
@@ -309,9 +310,17 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
309 BUG_ON(end & ~HPAGE_MASK); 310 BUG_ON(end & ~HPAGE_MASK);
310 311
311 for (address = start; address < end; address += HPAGE_SIZE) { 312 for (address = start; address < end; address += HPAGE_SIZE) {
312 pte = huge_ptep_get_and_clear(mm, address, huge_pte_offset(mm, address)); 313 ptep = huge_pte_offset(mm, address);
314 if (! ptep)
315 /* This can happen on truncate, or if an
316 * mmap() is aborted due to an error before
317 * the prefault */
318 continue;
319
320 pte = huge_ptep_get_and_clear(mm, address, ptep);
313 if (pte_none(pte)) 321 if (pte_none(pte))
314 continue; 322 continue;
323
315 page = pte_page(pte); 324 page = pte_page(pte);
316 put_page(page); 325 put_page(page);
317 } 326 }
diff --git a/mm/memory.c b/mm/memory.c
index 6fe77acbc1cd..e046b7e4b530 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -913,9 +913,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
913 pud = pud_offset(pgd, pg); 913 pud = pud_offset(pgd, pg);
914 BUG_ON(pud_none(*pud)); 914 BUG_ON(pud_none(*pud));
915 pmd = pmd_offset(pud, pg); 915 pmd = pmd_offset(pud, pg);
916 BUG_ON(pmd_none(*pmd)); 916 if (pmd_none(*pmd))
917 return i ? : -EFAULT;
917 pte = pte_offset_map(pmd, pg); 918 pte = pte_offset_map(pmd, pg);
918 BUG_ON(pte_none(*pte)); 919 if (pte_none(*pte)) {
920 pte_unmap(pte);
921 return i ? : -EFAULT;
922 }
919 if (pages) { 923 if (pages) {
920 pages[i] = pte_page(*pte); 924 pages[i] = pte_page(*pte);
921 get_page(pages[i]); 925 get_page(pages[i]);
@@ -940,11 +944,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
940 } 944 }
941 spin_lock(&mm->page_table_lock); 945 spin_lock(&mm->page_table_lock);
942 do { 946 do {
947 int write_access = write;
943 struct page *page; 948 struct page *page;
944 int lookup_write = write;
945 949
946 cond_resched_lock(&mm->page_table_lock); 950 cond_resched_lock(&mm->page_table_lock);
947 while (!(page = follow_page(mm, start, lookup_write))) { 951 while (!(page = follow_page(mm, start, write_access))) {
952 int ret;
953
948 /* 954 /*
949 * Shortcut for anonymous pages. We don't want 955 * Shortcut for anonymous pages. We don't want
950 * to force the creation of pages tables for 956 * to force the creation of pages tables for
@@ -952,13 +958,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
952 * nobody touched so far. This is important 958 * nobody touched so far. This is important
953 * for doing a core dump for these mappings. 959 * for doing a core dump for these mappings.
954 */ 960 */
955 if (!lookup_write && 961 if (!write && untouched_anonymous_page(mm,vma,start)) {
956 untouched_anonymous_page(mm,vma,start)) {
957 page = ZERO_PAGE(start); 962 page = ZERO_PAGE(start);
958 break; 963 break;
959 } 964 }
960 spin_unlock(&mm->page_table_lock); 965 spin_unlock(&mm->page_table_lock);
961 switch (handle_mm_fault(mm,vma,start,write)) { 966 ret = __handle_mm_fault(mm, vma, start, write_access);
967
968 /*
969 * The VM_FAULT_WRITE bit tells us that do_wp_page has
970 * broken COW when necessary, even if maybe_mkwrite
971 * decided not to set pte_write. We can thus safely do
972 * subsequent page lookups as if they were reads.
973 */
974 if (ret & VM_FAULT_WRITE)
975 write_access = 0;
976
977 switch (ret & ~VM_FAULT_WRITE) {
962 case VM_FAULT_MINOR: 978 case VM_FAULT_MINOR:
963 tsk->min_flt++; 979 tsk->min_flt++;
964 break; 980 break;
@@ -972,14 +988,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
972 default: 988 default:
973 BUG(); 989 BUG();
974 } 990 }
975 /*
976 * Now that we have performed a write fault
977 * and surely no longer have a shared page we
978 * shouldn't write, we shouldn't ignore an
979 * unwritable page in the page table if
980 * we are forcing write access.
981 */
982 lookup_write = write && !force;
983 spin_lock(&mm->page_table_lock); 991 spin_lock(&mm->page_table_lock);
984 } 992 }
985 if (pages) { 993 if (pages) {
@@ -1229,6 +1237,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1229 struct page *old_page, *new_page; 1237 struct page *old_page, *new_page;
1230 unsigned long pfn = pte_pfn(pte); 1238 unsigned long pfn = pte_pfn(pte);
1231 pte_t entry; 1239 pte_t entry;
1240 int ret;
1232 1241
1233 if (unlikely(!pfn_valid(pfn))) { 1242 if (unlikely(!pfn_valid(pfn))) {
1234 /* 1243 /*
@@ -1256,7 +1265,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1256 lazy_mmu_prot_update(entry); 1265 lazy_mmu_prot_update(entry);
1257 pte_unmap(page_table); 1266 pte_unmap(page_table);
1258 spin_unlock(&mm->page_table_lock); 1267 spin_unlock(&mm->page_table_lock);
1259 return VM_FAULT_MINOR; 1268 return VM_FAULT_MINOR|VM_FAULT_WRITE;
1260 } 1269 }
1261 } 1270 }
1262 pte_unmap(page_table); 1271 pte_unmap(page_table);
@@ -1283,6 +1292,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1283 /* 1292 /*
1284 * Re-check the pte - we dropped the lock 1293 * Re-check the pte - we dropped the lock
1285 */ 1294 */
1295 ret = VM_FAULT_MINOR;
1286 spin_lock(&mm->page_table_lock); 1296 spin_lock(&mm->page_table_lock);
1287 page_table = pte_offset_map(pmd, address); 1297 page_table = pte_offset_map(pmd, address);
1288 if (likely(pte_same(*page_table, pte))) { 1298 if (likely(pte_same(*page_table, pte))) {
@@ -1299,12 +1309,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1299 1309
1300 /* Free the old page.. */ 1310 /* Free the old page.. */
1301 new_page = old_page; 1311 new_page = old_page;
1312 ret |= VM_FAULT_WRITE;
1302 } 1313 }
1303 pte_unmap(page_table); 1314 pte_unmap(page_table);
1304 page_cache_release(new_page); 1315 page_cache_release(new_page);
1305 page_cache_release(old_page); 1316 page_cache_release(old_page);
1306 spin_unlock(&mm->page_table_lock); 1317 spin_unlock(&mm->page_table_lock);
1307 return VM_FAULT_MINOR; 1318 return ret;
1308 1319
1309no_new_page: 1320no_new_page:
1310 page_cache_release(old_page); 1321 page_cache_release(old_page);
@@ -1996,7 +2007,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
1996 if (write_access) { 2007 if (write_access) {
1997 if (!pte_write(entry)) 2008 if (!pte_write(entry))
1998 return do_wp_page(mm, vma, address, pte, pmd, entry); 2009 return do_wp_page(mm, vma, address, pte, pmd, entry);
1999
2000 entry = pte_mkdirty(entry); 2010 entry = pte_mkdirty(entry);
2001 } 2011 }
2002 entry = pte_mkyoung(entry); 2012 entry = pte_mkyoung(entry);
@@ -2011,7 +2021,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2011/* 2021/*
2012 * By the time we get here, we already hold the mm semaphore 2022 * By the time we get here, we already hold the mm semaphore
2013 */ 2023 */
2014int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, 2024int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
2015 unsigned long address, int write_access) 2025 unsigned long address, int write_access)
2016{ 2026{
2017 pgd_t *pgd; 2027 pgd_t *pgd;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1694845526be..b4eababc8198 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -443,7 +443,7 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
443 struct mempolicy *new; 443 struct mempolicy *new;
444 DECLARE_BITMAP(nodes, MAX_NUMNODES); 444 DECLARE_BITMAP(nodes, MAX_NUMNODES);
445 445
446 if (mode > MPOL_MAX) 446 if (mode < 0 || mode > MPOL_MAX)
447 return -EINVAL; 447 return -EINVAL;
448 err = get_nodes(nodes, nmask, maxnode, mode); 448 err = get_nodes(nodes, nmask, maxnode, mode);
449 if (err) 449 if (err)
diff --git a/mm/mmap.c b/mm/mmap.c
index da3fa90a0aae..404319477e71 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -143,7 +143,11 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
143 leave 3% of the size of this process for other processes */ 143 leave 3% of the size of this process for other processes */
144 allowed -= current->mm->total_vm / 32; 144 allowed -= current->mm->total_vm / 32;
145 145
146 if (atomic_read(&vm_committed_space) < allowed) 146 /*
147 * cast `allowed' as a signed long because vm_committed_space
148 * sometimes has a negative value
149 */
150 if (atomic_read(&vm_committed_space) < (long)allowed)
147 return 0; 151 return 0;
148 152
149 vm_unacct_memory(pages); 153 vm_unacct_memory(pages);
diff --git a/mm/mremap.c b/mm/mremap.c
index ec7238a78f36..fc45dc9a617b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -229,6 +229,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
229 * since do_munmap() will decrement it by old_len == new_len 229 * since do_munmap() will decrement it by old_len == new_len
230 */ 230 */
231 mm->total_vm += new_len >> PAGE_SHIFT; 231 mm->total_vm += new_len >> PAGE_SHIFT;
232 __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
232 233
233 if (do_munmap(mm, old_addr, old_len) < 0) { 234 if (do_munmap(mm, old_addr, old_len) < 0) {
234 /* OOM: unable to split vma, just get accounts right */ 235 /* OOM: unable to split vma, just get accounts right */
@@ -243,7 +244,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
243 vma->vm_next->vm_flags |= VM_ACCOUNT; 244 vma->vm_next->vm_flags |= VM_ACCOUNT;
244 } 245 }
245 246
246 __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
247 if (vm_flags & VM_LOCKED) { 247 if (vm_flags & VM_LOCKED) {
248 mm->locked_vm += new_len >> PAGE_SHIFT; 248 mm->locked_vm += new_len >> PAGE_SHIFT;
249 if (new_len > old_len) 249 if (new_len > old_len)
diff --git a/mm/nommu.c b/mm/nommu.c
index ce74452c02d9..fd4e8df0f02d 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1167,7 +1167,11 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
1167 leave 3% of the size of this process for other processes */ 1167 leave 3% of the size of this process for other processes */
1168 allowed -= current->mm->total_vm / 32; 1168 allowed -= current->mm->total_vm / 32;
1169 1169
1170 if (atomic_read(&vm_committed_space) < allowed) 1170 /*
1171 * cast `allowed' as a signed long because vm_committed_space
1172 * sometimes has a negative value
1173 */
1174 if (atomic_read(&vm_committed_space) < (long)allowed)
1171 return 0; 1175 return 0;
1172 1176
1173 vm_unacct_memory(pages); 1177 vm_unacct_memory(pages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 42bccfb8464d..8d088371196a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1061,20 +1061,19 @@ unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
1061 1061
1062static unsigned int nr_free_zone_pages(int offset) 1062static unsigned int nr_free_zone_pages(int offset)
1063{ 1063{
1064 pg_data_t *pgdat; 1064 /* Just pick one node, since fallback list is circular */
1065 pg_data_t *pgdat = NODE_DATA(numa_node_id());
1065 unsigned int sum = 0; 1066 unsigned int sum = 0;
1066 1067
1067 for_each_pgdat(pgdat) { 1068 struct zonelist *zonelist = pgdat->node_zonelists + offset;
1068 struct zonelist *zonelist = pgdat->node_zonelists + offset; 1069 struct zone **zonep = zonelist->zones;
1069 struct zone **zonep = zonelist->zones; 1070 struct zone *zone;
1070 struct zone *zone;
1071 1071
1072 for (zone = *zonep++; zone; zone = *zonep++) { 1072 for (zone = *zonep++; zone; zone = *zonep++) {
1073 unsigned long size = zone->present_pages; 1073 unsigned long size = zone->present_pages;
1074 unsigned long high = zone->pages_high; 1074 unsigned long high = zone->pages_high;
1075 if (size > high) 1075 if (size > high)
1076 sum += size - high; 1076 sum += size - high;
1077 }
1078 } 1077 }
1079 1078
1080 return sum; 1079 return sum;