aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-09-21 20:03:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:17:40 -0400
commita13ea5b759645a0779edc6dbfec9abfd83220844 (patch)
tree864dd495718195bd065d9f26edac2504e6de5af0
parent1ac0cb5d0e22d5e483f56b2bc12172dec1cf7536 (diff)
mm: reinstate ZERO_PAGE
KAMEZAWA Hiroyuki has observed customers of earlier kernels taking advantage of the ZERO_PAGE: which we stopped do_anonymous_page() from using in 2.6.24. And there were a couple of regression reports on LKML. Following suggestions from Linus, reinstate do_anonymous_page() use of the ZERO_PAGE; but this time avoid dirtying its struct page cacheline with (map)count updates - let vm_normal_page() regard it as abnormal. Use it only on arches which __HAVE_ARCH_PTE_SPECIAL (x86, s390, sh32, most powerpc): that's not essential, but minimizes additional branches (keeping them in the unlikely pte_special case); and incidentally excludes mips (some models of which needed eight colours of ZERO_PAGE to avoid costly exceptions). Don't be fanatical about avoiding ZERO_PAGE updates: get_user_pages() callers won't want to make exceptions for it, so increment its count there. Changes to mlock and migration? happily seems not needed. In most places it's quicker to check pfn than struct page address: prepare a __read_mostly zero_pfn for that. Does get_dump_page() still need its ZERO_PAGE check? probably not, but keep it anyway. Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Acked-by: Rik van Riel <riel@redhat.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Minchan Kim <minchan.kim@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memory.c53
1 files changed, 44 insertions, 9 deletions
diff --git a/mm/memory.c b/mm/memory.c
index fc38d4ed9ad9..c8b5b9435a92 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -108,6 +108,17 @@ static int __init disable_randmaps(char *s)
108} 108}
109__setup("norandmaps", disable_randmaps); 109__setup("norandmaps", disable_randmaps);
110 110
111static unsigned long zero_pfn __read_mostly;
112
113/*
114 * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
115 */
116static int __init init_zero_pfn(void)
117{
118 zero_pfn = page_to_pfn(ZERO_PAGE(0));
119 return 0;
120}
121core_initcall(init_zero_pfn);
111 122
112/* 123/*
113 * If a p?d_bad entry is found while walking page tables, report 124 * If a p?d_bad entry is found while walking page tables, report
@@ -499,7 +510,9 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
499 if (HAVE_PTE_SPECIAL) { 510 if (HAVE_PTE_SPECIAL) {
500 if (likely(!pte_special(pte))) 511 if (likely(!pte_special(pte)))
501 goto check_pfn; 512 goto check_pfn;
502 if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) 513 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
514 return NULL;
515 if (pfn != zero_pfn)
503 print_bad_pte(vma, addr, pte, NULL); 516 print_bad_pte(vma, addr, pte, NULL);
504 return NULL; 517 return NULL;
505 } 518 }
@@ -1144,9 +1157,14 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1144 goto no_page; 1157 goto no_page;
1145 if ((flags & FOLL_WRITE) && !pte_write(pte)) 1158 if ((flags & FOLL_WRITE) && !pte_write(pte))
1146 goto unlock; 1159 goto unlock;
1160
1147 page = vm_normal_page(vma, address, pte); 1161 page = vm_normal_page(vma, address, pte);
1148 if (unlikely(!page)) 1162 if (unlikely(!page)) {
1149 goto bad_page; 1163 if ((flags & FOLL_DUMP) ||
1164 pte_pfn(pte) != zero_pfn)
1165 goto bad_page;
1166 page = pte_page(pte);
1167 }
1150 1168
1151 if (flags & FOLL_GET) 1169 if (flags & FOLL_GET)
1152 get_page(page); 1170 get_page(page);
@@ -2084,10 +2102,19 @@ gotten:
2084 2102
2085 if (unlikely(anon_vma_prepare(vma))) 2103 if (unlikely(anon_vma_prepare(vma)))
2086 goto oom; 2104 goto oom;
2087 VM_BUG_ON(old_page == ZERO_PAGE(0)); 2105
2088 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 2106 if (pte_pfn(orig_pte) == zero_pfn) {
2089 if (!new_page) 2107 new_page = alloc_zeroed_user_highpage_movable(vma, address);
2090 goto oom; 2108 if (!new_page)
2109 goto oom;
2110 } else {
2111 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
2112 if (!new_page)
2113 goto oom;
2114 cow_user_page(new_page, old_page, address, vma);
2115 }
2116 __SetPageUptodate(new_page);
2117
2091 /* 2118 /*
2092 * Don't let another task, with possibly unlocked vma, 2119 * Don't let another task, with possibly unlocked vma,
2093 * keep the mlocked page. 2120 * keep the mlocked page.
@@ -2097,8 +2124,6 @@ gotten:
2097 clear_page_mlock(old_page); 2124 clear_page_mlock(old_page);
2098 unlock_page(old_page); 2125 unlock_page(old_page);
2099 } 2126 }
2100 cow_user_page(new_page, old_page, address, vma);
2101 __SetPageUptodate(new_page);
2102 2127
2103 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) 2128 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
2104 goto oom_free_new; 2129 goto oom_free_new;
@@ -2639,6 +2664,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2639 spinlock_t *ptl; 2664 spinlock_t *ptl;
2640 pte_t entry; 2665 pte_t entry;
2641 2666
2667 if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) {
2668 entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot));
2669 ptl = pte_lockptr(mm, pmd);
2670 spin_lock(ptl);
2671 if (!pte_none(*page_table))
2672 goto unlock;
2673 goto setpte;
2674 }
2675
2642 /* Allocate our own private page. */ 2676 /* Allocate our own private page. */
2643 pte_unmap(page_table); 2677 pte_unmap(page_table);
2644 2678
@@ -2662,6 +2696,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2662 2696
2663 inc_mm_counter(mm, anon_rss); 2697 inc_mm_counter(mm, anon_rss);
2664 page_add_new_anon_rmap(page, vma, address); 2698 page_add_new_anon_rmap(page, vma, address);
2699setpte:
2665 set_pte_at(mm, address, page_table, entry); 2700 set_pte_at(mm, address, page_table, entry);
2666 2701
2667 /* No need to invalidate - it was non-present before */ 2702 /* No need to invalidate - it was non-present before */