aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2013-04-29 18:08:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 18:54:39 -0400
commit5918d10a4bb1081920a04e2c17197a02ff06e651 (patch)
treec3e0e45e9eba854bf58d11d43ff9ab4d8ff66893
parentfd0ccaf2bd04e54d2a6979fbfdcad856694e3877 (diff)
thp: fix huge zero page logic for page with pfn == 0
Current implementation of huge zero page uses pfn value 0 to indicate that the page hasn't allocated yet. It assumes that buddy page allocator can't return page with pfn == 0. Let's rework the code to store 'struct page *' of huge zero page, not its pfn. This way we can avoid the weak assumption. [akpm@linux-foundation.org: fix sparse warning] Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reported-by: Minchan Kim <minchan@kernel.org> Acked-by: Minchan Kim <minchan@kernel.org> Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/huge_memory.c45
1 files changed, 22 insertions, 23 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2ed1a160a85b..03a89a2f464b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -163,35 +163,34 @@ static int start_khugepaged(void)
163} 163}
164 164
165static atomic_t huge_zero_refcount; 165static atomic_t huge_zero_refcount;
166static unsigned long huge_zero_pfn __read_mostly; 166static struct page *huge_zero_page __read_mostly;
167 167
168static inline bool is_huge_zero_pfn(unsigned long pfn) 168static inline bool is_huge_zero_page(struct page *page)
169{ 169{
170 unsigned long zero_pfn = ACCESS_ONCE(huge_zero_pfn); 170 return ACCESS_ONCE(huge_zero_page) == page;
171 return zero_pfn && pfn == zero_pfn;
172} 171}
173 172
174static inline bool is_huge_zero_pmd(pmd_t pmd) 173static inline bool is_huge_zero_pmd(pmd_t pmd)
175{ 174{
176 return is_huge_zero_pfn(pmd_pfn(pmd)); 175 return is_huge_zero_page(pmd_page(pmd));
177} 176}
178 177
179static unsigned long get_huge_zero_page(void) 178static struct page *get_huge_zero_page(void)
180{ 179{
181 struct page *zero_page; 180 struct page *zero_page;
182retry: 181retry:
183 if (likely(atomic_inc_not_zero(&huge_zero_refcount))) 182 if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
184 return ACCESS_ONCE(huge_zero_pfn); 183 return ACCESS_ONCE(huge_zero_page);
185 184
186 zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, 185 zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
187 HPAGE_PMD_ORDER); 186 HPAGE_PMD_ORDER);
188 if (!zero_page) { 187 if (!zero_page) {
189 count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED); 188 count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
190 return 0; 189 return NULL;
191 } 190 }
192 count_vm_event(THP_ZERO_PAGE_ALLOC); 191 count_vm_event(THP_ZERO_PAGE_ALLOC);
193 preempt_disable(); 192 preempt_disable();
194 if (cmpxchg(&huge_zero_pfn, 0, page_to_pfn(zero_page))) { 193 if (cmpxchg(&huge_zero_page, NULL, zero_page)) {
195 preempt_enable(); 194 preempt_enable();
196 __free_page(zero_page); 195 __free_page(zero_page);
197 goto retry; 196 goto retry;
@@ -200,7 +199,7 @@ retry:
200 /* We take additional reference here. It will be put back by shrinker */ 199 /* We take additional reference here. It will be put back by shrinker */
201 atomic_set(&huge_zero_refcount, 2); 200 atomic_set(&huge_zero_refcount, 2);
202 preempt_enable(); 201 preempt_enable();
203 return ACCESS_ONCE(huge_zero_pfn); 202 return ACCESS_ONCE(huge_zero_page);
204} 203}
205 204
206static void put_huge_zero_page(void) 205static void put_huge_zero_page(void)
@@ -220,9 +219,9 @@ static int shrink_huge_zero_page(struct shrinker *shrink,
220 return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; 219 return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;
221 220
222 if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { 221 if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
223 unsigned long zero_pfn = xchg(&huge_zero_pfn, 0); 222 struct page *zero_page = xchg(&huge_zero_page, NULL);
224 BUG_ON(zero_pfn == 0); 223 BUG_ON(zero_page == NULL);
225 __free_page(__pfn_to_page(zero_pfn)); 224 __free_page(zero_page);
226 } 225 }
227 226
228 return 0; 227 return 0;
@@ -764,12 +763,12 @@ static inline struct page *alloc_hugepage(int defrag)
764 763
765static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, 764static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
766 struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, 765 struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
767 unsigned long zero_pfn) 766 struct page *zero_page)
768{ 767{
769 pmd_t entry; 768 pmd_t entry;
770 if (!pmd_none(*pmd)) 769 if (!pmd_none(*pmd))
771 return false; 770 return false;
772 entry = pfn_pmd(zero_pfn, vma->vm_page_prot); 771 entry = mk_pmd(zero_page, vma->vm_page_prot);
773 entry = pmd_wrprotect(entry); 772 entry = pmd_wrprotect(entry);
774 entry = pmd_mkhuge(entry); 773 entry = pmd_mkhuge(entry);
775 set_pmd_at(mm, haddr, pmd, entry); 774 set_pmd_at(mm, haddr, pmd, entry);
@@ -794,20 +793,20 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
794 if (!(flags & FAULT_FLAG_WRITE) && 793 if (!(flags & FAULT_FLAG_WRITE) &&
795 transparent_hugepage_use_zero_page()) { 794 transparent_hugepage_use_zero_page()) {
796 pgtable_t pgtable; 795 pgtable_t pgtable;
797 unsigned long zero_pfn; 796 struct page *zero_page;
798 bool set; 797 bool set;
799 pgtable = pte_alloc_one(mm, haddr); 798 pgtable = pte_alloc_one(mm, haddr);
800 if (unlikely(!pgtable)) 799 if (unlikely(!pgtable))
801 return VM_FAULT_OOM; 800 return VM_FAULT_OOM;
802 zero_pfn = get_huge_zero_page(); 801 zero_page = get_huge_zero_page();
803 if (unlikely(!zero_pfn)) { 802 if (unlikely(!zero_page)) {
804 pte_free(mm, pgtable); 803 pte_free(mm, pgtable);
805 count_vm_event(THP_FAULT_FALLBACK); 804 count_vm_event(THP_FAULT_FALLBACK);
806 goto out; 805 goto out;
807 } 806 }
808 spin_lock(&mm->page_table_lock); 807 spin_lock(&mm->page_table_lock);
809 set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, 808 set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
810 zero_pfn); 809 zero_page);
811 spin_unlock(&mm->page_table_lock); 810 spin_unlock(&mm->page_table_lock);
812 if (!set) { 811 if (!set) {
813 pte_free(mm, pgtable); 812 pte_free(mm, pgtable);
@@ -886,16 +885,16 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
886 * a page table. 885 * a page table.
887 */ 886 */
888 if (is_huge_zero_pmd(pmd)) { 887 if (is_huge_zero_pmd(pmd)) {
889 unsigned long zero_pfn; 888 struct page *zero_page;
890 bool set; 889 bool set;
891 /* 890 /*
892 * get_huge_zero_page() will never allocate a new page here, 891 * get_huge_zero_page() will never allocate a new page here,
893 * since we already have a zero page to copy. It just takes a 892 * since we already have a zero page to copy. It just takes a
894 * reference. 893 * reference.
895 */ 894 */
896 zero_pfn = get_huge_zero_page(); 895 zero_page = get_huge_zero_page();
897 set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, 896 set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
898 zero_pfn); 897 zero_page);
899 BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */ 898 BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */
900 ret = 0; 899 ret = 0;
901 goto out_unlock; 900 goto out_unlock;
@@ -1812,7 +1811,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
1812 struct anon_vma *anon_vma; 1811 struct anon_vma *anon_vma;
1813 int ret = 1; 1812 int ret = 1;
1814 1813
1815 BUG_ON(is_huge_zero_pfn(page_to_pfn(page))); 1814 BUG_ON(is_huge_zero_page(page));
1816 BUG_ON(!PageAnon(page)); 1815 BUG_ON(!PageAnon(page));
1817 1816
1818 /* 1817 /*