diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2013-04-29 18:08:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-29 18:54:39 -0400 |
commit | 5918d10a4bb1081920a04e2c17197a02ff06e651 (patch) | |
tree | c3e0e45e9eba854bf58d11d43ff9ab4d8ff66893 /mm/huge_memory.c | |
parent | fd0ccaf2bd04e54d2a6979fbfdcad856694e3877 (diff) |
thp: fix huge zero page logic for page with pfn == 0
Current implementation of huge zero page uses pfn value 0 to indicate
that the page hasn't allocated yet. It assumes that buddy page
allocator can't return page with pfn == 0.
Let's rework the code to store 'struct page *' of huge zero page, not
its pfn. This way we can avoid the weak assumption.
[akpm@linux-foundation.org: fix sparse warning]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Minchan Kim <minchan@kernel.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 45 |
1 files changed, 22 insertions, 23 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2ed1a160a85b..03a89a2f464b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -163,35 +163,34 @@ static int start_khugepaged(void) | |||
163 | } | 163 | } |
164 | 164 | ||
165 | static atomic_t huge_zero_refcount; | 165 | static atomic_t huge_zero_refcount; |
166 | static unsigned long huge_zero_pfn __read_mostly; | 166 | static struct page *huge_zero_page __read_mostly; |
167 | 167 | ||
168 | static inline bool is_huge_zero_pfn(unsigned long pfn) | 168 | static inline bool is_huge_zero_page(struct page *page) |
169 | { | 169 | { |
170 | unsigned long zero_pfn = ACCESS_ONCE(huge_zero_pfn); | 170 | return ACCESS_ONCE(huge_zero_page) == page; |
171 | return zero_pfn && pfn == zero_pfn; | ||
172 | } | 171 | } |
173 | 172 | ||
174 | static inline bool is_huge_zero_pmd(pmd_t pmd) | 173 | static inline bool is_huge_zero_pmd(pmd_t pmd) |
175 | { | 174 | { |
176 | return is_huge_zero_pfn(pmd_pfn(pmd)); | 175 | return is_huge_zero_page(pmd_page(pmd)); |
177 | } | 176 | } |
178 | 177 | ||
179 | static unsigned long get_huge_zero_page(void) | 178 | static struct page *get_huge_zero_page(void) |
180 | { | 179 | { |
181 | struct page *zero_page; | 180 | struct page *zero_page; |
182 | retry: | 181 | retry: |
183 | if (likely(atomic_inc_not_zero(&huge_zero_refcount))) | 182 | if (likely(atomic_inc_not_zero(&huge_zero_refcount))) |
184 | return ACCESS_ONCE(huge_zero_pfn); | 183 | return ACCESS_ONCE(huge_zero_page); |
185 | 184 | ||
186 | zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, | 185 | zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, |
187 | HPAGE_PMD_ORDER); | 186 | HPAGE_PMD_ORDER); |
188 | if (!zero_page) { | 187 | if (!zero_page) { |
189 | count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED); | 188 | count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED); |
190 | return 0; | 189 | return NULL; |
191 | } | 190 | } |
192 | count_vm_event(THP_ZERO_PAGE_ALLOC); | 191 | count_vm_event(THP_ZERO_PAGE_ALLOC); |
193 | preempt_disable(); | 192 | preempt_disable(); |
194 | if (cmpxchg(&huge_zero_pfn, 0, page_to_pfn(zero_page))) { | 193 | if (cmpxchg(&huge_zero_page, NULL, zero_page)) { |
195 | preempt_enable(); | 194 | preempt_enable(); |
196 | __free_page(zero_page); | 195 | __free_page(zero_page); |
197 | goto retry; | 196 | goto retry; |
@@ -200,7 +199,7 @@ retry: | |||
200 | /* We take additional reference here. It will be put back by shrinker */ | 199 | /* We take additional reference here. It will be put back by shrinker */ |
201 | atomic_set(&huge_zero_refcount, 2); | 200 | atomic_set(&huge_zero_refcount, 2); |
202 | preempt_enable(); | 201 | preempt_enable(); |
203 | return ACCESS_ONCE(huge_zero_pfn); | 202 | return ACCESS_ONCE(huge_zero_page); |
204 | } | 203 | } |
205 | 204 | ||
206 | static void put_huge_zero_page(void) | 205 | static void put_huge_zero_page(void) |
@@ -220,9 +219,9 @@ static int shrink_huge_zero_page(struct shrinker *shrink, | |||
220 | return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; | 219 | return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; |
221 | 220 | ||
222 | if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { | 221 | if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { |
223 | unsigned long zero_pfn = xchg(&huge_zero_pfn, 0); | 222 | struct page *zero_page = xchg(&huge_zero_page, NULL); |
224 | BUG_ON(zero_pfn == 0); | 223 | BUG_ON(zero_page == NULL); |
225 | __free_page(__pfn_to_page(zero_pfn)); | 224 | __free_page(zero_page); |
226 | } | 225 | } |
227 | 226 | ||
228 | return 0; | 227 | return 0; |
@@ -764,12 +763,12 @@ static inline struct page *alloc_hugepage(int defrag) | |||
764 | 763 | ||
765 | static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, | 764 | static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, |
766 | struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, | 765 | struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, |
767 | unsigned long zero_pfn) | 766 | struct page *zero_page) |
768 | { | 767 | { |
769 | pmd_t entry; | 768 | pmd_t entry; |
770 | if (!pmd_none(*pmd)) | 769 | if (!pmd_none(*pmd)) |
771 | return false; | 770 | return false; |
772 | entry = pfn_pmd(zero_pfn, vma->vm_page_prot); | 771 | entry = mk_pmd(zero_page, vma->vm_page_prot); |
773 | entry = pmd_wrprotect(entry); | 772 | entry = pmd_wrprotect(entry); |
774 | entry = pmd_mkhuge(entry); | 773 | entry = pmd_mkhuge(entry); |
775 | set_pmd_at(mm, haddr, pmd, entry); | 774 | set_pmd_at(mm, haddr, pmd, entry); |
@@ -794,20 +793,20 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
794 | if (!(flags & FAULT_FLAG_WRITE) && | 793 | if (!(flags & FAULT_FLAG_WRITE) && |
795 | transparent_hugepage_use_zero_page()) { | 794 | transparent_hugepage_use_zero_page()) { |
796 | pgtable_t pgtable; | 795 | pgtable_t pgtable; |
797 | unsigned long zero_pfn; | 796 | struct page *zero_page; |
798 | bool set; | 797 | bool set; |
799 | pgtable = pte_alloc_one(mm, haddr); | 798 | pgtable = pte_alloc_one(mm, haddr); |
800 | if (unlikely(!pgtable)) | 799 | if (unlikely(!pgtable)) |
801 | return VM_FAULT_OOM; | 800 | return VM_FAULT_OOM; |
802 | zero_pfn = get_huge_zero_page(); | 801 | zero_page = get_huge_zero_page(); |
803 | if (unlikely(!zero_pfn)) { | 802 | if (unlikely(!zero_page)) { |
804 | pte_free(mm, pgtable); | 803 | pte_free(mm, pgtable); |
805 | count_vm_event(THP_FAULT_FALLBACK); | 804 | count_vm_event(THP_FAULT_FALLBACK); |
806 | goto out; | 805 | goto out; |
807 | } | 806 | } |
808 | spin_lock(&mm->page_table_lock); | 807 | spin_lock(&mm->page_table_lock); |
809 | set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, | 808 | set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, |
810 | zero_pfn); | 809 | zero_page); |
811 | spin_unlock(&mm->page_table_lock); | 810 | spin_unlock(&mm->page_table_lock); |
812 | if (!set) { | 811 | if (!set) { |
813 | pte_free(mm, pgtable); | 812 | pte_free(mm, pgtable); |
@@ -886,16 +885,16 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
886 | * a page table. | 885 | * a page table. |
887 | */ | 886 | */ |
888 | if (is_huge_zero_pmd(pmd)) { | 887 | if (is_huge_zero_pmd(pmd)) { |
889 | unsigned long zero_pfn; | 888 | struct page *zero_page; |
890 | bool set; | 889 | bool set; |
891 | /* | 890 | /* |
892 | * get_huge_zero_page() will never allocate a new page here, | 891 | * get_huge_zero_page() will never allocate a new page here, |
893 | * since we already have a zero page to copy. It just takes a | 892 | * since we already have a zero page to copy. It just takes a |
894 | * reference. | 893 | * reference. |
895 | */ | 894 | */ |
896 | zero_pfn = get_huge_zero_page(); | 895 | zero_page = get_huge_zero_page(); |
897 | set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, | 896 | set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, |
898 | zero_pfn); | 897 | zero_page); |
899 | BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */ | 898 | BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */ |
900 | ret = 0; | 899 | ret = 0; |
901 | goto out_unlock; | 900 | goto out_unlock; |
@@ -1812,7 +1811,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
1812 | struct anon_vma *anon_vma; | 1811 | struct anon_vma *anon_vma; |
1813 | int ret = 1; | 1812 | int ret = 1; |
1814 | 1813 | ||
1815 | BUG_ON(is_huge_zero_pfn(page_to_pfn(page))); | 1814 | BUG_ON(is_huge_zero_page(page)); |
1816 | BUG_ON(!PageAnon(page)); | 1815 | BUG_ON(!PageAnon(page)); |
1817 | 1816 | ||
1818 | /* | 1817 | /* |