diff options
author | Ebru Akagunduz <ebru.akagunduz@gmail.com> | 2015-04-14 18:45:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-14 19:49:01 -0400 |
commit | ca0984caa8235762dc4e22c1c47ae6719dcc4064 (patch) | |
tree | 6c59dae501f781e970d81d60580cf52542356a09 | |
parent | 2149cdaef6c0eb59a9edf3b152027392cd66b41f (diff) |
mm: incorporate zero pages into transparent huge pages
This patch improves THP collapse rates, by allowing zero pages.
Currently THP can collapse 4kB pages into a THP when there are up to
khugepaged_max_ptes_none pte_none ptes in a 2MB range. This patch counts
pte none and mapped zero pages with the same variable.
The patch was tested with a program that allocates 800MB of
memory, and performs interleaved reads and writes, in a pattern
that causes some 2MB areas to first see read accesses, resulting
in the zero pfn being mapped there.
To simulate memory fragmentation at allocation time, I modified
do_huge_pmd_anonymous_page to return VM_FAULT_FALLBACK for read faults.
Without the patch, only %50 of the program was collapsed into THP and the
percentage did not increase over time.
With this patch after 10 minutes of waiting khugepaged had collapsed %99
of the program's memory.
[aarcange@redhat.com: fix bogus BUG()]
Signed-off-by: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/huge_memory.c | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 10a4b6cea0d1..6352c1dfa898 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -2109,7 +2109,7 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte) | |||
2109 | { | 2109 | { |
2110 | while (--_pte >= pte) { | 2110 | while (--_pte >= pte) { |
2111 | pte_t pteval = *_pte; | 2111 | pte_t pteval = *_pte; |
2112 | if (!pte_none(pteval)) | 2112 | if (!pte_none(pteval) && !is_zero_pfn(pte_pfn(pteval))) |
2113 | release_pte_page(pte_page(pteval)); | 2113 | release_pte_page(pte_page(pteval)); |
2114 | } | 2114 | } |
2115 | } | 2115 | } |
@@ -2120,13 +2120,13 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
2120 | { | 2120 | { |
2121 | struct page *page; | 2121 | struct page *page; |
2122 | pte_t *_pte; | 2122 | pte_t *_pte; |
2123 | int none = 0; | 2123 | int none_or_zero = 0; |
2124 | bool referenced = false, writable = false; | 2124 | bool referenced = false, writable = false; |
2125 | for (_pte = pte; _pte < pte+HPAGE_PMD_NR; | 2125 | for (_pte = pte; _pte < pte+HPAGE_PMD_NR; |
2126 | _pte++, address += PAGE_SIZE) { | 2126 | _pte++, address += PAGE_SIZE) { |
2127 | pte_t pteval = *_pte; | 2127 | pte_t pteval = *_pte; |
2128 | if (pte_none(pteval)) { | 2128 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { |
2129 | if (++none <= khugepaged_max_ptes_none) | 2129 | if (++none_or_zero <= khugepaged_max_ptes_none) |
2130 | continue; | 2130 | continue; |
2131 | else | 2131 | else |
2132 | goto out; | 2132 | goto out; |
@@ -2207,9 +2207,21 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, | |||
2207 | pte_t pteval = *_pte; | 2207 | pte_t pteval = *_pte; |
2208 | struct page *src_page; | 2208 | struct page *src_page; |
2209 | 2209 | ||
2210 | if (pte_none(pteval)) { | 2210 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { |
2211 | clear_user_highpage(page, address); | 2211 | clear_user_highpage(page, address); |
2212 | add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); | 2212 | add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); |
2213 | if (is_zero_pfn(pte_pfn(pteval))) { | ||
2214 | /* | ||
2215 | * ptl mostly unnecessary. | ||
2216 | */ | ||
2217 | spin_lock(ptl); | ||
2218 | /* | ||
2219 | * paravirt calls inside pte_clear here are | ||
2220 | * superfluous. | ||
2221 | */ | ||
2222 | pte_clear(vma->vm_mm, address, _pte); | ||
2223 | spin_unlock(ptl); | ||
2224 | } | ||
2213 | } else { | 2225 | } else { |
2214 | src_page = pte_page(pteval); | 2226 | src_page = pte_page(pteval); |
2215 | copy_user_highpage(page, src_page, address, vma); | 2227 | copy_user_highpage(page, src_page, address, vma); |
@@ -2543,7 +2555,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2543 | { | 2555 | { |
2544 | pmd_t *pmd; | 2556 | pmd_t *pmd; |
2545 | pte_t *pte, *_pte; | 2557 | pte_t *pte, *_pte; |
2546 | int ret = 0, none = 0; | 2558 | int ret = 0, none_or_zero = 0; |
2547 | struct page *page; | 2559 | struct page *page; |
2548 | unsigned long _address; | 2560 | unsigned long _address; |
2549 | spinlock_t *ptl; | 2561 | spinlock_t *ptl; |
@@ -2561,8 +2573,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2561 | for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; | 2573 | for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; |
2562 | _pte++, _address += PAGE_SIZE) { | 2574 | _pte++, _address += PAGE_SIZE) { |
2563 | pte_t pteval = *_pte; | 2575 | pte_t pteval = *_pte; |
2564 | if (pte_none(pteval)) { | 2576 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { |
2565 | if (++none <= khugepaged_max_ptes_none) | 2577 | if (++none_or_zero <= khugepaged_max_ptes_none) |
2566 | continue; | 2578 | continue; |
2567 | else | 2579 | else |
2568 | goto out_unmap; | 2580 | goto out_unmap; |