aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorEbru Akagunduz <ebru.akagunduz@gmail.com>2015-02-11 18:28:28 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 20:06:07 -0500
commit10359213d05acf804558bda7cc9b8422a828d1cd (patch)
treed8453420e172744530fd663388761f074d1a75b4 /mm/huge_memory.c
parentba4877b9ca51f80b5d30f304a46762f0509e1635 (diff)
mm: incorporate read-only pages into transparent huge pages
This patch aims to improve THP collapse rates, by allowing THP collapse in the presence of read-only ptes, like those left in place by do_swap_page after a read fault. Currently THP can collapse 4kB pages into a THP when there are up to khugepaged_max_ptes_none pte_none ptes in a 2MB range. This patch applies the same limit for read-only ptes. The patch was tested with a test program that allocates 800MB of memory, writes to it, and then sleeps. I force the system to swap out all but 190MB of the program by touching other memory. Afterwards, the test program does a mix of reads and writes to its memory, and the memory gets swapped back in. Without the patch, only the memory that did not get swapped out remained in THPs, which corresponds to 24% of the memory of the program. The percentage did not increase over time. With this patch, after 5 minutes of waiting khugepaged had collapsed 50% of the program's memory back into THPs. Test results: With the patch: After swapped out: cat /proc/pid/smaps: Anonymous: 100464 kB AnonHugePages: 100352 kB Swap: 699540 kB Fraction: 99,88 cat /proc/meminfo: AnonPages: 1754448 kB AnonHugePages: 1716224 kB Fraction: 97,82 After swapped in: In a few seconds: cat /proc/pid/smaps: Anonymous: 800004 kB AnonHugePages: 145408 kB Swap: 0 kB Fraction: 18,17 cat /proc/meminfo: AnonPages: 2455016 kB AnonHugePages: 1761280 kB Fraction: 71,74 In 5 minutes: cat /proc/pid/smaps Anonymous: 800004 kB AnonHugePages: 407552 kB Swap: 0 kB Fraction: 50,94 cat /proc/meminfo: AnonPages: 2456872 kB AnonHugePages: 2023424 kB Fraction: 82,35 Without the patch: After swapped out: cat /proc/pid/smaps: Anonymous: 190660 kB AnonHugePages: 190464 kB Swap: 609344 kB Fraction: 99,89 cat /proc/meminfo: AnonPages: 1740456 kB AnonHugePages: 1667072 kB Fraction: 95,78 After swapped in: cat /proc/pid/smaps: Anonymous: 800004 kB AnonHugePages: 190464 kB Swap: 0 kB Fraction: 23,80 cat /proc/meminfo: AnonPages: 2350032 kB AnonHugePages: 1667072 kB Fraction: 70,93 I waited 10 minutes the fractions did not change without the patch. Signed-off-by: Ebru Akagunduz <ebru.akagunduz@gmail.com> Reviewed-by: Rik van Riel <riel@redhat.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Acked-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Mel Gorman <mgorman@suse.de> Cc: David Rientjes <rientjes@google.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c55
1 files changed, 42 insertions, 13 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 29bc6e471df4..cb7be110cad3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2117,7 +2117,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2117{ 2117{
2118 struct page *page; 2118 struct page *page;
2119 pte_t *_pte; 2119 pte_t *_pte;
2120 int referenced = 0, none = 0; 2120 int none = 0;
2121 bool referenced = false, writable = false;
2121 for (_pte = pte; _pte < pte+HPAGE_PMD_NR; 2122 for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
2122 _pte++, address += PAGE_SIZE) { 2123 _pte++, address += PAGE_SIZE) {
2123 pte_t pteval = *_pte; 2124 pte_t pteval = *_pte;
@@ -2127,7 +2128,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2127 else 2128 else
2128 goto out; 2129 goto out;
2129 } 2130 }
2130 if (!pte_present(pteval) || !pte_write(pteval)) 2131 if (!pte_present(pteval))
2131 goto out; 2132 goto out;
2132 page = vm_normal_page(vma, address, pteval); 2133 page = vm_normal_page(vma, address, pteval);
2133 if (unlikely(!page)) 2134 if (unlikely(!page))
@@ -2137,9 +2138,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2137 VM_BUG_ON_PAGE(!PageAnon(page), page); 2138 VM_BUG_ON_PAGE(!PageAnon(page), page);
2138 VM_BUG_ON_PAGE(!PageSwapBacked(page), page); 2139 VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
2139 2140
2140 /* cannot use mapcount: can't collapse if there's a gup pin */
2141 if (page_count(page) != 1)
2142 goto out;
2143 /* 2141 /*
2144 * We can do it before isolate_lru_page because the 2142 * We can do it before isolate_lru_page because the
2145 * page can't be freed from under us. NOTE: PG_lock 2143 * page can't be freed from under us. NOTE: PG_lock
@@ -2148,6 +2146,29 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2148 */ 2146 */
2149 if (!trylock_page(page)) 2147 if (!trylock_page(page))
2150 goto out; 2148 goto out;
2149
2150 /*
2151 * cannot use mapcount: can't collapse if there's a gup pin.
2152 * The page must only be referenced by the scanned process
2153 * and page swap cache.
2154 */
2155 if (page_count(page) != 1 + !!PageSwapCache(page)) {
2156 unlock_page(page);
2157 goto out;
2158 }
2159 if (pte_write(pteval)) {
2160 writable = true;
2161 } else {
2162 if (PageSwapCache(page) && !reuse_swap_page(page)) {
2163 unlock_page(page);
2164 goto out;
2165 }
2166 /*
2167 * Page is not in the swap cache. It can be collapsed
2168 * into a THP.
2169 */
2170 }
2171
2151 /* 2172 /*
2152 * Isolate the page to avoid collapsing an hugepage 2173 * Isolate the page to avoid collapsing an hugepage
2153 * currently in use by the VM. 2174 * currently in use by the VM.
@@ -2164,9 +2185,9 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2164 /* If there is no mapped pte young don't collapse the page */ 2185 /* If there is no mapped pte young don't collapse the page */
2165 if (pte_young(pteval) || PageReferenced(page) || 2186 if (pte_young(pteval) || PageReferenced(page) ||
2166 mmu_notifier_test_young(vma->vm_mm, address)) 2187 mmu_notifier_test_young(vma->vm_mm, address))
2167 referenced = 1; 2188 referenced = true;
2168 } 2189 }
2169 if (likely(referenced)) 2190 if (likely(referenced && writable))
2170 return 1; 2191 return 1;
2171out: 2192out:
2172 release_pte_pages(pte, _pte); 2193 release_pte_pages(pte, _pte);
@@ -2519,11 +2540,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2519{ 2540{
2520 pmd_t *pmd; 2541 pmd_t *pmd;
2521 pte_t *pte, *_pte; 2542 pte_t *pte, *_pte;
2522 int ret = 0, referenced = 0, none = 0; 2543 int ret = 0, none = 0;
2523 struct page *page; 2544 struct page *page;
2524 unsigned long _address; 2545 unsigned long _address;
2525 spinlock_t *ptl; 2546 spinlock_t *ptl;
2526 int node = NUMA_NO_NODE; 2547 int node = NUMA_NO_NODE;
2548 bool writable = false, referenced = false;
2527 2549
2528 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 2550 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
2529 2551
@@ -2542,8 +2564,11 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2542 else 2564 else
2543 goto out_unmap; 2565 goto out_unmap;
2544 } 2566 }
2545 if (!pte_present(pteval) || !pte_write(pteval)) 2567 if (!pte_present(pteval))
2546 goto out_unmap; 2568 goto out_unmap;
2569 if (pte_write(pteval))
2570 writable = true;
2571
2547 page = vm_normal_page(vma, _address, pteval); 2572 page = vm_normal_page(vma, _address, pteval);
2548 if (unlikely(!page)) 2573 if (unlikely(!page))
2549 goto out_unmap; 2574 goto out_unmap;
@@ -2560,14 +2585,18 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2560 VM_BUG_ON_PAGE(PageCompound(page), page); 2585 VM_BUG_ON_PAGE(PageCompound(page), page);
2561 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) 2586 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
2562 goto out_unmap; 2587 goto out_unmap;
2563 /* cannot use mapcount: can't collapse if there's a gup pin */ 2588 /*
2564 if (page_count(page) != 1) 2589 * cannot use mapcount: can't collapse if there's a gup pin.
2590 * The page must only be referenced by the scanned process
2591 * and page swap cache.
2592 */
2593 if (page_count(page) != 1 + !!PageSwapCache(page))
2565 goto out_unmap; 2594 goto out_unmap;
2566 if (pte_young(pteval) || PageReferenced(page) || 2595 if (pte_young(pteval) || PageReferenced(page) ||
2567 mmu_notifier_test_young(vma->vm_mm, address)) 2596 mmu_notifier_test_young(vma->vm_mm, address))
2568 referenced = 1; 2597 referenced = true;
2569 } 2598 }
2570 if (referenced) 2599 if (referenced && writable)
2571 ret = 1; 2600 ret = 1;
2572out_unmap: 2601out_unmap:
2573 pte_unmap_unlock(pte, ptl); 2602 pte_unmap_unlock(pte, ptl);