aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEbru Akagunduz <ebru.akagunduz@gmail.com>2016-01-14 18:22:19 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-14 19:00:49 -0500
commit7d2eba0557c18f7522b98befed98799990dd4fdb (patch)
tree948c38a965c11dde91d648ab7a60d8a4da7c3512
parentcb5490a5eea415106d7438df440da5fb1e17318d (diff)
mm: add tracepoint for scanning pages
This patch series makes swapin readahead up to a certain number to gain more thp performance and adds tracepoint for khugepaged_scan_pmd, collapse_huge_page, __collapse_huge_page_isolate. This patch series was written to deal with programs that access most, but not all, of their memory after they get swapped out. Currently these programs do not get their memory collapsed into THPs after the system swapped their memory out, while they would get THPs before swapping happened. This patch series was tested with a test program, it allocates 400MB of memory, writes to it, and then sleeps. I force the system to swap out all. Afterwards, the test program touches the area by writing and leaves a piece of it without writing. This shows how much swap in readahead made by the patch. Test results: After swapped out ------------------------------------------------------------------- | Anonymous | AnonHugePages | Swap | Fraction | ------------------------------------------------------------------- With patch | 90076 kB | 88064 kB | 309928 kB | %99 | ------------------------------------------------------------------- Without patch | 194068 kB | 192512 kB | 205936 kB | %99 | ------------------------------------------------------------------- After swapped in ------------------------------------------------------------------- | Anonymous | AnonHugePages | Swap | Fraction | ------------------------------------------------------------------- With patch | 201408 kB | 198656 kB | 198596 kB | %98 | ------------------------------------------------------------------- Without patch | 292624 kB | 192512 kB | 107380 kB | %65 | ------------------------------------------------------------------- This patch (of 3): Using static tracepoints, data of functions is recorded. It is good to automatize debugging without doing a lot of changes in the source code. This patch adds tracepoint for khugepaged_scan_pmd, collapse_huge_page and __collapse_huge_page_isolate. [dan.carpenter@oracle.com: add a missing tab] Signed-off-by: Ebru Akagunduz <ebru.akagunduz@gmail.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Rik van Riel <riel@redhat.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Xie XiuQi <xiexiuqi@huawei.com> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Mel Gorman <mgorman@suse.de> Cc: David Rientjes <rientjes@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/trace/events/huge_memory.h136
-rw-r--r--mm/huge_memory.c166
2 files changed, 270 insertions, 32 deletions
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
new file mode 100644
index 000000000000..97d635cabac8
--- /dev/null
+++ b/include/trace/events/huge_memory.h
@@ -0,0 +1,136 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM huge_memory
3
4#if !defined(__HUGE_MEMORY_H) || defined(TRACE_HEADER_MULTI_READ)
5#define __HUGE_MEMORY_H
6
7#include <linux/tracepoint.h>
8
9#include <trace/events/gfpflags.h>
10
11#define SCAN_STATUS \
12 EM( SCAN_FAIL, "failed") \
13 EM( SCAN_SUCCEED, "succeeded") \
14 EM( SCAN_PMD_NULL, "pmd_null") \
15 EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \
16 EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \
17 EM( SCAN_PAGE_RO, "no_writable_page") \
18 EM( SCAN_NO_REFERENCED_PAGE, "no_referenced_page") \
19 EM( SCAN_PAGE_NULL, "page_null") \
20 EM( SCAN_SCAN_ABORT, "scan_aborted") \
21 EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \
22 EM( SCAN_PAGE_LRU, "page_not_in_lru") \
23 EM( SCAN_PAGE_LOCK, "page_locked") \
24 EM( SCAN_PAGE_ANON, "page_not_anon") \
25 EM( SCAN_ANY_PROCESS, "no_process_for_page") \
26 EM( SCAN_VMA_NULL, "vma_null") \
27 EM( SCAN_VMA_CHECK, "vma_check_failed") \
28 EM( SCAN_ADDRESS_RANGE, "not_suitable_address_range") \
29 EM( SCAN_SWAP_CACHE_PAGE, "page_swap_cache") \
30 EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\
31 EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \
32 EMe( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed")
33
34#undef EM
35#undef EMe
36#define EM(a, b) TRACE_DEFINE_ENUM(a);
37#define EMe(a, b) TRACE_DEFINE_ENUM(a);
38
39SCAN_STATUS
40
41#undef EM
42#undef EMe
43#define EM(a, b) {a, b},
44#define EMe(a, b) {a, b}
45
46TRACE_EVENT(mm_khugepaged_scan_pmd,
47
48 TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable,
49 bool referenced, int none_or_zero, int status),
50
51 TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status),
52
53 TP_STRUCT__entry(
54 __field(struct mm_struct *, mm)
55 __field(unsigned long, pfn)
56 __field(bool, writable)
57 __field(bool, referenced)
58 __field(int, none_or_zero)
59 __field(int, status)
60 ),
61
62 TP_fast_assign(
63 __entry->mm = mm;
64 __entry->pfn = pfn;
65 __entry->writable = writable;
66 __entry->referenced = referenced;
67 __entry->none_or_zero = none_or_zero;
68 __entry->status = status;
69 ),
70
71 TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s",
72 __entry->mm,
73 __entry->pfn,
74 __entry->writable,
75 __entry->referenced,
76 __entry->none_or_zero,
77 __print_symbolic(__entry->status, SCAN_STATUS))
78);
79
80TRACE_EVENT(mm_collapse_huge_page,
81
82 TP_PROTO(struct mm_struct *mm, int isolated, int status),
83
84 TP_ARGS(mm, isolated, status),
85
86 TP_STRUCT__entry(
87 __field(struct mm_struct *, mm)
88 __field(int, isolated)
89 __field(int, status)
90 ),
91
92 TP_fast_assign(
93 __entry->mm = mm;
94 __entry->isolated = isolated;
95 __entry->status = status;
96 ),
97
98 TP_printk("mm=%p, isolated=%d, status=%s",
99 __entry->mm,
100 __entry->isolated,
101 __print_symbolic(__entry->status, SCAN_STATUS))
102);
103
104TRACE_EVENT(mm_collapse_huge_page_isolate,
105
106 TP_PROTO(unsigned long pfn, int none_or_zero,
107 bool referenced, bool writable, int status),
108
109 TP_ARGS(pfn, none_or_zero, referenced, writable, status),
110
111 TP_STRUCT__entry(
112 __field(unsigned long, pfn)
113 __field(int, none_or_zero)
114 __field(bool, referenced)
115 __field(bool, writable)
116 __field(int, status)
117 ),
118
119 TP_fast_assign(
120 __entry->pfn = pfn;
121 __entry->none_or_zero = none_or_zero;
122 __entry->referenced = referenced;
123 __entry->writable = writable;
124 __entry->status = status;
125 ),
126
127 TP_printk("scan_pfn=0x%lx, none_or_zero=%d, referenced=%d, writable=%d, status=%s",
128 __entry->pfn,
129 __entry->none_or_zero,
130 __entry->referenced,
131 __entry->writable,
132 __print_symbolic(__entry->status, SCAN_STATUS))
133);
134
135#endif /* __HUGE_MEMORY_H */
136#include <trace/define_trace.h>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 62fe06bb7d04..f952f055fdcf 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -31,6 +31,33 @@
31#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
32#include "internal.h" 32#include "internal.h"
33 33
34enum scan_result {
35 SCAN_FAIL,
36 SCAN_SUCCEED,
37 SCAN_PMD_NULL,
38 SCAN_EXCEED_NONE_PTE,
39 SCAN_PTE_NON_PRESENT,
40 SCAN_PAGE_RO,
41 SCAN_NO_REFERENCED_PAGE,
42 SCAN_PAGE_NULL,
43 SCAN_SCAN_ABORT,
44 SCAN_PAGE_COUNT,
45 SCAN_PAGE_LRU,
46 SCAN_PAGE_LOCK,
47 SCAN_PAGE_ANON,
48 SCAN_ANY_PROCESS,
49 SCAN_VMA_NULL,
50 SCAN_VMA_CHECK,
51 SCAN_ADDRESS_RANGE,
52 SCAN_SWAP_CACHE_PAGE,
53 SCAN_DEL_PAGE_LRU,
54 SCAN_ALLOC_HUGE_PAGE_FAIL,
55 SCAN_CGROUP_CHARGE_FAIL
56};
57
58#define CREATE_TRACE_POINTS
59#include <trace/events/huge_memory.h>
60
34/* 61/*
35 * By default transparent hugepage support is disabled in order that avoid 62 * By default transparent hugepage support is disabled in order that avoid
36 * to risk increase the memory footprint of applications without a guaranteed 63 * to risk increase the memory footprint of applications without a guaranteed
@@ -2198,26 +2225,33 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2198 unsigned long address, 2225 unsigned long address,
2199 pte_t *pte) 2226 pte_t *pte)
2200{ 2227{
2201 struct page *page; 2228 struct page *page = NULL;
2202 pte_t *_pte; 2229 pte_t *_pte;
2203 int none_or_zero = 0; 2230 int none_or_zero = 0, result = 0;
2204 bool referenced = false, writable = false; 2231 bool referenced = false, writable = false;
2232
2205 for (_pte = pte; _pte < pte+HPAGE_PMD_NR; 2233 for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
2206 _pte++, address += PAGE_SIZE) { 2234 _pte++, address += PAGE_SIZE) {
2207 pte_t pteval = *_pte; 2235 pte_t pteval = *_pte;
2208 if (pte_none(pteval) || (pte_present(pteval) && 2236 if (pte_none(pteval) || (pte_present(pteval) &&
2209 is_zero_pfn(pte_pfn(pteval)))) { 2237 is_zero_pfn(pte_pfn(pteval)))) {
2210 if (!userfaultfd_armed(vma) && 2238 if (!userfaultfd_armed(vma) &&
2211 ++none_or_zero <= khugepaged_max_ptes_none) 2239 ++none_or_zero <= khugepaged_max_ptes_none) {
2212 continue; 2240 continue;
2213 else 2241 } else {
2242 result = SCAN_EXCEED_NONE_PTE;
2214 goto out; 2243 goto out;
2244 }
2215 } 2245 }
2216 if (!pte_present(pteval)) 2246 if (!pte_present(pteval)) {
2247 result = SCAN_PTE_NON_PRESENT;
2217 goto out; 2248 goto out;
2249 }
2218 page = vm_normal_page(vma, address, pteval); 2250 page = vm_normal_page(vma, address, pteval);
2219 if (unlikely(!page)) 2251 if (unlikely(!page)) {
2252 result = SCAN_PAGE_NULL;
2220 goto out; 2253 goto out;
2254 }
2221 2255
2222 VM_BUG_ON_PAGE(PageCompound(page), page); 2256 VM_BUG_ON_PAGE(PageCompound(page), page);
2223 VM_BUG_ON_PAGE(!PageAnon(page), page); 2257 VM_BUG_ON_PAGE(!PageAnon(page), page);
@@ -2229,8 +2263,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2229 * is needed to serialize against split_huge_page 2263 * is needed to serialize against split_huge_page
2230 * when invoked from the VM. 2264 * when invoked from the VM.
2231 */ 2265 */
2232 if (!trylock_page(page)) 2266 if (!trylock_page(page)) {
2267 result = SCAN_PAGE_LOCK;
2233 goto out; 2268 goto out;
2269 }
2234 2270
2235 /* 2271 /*
2236 * cannot use mapcount: can't collapse if there's a gup pin. 2272 * cannot use mapcount: can't collapse if there's a gup pin.
@@ -2239,6 +2275,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2239 */ 2275 */
2240 if (page_count(page) != 1 + !!PageSwapCache(page)) { 2276 if (page_count(page) != 1 + !!PageSwapCache(page)) {
2241 unlock_page(page); 2277 unlock_page(page);
2278 result = SCAN_PAGE_COUNT;
2242 goto out; 2279 goto out;
2243 } 2280 }
2244 if (pte_write(pteval)) { 2281 if (pte_write(pteval)) {
@@ -2246,6 +2283,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2246 } else { 2283 } else {
2247 if (PageSwapCache(page) && !reuse_swap_page(page)) { 2284 if (PageSwapCache(page) && !reuse_swap_page(page)) {
2248 unlock_page(page); 2285 unlock_page(page);
2286 result = SCAN_SWAP_CACHE_PAGE;
2249 goto out; 2287 goto out;
2250 } 2288 }
2251 /* 2289 /*
@@ -2260,6 +2298,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2260 */ 2298 */
2261 if (isolate_lru_page(page)) { 2299 if (isolate_lru_page(page)) {
2262 unlock_page(page); 2300 unlock_page(page);
2301 result = SCAN_DEL_PAGE_LRU;
2263 goto out; 2302 goto out;
2264 } 2303 }
2265 /* 0 stands for page_is_file_cache(page) == false */ 2304 /* 0 stands for page_is_file_cache(page) == false */
@@ -2273,10 +2312,21 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
2273 mmu_notifier_test_young(vma->vm_mm, address)) 2312 mmu_notifier_test_young(vma->vm_mm, address))
2274 referenced = true; 2313 referenced = true;
2275 } 2314 }
2276 if (likely(referenced && writable)) 2315 if (likely(writable)) {
2277 return 1; 2316 if (likely(referenced)) {
2317 result = SCAN_SUCCEED;
2318 trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero,
2319 referenced, writable, result);
2320 return 1;
2321 }
2322 } else {
2323 result = SCAN_PAGE_RO;
2324 }
2325
2278out: 2326out:
2279 release_pte_pages(pte, _pte); 2327 release_pte_pages(pte, _pte);
2328 trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero,
2329 referenced, writable, result);
2280 return 0; 2330 return 0;
2281} 2331}
2282 2332
@@ -2513,7 +2563,7 @@ static void collapse_huge_page(struct mm_struct *mm,
2513 pgtable_t pgtable; 2563 pgtable_t pgtable;
2514 struct page *new_page; 2564 struct page *new_page;
2515 spinlock_t *pmd_ptl, *pte_ptl; 2565 spinlock_t *pmd_ptl, *pte_ptl;
2516 int isolated; 2566 int isolated, result = 0;
2517 unsigned long hstart, hend; 2567 unsigned long hstart, hend;
2518 struct mem_cgroup *memcg; 2568 struct mem_cgroup *memcg;
2519 unsigned long mmun_start; /* For mmu_notifiers */ 2569 unsigned long mmun_start; /* For mmu_notifiers */
@@ -2528,12 +2578,15 @@ static void collapse_huge_page(struct mm_struct *mm,
2528 2578
2529 /* release the mmap_sem read lock. */ 2579 /* release the mmap_sem read lock. */
2530 new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node); 2580 new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node);
2531 if (!new_page) 2581 if (!new_page) {
2532 return; 2582 result = SCAN_ALLOC_HUGE_PAGE_FAIL;
2583 goto out_nolock;
2584 }
2533 2585
2534 if (unlikely(mem_cgroup_try_charge(new_page, mm, 2586 if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg))) {
2535 gfp, &memcg))) 2587 result = SCAN_CGROUP_CHARGE_FAIL;
2536 return; 2588 goto out_nolock;
2589 }
2537 2590
2538 /* 2591 /*
2539 * Prevent all access to pagetables with the exception of 2592 * Prevent all access to pagetables with the exception of
@@ -2541,21 +2594,31 @@ static void collapse_huge_page(struct mm_struct *mm,
2541 * handled by the anon_vma lock + PG_lock. 2594 * handled by the anon_vma lock + PG_lock.
2542 */ 2595 */
2543 down_write(&mm->mmap_sem); 2596 down_write(&mm->mmap_sem);
2544 if (unlikely(khugepaged_test_exit(mm))) 2597 if (unlikely(khugepaged_test_exit(mm))) {
2598 result = SCAN_ANY_PROCESS;
2545 goto out; 2599 goto out;
2600 }
2546 2601
2547 vma = find_vma(mm, address); 2602 vma = find_vma(mm, address);
2548 if (!vma) 2603 if (!vma) {
2604 result = SCAN_VMA_NULL;
2549 goto out; 2605 goto out;
2606 }
2550 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 2607 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2551 hend = vma->vm_end & HPAGE_PMD_MASK; 2608 hend = vma->vm_end & HPAGE_PMD_MASK;
2552 if (address < hstart || address + HPAGE_PMD_SIZE > hend) 2609 if (address < hstart || address + HPAGE_PMD_SIZE > hend) {
2610 result = SCAN_ADDRESS_RANGE;
2553 goto out; 2611 goto out;
2554 if (!hugepage_vma_check(vma)) 2612 }
2613 if (!hugepage_vma_check(vma)) {
2614 result = SCAN_VMA_CHECK;
2555 goto out; 2615 goto out;
2616 }
2556 pmd = mm_find_pmd(mm, address); 2617 pmd = mm_find_pmd(mm, address);
2557 if (!pmd) 2618 if (!pmd) {
2619 result = SCAN_PMD_NULL;
2558 goto out; 2620 goto out;
2621 }
2559 2622
2560 anon_vma_lock_write(vma->anon_vma); 2623 anon_vma_lock_write(vma->anon_vma);
2561 2624
@@ -2592,6 +2655,7 @@ static void collapse_huge_page(struct mm_struct *mm,
2592 pmd_populate(mm, pmd, pmd_pgtable(_pmd)); 2655 pmd_populate(mm, pmd, pmd_pgtable(_pmd));
2593 spin_unlock(pmd_ptl); 2656 spin_unlock(pmd_ptl);
2594 anon_vma_unlock_write(vma->anon_vma); 2657 anon_vma_unlock_write(vma->anon_vma);
2658 result = SCAN_FAIL;
2595 goto out; 2659 goto out;
2596 } 2660 }
2597 2661
@@ -2629,10 +2693,15 @@ static void collapse_huge_page(struct mm_struct *mm,
2629 *hpage = NULL; 2693 *hpage = NULL;
2630 2694
2631 khugepaged_pages_collapsed++; 2695 khugepaged_pages_collapsed++;
2696 result = SCAN_SUCCEED;
2632out_up_write: 2697out_up_write:
2633 up_write(&mm->mmap_sem); 2698 up_write(&mm->mmap_sem);
2699 trace_mm_collapse_huge_page(mm, isolated, result);
2634 return; 2700 return;
2635 2701
2702out_nolock:
2703 trace_mm_collapse_huge_page(mm, isolated, result);
2704 return;
2636out: 2705out:
2637 mem_cgroup_cancel_charge(new_page, memcg); 2706 mem_cgroup_cancel_charge(new_page, memcg);
2638 goto out_up_write; 2707 goto out_up_write;
@@ -2645,8 +2714,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2645{ 2714{
2646 pmd_t *pmd; 2715 pmd_t *pmd;
2647 pte_t *pte, *_pte; 2716 pte_t *pte, *_pte;
2648 int ret = 0, none_or_zero = 0; 2717 int ret = 0, none_or_zero = 0, result = 0;
2649 struct page *page; 2718 struct page *page = NULL;
2650 unsigned long _address; 2719 unsigned long _address;
2651 spinlock_t *ptl; 2720 spinlock_t *ptl;
2652 int node = NUMA_NO_NODE; 2721 int node = NUMA_NO_NODE;
@@ -2655,8 +2724,10 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2655 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 2724 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
2656 2725
2657 pmd = mm_find_pmd(mm, address); 2726 pmd = mm_find_pmd(mm, address);
2658 if (!pmd) 2727 if (!pmd) {
2728 result = SCAN_PMD_NULL;
2659 goto out; 2729 goto out;
2730 }
2660 2731
2661 memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); 2732 memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
2662 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 2733 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
@@ -2665,19 +2736,25 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2665 pte_t pteval = *_pte; 2736 pte_t pteval = *_pte;
2666 if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { 2737 if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
2667 if (!userfaultfd_armed(vma) && 2738 if (!userfaultfd_armed(vma) &&
2668 ++none_or_zero <= khugepaged_max_ptes_none) 2739 ++none_or_zero <= khugepaged_max_ptes_none) {
2669 continue; 2740 continue;
2670 else 2741 } else {
2742 result = SCAN_EXCEED_NONE_PTE;
2671 goto out_unmap; 2743 goto out_unmap;
2744 }
2672 } 2745 }
2673 if (!pte_present(pteval)) 2746 if (!pte_present(pteval)) {
2747 result = SCAN_PTE_NON_PRESENT;
2674 goto out_unmap; 2748 goto out_unmap;
2749 }
2675 if (pte_write(pteval)) 2750 if (pte_write(pteval))
2676 writable = true; 2751 writable = true;
2677 2752
2678 page = vm_normal_page(vma, _address, pteval); 2753 page = vm_normal_page(vma, _address, pteval);
2679 if (unlikely(!page)) 2754 if (unlikely(!page)) {
2755 result = SCAN_PAGE_NULL;
2680 goto out_unmap; 2756 goto out_unmap;
2757 }
2681 /* 2758 /*
2682 * Record which node the original page is from and save this 2759 * Record which node the original page is from and save this
2683 * information to khugepaged_node_load[]. 2760 * information to khugepaged_node_load[].
@@ -2685,26 +2762,49 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2685 * hit record. 2762 * hit record.
2686 */ 2763 */
2687 node = page_to_nid(page); 2764 node = page_to_nid(page);
2688 if (khugepaged_scan_abort(node)) 2765 if (khugepaged_scan_abort(node)) {
2766 result = SCAN_SCAN_ABORT;
2689 goto out_unmap; 2767 goto out_unmap;
2768 }
2690 khugepaged_node_load[node]++; 2769 khugepaged_node_load[node]++;
2691 VM_BUG_ON_PAGE(PageCompound(page), page); 2770 VM_BUG_ON_PAGE(PageCompound(page), page);
2692 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) 2771 if (!PageLRU(page)) {
2772 result = SCAN_SCAN_ABORT;
2773 goto out_unmap;
2774 }
2775 if (PageLocked(page)) {
2776 result = SCAN_PAGE_LOCK;
2777 goto out_unmap;
2778 }
2779 if (!PageAnon(page)) {
2780 result = SCAN_PAGE_ANON;
2693 goto out_unmap; 2781 goto out_unmap;
2782 }
2783
2694 /* 2784 /*
2695 * cannot use mapcount: can't collapse if there's a gup pin. 2785 * cannot use mapcount: can't collapse if there's a gup pin.
2696 * The page must only be referenced by the scanned process 2786 * The page must only be referenced by the scanned process
2697 * and page swap cache. 2787 * and page swap cache.
2698 */ 2788 */
2699 if (page_count(page) != 1 + !!PageSwapCache(page)) 2789 if (page_count(page) != 1 + !!PageSwapCache(page)) {
2790 result = SCAN_PAGE_COUNT;
2700 goto out_unmap; 2791 goto out_unmap;
2792 }
2701 if (pte_young(pteval) || 2793 if (pte_young(pteval) ||
2702 page_is_young(page) || PageReferenced(page) || 2794 page_is_young(page) || PageReferenced(page) ||
2703 mmu_notifier_test_young(vma->vm_mm, address)) 2795 mmu_notifier_test_young(vma->vm_mm, address))
2704 referenced = true; 2796 referenced = true;
2705 } 2797 }
2706 if (referenced && writable) 2798 if (writable) {
2707 ret = 1; 2799 if (referenced) {
2800 result = SCAN_SUCCEED;
2801 ret = 1;
2802 } else {
2803 result = SCAN_NO_REFERENCED_PAGE;
2804 }
2805 } else {
2806 result = SCAN_PAGE_RO;
2807 }
2708out_unmap: 2808out_unmap:
2709 pte_unmap_unlock(pte, ptl); 2809 pte_unmap_unlock(pte, ptl);
2710 if (ret) { 2810 if (ret) {
@@ -2713,6 +2813,8 @@ out_unmap:
2713 collapse_huge_page(mm, address, hpage, vma, node); 2813 collapse_huge_page(mm, address, hpage, vma, node);
2714 } 2814 }
2715out: 2815out:
2816 trace_mm_khugepaged_scan_pmd(mm, page_to_pfn(page), writable, referenced,
2817 none_or_zero, result);
2716 return ret; 2818 return ret;
2717} 2819}
2718 2820