aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEbru Akagunduz <ebru.akagunduz@gmail.com>2016-07-26 18:26:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 19:19:19 -0400
commit0db501f7a34c11d3b964205e5b6d00692a648035 (patch)
tree1e0abb724957a37157af899319770086e10e1dc5
parent47f863ea220067f5c87096893876f44500fcc8c9 (diff)
mm, thp: convert from optimistic swapin collapsing to conservative
To detect whether khugepaged swapin is worthwhile, this patch checks the amount of young pages. There should be at least half of HPAGE_PMD_NR to swapin. Link: http://lkml.kernel.org/r/1468109451-1615-1-git-send-email-ebru.akagunduz@gmail.com Signed-off-by: Ebru Akagunduz <ebru.akagunduz@gmail.com> Suggested-by: Minchan Kim <minchan@kernel.org> Acked-by: Rik van Riel <riel@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Mel Gorman <mgorman@suse.de> Cc: David Rientjes <rientjes@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Boaz Harrosh <boaz@plexistor.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/trace/events/huge_memory.h19
-rw-r--r--mm/khugepaged.c38
2 files changed, 34 insertions, 23 deletions
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index 830d47d5ca41..04f58acda8e8 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -13,7 +13,7 @@
13 EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \ 13 EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \
14 EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \ 14 EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \
15 EM( SCAN_PAGE_RO, "no_writable_page") \ 15 EM( SCAN_PAGE_RO, "no_writable_page") \
16 EM( SCAN_NO_REFERENCED_PAGE, "no_referenced_page") \ 16 EM( SCAN_LACK_REFERENCED_PAGE, "lack_referenced_page") \
17 EM( SCAN_PAGE_NULL, "page_null") \ 17 EM( SCAN_PAGE_NULL, "page_null") \
18 EM( SCAN_SCAN_ABORT, "scan_aborted") \ 18 EM( SCAN_SCAN_ABORT, "scan_aborted") \
19 EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \ 19 EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \
@@ -47,7 +47,7 @@ SCAN_STATUS
47TRACE_EVENT(mm_khugepaged_scan_pmd, 47TRACE_EVENT(mm_khugepaged_scan_pmd,
48 48
49 TP_PROTO(struct mm_struct *mm, struct page *page, bool writable, 49 TP_PROTO(struct mm_struct *mm, struct page *page, bool writable,
50 bool referenced, int none_or_zero, int status, int unmapped), 50 int referenced, int none_or_zero, int status, int unmapped),
51 51
52 TP_ARGS(mm, page, writable, referenced, none_or_zero, status, unmapped), 52 TP_ARGS(mm, page, writable, referenced, none_or_zero, status, unmapped),
53 53
@@ -55,7 +55,7 @@ TRACE_EVENT(mm_khugepaged_scan_pmd,
55 __field(struct mm_struct *, mm) 55 __field(struct mm_struct *, mm)
56 __field(unsigned long, pfn) 56 __field(unsigned long, pfn)
57 __field(bool, writable) 57 __field(bool, writable)
58 __field(bool, referenced) 58 __field(int, referenced)
59 __field(int, none_or_zero) 59 __field(int, none_or_zero)
60 __field(int, status) 60 __field(int, status)
61 __field(int, unmapped) 61 __field(int, unmapped)
@@ -108,14 +108,14 @@ TRACE_EVENT(mm_collapse_huge_page,
108TRACE_EVENT(mm_collapse_huge_page_isolate, 108TRACE_EVENT(mm_collapse_huge_page_isolate,
109 109
110 TP_PROTO(struct page *page, int none_or_zero, 110 TP_PROTO(struct page *page, int none_or_zero,
111 bool referenced, bool writable, int status), 111 int referenced, bool writable, int status),
112 112
113 TP_ARGS(page, none_or_zero, referenced, writable, status), 113 TP_ARGS(page, none_or_zero, referenced, writable, status),
114 114
115 TP_STRUCT__entry( 115 TP_STRUCT__entry(
116 __field(unsigned long, pfn) 116 __field(unsigned long, pfn)
117 __field(int, none_or_zero) 117 __field(int, none_or_zero)
118 __field(bool, referenced) 118 __field(int, referenced)
119 __field(bool, writable) 119 __field(bool, writable)
120 __field(int, status) 120 __field(int, status)
121 ), 121 ),
@@ -138,25 +138,28 @@ TRACE_EVENT(mm_collapse_huge_page_isolate,
138 138
139TRACE_EVENT(mm_collapse_huge_page_swapin, 139TRACE_EVENT(mm_collapse_huge_page_swapin,
140 140
141 TP_PROTO(struct mm_struct *mm, int swapped_in, int ret), 141 TP_PROTO(struct mm_struct *mm, int swapped_in, int referenced, int ret),
142 142
143 TP_ARGS(mm, swapped_in, ret), 143 TP_ARGS(mm, swapped_in, referenced, ret),
144 144
145 TP_STRUCT__entry( 145 TP_STRUCT__entry(
146 __field(struct mm_struct *, mm) 146 __field(struct mm_struct *, mm)
147 __field(int, swapped_in) 147 __field(int, swapped_in)
148 __field(int, referenced)
148 __field(int, ret) 149 __field(int, ret)
149 ), 150 ),
150 151
151 TP_fast_assign( 152 TP_fast_assign(
152 __entry->mm = mm; 153 __entry->mm = mm;
153 __entry->swapped_in = swapped_in; 154 __entry->swapped_in = swapped_in;
155 __entry->referenced = referenced;
154 __entry->ret = ret; 156 __entry->ret = ret;
155 ), 157 ),
156 158
157 TP_printk("mm=%p, swapped_in=%d, ret=%d", 159 TP_printk("mm=%p, swapped_in=%d, referenced=%d, ret=%d",
158 __entry->mm, 160 __entry->mm,
159 __entry->swapped_in, 161 __entry->swapped_in,
162 __entry->referenced,
160 __entry->ret) 163 __entry->ret)
161); 164);
162 165
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 566148489e33..7dbee698d6aa 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -27,7 +27,7 @@ enum scan_result {
27 SCAN_EXCEED_NONE_PTE, 27 SCAN_EXCEED_NONE_PTE,
28 SCAN_PTE_NON_PRESENT, 28 SCAN_PTE_NON_PRESENT,
29 SCAN_PAGE_RO, 29 SCAN_PAGE_RO,
30 SCAN_NO_REFERENCED_PAGE, 30 SCAN_LACK_REFERENCED_PAGE,
31 SCAN_PAGE_NULL, 31 SCAN_PAGE_NULL,
32 SCAN_SCAN_ABORT, 32 SCAN_SCAN_ABORT,
33 SCAN_PAGE_COUNT, 33 SCAN_PAGE_COUNT,
@@ -500,8 +500,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
500{ 500{
501 struct page *page = NULL; 501 struct page *page = NULL;
502 pte_t *_pte; 502 pte_t *_pte;
503 int none_or_zero = 0, result = 0; 503 int none_or_zero = 0, result = 0, referenced = 0;
504 bool referenced = false, writable = false; 504 bool writable = false;
505 505
506 for (_pte = pte; _pte < pte+HPAGE_PMD_NR; 506 for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
507 _pte++, address += PAGE_SIZE) { 507 _pte++, address += PAGE_SIZE) {
@@ -580,11 +580,11 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
580 VM_BUG_ON_PAGE(!PageLocked(page), page); 580 VM_BUG_ON_PAGE(!PageLocked(page), page);
581 VM_BUG_ON_PAGE(PageLRU(page), page); 581 VM_BUG_ON_PAGE(PageLRU(page), page);
582 582
583 /* If there is no mapped pte young don't collapse the page */ 583 /* There should be enough young pte to collapse the page */
584 if (pte_young(pteval) || 584 if (pte_young(pteval) ||
585 page_is_young(page) || PageReferenced(page) || 585 page_is_young(page) || PageReferenced(page) ||
586 mmu_notifier_test_young(vma->vm_mm, address)) 586 mmu_notifier_test_young(vma->vm_mm, address))
587 referenced = true; 587 referenced++;
588 } 588 }
589 if (likely(writable)) { 589 if (likely(writable)) {
590 if (likely(referenced)) { 590 if (likely(referenced)) {
@@ -869,7 +869,8 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
869 869
870static bool __collapse_huge_page_swapin(struct mm_struct *mm, 870static bool __collapse_huge_page_swapin(struct mm_struct *mm,
871 struct vm_area_struct *vma, 871 struct vm_area_struct *vma,
872 unsigned long address, pmd_t *pmd) 872 unsigned long address, pmd_t *pmd,
873 int referenced)
873{ 874{
874 pte_t pteval; 875 pte_t pteval;
875 int swapped_in = 0, ret = 0; 876 int swapped_in = 0, ret = 0;
@@ -887,12 +888,19 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
887 if (!is_swap_pte(pteval)) 888 if (!is_swap_pte(pteval))
888 continue; 889 continue;
889 swapped_in++; 890 swapped_in++;
891 /* we only decide to swapin, if there is enough young ptes */
892 if (referenced < HPAGE_PMD_NR/2) {
893 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
894 return false;
895 }
890 ret = do_swap_page(&fe, pteval); 896 ret = do_swap_page(&fe, pteval);
897
891 /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ 898 /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
892 if (ret & VM_FAULT_RETRY) { 899 if (ret & VM_FAULT_RETRY) {
893 down_read(&mm->mmap_sem); 900 down_read(&mm->mmap_sem);
894 if (hugepage_vma_revalidate(mm, address)) { 901 if (hugepage_vma_revalidate(mm, address)) {
895 /* vma is no longer available, don't continue to swapin */ 902 /* vma is no longer available, don't continue to swapin */
903 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
896 return false; 904 return false;
897 } 905 }
898 /* check if the pmd is still valid */ 906 /* check if the pmd is still valid */
@@ -900,7 +908,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
900 return false; 908 return false;
901 } 909 }
902 if (ret & VM_FAULT_ERROR) { 910 if (ret & VM_FAULT_ERROR) {
903 trace_mm_collapse_huge_page_swapin(mm, swapped_in, 0); 911 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
904 return false; 912 return false;
905 } 913 }
906 /* pte is unmapped now, we need to map it */ 914 /* pte is unmapped now, we need to map it */
@@ -908,7 +916,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
908 } 916 }
909 fe.pte--; 917 fe.pte--;
910 pte_unmap(fe.pte); 918 pte_unmap(fe.pte);
911 trace_mm_collapse_huge_page_swapin(mm, swapped_in, 1); 919 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 1);
912 return true; 920 return true;
913} 921}
914 922
@@ -916,7 +924,7 @@ static void collapse_huge_page(struct mm_struct *mm,
916 unsigned long address, 924 unsigned long address,
917 struct page **hpage, 925 struct page **hpage,
918 struct vm_area_struct *vma, 926 struct vm_area_struct *vma,
919 int node) 927 int node, int referenced)
920{ 928{
921 pmd_t *pmd, _pmd; 929 pmd_t *pmd, _pmd;
922 pte_t *pte; 930 pte_t *pte;
@@ -973,7 +981,7 @@ static void collapse_huge_page(struct mm_struct *mm,
973 * If it fails, we release mmap_sem and jump out_nolock. 981 * If it fails, we release mmap_sem and jump out_nolock.
974 * Continuing to collapse causes inconsistency. 982 * Continuing to collapse causes inconsistency.
975 */ 983 */
976 if (!__collapse_huge_page_swapin(mm, vma, address, pmd)) { 984 if (!__collapse_huge_page_swapin(mm, vma, address, pmd, referenced)) {
977 mem_cgroup_cancel_charge(new_page, memcg, true); 985 mem_cgroup_cancel_charge(new_page, memcg, true);
978 up_read(&mm->mmap_sem); 986 up_read(&mm->mmap_sem);
979 goto out_nolock; 987 goto out_nolock;
@@ -1084,12 +1092,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1084{ 1092{
1085 pmd_t *pmd; 1093 pmd_t *pmd;
1086 pte_t *pte, *_pte; 1094 pte_t *pte, *_pte;
1087 int ret = 0, none_or_zero = 0, result = 0; 1095 int ret = 0, none_or_zero = 0, result = 0, referenced = 0;
1088 struct page *page = NULL; 1096 struct page *page = NULL;
1089 unsigned long _address; 1097 unsigned long _address;
1090 spinlock_t *ptl; 1098 spinlock_t *ptl;
1091 int node = NUMA_NO_NODE, unmapped = 0; 1099 int node = NUMA_NO_NODE, unmapped = 0;
1092 bool writable = false, referenced = false; 1100 bool writable = false;
1093 1101
1094 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1102 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1095 1103
@@ -1177,14 +1185,14 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1177 if (pte_young(pteval) || 1185 if (pte_young(pteval) ||
1178 page_is_young(page) || PageReferenced(page) || 1186 page_is_young(page) || PageReferenced(page) ||
1179 mmu_notifier_test_young(vma->vm_mm, address)) 1187 mmu_notifier_test_young(vma->vm_mm, address))
1180 referenced = true; 1188 referenced++;
1181 } 1189 }
1182 if (writable) { 1190 if (writable) {
1183 if (referenced) { 1191 if (referenced) {
1184 result = SCAN_SUCCEED; 1192 result = SCAN_SUCCEED;
1185 ret = 1; 1193 ret = 1;
1186 } else { 1194 } else {
1187 result = SCAN_NO_REFERENCED_PAGE; 1195 result = SCAN_LACK_REFERENCED_PAGE;
1188 } 1196 }
1189 } else { 1197 } else {
1190 result = SCAN_PAGE_RO; 1198 result = SCAN_PAGE_RO;
@@ -1194,7 +1202,7 @@ out_unmap:
1194 if (ret) { 1202 if (ret) {
1195 node = khugepaged_find_target_node(); 1203 node = khugepaged_find_target_node();
1196 /* collapse_huge_page will return with the mmap_sem released */ 1204 /* collapse_huge_page will return with the mmap_sem released */
1197 collapse_huge_page(mm, address, hpage, vma, node); 1205 collapse_huge_page(mm, address, hpage, vma, node, referenced);
1198 } 1206 }
1199out: 1207out:
1200 trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, 1208 trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,