aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2017-09-06 19:22:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 20:27:28 -0400
commitbd4c82c22c367e068acb1ec9ec02be2fac3e09e2 (patch)
treec52482b09d020dc9873376221fb4cb22332dda6b /mm/vmscan.c
parentd6810d730022016d9c0f389452b86b035dba1492 (diff)
mm, THP, swap: delay splitting THP after swapped out
In this patch, splitting transparent huge page (THP) during swapping out is delayed from after adding the THP into the swap cache to after swapping out finishes. After the patch, more operations for the anonymous THP reclaiming, such as writing the THP to the swap device, removing the THP from the swap cache could be batched. So that the performance of anonymous THP swapping out could be improved. This is the second step for the THP swap support. The plan is to delay splitting the THP step by step and avoid splitting the THP finally. With the patchset, the swap out throughput improves 42% (from about 5.81GB/s to about 8.25GB/s) in the vm-scalability swap-w-seq test case with 16 processes. At the same time, the IPI (reflect TLB flushing) reduced about 78.9%. The test is done on a Xeon E5 v3 system. The swap device used is a RAM simulated PMEM (persistent memory) device. To test the sequential swapping out, the test case creates 8 processes, which sequentially allocate and write to the anonymous pages until the RAM and part of the swap device is used up. Link: http://lkml.kernel.org/r/20170724051840.2309-12-ying.huang@intel.com Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Shaohua Li <shli@kernel.org> Cc: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Ross Zwisler <ross.zwisler@intel.com> [for brd.c, zram_drv.c, pmem.c] Cc: Vishal L Verma <vishal.l.verma@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c95
1 files changed, 52 insertions, 43 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1638814c7848..6fbf707c0ce2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -536,7 +536,9 @@ static inline int is_page_cache_freeable(struct page *page)
536 * that isolated the page, the page cache radix tree and 536 * that isolated the page, the page cache radix tree and
537 * optional buffer heads at page->private. 537 * optional buffer heads at page->private.
538 */ 538 */
539 return page_count(page) - page_has_private(page) == 2; 539 int radix_pins = PageTransHuge(page) && PageSwapCache(page) ?
540 HPAGE_PMD_NR : 1;
541 return page_count(page) - page_has_private(page) == 1 + radix_pins;
540} 542}
541 543
542static int may_write_to_inode(struct inode *inode, struct scan_control *sc) 544static int may_write_to_inode(struct inode *inode, struct scan_control *sc)
@@ -666,6 +668,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
666 bool reclaimed) 668 bool reclaimed)
667{ 669{
668 unsigned long flags; 670 unsigned long flags;
671 int refcount;
669 672
670 BUG_ON(!PageLocked(page)); 673 BUG_ON(!PageLocked(page));
671 BUG_ON(mapping != page_mapping(page)); 674 BUG_ON(mapping != page_mapping(page));
@@ -696,11 +699,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
696 * Note that if SetPageDirty is always performed via set_page_dirty, 699 * Note that if SetPageDirty is always performed via set_page_dirty,
697 * and thus under tree_lock, then this ordering is not required. 700 * and thus under tree_lock, then this ordering is not required.
698 */ 701 */
699 if (!page_ref_freeze(page, 2)) 702 if (unlikely(PageTransHuge(page)) && PageSwapCache(page))
703 refcount = 1 + HPAGE_PMD_NR;
704 else
705 refcount = 2;
706 if (!page_ref_freeze(page, refcount))
700 goto cannot_free; 707 goto cannot_free;
701 /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */ 708 /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
702 if (unlikely(PageDirty(page))) { 709 if (unlikely(PageDirty(page))) {
703 page_ref_unfreeze(page, 2); 710 page_ref_unfreeze(page, refcount);
704 goto cannot_free; 711 goto cannot_free;
705 } 712 }
706 713
@@ -1122,58 +1129,56 @@ static unsigned long shrink_page_list(struct list_head *page_list,
1122 * Try to allocate it some swap space here. 1129 * Try to allocate it some swap space here.
1123 * Lazyfree page could be freed directly 1130 * Lazyfree page could be freed directly
1124 */ 1131 */
1125 if (PageAnon(page) && PageSwapBacked(page) && 1132 if (PageAnon(page) && PageSwapBacked(page)) {
1126 !PageSwapCache(page)) { 1133 if (!PageSwapCache(page)) {
1127 if (!(sc->gfp_mask & __GFP_IO)) 1134 if (!(sc->gfp_mask & __GFP_IO))
1128 goto keep_locked; 1135 goto keep_locked;
1129 if (PageTransHuge(page)) { 1136 if (PageTransHuge(page)) {
1130 /* cannot split THP, skip it */ 1137 /* cannot split THP, skip it */
1131 if (!can_split_huge_page(page, NULL)) 1138 if (!can_split_huge_page(page, NULL))
1132 goto activate_locked; 1139 goto activate_locked;
1133 /* 1140 /*
1134 * Split pages without a PMD map right 1141 * Split pages without a PMD map right
1135 * away. Chances are some or all of the 1142 * away. Chances are some or all of the
1136 * tail pages can be freed without IO. 1143 * tail pages can be freed without IO.
1137 */ 1144 */
1138 if (!compound_mapcount(page) && 1145 if (!compound_mapcount(page) &&
1139 split_huge_page_to_list(page, page_list)) 1146 split_huge_page_to_list(page,
1140 goto activate_locked; 1147 page_list))
1141 } 1148 goto activate_locked;
1142 if (!add_to_swap(page)) { 1149 }
1143 if (!PageTransHuge(page)) 1150 if (!add_to_swap(page)) {
1144 goto activate_locked; 1151 if (!PageTransHuge(page))
1145 /* Split THP and swap individual base pages */ 1152 goto activate_locked;
1146 if (split_huge_page_to_list(page, page_list)) 1153 /* Fallback to swap normal pages */
1147 goto activate_locked; 1154 if (split_huge_page_to_list(page,
1148 if (!add_to_swap(page)) 1155 page_list))
1149 goto activate_locked; 1156 goto activate_locked;
1150 } 1157 if (!add_to_swap(page))
1151 1158 goto activate_locked;
1152 /* XXX: We don't support THP writes */ 1159 }
1153 if (PageTransHuge(page) &&
1154 split_huge_page_to_list(page, page_list)) {
1155 delete_from_swap_cache(page);
1156 goto activate_locked;
1157 }
1158 1160
1159 may_enter_fs = 1; 1161 may_enter_fs = 1;
1160 1162
1161 /* Adding to swap updated mapping */ 1163 /* Adding to swap updated mapping */
1162 mapping = page_mapping(page); 1164 mapping = page_mapping(page);
1165 }
1163 } else if (unlikely(PageTransHuge(page))) { 1166 } else if (unlikely(PageTransHuge(page))) {
1164 /* Split file THP */ 1167 /* Split file THP */
1165 if (split_huge_page_to_list(page, page_list)) 1168 if (split_huge_page_to_list(page, page_list))
1166 goto keep_locked; 1169 goto keep_locked;
1167 } 1170 }
1168 1171
1169 VM_BUG_ON_PAGE(PageTransHuge(page), page);
1170
1171 /* 1172 /*
1172 * The page is mapped into the page tables of one or more 1173 * The page is mapped into the page tables of one or more
1173 * processes. Try to unmap it here. 1174 * processes. Try to unmap it here.
1174 */ 1175 */
1175 if (page_mapped(page)) { 1176 if (page_mapped(page)) {
1176 if (!try_to_unmap(page, ttu_flags | TTU_BATCH_FLUSH)) { 1177 enum ttu_flags flags = ttu_flags | TTU_BATCH_FLUSH;
1178
1179 if (unlikely(PageTransHuge(page)))
1180 flags |= TTU_SPLIT_HUGE_PMD;
1181 if (!try_to_unmap(page, flags)) {
1177 nr_unmap_fail++; 1182 nr_unmap_fail++;
1178 goto activate_locked; 1183 goto activate_locked;
1179 } 1184 }
@@ -1313,7 +1318,11 @@ free_it:
1313 * Is there need to periodically free_page_list? It would 1318 * Is there need to periodically free_page_list? It would
1314 * appear not as the counts should be low 1319 * appear not as the counts should be low
1315 */ 1320 */
1316 list_add(&page->lru, &free_pages); 1321 if (unlikely(PageTransHuge(page))) {
1322 mem_cgroup_uncharge(page);
1323 (*get_compound_page_dtor(page))(page);
1324 } else
1325 list_add(&page->lru, &free_pages);
1317 continue; 1326 continue;
1318 1327
1319activate_locked: 1328activate_locked: