summaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2017-09-06 19:22:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 20:27:27 -0400
commite07098294adfd03d582af7626752255e3d170393 (patch)
treed8c515d06da5bf1b6dec47b66d686416e4619ca5 /mm/swapfile.c
parenta3aea839e42ef8d76bb58091ab7f5a45a85ea299 (diff)
mm, THP, swap: support to reclaim swap space for THP swapped out
The normal swap slot reclaiming can be done when the swap count reaches SWAP_HAS_CACHE. But for the swap slot which is backing a THP, all swap slots backing one THP must be reclaimed together, because the swap slot may be used again when the THP is swapped out again later. So the swap slots backing one THP can be reclaimed together when the swap count for all swap slots for the THP reached SWAP_HAS_CACHE. In the patch, the functions to check whether the swap count for all swap slots backing one THP reached SWAP_HAS_CACHE are implemented and used when checking whether a swap slot can be reclaimed. To make it easier to determine whether a swap slot is backing a THP, a new swap cluster flag named CLUSTER_FLAG_HUGE is added to mark a swap cluster which is backing a THP (Transparent Huge Page). Because THP swap in as a whole isn't supported now. After deleting the THP from the swap cache (for example, swapping out finished), the CLUSTER_FLAG_HUGE flag will be cleared. So that, the normal pages inside THP can be swapped in individually. [ying.huang@intel.com: fix swap_page_trans_huge_swapped on HDD] Link: http://lkml.kernel.org/r/874ltsm0bi.fsf@yhuang-dev.intel.com Link: http://lkml.kernel.org/r/20170724051840.2309-3-ying.huang@intel.com Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Acked-by: Rik van Riel <riel@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Shaohua Li <shli@kernel.org> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Michal Hocko <mhocko@kernel.org> Cc: Ross Zwisler <ross.zwisler@intel.com> [for brd.c, zram_drv.c, pmem.c] Cc: Vishal L Verma <vishal.l.verma@intel.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c78
1 files changed, 71 insertions, 7 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c32e9b23d642..164d9624d7d2 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -265,6 +265,16 @@ static inline void cluster_set_null(struct swap_cluster_info *info)
265 info->data = 0; 265 info->data = 0;
266} 266}
267 267
268static inline bool cluster_is_huge(struct swap_cluster_info *info)
269{
270 return info->flags & CLUSTER_FLAG_HUGE;
271}
272
273static inline void cluster_clear_huge(struct swap_cluster_info *info)
274{
275 info->flags &= ~CLUSTER_FLAG_HUGE;
276}
277
268static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si, 278static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
269 unsigned long offset) 279 unsigned long offset)
270{ 280{
@@ -846,7 +856,7 @@ static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
846 offset = idx * SWAPFILE_CLUSTER; 856 offset = idx * SWAPFILE_CLUSTER;
847 ci = lock_cluster(si, offset); 857 ci = lock_cluster(si, offset);
848 alloc_cluster(si, idx); 858 alloc_cluster(si, idx);
849 cluster_set_count_flag(ci, SWAPFILE_CLUSTER, 0); 859 cluster_set_count_flag(ci, SWAPFILE_CLUSTER, CLUSTER_FLAG_HUGE);
850 860
851 map = si->swap_map + offset; 861 map = si->swap_map + offset;
852 for (i = 0; i < SWAPFILE_CLUSTER; i++) 862 for (i = 0; i < SWAPFILE_CLUSTER; i++)
@@ -1176,6 +1186,7 @@ static void swapcache_free_cluster(swp_entry_t entry)
1176 return; 1186 return;
1177 1187
1178 ci = lock_cluster(si, offset); 1188 ci = lock_cluster(si, offset);
1189 VM_BUG_ON(!cluster_is_huge(ci));
1179 map = si->swap_map + offset; 1190 map = si->swap_map + offset;
1180 for (i = 0; i < SWAPFILE_CLUSTER; i++) { 1191 for (i = 0; i < SWAPFILE_CLUSTER; i++) {
1181 val = map[i]; 1192 val = map[i];
@@ -1187,6 +1198,7 @@ static void swapcache_free_cluster(swp_entry_t entry)
1187 for (i = 0; i < SWAPFILE_CLUSTER; i++) 1198 for (i = 0; i < SWAPFILE_CLUSTER; i++)
1188 map[i] &= ~SWAP_HAS_CACHE; 1199 map[i] &= ~SWAP_HAS_CACHE;
1189 } 1200 }
1201 cluster_clear_huge(ci);
1190 unlock_cluster(ci); 1202 unlock_cluster(ci);
1191 if (free_entries == SWAPFILE_CLUSTER) { 1203 if (free_entries == SWAPFILE_CLUSTER) {
1192 spin_lock(&si->lock); 1204 spin_lock(&si->lock);
@@ -1350,6 +1362,54 @@ out:
1350 return count; 1362 return count;
1351} 1363}
1352 1364
1365#ifdef CONFIG_THP_SWAP
1366static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
1367 swp_entry_t entry)
1368{
1369 struct swap_cluster_info *ci;
1370 unsigned char *map = si->swap_map;
1371 unsigned long roffset = swp_offset(entry);
1372 unsigned long offset = round_down(roffset, SWAPFILE_CLUSTER);
1373 int i;
1374 bool ret = false;
1375
1376 ci = lock_cluster_or_swap_info(si, offset);
1377 if (!ci || !cluster_is_huge(ci)) {
1378 if (map[roffset] != SWAP_HAS_CACHE)
1379 ret = true;
1380 goto unlock_out;
1381 }
1382 for (i = 0; i < SWAPFILE_CLUSTER; i++) {
1383 if (map[offset + i] != SWAP_HAS_CACHE) {
1384 ret = true;
1385 break;
1386 }
1387 }
1388unlock_out:
1389 unlock_cluster_or_swap_info(si, ci);
1390 return ret;
1391}
1392
1393static bool page_swapped(struct page *page)
1394{
1395 swp_entry_t entry;
1396 struct swap_info_struct *si;
1397
1398 if (likely(!PageTransCompound(page)))
1399 return page_swapcount(page) != 0;
1400
1401 page = compound_head(page);
1402 entry.val = page_private(page);
1403 si = _swap_info_get(entry);
1404 if (si)
1405 return swap_page_trans_huge_swapped(si, entry);
1406 return false;
1407}
1408#else
1409#define swap_page_trans_huge_swapped(si, entry) swap_swapcount(si, entry)
1410#define page_swapped(page) (page_swapcount(page) != 0)
1411#endif
1412
1353/* 1413/*
1354 * We can write to an anon page without COW if there are no other references 1414 * We can write to an anon page without COW if there are no other references
1355 * to it. And as a side-effect, free up its swap: because the old content 1415 * to it. And as a side-effect, free up its swap: because the old content
@@ -1404,7 +1464,7 @@ int try_to_free_swap(struct page *page)
1404 return 0; 1464 return 0;
1405 if (PageWriteback(page)) 1465 if (PageWriteback(page))
1406 return 0; 1466 return 0;
1407 if (page_swapcount(page)) 1467 if (page_swapped(page))
1408 return 0; 1468 return 0;
1409 1469
1410 /* 1470 /*
@@ -1425,6 +1485,7 @@ int try_to_free_swap(struct page *page)
1425 if (pm_suspended_storage()) 1485 if (pm_suspended_storage())
1426 return 0; 1486 return 0;
1427 1487
1488 page = compound_head(page);
1428 delete_from_swap_cache(page); 1489 delete_from_swap_cache(page);
1429 SetPageDirty(page); 1490 SetPageDirty(page);
1430 return 1; 1491 return 1;
@@ -1446,7 +1507,8 @@ int free_swap_and_cache(swp_entry_t entry)
1446 p = _swap_info_get(entry); 1507 p = _swap_info_get(entry);
1447 if (p) { 1508 if (p) {
1448 count = __swap_entry_free(p, entry, 1); 1509 count = __swap_entry_free(p, entry, 1);
1449 if (count == SWAP_HAS_CACHE) { 1510 if (count == SWAP_HAS_CACHE &&
1511 !swap_page_trans_huge_swapped(p, entry)) {
1450 page = find_get_page(swap_address_space(entry), 1512 page = find_get_page(swap_address_space(entry),
1451 swp_offset(entry)); 1513 swp_offset(entry));
1452 if (page && !trylock_page(page)) { 1514 if (page && !trylock_page(page)) {
@@ -1463,7 +1525,8 @@ int free_swap_and_cache(swp_entry_t entry)
1463 */ 1525 */
1464 if (PageSwapCache(page) && !PageWriteback(page) && 1526 if (PageSwapCache(page) && !PageWriteback(page) &&
1465 (!page_mapped(page) || mem_cgroup_swap_full(page)) && 1527 (!page_mapped(page) || mem_cgroup_swap_full(page)) &&
1466 !swap_swapcount(p, entry)) { 1528 !swap_page_trans_huge_swapped(p, entry)) {
1529 page = compound_head(page);
1467 delete_from_swap_cache(page); 1530 delete_from_swap_cache(page);
1468 SetPageDirty(page); 1531 SetPageDirty(page);
1469 } 1532 }
@@ -2017,7 +2080,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
2017 .sync_mode = WB_SYNC_NONE, 2080 .sync_mode = WB_SYNC_NONE,
2018 }; 2081 };
2019 2082
2020 swap_writepage(page, &wbc); 2083 swap_writepage(compound_head(page), &wbc);
2021 lock_page(page); 2084 lock_page(page);
2022 wait_on_page_writeback(page); 2085 wait_on_page_writeback(page);
2023 } 2086 }
@@ -2030,8 +2093,9 @@ int try_to_unuse(unsigned int type, bool frontswap,
2030 * delete, since it may not have been written out to swap yet. 2093 * delete, since it may not have been written out to swap yet.
2031 */ 2094 */
2032 if (PageSwapCache(page) && 2095 if (PageSwapCache(page) &&
2033 likely(page_private(page) == entry.val)) 2096 likely(page_private(page) == entry.val) &&
2034 delete_from_swap_cache(page); 2097 !page_swapped(page))
2098 delete_from_swap_cache(compound_head(page));
2035 2099
2036 /* 2100 /*
2037 * So we could skip searching mms once swap count went 2101 * So we could skip searching mms once swap count went