aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2018-11-30 17:10:43 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-11-30 17:56:15 -0500
commit87c460a0bded56195b5eb497d44709777ef7b415 (patch)
tree697ee6c87214a76766981ba70e23f9e434d74c9f
parent042a30824871fa3149b0127009074b75cc25863c (diff)
mm/khugepaged: collapse_shmem() without freezing new_page
khugepaged's collapse_shmem() does almost all of its work, to assemble the huge new_page from 512 scattered old pages, with the new_page's refcount frozen to 0 (and refcounts of all old pages so far also frozen to 0). Including shmem_getpage() to read in any which were out on swap, memory reclaim if necessary to allocate their intermediate pages, and copying over all the data from old to new. Imagine the frozen refcount as a spinlock held, but without any lock debugging to highlight the abuse: it's not good, and under serious load heads into lockups - speculative getters of the page are not expecting to spin while khugepaged is rescheduled. One can get a little further under load by hacking around elsewhere; but fortunately, freezing the new_page turns out to have been entirely unnecessary, with no hacks needed elsewhere. The huge new_page lock is already held throughout, and guards all its subpages as they are brought one by one into the page cache tree; and anything reading the data in that page, without the lock, before it has been marked PageUptodate, would already be in the wrong. So simply eliminate the freezing of the new_page. Each of the old pages remains frozen with refcount 0 after it has been replaced by a new_page subpage in the page cache tree, until they are all unfrozen on success or failure: just as before. They could be unfrozen sooner, but cause no problem once no longer visible to find_get_entry(), filemap_map_pages() and other speculative lookups. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261527570.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Cc: Matthew Wilcox <willy@infradead.org> Cc: <stable@vger.kernel.org> [4.8+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/khugepaged.c19
1 files changed, 7 insertions, 12 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 9d4e9ff1af95..55930cbed3fd 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1287,7 +1287,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
1287 * collapse_shmem - collapse small tmpfs/shmem pages into huge one. 1287 * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
1288 * 1288 *
1289 * Basic scheme is simple, details are more complex: 1289 * Basic scheme is simple, details are more complex:
1290 * - allocate and freeze a new huge page; 1290 * - allocate and lock a new huge page;
1291 * - scan page cache replacing old pages with the new one 1291 * - scan page cache replacing old pages with the new one
1292 * + swap in pages if necessary; 1292 * + swap in pages if necessary;
1293 * + fill in gaps; 1293 * + fill in gaps;
@@ -1295,11 +1295,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
1295 * - if replacing succeeds: 1295 * - if replacing succeeds:
1296 * + copy data over; 1296 * + copy data over;
1297 * + free old pages; 1297 * + free old pages;
1298 * + unfreeze huge page; 1298 * + unlock huge page;
1299 * - if replacing failed; 1299 * - if replacing failed;
1300 * + put all pages back and unfreeze them; 1300 * + put all pages back and unfreeze them;
1301 * + restore gaps in the page cache; 1301 * + restore gaps in the page cache;
1302 * + free huge page; 1302 * + unlock and free huge page;
1303 */ 1303 */
1304static void collapse_shmem(struct mm_struct *mm, 1304static void collapse_shmem(struct mm_struct *mm,
1305 struct address_space *mapping, pgoff_t start, 1305 struct address_space *mapping, pgoff_t start,
@@ -1333,13 +1333,11 @@ static void collapse_shmem(struct mm_struct *mm,
1333 __SetPageSwapBacked(new_page); 1333 __SetPageSwapBacked(new_page);
1334 new_page->index = start; 1334 new_page->index = start;
1335 new_page->mapping = mapping; 1335 new_page->mapping = mapping;
1336 BUG_ON(!page_ref_freeze(new_page, 1));
1337 1336
1338 /* 1337 /*
1339 * At this point the new_page is 'frozen' (page_count() is zero), 1338 * At this point the new_page is locked and not up-to-date.
1340 * locked and not up-to-date. It's safe to insert it into the page 1339 * It's safe to insert it into the page cache, because nobody would
1341 * cache, because nobody would be able to map it or use it in other 1340 * be able to map it or use it in another way until we unlock it.
1342 * way until we unfreeze it.
1343 */ 1341 */
1344 1342
1345 /* This will be less messy when we use multi-index entries */ 1343 /* This will be less messy when we use multi-index entries */
@@ -1491,9 +1489,8 @@ xa_unlocked:
1491 index++; 1489 index++;
1492 } 1490 }
1493 1491
1494 /* Everything is ready, let's unfreeze the new_page */
1495 SetPageUptodate(new_page); 1492 SetPageUptodate(new_page);
1496 page_ref_unfreeze(new_page, HPAGE_PMD_NR); 1493 page_ref_add(new_page, HPAGE_PMD_NR - 1);
1497 set_page_dirty(new_page); 1494 set_page_dirty(new_page);
1498 mem_cgroup_commit_charge(new_page, memcg, false, true); 1495 mem_cgroup_commit_charge(new_page, memcg, false, true);
1499 lru_cache_add_anon(new_page); 1496 lru_cache_add_anon(new_page);
@@ -1541,8 +1538,6 @@ xa_unlocked:
1541 VM_BUG_ON(nr_none); 1538 VM_BUG_ON(nr_none);
1542 xas_unlock_irq(&xas); 1539 xas_unlock_irq(&xas);
1543 1540
1544 /* Unfreeze new_page, caller would take care about freeing it */
1545 page_ref_unfreeze(new_page, 1);
1546 mem_cgroup_cancel_charge(new_page, memcg, true); 1541 mem_cgroup_cancel_charge(new_page, memcg, true);
1547 new_page->mapping = NULL; 1542 new_page->mapping = NULL;
1548 } 1543 }