summaryrefslogtreecommitdiffstats
path: root/mm/khugepaged.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r--mm/khugepaged.c366
1 files changed, 308 insertions, 58 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index ccede2425c3f..0a1b4b484ac5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -48,6 +48,7 @@ enum scan_result {
48 SCAN_CGROUP_CHARGE_FAIL, 48 SCAN_CGROUP_CHARGE_FAIL,
49 SCAN_EXCEED_SWAP_PTE, 49 SCAN_EXCEED_SWAP_PTE,
50 SCAN_TRUNCATED, 50 SCAN_TRUNCATED,
51 SCAN_PAGE_HAS_PRIVATE,
51}; 52};
52 53
53#define CREATE_TRACE_POINTS 54#define CREATE_TRACE_POINTS
@@ -76,6 +77,8 @@ static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
76 77
77static struct kmem_cache *mm_slot_cache __read_mostly; 78static struct kmem_cache *mm_slot_cache __read_mostly;
78 79
80#define MAX_PTE_MAPPED_THP 8
81
79/** 82/**
80 * struct mm_slot - hash lookup from mm to mm_slot 83 * struct mm_slot - hash lookup from mm to mm_slot
81 * @hash: hash collision list 84 * @hash: hash collision list
@@ -86,6 +89,10 @@ struct mm_slot {
86 struct hlist_node hash; 89 struct hlist_node hash;
87 struct list_head mm_node; 90 struct list_head mm_node;
88 struct mm_struct *mm; 91 struct mm_struct *mm;
92
93 /* pte-mapped THP in this mm */
94 int nr_pte_mapped_thp;
95 unsigned long pte_mapped_thp[MAX_PTE_MAPPED_THP];
89}; 96};
90 97
91/** 98/**
@@ -404,7 +411,11 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
404 (vm_flags & VM_NOHUGEPAGE) || 411 (vm_flags & VM_NOHUGEPAGE) ||
405 test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) 412 test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
406 return false; 413 return false;
407 if (shmem_file(vma->vm_file)) { 414
415 if (shmem_file(vma->vm_file) ||
416 (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
417 vma->vm_file &&
418 (vm_flags & VM_DENYWRITE))) {
408 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) 419 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
409 return false; 420 return false;
410 return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, 421 return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
@@ -456,8 +467,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
456 unsigned long hstart, hend; 467 unsigned long hstart, hend;
457 468
458 /* 469 /*
459 * khugepaged does not yet work on non-shmem files or special 470 * khugepaged only supports read-only files for non-shmem files.
460 * mappings. And file-private shmem THP is not supported. 471 * khugepaged does not yet work on special mappings. And
472 * file-private shmem THP is not supported.
461 */ 473 */
462 if (!hugepage_vma_check(vma, vm_flags)) 474 if (!hugepage_vma_check(vma, vm_flags))
463 return 0; 475 return 0;
@@ -1248,6 +1260,159 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
1248} 1260}
1249 1261
1250#if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) 1262#if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
1263/*
1264 * Notify khugepaged that given addr of the mm is pte-mapped THP. Then
1265 * khugepaged should try to collapse the page table.
1266 */
1267static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
1268 unsigned long addr)
1269{
1270 struct mm_slot *mm_slot;
1271
1272 VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
1273
1274 spin_lock(&khugepaged_mm_lock);
1275 mm_slot = get_mm_slot(mm);
1276 if (likely(mm_slot && mm_slot->nr_pte_mapped_thp < MAX_PTE_MAPPED_THP))
1277 mm_slot->pte_mapped_thp[mm_slot->nr_pte_mapped_thp++] = addr;
1278 spin_unlock(&khugepaged_mm_lock);
1279 return 0;
1280}
1281
1282/**
1283 * Try to collapse a pte-mapped THP for mm at address haddr.
1284 *
1285 * This function checks whether all the PTEs in the PMD are pointing to the
1286 * right THP. If so, retract the page table so the THP can refault in with
1287 * as pmd-mapped.
1288 */
1289void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
1290{
1291 unsigned long haddr = addr & HPAGE_PMD_MASK;
1292 struct vm_area_struct *vma = find_vma(mm, haddr);
1293 struct page *hpage = NULL;
1294 pte_t *start_pte, *pte;
1295 pmd_t *pmd, _pmd;
1296 spinlock_t *ptl;
1297 int count = 0;
1298 int i;
1299
1300 if (!vma || !vma->vm_file ||
1301 vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE)
1302 return;
1303
1304 /*
1305 * This vm_flags may not have VM_HUGEPAGE if the page was not
1306 * collapsed by this mm. But we can still collapse if the page is
1307 * the valid THP. Add extra VM_HUGEPAGE so hugepage_vma_check()
1308 * will not fail the vma for missing VM_HUGEPAGE
1309 */
1310 if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
1311 return;
1312
1313 pmd = mm_find_pmd(mm, haddr);
1314 if (!pmd)
1315 return;
1316
1317 start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
1318
1319 /* step 1: check all mapped PTEs are to the right huge page */
1320 for (i = 0, addr = haddr, pte = start_pte;
1321 i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
1322 struct page *page;
1323
1324 /* empty pte, skip */
1325 if (pte_none(*pte))
1326 continue;
1327
1328 /* page swapped out, abort */
1329 if (!pte_present(*pte))
1330 goto abort;
1331
1332 page = vm_normal_page(vma, addr, *pte);
1333
1334 if (!page || !PageCompound(page))
1335 goto abort;
1336
1337 if (!hpage) {
1338 hpage = compound_head(page);
1339 /*
1340 * The mapping of the THP should not change.
1341 *
1342 * Note that uprobe, debugger, or MAP_PRIVATE may
1343 * change the page table, but the new page will
1344 * not pass PageCompound() check.
1345 */
1346 if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
1347 goto abort;
1348 }
1349
1350 /*
1351 * Confirm the page maps to the correct subpage.
1352 *
1353 * Note that uprobe, debugger, or MAP_PRIVATE may change
1354 * the page table, but the new page will not pass
1355 * PageCompound() check.
1356 */
1357 if (WARN_ON(hpage + i != page))
1358 goto abort;
1359 count++;
1360 }
1361
1362 /* step 2: adjust rmap */
1363 for (i = 0, addr = haddr, pte = start_pte;
1364 i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
1365 struct page *page;
1366
1367 if (pte_none(*pte))
1368 continue;
1369 page = vm_normal_page(vma, addr, *pte);
1370 page_remove_rmap(page, false);
1371 }
1372
1373 pte_unmap_unlock(start_pte, ptl);
1374
1375 /* step 3: set proper refcount and mm_counters. */
1376 if (hpage) {
1377 page_ref_sub(hpage, count);
1378 add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
1379 }
1380
1381 /* step 4: collapse pmd */
1382 ptl = pmd_lock(vma->vm_mm, pmd);
1383 _pmd = pmdp_collapse_flush(vma, addr, pmd);
1384 spin_unlock(ptl);
1385 mm_dec_nr_ptes(mm);
1386 pte_free(mm, pmd_pgtable(_pmd));
1387 return;
1388
1389abort:
1390 pte_unmap_unlock(start_pte, ptl);
1391}
1392
1393static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
1394{
1395 struct mm_struct *mm = mm_slot->mm;
1396 int i;
1397
1398 if (likely(mm_slot->nr_pte_mapped_thp == 0))
1399 return 0;
1400
1401 if (!down_write_trylock(&mm->mmap_sem))
1402 return -EBUSY;
1403
1404 if (unlikely(khugepaged_test_exit(mm)))
1405 goto out;
1406
1407 for (i = 0; i < mm_slot->nr_pte_mapped_thp; i++)
1408 collapse_pte_mapped_thp(mm, mm_slot->pte_mapped_thp[i]);
1409
1410out:
1411 mm_slot->nr_pte_mapped_thp = 0;
1412 up_write(&mm->mmap_sem);
1413 return 0;
1414}
1415
1251static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) 1416static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
1252{ 1417{
1253 struct vm_area_struct *vma; 1418 struct vm_area_struct *vma;
@@ -1256,7 +1421,22 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
1256 1421
1257 i_mmap_lock_write(mapping); 1422 i_mmap_lock_write(mapping);
1258 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { 1423 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1259 /* probably overkill */ 1424 /*
1425 * Check vma->anon_vma to exclude MAP_PRIVATE mappings that
1426 * got written to. These VMAs are likely not worth investing
1427 * down_write(mmap_sem) as PMD-mapping is likely to be split
1428 * later.
1429 *
1430 * Not that vma->anon_vma check is racy: it can be set up after
1431 * the check but before we took mmap_sem by the fault path.
1432 * But page lock would prevent establishing any new ptes of the
1433 * page, so we are safe.
1434 *
1435 * An alternative would be drop the check, but check that page
1436 * table is clear before calling pmdp_collapse_flush() under
1437 * ptl. It has higher chance to recover THP for the VMA, but
1438 * has higher cost too.
1439 */
1260 if (vma->anon_vma) 1440 if (vma->anon_vma)
1261 continue; 1441 continue;
1262 addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 1442 addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
@@ -1269,9 +1449,10 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
1269 continue; 1449 continue;
1270 /* 1450 /*
1271 * We need exclusive mmap_sem to retract page table. 1451 * We need exclusive mmap_sem to retract page table.
1272 * If trylock fails we would end up with pte-mapped THP after 1452 *
1273 * re-fault. Not ideal, but it's more important to not disturb 1453 * We use trylock due to lock inversion: we need to acquire
1274 * the system too much. 1454 * mmap_sem while holding page lock. Fault path does it in
1455 * reverse order. Trylock is a way to avoid deadlock.
1275 */ 1456 */
1276 if (down_write_trylock(&vma->vm_mm->mmap_sem)) { 1457 if (down_write_trylock(&vma->vm_mm->mmap_sem)) {
1277 spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd); 1458 spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
@@ -1281,18 +1462,21 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
1281 up_write(&vma->vm_mm->mmap_sem); 1462 up_write(&vma->vm_mm->mmap_sem);
1282 mm_dec_nr_ptes(vma->vm_mm); 1463 mm_dec_nr_ptes(vma->vm_mm);
1283 pte_free(vma->vm_mm, pmd_pgtable(_pmd)); 1464 pte_free(vma->vm_mm, pmd_pgtable(_pmd));
1465 } else {
1466 /* Try again later */
1467 khugepaged_add_pte_mapped_thp(vma->vm_mm, addr);
1284 } 1468 }
1285 } 1469 }
1286 i_mmap_unlock_write(mapping); 1470 i_mmap_unlock_write(mapping);
1287} 1471}
1288 1472
1289/** 1473/**
1290 * collapse_shmem - collapse small tmpfs/shmem pages into huge one. 1474 * collapse_file - collapse filemap/tmpfs/shmem pages into huge one.
1291 * 1475 *
1292 * Basic scheme is simple, details are more complex: 1476 * Basic scheme is simple, details are more complex:
1293 * - allocate and lock a new huge page; 1477 * - allocate and lock a new huge page;
1294 * - scan page cache replacing old pages with the new one 1478 * - scan page cache replacing old pages with the new one
1295 * + swap in pages if necessary; 1479 * + swap/gup in pages if necessary;
1296 * + fill in gaps; 1480 * + fill in gaps;
1297 * + keep old pages around in case rollback is required; 1481 * + keep old pages around in case rollback is required;
1298 * - if replacing succeeds: 1482 * - if replacing succeeds:
@@ -1304,10 +1488,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
1304 * + restore gaps in the page cache; 1488 * + restore gaps in the page cache;
1305 * + unlock and free huge page; 1489 * + unlock and free huge page;
1306 */ 1490 */
1307static void collapse_shmem(struct mm_struct *mm, 1491static void collapse_file(struct mm_struct *mm,
1308 struct address_space *mapping, pgoff_t start, 1492 struct file *file, pgoff_t start,
1309 struct page **hpage, int node) 1493 struct page **hpage, int node)
1310{ 1494{
1495 struct address_space *mapping = file->f_mapping;
1311 gfp_t gfp; 1496 gfp_t gfp;
1312 struct page *new_page; 1497 struct page *new_page;
1313 struct mem_cgroup *memcg; 1498 struct mem_cgroup *memcg;
@@ -1315,7 +1500,9 @@ static void collapse_shmem(struct mm_struct *mm,
1315 LIST_HEAD(pagelist); 1500 LIST_HEAD(pagelist);
1316 XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); 1501 XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
1317 int nr_none = 0, result = SCAN_SUCCEED; 1502 int nr_none = 0, result = SCAN_SUCCEED;
1503 bool is_shmem = shmem_file(file);
1318 1504
1505 VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
1319 VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); 1506 VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
1320 1507
1321 /* Only allocate from the target node */ 1508 /* Only allocate from the target node */
@@ -1347,7 +1534,8 @@ static void collapse_shmem(struct mm_struct *mm,
1347 } while (1); 1534 } while (1);
1348 1535
1349 __SetPageLocked(new_page); 1536 __SetPageLocked(new_page);
1350 __SetPageSwapBacked(new_page); 1537 if (is_shmem)
1538 __SetPageSwapBacked(new_page);
1351 new_page->index = start; 1539 new_page->index = start;
1352 new_page->mapping = mapping; 1540 new_page->mapping = mapping;
1353 1541
@@ -1362,41 +1550,75 @@ static void collapse_shmem(struct mm_struct *mm,
1362 struct page *page = xas_next(&xas); 1550 struct page *page = xas_next(&xas);
1363 1551
1364 VM_BUG_ON(index != xas.xa_index); 1552 VM_BUG_ON(index != xas.xa_index);
1365 if (!page) { 1553 if (is_shmem) {
1366 /* 1554 if (!page) {
1367 * Stop if extent has been truncated or hole-punched, 1555 /*
1368 * and is now completely empty. 1556 * Stop if extent has been truncated or
1369 */ 1557 * hole-punched, and is now completely
1370 if (index == start) { 1558 * empty.
1371 if (!xas_next_entry(&xas, end - 1)) { 1559 */
1372 result = SCAN_TRUNCATED; 1560 if (index == start) {
1561 if (!xas_next_entry(&xas, end - 1)) {
1562 result = SCAN_TRUNCATED;
1563 goto xa_locked;
1564 }
1565 xas_set(&xas, index);
1566 }
1567 if (!shmem_charge(mapping->host, 1)) {
1568 result = SCAN_FAIL;
1373 goto xa_locked; 1569 goto xa_locked;
1374 } 1570 }
1375 xas_set(&xas, index); 1571 xas_store(&xas, new_page);
1572 nr_none++;
1573 continue;
1376 } 1574 }
1377 if (!shmem_charge(mapping->host, 1)) { 1575
1378 result = SCAN_FAIL; 1576 if (xa_is_value(page) || !PageUptodate(page)) {
1577 xas_unlock_irq(&xas);
1578 /* swap in or instantiate fallocated page */
1579 if (shmem_getpage(mapping->host, index, &page,
1580 SGP_NOHUGE)) {
1581 result = SCAN_FAIL;
1582 goto xa_unlocked;
1583 }
1584 } else if (trylock_page(page)) {
1585 get_page(page);
1586 xas_unlock_irq(&xas);
1587 } else {
1588 result = SCAN_PAGE_LOCK;
1379 goto xa_locked; 1589 goto xa_locked;
1380 } 1590 }
1381 xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); 1591 } else { /* !is_shmem */
1382 nr_none++; 1592 if (!page || xa_is_value(page)) {
1383 continue; 1593 xas_unlock_irq(&xas);
1384 } 1594 page_cache_sync_readahead(mapping, &file->f_ra,
1385 1595 file, index,
1386 if (xa_is_value(page) || !PageUptodate(page)) { 1596 PAGE_SIZE);
1387 xas_unlock_irq(&xas); 1597 /* drain pagevecs to help isolate_lru_page() */
1388 /* swap in or instantiate fallocated page */ 1598 lru_add_drain();
1389 if (shmem_getpage(mapping->host, index, &page, 1599 page = find_lock_page(mapping, index);
1390 SGP_NOHUGE)) { 1600 if (unlikely(page == NULL)) {
1601 result = SCAN_FAIL;
1602 goto xa_unlocked;
1603 }
1604 } else if (!PageUptodate(page)) {
1605 xas_unlock_irq(&xas);
1606 wait_on_page_locked(page);
1607 if (!trylock_page(page)) {
1608 result = SCAN_PAGE_LOCK;
1609 goto xa_unlocked;
1610 }
1611 get_page(page);
1612 } else if (PageDirty(page)) {
1391 result = SCAN_FAIL; 1613 result = SCAN_FAIL;
1392 goto xa_unlocked; 1614 goto xa_locked;
1615 } else if (trylock_page(page)) {
1616 get_page(page);
1617 xas_unlock_irq(&xas);
1618 } else {
1619 result = SCAN_PAGE_LOCK;
1620 goto xa_locked;
1393 } 1621 }
1394 } else if (trylock_page(page)) {
1395 get_page(page);
1396 xas_unlock_irq(&xas);
1397 } else {
1398 result = SCAN_PAGE_LOCK;
1399 goto xa_locked;
1400 } 1622 }
1401 1623
1402 /* 1624 /*
@@ -1425,6 +1647,12 @@ static void collapse_shmem(struct mm_struct *mm,
1425 goto out_unlock; 1647 goto out_unlock;
1426 } 1648 }
1427 1649
1650 if (page_has_private(page) &&
1651 !try_to_release_page(page, GFP_KERNEL)) {
1652 result = SCAN_PAGE_HAS_PRIVATE;
1653 goto out_unlock;
1654 }
1655
1428 if (page_mapped(page)) 1656 if (page_mapped(page))
1429 unmap_mapping_pages(mapping, index, 1, false); 1657 unmap_mapping_pages(mapping, index, 1, false);
1430 1658
@@ -1454,7 +1682,7 @@ static void collapse_shmem(struct mm_struct *mm,
1454 list_add_tail(&page->lru, &pagelist); 1682 list_add_tail(&page->lru, &pagelist);
1455 1683
1456 /* Finally, replace with the new page. */ 1684 /* Finally, replace with the new page. */
1457 xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); 1685 xas_store(&xas, new_page);
1458 continue; 1686 continue;
1459out_unlock: 1687out_unlock:
1460 unlock_page(page); 1688 unlock_page(page);
@@ -1462,12 +1690,20 @@ out_unlock:
1462 goto xa_unlocked; 1690 goto xa_unlocked;
1463 } 1691 }
1464 1692
1465 __inc_node_page_state(new_page, NR_SHMEM_THPS); 1693 if (is_shmem)
1694 __inc_node_page_state(new_page, NR_SHMEM_THPS);
1695 else {
1696 __inc_node_page_state(new_page, NR_FILE_THPS);
1697 filemap_nr_thps_inc(mapping);
1698 }
1699
1466 if (nr_none) { 1700 if (nr_none) {
1467 struct zone *zone = page_zone(new_page); 1701 struct zone *zone = page_zone(new_page);
1468 1702
1469 __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); 1703 __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
1470 __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); 1704 if (is_shmem)
1705 __mod_node_page_state(zone->zone_pgdat,
1706 NR_SHMEM, nr_none);
1471 } 1707 }
1472 1708
1473xa_locked: 1709xa_locked:
@@ -1505,10 +1741,15 @@ xa_unlocked:
1505 1741
1506 SetPageUptodate(new_page); 1742 SetPageUptodate(new_page);
1507 page_ref_add(new_page, HPAGE_PMD_NR - 1); 1743 page_ref_add(new_page, HPAGE_PMD_NR - 1);
1508 set_page_dirty(new_page);
1509 mem_cgroup_commit_charge(new_page, memcg, false, true); 1744 mem_cgroup_commit_charge(new_page, memcg, false, true);
1745
1746 if (is_shmem) {
1747 set_page_dirty(new_page);
1748 lru_cache_add_anon(new_page);
1749 } else {
1750 lru_cache_add_file(new_page);
1751 }
1510 count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1); 1752 count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
1511 lru_cache_add_anon(new_page);
1512 1753
1513 /* 1754 /*
1514 * Remove pte page tables, so we can re-fault the page as huge. 1755 * Remove pte page tables, so we can re-fault the page as huge.
@@ -1523,7 +1764,9 @@ xa_unlocked:
1523 /* Something went wrong: roll back page cache changes */ 1764 /* Something went wrong: roll back page cache changes */
1524 xas_lock_irq(&xas); 1765 xas_lock_irq(&xas);
1525 mapping->nrpages -= nr_none; 1766 mapping->nrpages -= nr_none;
1526 shmem_uncharge(mapping->host, nr_none); 1767
1768 if (is_shmem)
1769 shmem_uncharge(mapping->host, nr_none);
1527 1770
1528 xas_set(&xas, start); 1771 xas_set(&xas, start);
1529 xas_for_each(&xas, page, end - 1) { 1772 xas_for_each(&xas, page, end - 1) {
@@ -1563,11 +1806,11 @@ out:
1563 /* TODO: tracepoints */ 1806 /* TODO: tracepoints */
1564} 1807}
1565 1808
1566static void khugepaged_scan_shmem(struct mm_struct *mm, 1809static void khugepaged_scan_file(struct mm_struct *mm,
1567 struct address_space *mapping, 1810 struct file *file, pgoff_t start, struct page **hpage)
1568 pgoff_t start, struct page **hpage)
1569{ 1811{
1570 struct page *page = NULL; 1812 struct page *page = NULL;
1813 struct address_space *mapping = file->f_mapping;
1571 XA_STATE(xas, &mapping->i_pages, start); 1814 XA_STATE(xas, &mapping->i_pages, start);
1572 int present, swap; 1815 int present, swap;
1573 int node = NUMA_NO_NODE; 1816 int node = NUMA_NO_NODE;
@@ -1606,7 +1849,8 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
1606 break; 1849 break;
1607 } 1850 }
1608 1851
1609 if (page_count(page) != 1 + page_mapcount(page)) { 1852 if (page_count(page) !=
1853 1 + page_mapcount(page) + page_has_private(page)) {
1610 result = SCAN_PAGE_COUNT; 1854 result = SCAN_PAGE_COUNT;
1611 break; 1855 break;
1612 } 1856 }
@@ -1631,19 +1875,23 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
1631 result = SCAN_EXCEED_NONE_PTE; 1875 result = SCAN_EXCEED_NONE_PTE;
1632 } else { 1876 } else {
1633 node = khugepaged_find_target_node(); 1877 node = khugepaged_find_target_node();
1634 collapse_shmem(mm, mapping, start, hpage, node); 1878 collapse_file(mm, file, start, hpage, node);
1635 } 1879 }
1636 } 1880 }
1637 1881
1638 /* TODO: tracepoints */ 1882 /* TODO: tracepoints */
1639} 1883}
1640#else 1884#else
1641static void khugepaged_scan_shmem(struct mm_struct *mm, 1885static void khugepaged_scan_file(struct mm_struct *mm,
1642 struct address_space *mapping, 1886 struct file *file, pgoff_t start, struct page **hpage)
1643 pgoff_t start, struct page **hpage)
1644{ 1887{
1645 BUILD_BUG(); 1888 BUILD_BUG();
1646} 1889}
1890
1891static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
1892{
1893 return 0;
1894}
1647#endif 1895#endif
1648 1896
1649static unsigned int khugepaged_scan_mm_slot(unsigned int pages, 1897static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
@@ -1668,6 +1916,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
1668 khugepaged_scan.mm_slot = mm_slot; 1916 khugepaged_scan.mm_slot = mm_slot;
1669 } 1917 }
1670 spin_unlock(&khugepaged_mm_lock); 1918 spin_unlock(&khugepaged_mm_lock);
1919 khugepaged_collapse_pte_mapped_thps(mm_slot);
1671 1920
1672 mm = mm_slot->mm; 1921 mm = mm_slot->mm;
1673 /* 1922 /*
@@ -1713,17 +1962,18 @@ skip:
1713 VM_BUG_ON(khugepaged_scan.address < hstart || 1962 VM_BUG_ON(khugepaged_scan.address < hstart ||
1714 khugepaged_scan.address + HPAGE_PMD_SIZE > 1963 khugepaged_scan.address + HPAGE_PMD_SIZE >
1715 hend); 1964 hend);
1716 if (shmem_file(vma->vm_file)) { 1965 if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
1717 struct file *file; 1966 struct file *file;
1718 pgoff_t pgoff = linear_page_index(vma, 1967 pgoff_t pgoff = linear_page_index(vma,
1719 khugepaged_scan.address); 1968 khugepaged_scan.address);
1720 if (!shmem_huge_enabled(vma)) 1969
1970 if (shmem_file(vma->vm_file)
1971 && !shmem_huge_enabled(vma))
1721 goto skip; 1972 goto skip;
1722 file = get_file(vma->vm_file); 1973 file = get_file(vma->vm_file);
1723 up_read(&mm->mmap_sem); 1974 up_read(&mm->mmap_sem);
1724 ret = 1; 1975 ret = 1;
1725 khugepaged_scan_shmem(mm, file->f_mapping, 1976 khugepaged_scan_file(mm, file, pgoff, hpage);
1726 pgoff, hpage);
1727 fput(file); 1977 fput(file);
1728 } else { 1978 } else {
1729 ret = khugepaged_scan_pmd(mm, vma, 1979 ret = khugepaged_scan_pmd(mm, vma,