diff options
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r-- | mm/khugepaged.c | 366 |
1 files changed, 308 insertions, 58 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index ccede2425c3f..0a1b4b484ac5 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c | |||
@@ -48,6 +48,7 @@ enum scan_result { | |||
48 | SCAN_CGROUP_CHARGE_FAIL, | 48 | SCAN_CGROUP_CHARGE_FAIL, |
49 | SCAN_EXCEED_SWAP_PTE, | 49 | SCAN_EXCEED_SWAP_PTE, |
50 | SCAN_TRUNCATED, | 50 | SCAN_TRUNCATED, |
51 | SCAN_PAGE_HAS_PRIVATE, | ||
51 | }; | 52 | }; |
52 | 53 | ||
53 | #define CREATE_TRACE_POINTS | 54 | #define CREATE_TRACE_POINTS |
@@ -76,6 +77,8 @@ static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); | |||
76 | 77 | ||
77 | static struct kmem_cache *mm_slot_cache __read_mostly; | 78 | static struct kmem_cache *mm_slot_cache __read_mostly; |
78 | 79 | ||
80 | #define MAX_PTE_MAPPED_THP 8 | ||
81 | |||
79 | /** | 82 | /** |
80 | * struct mm_slot - hash lookup from mm to mm_slot | 83 | * struct mm_slot - hash lookup from mm to mm_slot |
81 | * @hash: hash collision list | 84 | * @hash: hash collision list |
@@ -86,6 +89,10 @@ struct mm_slot { | |||
86 | struct hlist_node hash; | 89 | struct hlist_node hash; |
87 | struct list_head mm_node; | 90 | struct list_head mm_node; |
88 | struct mm_struct *mm; | 91 | struct mm_struct *mm; |
92 | |||
93 | /* pte-mapped THP in this mm */ | ||
94 | int nr_pte_mapped_thp; | ||
95 | unsigned long pte_mapped_thp[MAX_PTE_MAPPED_THP]; | ||
89 | }; | 96 | }; |
90 | 97 | ||
91 | /** | 98 | /** |
@@ -404,7 +411,11 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, | |||
404 | (vm_flags & VM_NOHUGEPAGE) || | 411 | (vm_flags & VM_NOHUGEPAGE) || |
405 | test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) | 412 | test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) |
406 | return false; | 413 | return false; |
407 | if (shmem_file(vma->vm_file)) { | 414 | |
415 | if (shmem_file(vma->vm_file) || | ||
416 | (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && | ||
417 | vma->vm_file && | ||
418 | (vm_flags & VM_DENYWRITE))) { | ||
408 | if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) | 419 | if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) |
409 | return false; | 420 | return false; |
410 | return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, | 421 | return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, |
@@ -456,8 +467,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma, | |||
456 | unsigned long hstart, hend; | 467 | unsigned long hstart, hend; |
457 | 468 | ||
458 | /* | 469 | /* |
459 | * khugepaged does not yet work on non-shmem files or special | 470 | * khugepaged only supports read-only files for non-shmem files. |
460 | * mappings. And file-private shmem THP is not supported. | 471 | * khugepaged does not yet work on special mappings. And |
472 | * file-private shmem THP is not supported. | ||
461 | */ | 473 | */ |
462 | if (!hugepage_vma_check(vma, vm_flags)) | 474 | if (!hugepage_vma_check(vma, vm_flags)) |
463 | return 0; | 475 | return 0; |
@@ -1248,6 +1260,159 @@ static void collect_mm_slot(struct mm_slot *mm_slot) | |||
1248 | } | 1260 | } |
1249 | 1261 | ||
1250 | #if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) | 1262 | #if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) |
1263 | /* | ||
1264 | * Notify khugepaged that given addr of the mm is pte-mapped THP. Then | ||
1265 | * khugepaged should try to collapse the page table. | ||
1266 | */ | ||
1267 | static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm, | ||
1268 | unsigned long addr) | ||
1269 | { | ||
1270 | struct mm_slot *mm_slot; | ||
1271 | |||
1272 | VM_BUG_ON(addr & ~HPAGE_PMD_MASK); | ||
1273 | |||
1274 | spin_lock(&khugepaged_mm_lock); | ||
1275 | mm_slot = get_mm_slot(mm); | ||
1276 | if (likely(mm_slot && mm_slot->nr_pte_mapped_thp < MAX_PTE_MAPPED_THP)) | ||
1277 | mm_slot->pte_mapped_thp[mm_slot->nr_pte_mapped_thp++] = addr; | ||
1278 | spin_unlock(&khugepaged_mm_lock); | ||
1279 | return 0; | ||
1280 | } | ||
1281 | |||
1282 | /** | ||
1283 | * Try to collapse a pte-mapped THP for mm at address haddr. | ||
1284 | * | ||
1285 | * This function checks whether all the PTEs in the PMD are pointing to the | ||
1286 | * right THP. If so, retract the page table so the THP can refault in with | ||
1287 | * as pmd-mapped. | ||
1288 | */ | ||
1289 | void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) | ||
1290 | { | ||
1291 | unsigned long haddr = addr & HPAGE_PMD_MASK; | ||
1292 | struct vm_area_struct *vma = find_vma(mm, haddr); | ||
1293 | struct page *hpage = NULL; | ||
1294 | pte_t *start_pte, *pte; | ||
1295 | pmd_t *pmd, _pmd; | ||
1296 | spinlock_t *ptl; | ||
1297 | int count = 0; | ||
1298 | int i; | ||
1299 | |||
1300 | if (!vma || !vma->vm_file || | ||
1301 | vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE) | ||
1302 | return; | ||
1303 | |||
1304 | /* | ||
1305 | * This vm_flags may not have VM_HUGEPAGE if the page was not | ||
1306 | * collapsed by this mm. But we can still collapse if the page is | ||
1307 | * the valid THP. Add extra VM_HUGEPAGE so hugepage_vma_check() | ||
1308 | * will not fail the vma for missing VM_HUGEPAGE | ||
1309 | */ | ||
1310 | if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE)) | ||
1311 | return; | ||
1312 | |||
1313 | pmd = mm_find_pmd(mm, haddr); | ||
1314 | if (!pmd) | ||
1315 | return; | ||
1316 | |||
1317 | start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); | ||
1318 | |||
1319 | /* step 1: check all mapped PTEs are to the right huge page */ | ||
1320 | for (i = 0, addr = haddr, pte = start_pte; | ||
1321 | i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) { | ||
1322 | struct page *page; | ||
1323 | |||
1324 | /* empty pte, skip */ | ||
1325 | if (pte_none(*pte)) | ||
1326 | continue; | ||
1327 | |||
1328 | /* page swapped out, abort */ | ||
1329 | if (!pte_present(*pte)) | ||
1330 | goto abort; | ||
1331 | |||
1332 | page = vm_normal_page(vma, addr, *pte); | ||
1333 | |||
1334 | if (!page || !PageCompound(page)) | ||
1335 | goto abort; | ||
1336 | |||
1337 | if (!hpage) { | ||
1338 | hpage = compound_head(page); | ||
1339 | /* | ||
1340 | * The mapping of the THP should not change. | ||
1341 | * | ||
1342 | * Note that uprobe, debugger, or MAP_PRIVATE may | ||
1343 | * change the page table, but the new page will | ||
1344 | * not pass PageCompound() check. | ||
1345 | */ | ||
1346 | if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping)) | ||
1347 | goto abort; | ||
1348 | } | ||
1349 | |||
1350 | /* | ||
1351 | * Confirm the page maps to the correct subpage. | ||
1352 | * | ||
1353 | * Note that uprobe, debugger, or MAP_PRIVATE may change | ||
1354 | * the page table, but the new page will not pass | ||
1355 | * PageCompound() check. | ||
1356 | */ | ||
1357 | if (WARN_ON(hpage + i != page)) | ||
1358 | goto abort; | ||
1359 | count++; | ||
1360 | } | ||
1361 | |||
1362 | /* step 2: adjust rmap */ | ||
1363 | for (i = 0, addr = haddr, pte = start_pte; | ||
1364 | i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) { | ||
1365 | struct page *page; | ||
1366 | |||
1367 | if (pte_none(*pte)) | ||
1368 | continue; | ||
1369 | page = vm_normal_page(vma, addr, *pte); | ||
1370 | page_remove_rmap(page, false); | ||
1371 | } | ||
1372 | |||
1373 | pte_unmap_unlock(start_pte, ptl); | ||
1374 | |||
1375 | /* step 3: set proper refcount and mm_counters. */ | ||
1376 | if (hpage) { | ||
1377 | page_ref_sub(hpage, count); | ||
1378 | add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count); | ||
1379 | } | ||
1380 | |||
1381 | /* step 4: collapse pmd */ | ||
1382 | ptl = pmd_lock(vma->vm_mm, pmd); | ||
1383 | _pmd = pmdp_collapse_flush(vma, addr, pmd); | ||
1384 | spin_unlock(ptl); | ||
1385 | mm_dec_nr_ptes(mm); | ||
1386 | pte_free(mm, pmd_pgtable(_pmd)); | ||
1387 | return; | ||
1388 | |||
1389 | abort: | ||
1390 | pte_unmap_unlock(start_pte, ptl); | ||
1391 | } | ||
1392 | |||
1393 | static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot) | ||
1394 | { | ||
1395 | struct mm_struct *mm = mm_slot->mm; | ||
1396 | int i; | ||
1397 | |||
1398 | if (likely(mm_slot->nr_pte_mapped_thp == 0)) | ||
1399 | return 0; | ||
1400 | |||
1401 | if (!down_write_trylock(&mm->mmap_sem)) | ||
1402 | return -EBUSY; | ||
1403 | |||
1404 | if (unlikely(khugepaged_test_exit(mm))) | ||
1405 | goto out; | ||
1406 | |||
1407 | for (i = 0; i < mm_slot->nr_pte_mapped_thp; i++) | ||
1408 | collapse_pte_mapped_thp(mm, mm_slot->pte_mapped_thp[i]); | ||
1409 | |||
1410 | out: | ||
1411 | mm_slot->nr_pte_mapped_thp = 0; | ||
1412 | up_write(&mm->mmap_sem); | ||
1413 | return 0; | ||
1414 | } | ||
1415 | |||
1251 | static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) | 1416 | static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) |
1252 | { | 1417 | { |
1253 | struct vm_area_struct *vma; | 1418 | struct vm_area_struct *vma; |
@@ -1256,7 +1421,22 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) | |||
1256 | 1421 | ||
1257 | i_mmap_lock_write(mapping); | 1422 | i_mmap_lock_write(mapping); |
1258 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | 1423 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { |
1259 | /* probably overkill */ | 1424 | /* |
1425 | * Check vma->anon_vma to exclude MAP_PRIVATE mappings that | ||
1426 | * got written to. These VMAs are likely not worth investing | ||
1427 | * down_write(mmap_sem) as PMD-mapping is likely to be split | ||
1428 | * later. | ||
1429 | * | ||
1430 | * Not that vma->anon_vma check is racy: it can be set up after | ||
1431 | * the check but before we took mmap_sem by the fault path. | ||
1432 | * But page lock would prevent establishing any new ptes of the | ||
1433 | * page, so we are safe. | ||
1434 | * | ||
1435 | * An alternative would be drop the check, but check that page | ||
1436 | * table is clear before calling pmdp_collapse_flush() under | ||
1437 | * ptl. It has higher chance to recover THP for the VMA, but | ||
1438 | * has higher cost too. | ||
1439 | */ | ||
1260 | if (vma->anon_vma) | 1440 | if (vma->anon_vma) |
1261 | continue; | 1441 | continue; |
1262 | addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | 1442 | addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); |
@@ -1269,9 +1449,10 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) | |||
1269 | continue; | 1449 | continue; |
1270 | /* | 1450 | /* |
1271 | * We need exclusive mmap_sem to retract page table. | 1451 | * We need exclusive mmap_sem to retract page table. |
1272 | * If trylock fails we would end up with pte-mapped THP after | 1452 | * |
1273 | * re-fault. Not ideal, but it's more important to not disturb | 1453 | * We use trylock due to lock inversion: we need to acquire |
1274 | * the system too much. | 1454 | * mmap_sem while holding page lock. Fault path does it in |
1455 | * reverse order. Trylock is a way to avoid deadlock. | ||
1275 | */ | 1456 | */ |
1276 | if (down_write_trylock(&vma->vm_mm->mmap_sem)) { | 1457 | if (down_write_trylock(&vma->vm_mm->mmap_sem)) { |
1277 | spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd); | 1458 | spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd); |
@@ -1281,18 +1462,21 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) | |||
1281 | up_write(&vma->vm_mm->mmap_sem); | 1462 | up_write(&vma->vm_mm->mmap_sem); |
1282 | mm_dec_nr_ptes(vma->vm_mm); | 1463 | mm_dec_nr_ptes(vma->vm_mm); |
1283 | pte_free(vma->vm_mm, pmd_pgtable(_pmd)); | 1464 | pte_free(vma->vm_mm, pmd_pgtable(_pmd)); |
1465 | } else { | ||
1466 | /* Try again later */ | ||
1467 | khugepaged_add_pte_mapped_thp(vma->vm_mm, addr); | ||
1284 | } | 1468 | } |
1285 | } | 1469 | } |
1286 | i_mmap_unlock_write(mapping); | 1470 | i_mmap_unlock_write(mapping); |
1287 | } | 1471 | } |
1288 | 1472 | ||
1289 | /** | 1473 | /** |
1290 | * collapse_shmem - collapse small tmpfs/shmem pages into huge one. | 1474 | * collapse_file - collapse filemap/tmpfs/shmem pages into huge one. |
1291 | * | 1475 | * |
1292 | * Basic scheme is simple, details are more complex: | 1476 | * Basic scheme is simple, details are more complex: |
1293 | * - allocate and lock a new huge page; | 1477 | * - allocate and lock a new huge page; |
1294 | * - scan page cache replacing old pages with the new one | 1478 | * - scan page cache replacing old pages with the new one |
1295 | * + swap in pages if necessary; | 1479 | * + swap/gup in pages if necessary; |
1296 | * + fill in gaps; | 1480 | * + fill in gaps; |
1297 | * + keep old pages around in case rollback is required; | 1481 | * + keep old pages around in case rollback is required; |
1298 | * - if replacing succeeds: | 1482 | * - if replacing succeeds: |
@@ -1304,10 +1488,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) | |||
1304 | * + restore gaps in the page cache; | 1488 | * + restore gaps in the page cache; |
1305 | * + unlock and free huge page; | 1489 | * + unlock and free huge page; |
1306 | */ | 1490 | */ |
1307 | static void collapse_shmem(struct mm_struct *mm, | 1491 | static void collapse_file(struct mm_struct *mm, |
1308 | struct address_space *mapping, pgoff_t start, | 1492 | struct file *file, pgoff_t start, |
1309 | struct page **hpage, int node) | 1493 | struct page **hpage, int node) |
1310 | { | 1494 | { |
1495 | struct address_space *mapping = file->f_mapping; | ||
1311 | gfp_t gfp; | 1496 | gfp_t gfp; |
1312 | struct page *new_page; | 1497 | struct page *new_page; |
1313 | struct mem_cgroup *memcg; | 1498 | struct mem_cgroup *memcg; |
@@ -1315,7 +1500,9 @@ static void collapse_shmem(struct mm_struct *mm, | |||
1315 | LIST_HEAD(pagelist); | 1500 | LIST_HEAD(pagelist); |
1316 | XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); | 1501 | XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); |
1317 | int nr_none = 0, result = SCAN_SUCCEED; | 1502 | int nr_none = 0, result = SCAN_SUCCEED; |
1503 | bool is_shmem = shmem_file(file); | ||
1318 | 1504 | ||
1505 | VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); | ||
1319 | VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); | 1506 | VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); |
1320 | 1507 | ||
1321 | /* Only allocate from the target node */ | 1508 | /* Only allocate from the target node */ |
@@ -1347,7 +1534,8 @@ static void collapse_shmem(struct mm_struct *mm, | |||
1347 | } while (1); | 1534 | } while (1); |
1348 | 1535 | ||
1349 | __SetPageLocked(new_page); | 1536 | __SetPageLocked(new_page); |
1350 | __SetPageSwapBacked(new_page); | 1537 | if (is_shmem) |
1538 | __SetPageSwapBacked(new_page); | ||
1351 | new_page->index = start; | 1539 | new_page->index = start; |
1352 | new_page->mapping = mapping; | 1540 | new_page->mapping = mapping; |
1353 | 1541 | ||
@@ -1362,41 +1550,75 @@ static void collapse_shmem(struct mm_struct *mm, | |||
1362 | struct page *page = xas_next(&xas); | 1550 | struct page *page = xas_next(&xas); |
1363 | 1551 | ||
1364 | VM_BUG_ON(index != xas.xa_index); | 1552 | VM_BUG_ON(index != xas.xa_index); |
1365 | if (!page) { | 1553 | if (is_shmem) { |
1366 | /* | 1554 | if (!page) { |
1367 | * Stop if extent has been truncated or hole-punched, | 1555 | /* |
1368 | * and is now completely empty. | 1556 | * Stop if extent has been truncated or |
1369 | */ | 1557 | * hole-punched, and is now completely |
1370 | if (index == start) { | 1558 | * empty. |
1371 | if (!xas_next_entry(&xas, end - 1)) { | 1559 | */ |
1372 | result = SCAN_TRUNCATED; | 1560 | if (index == start) { |
1561 | if (!xas_next_entry(&xas, end - 1)) { | ||
1562 | result = SCAN_TRUNCATED; | ||
1563 | goto xa_locked; | ||
1564 | } | ||
1565 | xas_set(&xas, index); | ||
1566 | } | ||
1567 | if (!shmem_charge(mapping->host, 1)) { | ||
1568 | result = SCAN_FAIL; | ||
1373 | goto xa_locked; | 1569 | goto xa_locked; |
1374 | } | 1570 | } |
1375 | xas_set(&xas, index); | 1571 | xas_store(&xas, new_page); |
1572 | nr_none++; | ||
1573 | continue; | ||
1376 | } | 1574 | } |
1377 | if (!shmem_charge(mapping->host, 1)) { | 1575 | |
1378 | result = SCAN_FAIL; | 1576 | if (xa_is_value(page) || !PageUptodate(page)) { |
1577 | xas_unlock_irq(&xas); | ||
1578 | /* swap in or instantiate fallocated page */ | ||
1579 | if (shmem_getpage(mapping->host, index, &page, | ||
1580 | SGP_NOHUGE)) { | ||
1581 | result = SCAN_FAIL; | ||
1582 | goto xa_unlocked; | ||
1583 | } | ||
1584 | } else if (trylock_page(page)) { | ||
1585 | get_page(page); | ||
1586 | xas_unlock_irq(&xas); | ||
1587 | } else { | ||
1588 | result = SCAN_PAGE_LOCK; | ||
1379 | goto xa_locked; | 1589 | goto xa_locked; |
1380 | } | 1590 | } |
1381 | xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); | 1591 | } else { /* !is_shmem */ |
1382 | nr_none++; | 1592 | if (!page || xa_is_value(page)) { |
1383 | continue; | 1593 | xas_unlock_irq(&xas); |
1384 | } | 1594 | page_cache_sync_readahead(mapping, &file->f_ra, |
1385 | 1595 | file, index, | |
1386 | if (xa_is_value(page) || !PageUptodate(page)) { | 1596 | PAGE_SIZE); |
1387 | xas_unlock_irq(&xas); | 1597 | /* drain pagevecs to help isolate_lru_page() */ |
1388 | /* swap in or instantiate fallocated page */ | 1598 | lru_add_drain(); |
1389 | if (shmem_getpage(mapping->host, index, &page, | 1599 | page = find_lock_page(mapping, index); |
1390 | SGP_NOHUGE)) { | 1600 | if (unlikely(page == NULL)) { |
1601 | result = SCAN_FAIL; | ||
1602 | goto xa_unlocked; | ||
1603 | } | ||
1604 | } else if (!PageUptodate(page)) { | ||
1605 | xas_unlock_irq(&xas); | ||
1606 | wait_on_page_locked(page); | ||
1607 | if (!trylock_page(page)) { | ||
1608 | result = SCAN_PAGE_LOCK; | ||
1609 | goto xa_unlocked; | ||
1610 | } | ||
1611 | get_page(page); | ||
1612 | } else if (PageDirty(page)) { | ||
1391 | result = SCAN_FAIL; | 1613 | result = SCAN_FAIL; |
1392 | goto xa_unlocked; | 1614 | goto xa_locked; |
1615 | } else if (trylock_page(page)) { | ||
1616 | get_page(page); | ||
1617 | xas_unlock_irq(&xas); | ||
1618 | } else { | ||
1619 | result = SCAN_PAGE_LOCK; | ||
1620 | goto xa_locked; | ||
1393 | } | 1621 | } |
1394 | } else if (trylock_page(page)) { | ||
1395 | get_page(page); | ||
1396 | xas_unlock_irq(&xas); | ||
1397 | } else { | ||
1398 | result = SCAN_PAGE_LOCK; | ||
1399 | goto xa_locked; | ||
1400 | } | 1622 | } |
1401 | 1623 | ||
1402 | /* | 1624 | /* |
@@ -1425,6 +1647,12 @@ static void collapse_shmem(struct mm_struct *mm, | |||
1425 | goto out_unlock; | 1647 | goto out_unlock; |
1426 | } | 1648 | } |
1427 | 1649 | ||
1650 | if (page_has_private(page) && | ||
1651 | !try_to_release_page(page, GFP_KERNEL)) { | ||
1652 | result = SCAN_PAGE_HAS_PRIVATE; | ||
1653 | goto out_unlock; | ||
1654 | } | ||
1655 | |||
1428 | if (page_mapped(page)) | 1656 | if (page_mapped(page)) |
1429 | unmap_mapping_pages(mapping, index, 1, false); | 1657 | unmap_mapping_pages(mapping, index, 1, false); |
1430 | 1658 | ||
@@ -1454,7 +1682,7 @@ static void collapse_shmem(struct mm_struct *mm, | |||
1454 | list_add_tail(&page->lru, &pagelist); | 1682 | list_add_tail(&page->lru, &pagelist); |
1455 | 1683 | ||
1456 | /* Finally, replace with the new page. */ | 1684 | /* Finally, replace with the new page. */ |
1457 | xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); | 1685 | xas_store(&xas, new_page); |
1458 | continue; | 1686 | continue; |
1459 | out_unlock: | 1687 | out_unlock: |
1460 | unlock_page(page); | 1688 | unlock_page(page); |
@@ -1462,12 +1690,20 @@ out_unlock: | |||
1462 | goto xa_unlocked; | 1690 | goto xa_unlocked; |
1463 | } | 1691 | } |
1464 | 1692 | ||
1465 | __inc_node_page_state(new_page, NR_SHMEM_THPS); | 1693 | if (is_shmem) |
1694 | __inc_node_page_state(new_page, NR_SHMEM_THPS); | ||
1695 | else { | ||
1696 | __inc_node_page_state(new_page, NR_FILE_THPS); | ||
1697 | filemap_nr_thps_inc(mapping); | ||
1698 | } | ||
1699 | |||
1466 | if (nr_none) { | 1700 | if (nr_none) { |
1467 | struct zone *zone = page_zone(new_page); | 1701 | struct zone *zone = page_zone(new_page); |
1468 | 1702 | ||
1469 | __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); | 1703 | __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); |
1470 | __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); | 1704 | if (is_shmem) |
1705 | __mod_node_page_state(zone->zone_pgdat, | ||
1706 | NR_SHMEM, nr_none); | ||
1471 | } | 1707 | } |
1472 | 1708 | ||
1473 | xa_locked: | 1709 | xa_locked: |
@@ -1505,10 +1741,15 @@ xa_unlocked: | |||
1505 | 1741 | ||
1506 | SetPageUptodate(new_page); | 1742 | SetPageUptodate(new_page); |
1507 | page_ref_add(new_page, HPAGE_PMD_NR - 1); | 1743 | page_ref_add(new_page, HPAGE_PMD_NR - 1); |
1508 | set_page_dirty(new_page); | ||
1509 | mem_cgroup_commit_charge(new_page, memcg, false, true); | 1744 | mem_cgroup_commit_charge(new_page, memcg, false, true); |
1745 | |||
1746 | if (is_shmem) { | ||
1747 | set_page_dirty(new_page); | ||
1748 | lru_cache_add_anon(new_page); | ||
1749 | } else { | ||
1750 | lru_cache_add_file(new_page); | ||
1751 | } | ||
1510 | count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1); | 1752 | count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1); |
1511 | lru_cache_add_anon(new_page); | ||
1512 | 1753 | ||
1513 | /* | 1754 | /* |
1514 | * Remove pte page tables, so we can re-fault the page as huge. | 1755 | * Remove pte page tables, so we can re-fault the page as huge. |
@@ -1523,7 +1764,9 @@ xa_unlocked: | |||
1523 | /* Something went wrong: roll back page cache changes */ | 1764 | /* Something went wrong: roll back page cache changes */ |
1524 | xas_lock_irq(&xas); | 1765 | xas_lock_irq(&xas); |
1525 | mapping->nrpages -= nr_none; | 1766 | mapping->nrpages -= nr_none; |
1526 | shmem_uncharge(mapping->host, nr_none); | 1767 | |
1768 | if (is_shmem) | ||
1769 | shmem_uncharge(mapping->host, nr_none); | ||
1527 | 1770 | ||
1528 | xas_set(&xas, start); | 1771 | xas_set(&xas, start); |
1529 | xas_for_each(&xas, page, end - 1) { | 1772 | xas_for_each(&xas, page, end - 1) { |
@@ -1563,11 +1806,11 @@ out: | |||
1563 | /* TODO: tracepoints */ | 1806 | /* TODO: tracepoints */ |
1564 | } | 1807 | } |
1565 | 1808 | ||
1566 | static void khugepaged_scan_shmem(struct mm_struct *mm, | 1809 | static void khugepaged_scan_file(struct mm_struct *mm, |
1567 | struct address_space *mapping, | 1810 | struct file *file, pgoff_t start, struct page **hpage) |
1568 | pgoff_t start, struct page **hpage) | ||
1569 | { | 1811 | { |
1570 | struct page *page = NULL; | 1812 | struct page *page = NULL; |
1813 | struct address_space *mapping = file->f_mapping; | ||
1571 | XA_STATE(xas, &mapping->i_pages, start); | 1814 | XA_STATE(xas, &mapping->i_pages, start); |
1572 | int present, swap; | 1815 | int present, swap; |
1573 | int node = NUMA_NO_NODE; | 1816 | int node = NUMA_NO_NODE; |
@@ -1606,7 +1849,8 @@ static void khugepaged_scan_shmem(struct mm_struct *mm, | |||
1606 | break; | 1849 | break; |
1607 | } | 1850 | } |
1608 | 1851 | ||
1609 | if (page_count(page) != 1 + page_mapcount(page)) { | 1852 | if (page_count(page) != |
1853 | 1 + page_mapcount(page) + page_has_private(page)) { | ||
1610 | result = SCAN_PAGE_COUNT; | 1854 | result = SCAN_PAGE_COUNT; |
1611 | break; | 1855 | break; |
1612 | } | 1856 | } |
@@ -1631,19 +1875,23 @@ static void khugepaged_scan_shmem(struct mm_struct *mm, | |||
1631 | result = SCAN_EXCEED_NONE_PTE; | 1875 | result = SCAN_EXCEED_NONE_PTE; |
1632 | } else { | 1876 | } else { |
1633 | node = khugepaged_find_target_node(); | 1877 | node = khugepaged_find_target_node(); |
1634 | collapse_shmem(mm, mapping, start, hpage, node); | 1878 | collapse_file(mm, file, start, hpage, node); |
1635 | } | 1879 | } |
1636 | } | 1880 | } |
1637 | 1881 | ||
1638 | /* TODO: tracepoints */ | 1882 | /* TODO: tracepoints */ |
1639 | } | 1883 | } |
1640 | #else | 1884 | #else |
1641 | static void khugepaged_scan_shmem(struct mm_struct *mm, | 1885 | static void khugepaged_scan_file(struct mm_struct *mm, |
1642 | struct address_space *mapping, | 1886 | struct file *file, pgoff_t start, struct page **hpage) |
1643 | pgoff_t start, struct page **hpage) | ||
1644 | { | 1887 | { |
1645 | BUILD_BUG(); | 1888 | BUILD_BUG(); |
1646 | } | 1889 | } |
1890 | |||
1891 | static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot) | ||
1892 | { | ||
1893 | return 0; | ||
1894 | } | ||
1647 | #endif | 1895 | #endif |
1648 | 1896 | ||
1649 | static unsigned int khugepaged_scan_mm_slot(unsigned int pages, | 1897 | static unsigned int khugepaged_scan_mm_slot(unsigned int pages, |
@@ -1668,6 +1916,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, | |||
1668 | khugepaged_scan.mm_slot = mm_slot; | 1916 | khugepaged_scan.mm_slot = mm_slot; |
1669 | } | 1917 | } |
1670 | spin_unlock(&khugepaged_mm_lock); | 1918 | spin_unlock(&khugepaged_mm_lock); |
1919 | khugepaged_collapse_pte_mapped_thps(mm_slot); | ||
1671 | 1920 | ||
1672 | mm = mm_slot->mm; | 1921 | mm = mm_slot->mm; |
1673 | /* | 1922 | /* |
@@ -1713,17 +1962,18 @@ skip: | |||
1713 | VM_BUG_ON(khugepaged_scan.address < hstart || | 1962 | VM_BUG_ON(khugepaged_scan.address < hstart || |
1714 | khugepaged_scan.address + HPAGE_PMD_SIZE > | 1963 | khugepaged_scan.address + HPAGE_PMD_SIZE > |
1715 | hend); | 1964 | hend); |
1716 | if (shmem_file(vma->vm_file)) { | 1965 | if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { |
1717 | struct file *file; | 1966 | struct file *file; |
1718 | pgoff_t pgoff = linear_page_index(vma, | 1967 | pgoff_t pgoff = linear_page_index(vma, |
1719 | khugepaged_scan.address); | 1968 | khugepaged_scan.address); |
1720 | if (!shmem_huge_enabled(vma)) | 1969 | |
1970 | if (shmem_file(vma->vm_file) | ||
1971 | && !shmem_huge_enabled(vma)) | ||
1721 | goto skip; | 1972 | goto skip; |
1722 | file = get_file(vma->vm_file); | 1973 | file = get_file(vma->vm_file); |
1723 | up_read(&mm->mmap_sem); | 1974 | up_read(&mm->mmap_sem); |
1724 | ret = 1; | 1975 | ret = 1; |
1725 | khugepaged_scan_shmem(mm, file->f_mapping, | 1976 | khugepaged_scan_file(mm, file, pgoff, hpage); |
1726 | pgoff, hpage); | ||
1727 | fput(file); | 1977 | fput(file); |
1728 | } else { | 1978 | } else { |
1729 | ret = khugepaged_scan_pmd(mm, vma, | 1979 | ret = khugepaged_scan_pmd(mm, vma, |