diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2017-02-24 17:58:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-24 20:46:55 -0500 |
commit | c7ab0d2fdc840266b39db94538f74207ec2afbf6 (patch) | |
tree | 668097f8e471bdece447f2c334244c4b5e98081e /mm | |
parent | f27176cfc363d395eea8dc5c4a26e5d6d7d65eaf (diff) |
mm: convert try_to_unmap_one() to use page_vma_mapped_walk()
For consistency, it worth converting all page_check_address() to
page_vma_mapped_walk(), so we could drop the former.
It also makes freeze_page() as we walk though rmap only once.
Link: http://lkml.kernel.org/r/20170129173858.45174-8-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 16 | ||||
-rw-r--r-- | mm/rmap.c | 260 |
2 files changed, 137 insertions, 139 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a7bac4f2b78a..efddd02141a8 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -2106,24 +2106,16 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, | |||
2106 | static void freeze_page(struct page *page) | 2106 | static void freeze_page(struct page *page) |
2107 | { | 2107 | { |
2108 | enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | | 2108 | enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | |
2109 | TTU_RMAP_LOCKED; | 2109 | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; |
2110 | int i, ret; | 2110 | int ret; |
2111 | 2111 | ||
2112 | VM_BUG_ON_PAGE(!PageHead(page), page); | 2112 | VM_BUG_ON_PAGE(!PageHead(page), page); |
2113 | 2113 | ||
2114 | if (PageAnon(page)) | 2114 | if (PageAnon(page)) |
2115 | ttu_flags |= TTU_MIGRATION; | 2115 | ttu_flags |= TTU_MIGRATION; |
2116 | 2116 | ||
2117 | /* We only need TTU_SPLIT_HUGE_PMD once */ | 2117 | ret = try_to_unmap(page, ttu_flags); |
2118 | ret = try_to_unmap(page, ttu_flags | TTU_SPLIT_HUGE_PMD); | 2118 | VM_BUG_ON_PAGE(ret, page); |
2119 | for (i = 1; !ret && i < HPAGE_PMD_NR; i++) { | ||
2120 | /* Cut short if the page is unmapped */ | ||
2121 | if (page_count(page) == 1) | ||
2122 | return; | ||
2123 | |||
2124 | ret = try_to_unmap(page + i, ttu_flags); | ||
2125 | } | ||
2126 | VM_BUG_ON_PAGE(ret, page + i - 1); | ||
2127 | } | 2119 | } |
2128 | 2120 | ||
2129 | static void unfreeze_page(struct page *page) | 2121 | static void unfreeze_page(struct page *page) |
@@ -607,8 +607,7 @@ void try_to_unmap_flush_dirty(void) | |||
607 | try_to_unmap_flush(); | 607 | try_to_unmap_flush(); |
608 | } | 608 | } |
609 | 609 | ||
610 | static void set_tlb_ubc_flush_pending(struct mm_struct *mm, | 610 | static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) |
611 | struct page *page, bool writable) | ||
612 | { | 611 | { |
613 | struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc; | 612 | struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc; |
614 | 613 | ||
@@ -643,8 +642,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) | |||
643 | return should_defer; | 642 | return should_defer; |
644 | } | 643 | } |
645 | #else | 644 | #else |
646 | static void set_tlb_ubc_flush_pending(struct mm_struct *mm, | 645 | static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) |
647 | struct page *page, bool writable) | ||
648 | { | 646 | { |
649 | } | 647 | } |
650 | 648 | ||
@@ -1459,155 +1457,163 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1459 | unsigned long address, void *arg) | 1457 | unsigned long address, void *arg) |
1460 | { | 1458 | { |
1461 | struct mm_struct *mm = vma->vm_mm; | 1459 | struct mm_struct *mm = vma->vm_mm; |
1462 | pte_t *pte; | 1460 | struct page_vma_mapped_walk pvmw = { |
1461 | .page = page, | ||
1462 | .vma = vma, | ||
1463 | .address = address, | ||
1464 | }; | ||
1463 | pte_t pteval; | 1465 | pte_t pteval; |
1464 | spinlock_t *ptl; | 1466 | struct page *subpage; |
1465 | int ret = SWAP_AGAIN; | 1467 | int ret = SWAP_AGAIN; |
1466 | struct rmap_private *rp = arg; | 1468 | struct rmap_private *rp = arg; |
1467 | enum ttu_flags flags = rp->flags; | 1469 | enum ttu_flags flags = rp->flags; |
1468 | 1470 | ||
1469 | /* munlock has nothing to gain from examining un-locked vmas */ | 1471 | /* munlock has nothing to gain from examining un-locked vmas */ |
1470 | if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) | 1472 | if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) |
1471 | goto out; | 1473 | return SWAP_AGAIN; |
1472 | 1474 | ||
1473 | if (flags & TTU_SPLIT_HUGE_PMD) { | 1475 | if (flags & TTU_SPLIT_HUGE_PMD) { |
1474 | split_huge_pmd_address(vma, address, | 1476 | split_huge_pmd_address(vma, address, |
1475 | flags & TTU_MIGRATION, page); | 1477 | flags & TTU_MIGRATION, page); |
1476 | /* check if we have anything to do after split */ | ||
1477 | if (page_mapcount(page) == 0) | ||
1478 | goto out; | ||
1479 | } | 1478 | } |
1480 | 1479 | ||
1481 | pte = page_check_address(page, mm, address, &ptl, | 1480 | while (page_vma_mapped_walk(&pvmw)) { |
1482 | PageTransCompound(page)); | 1481 | subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); |
1483 | if (!pte) | 1482 | address = pvmw.address; |
1484 | goto out; | ||
1485 | 1483 | ||
1486 | /* | 1484 | /* Unexpected PMD-mapped THP? */ |
1487 | * If the page is mlock()d, we cannot swap it out. | 1485 | VM_BUG_ON_PAGE(!pvmw.pte, page); |
1488 | * If it's recently referenced (perhaps page_referenced | 1486 | |
1489 | * skipped over this mm) then we should reactivate it. | 1487 | /* |
1490 | */ | 1488 | * If the page is mlock()d, we cannot swap it out. |
1491 | if (!(flags & TTU_IGNORE_MLOCK)) { | 1489 | * If it's recently referenced (perhaps page_referenced |
1492 | if (vma->vm_flags & VM_LOCKED) { | 1490 | * skipped over this mm) then we should reactivate it. |
1493 | /* PTE-mapped THP are never mlocked */ | 1491 | */ |
1494 | if (!PageTransCompound(page)) { | 1492 | if (!(flags & TTU_IGNORE_MLOCK)) { |
1495 | /* | 1493 | if (vma->vm_flags & VM_LOCKED) { |
1496 | * Holding pte lock, we do *not* need | 1494 | /* PTE-mapped THP are never mlocked */ |
1497 | * mmap_sem here | 1495 | if (!PageTransCompound(page)) { |
1498 | */ | 1496 | /* |
1499 | mlock_vma_page(page); | 1497 | * Holding pte lock, we do *not* need |
1498 | * mmap_sem here | ||
1499 | */ | ||
1500 | mlock_vma_page(page); | ||
1501 | } | ||
1502 | ret = SWAP_MLOCK; | ||
1503 | page_vma_mapped_walk_done(&pvmw); | ||
1504 | break; | ||
1500 | } | 1505 | } |
1501 | ret = SWAP_MLOCK; | 1506 | if (flags & TTU_MUNLOCK) |
1502 | goto out_unmap; | 1507 | continue; |
1503 | } | 1508 | } |
1504 | if (flags & TTU_MUNLOCK) | 1509 | |
1505 | goto out_unmap; | 1510 | if (!(flags & TTU_IGNORE_ACCESS)) { |
1506 | } | 1511 | if (ptep_clear_flush_young_notify(vma, address, |
1507 | if (!(flags & TTU_IGNORE_ACCESS)) { | 1512 | pvmw.pte)) { |
1508 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | 1513 | ret = SWAP_FAIL; |
1509 | ret = SWAP_FAIL; | 1514 | page_vma_mapped_walk_done(&pvmw); |
1510 | goto out_unmap; | 1515 | break; |
1516 | } | ||
1511 | } | 1517 | } |
1512 | } | ||
1513 | 1518 | ||
1514 | /* Nuke the page table entry. */ | 1519 | /* Nuke the page table entry. */ |
1515 | flush_cache_page(vma, address, page_to_pfn(page)); | 1520 | flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); |
1516 | if (should_defer_flush(mm, flags)) { | 1521 | if (should_defer_flush(mm, flags)) { |
1517 | /* | 1522 | /* |
1518 | * We clear the PTE but do not flush so potentially a remote | 1523 | * We clear the PTE but do not flush so potentially |
1519 | * CPU could still be writing to the page. If the entry was | 1524 | * a remote CPU could still be writing to the page. |
1520 | * previously clean then the architecture must guarantee that | 1525 | * If the entry was previously clean then the |
1521 | * a clear->dirty transition on a cached TLB entry is written | 1526 | * architecture must guarantee that a clear->dirty |
1522 | * through and traps if the PTE is unmapped. | 1527 | * transition on a cached TLB entry is written through |
1523 | */ | 1528 | * and traps if the PTE is unmapped. |
1524 | pteval = ptep_get_and_clear(mm, address, pte); | 1529 | */ |
1530 | pteval = ptep_get_and_clear(mm, address, pvmw.pte); | ||
1531 | |||
1532 | set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); | ||
1533 | } else { | ||
1534 | pteval = ptep_clear_flush(vma, address, pvmw.pte); | ||
1535 | } | ||
1525 | 1536 | ||
1526 | set_tlb_ubc_flush_pending(mm, page, pte_dirty(pteval)); | 1537 | /* Move the dirty bit to the page. Now the pte is gone. */ |
1527 | } else { | 1538 | if (pte_dirty(pteval)) |
1528 | pteval = ptep_clear_flush(vma, address, pte); | 1539 | set_page_dirty(page); |
1529 | } | ||
1530 | 1540 | ||
1531 | /* Move the dirty bit to the physical page now the pte is gone. */ | 1541 | /* Update high watermark before we lower rss */ |
1532 | if (pte_dirty(pteval)) | 1542 | update_hiwater_rss(mm); |
1533 | set_page_dirty(page); | ||
1534 | 1543 | ||
1535 | /* Update high watermark before we lower rss */ | 1544 | if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { |
1536 | update_hiwater_rss(mm); | 1545 | if (PageHuge(page)) { |
1546 | int nr = 1 << compound_order(page); | ||
1547 | hugetlb_count_sub(nr, mm); | ||
1548 | } else { | ||
1549 | dec_mm_counter(mm, mm_counter(page)); | ||
1550 | } | ||
1537 | 1551 | ||
1538 | if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { | 1552 | pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); |
1539 | if (PageHuge(page)) { | 1553 | set_pte_at(mm, address, pvmw.pte, pteval); |
1540 | hugetlb_count_sub(1 << compound_order(page), mm); | 1554 | } else if (pte_unused(pteval)) { |
1541 | } else { | 1555 | /* |
1556 | * The guest indicated that the page content is of no | ||
1557 | * interest anymore. Simply discard the pte, vmscan | ||
1558 | * will take care of the rest. | ||
1559 | */ | ||
1542 | dec_mm_counter(mm, mm_counter(page)); | 1560 | dec_mm_counter(mm, mm_counter(page)); |
1543 | } | 1561 | } else if (IS_ENABLED(CONFIG_MIGRATION) && |
1544 | set_pte_at(mm, address, pte, | 1562 | (flags & TTU_MIGRATION)) { |
1545 | swp_entry_to_pte(make_hwpoison_entry(page))); | 1563 | swp_entry_t entry; |
1546 | } else if (pte_unused(pteval)) { | 1564 | pte_t swp_pte; |
1547 | /* | 1565 | /* |
1548 | * The guest indicated that the page content is of no | 1566 | * Store the pfn of the page in a special migration |
1549 | * interest anymore. Simply discard the pte, vmscan | 1567 | * pte. do_swap_page() will wait until the migration |
1550 | * will take care of the rest. | 1568 | * pte is removed and then restart fault handling. |
1551 | */ | 1569 | */ |
1552 | dec_mm_counter(mm, mm_counter(page)); | 1570 | entry = make_migration_entry(subpage, |
1553 | } else if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION)) { | 1571 | pte_write(pteval)); |
1554 | swp_entry_t entry; | 1572 | swp_pte = swp_entry_to_pte(entry); |
1555 | pte_t swp_pte; | 1573 | if (pte_soft_dirty(pteval)) |
1556 | /* | 1574 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
1557 | * Store the pfn of the page in a special migration | 1575 | set_pte_at(mm, address, pvmw.pte, swp_pte); |
1558 | * pte. do_swap_page() will wait until the migration | 1576 | } else if (PageAnon(page)) { |
1559 | * pte is removed and then restart fault handling. | 1577 | swp_entry_t entry = { .val = page_private(subpage) }; |
1560 | */ | 1578 | pte_t swp_pte; |
1561 | entry = make_migration_entry(page, pte_write(pteval)); | 1579 | /* |
1562 | swp_pte = swp_entry_to_pte(entry); | 1580 | * Store the swap location in the pte. |
1563 | if (pte_soft_dirty(pteval)) | 1581 | * See handle_pte_fault() ... |
1564 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | 1582 | */ |
1565 | set_pte_at(mm, address, pte, swp_pte); | 1583 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); |
1566 | } else if (PageAnon(page)) { | 1584 | |
1567 | swp_entry_t entry = { .val = page_private(page) }; | 1585 | if (!PageDirty(page) && (flags & TTU_LZFREE)) { |
1568 | pte_t swp_pte; | 1586 | /* It's a freeable page by MADV_FREE */ |
1569 | /* | 1587 | dec_mm_counter(mm, MM_ANONPAGES); |
1570 | * Store the swap location in the pte. | 1588 | rp->lazyfreed++; |
1571 | * See handle_pte_fault() ... | 1589 | goto discard; |
1572 | */ | 1590 | } |
1573 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); | ||
1574 | 1591 | ||
1575 | if (!PageDirty(page) && (flags & TTU_LZFREE)) { | 1592 | if (swap_duplicate(entry) < 0) { |
1576 | /* It's a freeable page by MADV_FREE */ | 1593 | set_pte_at(mm, address, pvmw.pte, pteval); |
1594 | ret = SWAP_FAIL; | ||
1595 | page_vma_mapped_walk_done(&pvmw); | ||
1596 | break; | ||
1597 | } | ||
1598 | if (list_empty(&mm->mmlist)) { | ||
1599 | spin_lock(&mmlist_lock); | ||
1600 | if (list_empty(&mm->mmlist)) | ||
1601 | list_add(&mm->mmlist, &init_mm.mmlist); | ||
1602 | spin_unlock(&mmlist_lock); | ||
1603 | } | ||
1577 | dec_mm_counter(mm, MM_ANONPAGES); | 1604 | dec_mm_counter(mm, MM_ANONPAGES); |
1578 | rp->lazyfreed++; | 1605 | inc_mm_counter(mm, MM_SWAPENTS); |
1579 | goto discard; | 1606 | swp_pte = swp_entry_to_pte(entry); |
1580 | } | 1607 | if (pte_soft_dirty(pteval)) |
1581 | 1608 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | |
1582 | if (swap_duplicate(entry) < 0) { | 1609 | set_pte_at(mm, address, pvmw.pte, swp_pte); |
1583 | set_pte_at(mm, address, pte, pteval); | 1610 | } else |
1584 | ret = SWAP_FAIL; | 1611 | dec_mm_counter(mm, mm_counter_file(page)); |
1585 | goto out_unmap; | ||
1586 | } | ||
1587 | if (list_empty(&mm->mmlist)) { | ||
1588 | spin_lock(&mmlist_lock); | ||
1589 | if (list_empty(&mm->mmlist)) | ||
1590 | list_add(&mm->mmlist, &init_mm.mmlist); | ||
1591 | spin_unlock(&mmlist_lock); | ||
1592 | } | ||
1593 | dec_mm_counter(mm, MM_ANONPAGES); | ||
1594 | inc_mm_counter(mm, MM_SWAPENTS); | ||
1595 | swp_pte = swp_entry_to_pte(entry); | ||
1596 | if (pte_soft_dirty(pteval)) | ||
1597 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | ||
1598 | set_pte_at(mm, address, pte, swp_pte); | ||
1599 | } else | ||
1600 | dec_mm_counter(mm, mm_counter_file(page)); | ||
1601 | |||
1602 | discard: | 1612 | discard: |
1603 | page_remove_rmap(page, PageHuge(page)); | 1613 | page_remove_rmap(subpage, PageHuge(page)); |
1604 | put_page(page); | 1614 | put_page(page); |
1605 | |||
1606 | out_unmap: | ||
1607 | pte_unmap_unlock(pte, ptl); | ||
1608 | if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK)) | ||
1609 | mmu_notifier_invalidate_page(mm, address); | 1615 | mmu_notifier_invalidate_page(mm, address); |
1610 | out: | 1616 | } |
1611 | return ret; | 1617 | return ret; |
1612 | } | 1618 | } |
1613 | 1619 | ||
@@ -1632,7 +1638,7 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) | |||
1632 | 1638 | ||
1633 | static int page_mapcount_is_zero(struct page *page) | 1639 | static int page_mapcount_is_zero(struct page *page) |
1634 | { | 1640 | { |
1635 | return !page_mapcount(page); | 1641 | return !total_mapcount(page); |
1636 | } | 1642 | } |
1637 | 1643 | ||
1638 | /** | 1644 | /** |