summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2017-02-24 17:58:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-24 20:46:55 -0500
commitc7ab0d2fdc840266b39db94538f74207ec2afbf6 (patch)
tree668097f8e471bdece447f2c334244c4b5e98081e /mm
parentf27176cfc363d395eea8dc5c4a26e5d6d7d65eaf (diff)
mm: convert try_to_unmap_one() to use page_vma_mapped_walk()
For consistency, it worth converting all page_check_address() to page_vma_mapped_walk(), so we could drop the former. It also makes freeze_page() as we walk though rmap only once. Link: http://lkml.kernel.org/r/20170129173858.45174-8-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c16
-rw-r--r--mm/rmap.c260
2 files changed, 137 insertions, 139 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a7bac4f2b78a..efddd02141a8 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2106,24 +2106,16 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
2106static void freeze_page(struct page *page) 2106static void freeze_page(struct page *page)
2107{ 2107{
2108 enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | 2108 enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
2109 TTU_RMAP_LOCKED; 2109 TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
2110 int i, ret; 2110 int ret;
2111 2111
2112 VM_BUG_ON_PAGE(!PageHead(page), page); 2112 VM_BUG_ON_PAGE(!PageHead(page), page);
2113 2113
2114 if (PageAnon(page)) 2114 if (PageAnon(page))
2115 ttu_flags |= TTU_MIGRATION; 2115 ttu_flags |= TTU_MIGRATION;
2116 2116
2117 /* We only need TTU_SPLIT_HUGE_PMD once */ 2117 ret = try_to_unmap(page, ttu_flags);
2118 ret = try_to_unmap(page, ttu_flags | TTU_SPLIT_HUGE_PMD); 2118 VM_BUG_ON_PAGE(ret, page);
2119 for (i = 1; !ret && i < HPAGE_PMD_NR; i++) {
2120 /* Cut short if the page is unmapped */
2121 if (page_count(page) == 1)
2122 return;
2123
2124 ret = try_to_unmap(page + i, ttu_flags);
2125 }
2126 VM_BUG_ON_PAGE(ret, page + i - 1);
2127} 2119}
2128 2120
2129static void unfreeze_page(struct page *page) 2121static void unfreeze_page(struct page *page)
diff --git a/mm/rmap.c b/mm/rmap.c
index 58597de049fd..11668fb881d8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -607,8 +607,7 @@ void try_to_unmap_flush_dirty(void)
607 try_to_unmap_flush(); 607 try_to_unmap_flush();
608} 608}
609 609
610static void set_tlb_ubc_flush_pending(struct mm_struct *mm, 610static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
611 struct page *page, bool writable)
612{ 611{
613 struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc; 612 struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
614 613
@@ -643,8 +642,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
643 return should_defer; 642 return should_defer;
644} 643}
645#else 644#else
646static void set_tlb_ubc_flush_pending(struct mm_struct *mm, 645static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
647 struct page *page, bool writable)
648{ 646{
649} 647}
650 648
@@ -1459,155 +1457,163 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1459 unsigned long address, void *arg) 1457 unsigned long address, void *arg)
1460{ 1458{
1461 struct mm_struct *mm = vma->vm_mm; 1459 struct mm_struct *mm = vma->vm_mm;
1462 pte_t *pte; 1460 struct page_vma_mapped_walk pvmw = {
1461 .page = page,
1462 .vma = vma,
1463 .address = address,
1464 };
1463 pte_t pteval; 1465 pte_t pteval;
1464 spinlock_t *ptl; 1466 struct page *subpage;
1465 int ret = SWAP_AGAIN; 1467 int ret = SWAP_AGAIN;
1466 struct rmap_private *rp = arg; 1468 struct rmap_private *rp = arg;
1467 enum ttu_flags flags = rp->flags; 1469 enum ttu_flags flags = rp->flags;
1468 1470
1469 /* munlock has nothing to gain from examining un-locked vmas */ 1471 /* munlock has nothing to gain from examining un-locked vmas */
1470 if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) 1472 if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
1471 goto out; 1473 return SWAP_AGAIN;
1472 1474
1473 if (flags & TTU_SPLIT_HUGE_PMD) { 1475 if (flags & TTU_SPLIT_HUGE_PMD) {
1474 split_huge_pmd_address(vma, address, 1476 split_huge_pmd_address(vma, address,
1475 flags & TTU_MIGRATION, page); 1477 flags & TTU_MIGRATION, page);
1476 /* check if we have anything to do after split */
1477 if (page_mapcount(page) == 0)
1478 goto out;
1479 } 1478 }
1480 1479
1481 pte = page_check_address(page, mm, address, &ptl, 1480 while (page_vma_mapped_walk(&pvmw)) {
1482 PageTransCompound(page)); 1481 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
1483 if (!pte) 1482 address = pvmw.address;
1484 goto out;
1485 1483
1486 /* 1484 /* Unexpected PMD-mapped THP? */
1487 * If the page is mlock()d, we cannot swap it out. 1485 VM_BUG_ON_PAGE(!pvmw.pte, page);
1488 * If it's recently referenced (perhaps page_referenced 1486
1489 * skipped over this mm) then we should reactivate it. 1487 /*
1490 */ 1488 * If the page is mlock()d, we cannot swap it out.
1491 if (!(flags & TTU_IGNORE_MLOCK)) { 1489 * If it's recently referenced (perhaps page_referenced
1492 if (vma->vm_flags & VM_LOCKED) { 1490 * skipped over this mm) then we should reactivate it.
1493 /* PTE-mapped THP are never mlocked */ 1491 */
1494 if (!PageTransCompound(page)) { 1492 if (!(flags & TTU_IGNORE_MLOCK)) {
1495 /* 1493 if (vma->vm_flags & VM_LOCKED) {
1496 * Holding pte lock, we do *not* need 1494 /* PTE-mapped THP are never mlocked */
1497 * mmap_sem here 1495 if (!PageTransCompound(page)) {
1498 */ 1496 /*
1499 mlock_vma_page(page); 1497 * Holding pte lock, we do *not* need
1498 * mmap_sem here
1499 */
1500 mlock_vma_page(page);
1501 }
1502 ret = SWAP_MLOCK;
1503 page_vma_mapped_walk_done(&pvmw);
1504 break;
1500 } 1505 }
1501 ret = SWAP_MLOCK; 1506 if (flags & TTU_MUNLOCK)
1502 goto out_unmap; 1507 continue;
1503 } 1508 }
1504 if (flags & TTU_MUNLOCK) 1509
1505 goto out_unmap; 1510 if (!(flags & TTU_IGNORE_ACCESS)) {
1506 } 1511 if (ptep_clear_flush_young_notify(vma, address,
1507 if (!(flags & TTU_IGNORE_ACCESS)) { 1512 pvmw.pte)) {
1508 if (ptep_clear_flush_young_notify(vma, address, pte)) { 1513 ret = SWAP_FAIL;
1509 ret = SWAP_FAIL; 1514 page_vma_mapped_walk_done(&pvmw);
1510 goto out_unmap; 1515 break;
1516 }
1511 } 1517 }
1512 }
1513 1518
1514 /* Nuke the page table entry. */ 1519 /* Nuke the page table entry. */
1515 flush_cache_page(vma, address, page_to_pfn(page)); 1520 flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
1516 if (should_defer_flush(mm, flags)) { 1521 if (should_defer_flush(mm, flags)) {
1517 /* 1522 /*
1518 * We clear the PTE but do not flush so potentially a remote 1523 * We clear the PTE but do not flush so potentially
1519 * CPU could still be writing to the page. If the entry was 1524 * a remote CPU could still be writing to the page.
1520 * previously clean then the architecture must guarantee that 1525 * If the entry was previously clean then the
1521 * a clear->dirty transition on a cached TLB entry is written 1526 * architecture must guarantee that a clear->dirty
1522 * through and traps if the PTE is unmapped. 1527 * transition on a cached TLB entry is written through
1523 */ 1528 * and traps if the PTE is unmapped.
1524 pteval = ptep_get_and_clear(mm, address, pte); 1529 */
1530 pteval = ptep_get_and_clear(mm, address, pvmw.pte);
1531
1532 set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
1533 } else {
1534 pteval = ptep_clear_flush(vma, address, pvmw.pte);
1535 }
1525 1536
1526 set_tlb_ubc_flush_pending(mm, page, pte_dirty(pteval)); 1537 /* Move the dirty bit to the page. Now the pte is gone. */
1527 } else { 1538 if (pte_dirty(pteval))
1528 pteval = ptep_clear_flush(vma, address, pte); 1539 set_page_dirty(page);
1529 }
1530 1540
1531 /* Move the dirty bit to the physical page now the pte is gone. */ 1541 /* Update high watermark before we lower rss */
1532 if (pte_dirty(pteval)) 1542 update_hiwater_rss(mm);
1533 set_page_dirty(page);
1534 1543
1535 /* Update high watermark before we lower rss */ 1544 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1536 update_hiwater_rss(mm); 1545 if (PageHuge(page)) {
1546 int nr = 1 << compound_order(page);
1547 hugetlb_count_sub(nr, mm);
1548 } else {
1549 dec_mm_counter(mm, mm_counter(page));
1550 }
1537 1551
1538 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { 1552 pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
1539 if (PageHuge(page)) { 1553 set_pte_at(mm, address, pvmw.pte, pteval);
1540 hugetlb_count_sub(1 << compound_order(page), mm); 1554 } else if (pte_unused(pteval)) {
1541 } else { 1555 /*
1556 * The guest indicated that the page content is of no
1557 * interest anymore. Simply discard the pte, vmscan
1558 * will take care of the rest.
1559 */
1542 dec_mm_counter(mm, mm_counter(page)); 1560 dec_mm_counter(mm, mm_counter(page));
1543 } 1561 } else if (IS_ENABLED(CONFIG_MIGRATION) &&
1544 set_pte_at(mm, address, pte, 1562 (flags & TTU_MIGRATION)) {
1545 swp_entry_to_pte(make_hwpoison_entry(page))); 1563 swp_entry_t entry;
1546 } else if (pte_unused(pteval)) { 1564 pte_t swp_pte;
1547 /* 1565 /*
1548 * The guest indicated that the page content is of no 1566 * Store the pfn of the page in a special migration
1549 * interest anymore. Simply discard the pte, vmscan 1567 * pte. do_swap_page() will wait until the migration
1550 * will take care of the rest. 1568 * pte is removed and then restart fault handling.
1551 */ 1569 */
1552 dec_mm_counter(mm, mm_counter(page)); 1570 entry = make_migration_entry(subpage,
1553 } else if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION)) { 1571 pte_write(pteval));
1554 swp_entry_t entry; 1572 swp_pte = swp_entry_to_pte(entry);
1555 pte_t swp_pte; 1573 if (pte_soft_dirty(pteval))
1556 /* 1574 swp_pte = pte_swp_mksoft_dirty(swp_pte);
1557 * Store the pfn of the page in a special migration 1575 set_pte_at(mm, address, pvmw.pte, swp_pte);
1558 * pte. do_swap_page() will wait until the migration 1576 } else if (PageAnon(page)) {
1559 * pte is removed and then restart fault handling. 1577 swp_entry_t entry = { .val = page_private(subpage) };
1560 */ 1578 pte_t swp_pte;
1561 entry = make_migration_entry(page, pte_write(pteval)); 1579 /*
1562 swp_pte = swp_entry_to_pte(entry); 1580 * Store the swap location in the pte.
1563 if (pte_soft_dirty(pteval)) 1581 * See handle_pte_fault() ...
1564 swp_pte = pte_swp_mksoft_dirty(swp_pte); 1582 */
1565 set_pte_at(mm, address, pte, swp_pte); 1583 VM_BUG_ON_PAGE(!PageSwapCache(page), page);
1566 } else if (PageAnon(page)) { 1584
1567 swp_entry_t entry = { .val = page_private(page) }; 1585 if (!PageDirty(page) && (flags & TTU_LZFREE)) {
1568 pte_t swp_pte; 1586 /* It's a freeable page by MADV_FREE */
1569 /* 1587 dec_mm_counter(mm, MM_ANONPAGES);
1570 * Store the swap location in the pte. 1588 rp->lazyfreed++;
1571 * See handle_pte_fault() ... 1589 goto discard;
1572 */ 1590 }
1573 VM_BUG_ON_PAGE(!PageSwapCache(page), page);
1574 1591
1575 if (!PageDirty(page) && (flags & TTU_LZFREE)) { 1592 if (swap_duplicate(entry) < 0) {
1576 /* It's a freeable page by MADV_FREE */ 1593 set_pte_at(mm, address, pvmw.pte, pteval);
1594 ret = SWAP_FAIL;
1595 page_vma_mapped_walk_done(&pvmw);
1596 break;
1597 }
1598 if (list_empty(&mm->mmlist)) {
1599 spin_lock(&mmlist_lock);
1600 if (list_empty(&mm->mmlist))
1601 list_add(&mm->mmlist, &init_mm.mmlist);
1602 spin_unlock(&mmlist_lock);
1603 }
1577 dec_mm_counter(mm, MM_ANONPAGES); 1604 dec_mm_counter(mm, MM_ANONPAGES);
1578 rp->lazyfreed++; 1605 inc_mm_counter(mm, MM_SWAPENTS);
1579 goto discard; 1606 swp_pte = swp_entry_to_pte(entry);
1580 } 1607 if (pte_soft_dirty(pteval))
1581 1608 swp_pte = pte_swp_mksoft_dirty(swp_pte);
1582 if (swap_duplicate(entry) < 0) { 1609 set_pte_at(mm, address, pvmw.pte, swp_pte);
1583 set_pte_at(mm, address, pte, pteval); 1610 } else
1584 ret = SWAP_FAIL; 1611 dec_mm_counter(mm, mm_counter_file(page));
1585 goto out_unmap;
1586 }
1587 if (list_empty(&mm->mmlist)) {
1588 spin_lock(&mmlist_lock);
1589 if (list_empty(&mm->mmlist))
1590 list_add(&mm->mmlist, &init_mm.mmlist);
1591 spin_unlock(&mmlist_lock);
1592 }
1593 dec_mm_counter(mm, MM_ANONPAGES);
1594 inc_mm_counter(mm, MM_SWAPENTS);
1595 swp_pte = swp_entry_to_pte(entry);
1596 if (pte_soft_dirty(pteval))
1597 swp_pte = pte_swp_mksoft_dirty(swp_pte);
1598 set_pte_at(mm, address, pte, swp_pte);
1599 } else
1600 dec_mm_counter(mm, mm_counter_file(page));
1601
1602discard: 1612discard:
1603 page_remove_rmap(page, PageHuge(page)); 1613 page_remove_rmap(subpage, PageHuge(page));
1604 put_page(page); 1614 put_page(page);
1605
1606out_unmap:
1607 pte_unmap_unlock(pte, ptl);
1608 if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK))
1609 mmu_notifier_invalidate_page(mm, address); 1615 mmu_notifier_invalidate_page(mm, address);
1610out: 1616 }
1611 return ret; 1617 return ret;
1612} 1618}
1613 1619
@@ -1632,7 +1638,7 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
1632 1638
1633static int page_mapcount_is_zero(struct page *page) 1639static int page_mapcount_is_zero(struct page *page)
1634{ 1640{
1635 return !page_mapcount(page); 1641 return !total_mapcount(page);
1636} 1642}
1637 1643
1638/** 1644/**