diff options
author | Hugh Dickins <hughd@google.com> | 2013-02-22 19:34:33 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:17 -0500 |
commit | 340ef3902cf20cec43cdcd1e72ae5cb518be7328 (patch) | |
tree | 98051b821bc3749510dc7e27f14051902aa54743 | |
parent | 75980e97daccfc6babbac7e180ff118537955f5d (diff) |
mm: numa: cleanup flow of transhuge page migration
When correcting commit 04fa5d6a6547 ("mm: migrate: check page_count of
THP before migrating") Hugh Dickins noted that the control flow for
transhuge migration was difficult to follow. Unconditionally calling
put_page() in numamigrate_isolate_page() made the failure paths of both
migrate_misplaced_transhuge_page() and migrate_misplaced_page() more
complex that they should be. Further, he was extremely wary that an
unlock_page() should ever happen after a put_page() even if the
put_page() should never be the final put_page.
Hugh implemented the following cleanup to simplify the path by calling
putback_lru_page() inside numamigrate_isolate_page() if it failed to
isolate and always calling unlock_page() within
migrate_misplaced_transhuge_page().
There is no functional change after this patch is applied but the code
is easier to follow and unlock_page() always happens before put_page().
[mgorman@suse.de: changelog only]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Simon Jeons <simon.jeons@gmail.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/huge_memory.c | 28 | ||||
-rw-r--r-- | mm/migrate.c | 95 |
2 files changed, 52 insertions, 71 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9db521fa8e8a..f40b2ce23d60 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1296,7 +1296,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1296 | int target_nid; | 1296 | int target_nid; |
1297 | int current_nid = -1; | 1297 | int current_nid = -1; |
1298 | bool migrated; | 1298 | bool migrated; |
1299 | bool page_locked = false; | ||
1300 | 1299 | ||
1301 | spin_lock(&mm->page_table_lock); | 1300 | spin_lock(&mm->page_table_lock); |
1302 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1301 | if (unlikely(!pmd_same(pmd, *pmdp))) |
@@ -1318,7 +1317,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1318 | /* Acquire the page lock to serialise THP migrations */ | 1317 | /* Acquire the page lock to serialise THP migrations */ |
1319 | spin_unlock(&mm->page_table_lock); | 1318 | spin_unlock(&mm->page_table_lock); |
1320 | lock_page(page); | 1319 | lock_page(page); |
1321 | page_locked = true; | ||
1322 | 1320 | ||
1323 | /* Confirm the PTE did not while locked */ | 1321 | /* Confirm the PTE did not while locked */ |
1324 | spin_lock(&mm->page_table_lock); | 1322 | spin_lock(&mm->page_table_lock); |
@@ -1331,34 +1329,26 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1331 | 1329 | ||
1332 | /* Migrate the THP to the requested node */ | 1330 | /* Migrate the THP to the requested node */ |
1333 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1331 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
1334 | pmdp, pmd, addr, | 1332 | pmdp, pmd, addr, page, target_nid); |
1335 | page, target_nid); | 1333 | if (!migrated) |
1336 | if (migrated) | 1334 | goto check_same; |
1337 | current_nid = target_nid; | ||
1338 | else { | ||
1339 | spin_lock(&mm->page_table_lock); | ||
1340 | if (unlikely(!pmd_same(pmd, *pmdp))) { | ||
1341 | unlock_page(page); | ||
1342 | goto out_unlock; | ||
1343 | } | ||
1344 | goto clear_pmdnuma; | ||
1345 | } | ||
1346 | 1335 | ||
1347 | task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); | 1336 | task_numa_fault(target_nid, HPAGE_PMD_NR, true); |
1348 | return 0; | 1337 | return 0; |
1349 | 1338 | ||
1339 | check_same: | ||
1340 | spin_lock(&mm->page_table_lock); | ||
1341 | if (unlikely(!pmd_same(pmd, *pmdp))) | ||
1342 | goto out_unlock; | ||
1350 | clear_pmdnuma: | 1343 | clear_pmdnuma: |
1351 | pmd = pmd_mknonnuma(pmd); | 1344 | pmd = pmd_mknonnuma(pmd); |
1352 | set_pmd_at(mm, haddr, pmdp, pmd); | 1345 | set_pmd_at(mm, haddr, pmdp, pmd); |
1353 | VM_BUG_ON(pmd_numa(*pmdp)); | 1346 | VM_BUG_ON(pmd_numa(*pmdp)); |
1354 | update_mmu_cache_pmd(vma, addr, pmdp); | 1347 | update_mmu_cache_pmd(vma, addr, pmdp); |
1355 | if (page_locked) | ||
1356 | unlock_page(page); | ||
1357 | |||
1358 | out_unlock: | 1348 | out_unlock: |
1359 | spin_unlock(&mm->page_table_lock); | 1349 | spin_unlock(&mm->page_table_lock); |
1360 | if (current_nid != -1) | 1350 | if (current_nid != -1) |
1361 | task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); | 1351 | task_numa_fault(current_nid, HPAGE_PMD_NR, false); |
1362 | return 0; | 1352 | return 0; |
1363 | } | 1353 | } |
1364 | 1354 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index 77f4e70df24d..f560071e89c5 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -1557,41 +1557,40 @@ bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages) | |||
1557 | 1557 | ||
1558 | int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) | 1558 | int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) |
1559 | { | 1559 | { |
1560 | int ret = 0; | 1560 | int page_lru; |
1561 | 1561 | ||
1562 | VM_BUG_ON(compound_order(page) && !PageTransHuge(page)); | 1562 | VM_BUG_ON(compound_order(page) && !PageTransHuge(page)); |
1563 | 1563 | ||
1564 | /* Avoid migrating to a node that is nearly full */ | 1564 | /* Avoid migrating to a node that is nearly full */ |
1565 | if (migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) { | 1565 | if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) |
1566 | int page_lru; | 1566 | return 0; |
1567 | 1567 | ||
1568 | if (isolate_lru_page(page)) { | 1568 | if (isolate_lru_page(page)) |
1569 | put_page(page); | 1569 | return 0; |
1570 | return 0; | ||
1571 | } | ||
1572 | 1570 | ||
1573 | /* Page is isolated */ | 1571 | /* |
1574 | ret = 1; | 1572 | * migrate_misplaced_transhuge_page() skips page migration's usual |
1575 | page_lru = page_is_file_cache(page); | 1573 | * check on page_count(), so we must do it here, now that the page |
1576 | if (!PageTransHuge(page)) | 1574 | * has been isolated: a GUP pin, or any other pin, prevents migration. |
1577 | inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru); | 1575 | * The expected page count is 3: 1 for page's mapcount and 1 for the |
1578 | else | 1576 | * caller's pin and 1 for the reference taken by isolate_lru_page(). |
1579 | mod_zone_page_state(page_zone(page), | 1577 | */ |
1580 | NR_ISOLATED_ANON + page_lru, | 1578 | if (PageTransHuge(page) && page_count(page) != 3) { |
1581 | HPAGE_PMD_NR); | 1579 | putback_lru_page(page); |
1580 | return 0; | ||
1582 | } | 1581 | } |
1583 | 1582 | ||
1583 | page_lru = page_is_file_cache(page); | ||
1584 | mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru, | ||
1585 | hpage_nr_pages(page)); | ||
1586 | |||
1584 | /* | 1587 | /* |
1585 | * Page is either isolated or there is not enough space on the target | 1588 | * Isolating the page has taken another reference, so the |
1586 | * node. If isolated, then it has taken a reference count and the | 1589 | * caller's reference can be safely dropped without the page |
1587 | * callers reference can be safely dropped without the page | 1590 | * disappearing underneath us during migration. |
1588 | * disappearing underneath us during migration. Otherwise the page is | ||
1589 | * not to be migrated but the callers reference should still be | ||
1590 | * dropped so it does not leak. | ||
1591 | */ | 1591 | */ |
1592 | put_page(page); | 1592 | put_page(page); |
1593 | 1593 | return 1; | |
1594 | return ret; | ||
1595 | } | 1594 | } |
1596 | 1595 | ||
1597 | /* | 1596 | /* |
@@ -1602,7 +1601,7 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) | |||
1602 | int migrate_misplaced_page(struct page *page, int node) | 1601 | int migrate_misplaced_page(struct page *page, int node) |
1603 | { | 1602 | { |
1604 | pg_data_t *pgdat = NODE_DATA(node); | 1603 | pg_data_t *pgdat = NODE_DATA(node); |
1605 | int isolated = 0; | 1604 | int isolated; |
1606 | int nr_remaining; | 1605 | int nr_remaining; |
1607 | LIST_HEAD(migratepages); | 1606 | LIST_HEAD(migratepages); |
1608 | 1607 | ||
@@ -1610,20 +1609,16 @@ int migrate_misplaced_page(struct page *page, int node) | |||
1610 | * Don't migrate pages that are mapped in multiple processes. | 1609 | * Don't migrate pages that are mapped in multiple processes. |
1611 | * TODO: Handle false sharing detection instead of this hammer | 1610 | * TODO: Handle false sharing detection instead of this hammer |
1612 | */ | 1611 | */ |
1613 | if (page_mapcount(page) != 1) { | 1612 | if (page_mapcount(page) != 1) |
1614 | put_page(page); | ||
1615 | goto out; | 1613 | goto out; |
1616 | } | ||
1617 | 1614 | ||
1618 | /* | 1615 | /* |
1619 | * Rate-limit the amount of data that is being migrated to a node. | 1616 | * Rate-limit the amount of data that is being migrated to a node. |
1620 | * Optimal placement is no good if the memory bus is saturated and | 1617 | * Optimal placement is no good if the memory bus is saturated and |
1621 | * all the time is being spent migrating! | 1618 | * all the time is being spent migrating! |
1622 | */ | 1619 | */ |
1623 | if (numamigrate_update_ratelimit(pgdat, 1)) { | 1620 | if (numamigrate_update_ratelimit(pgdat, 1)) |
1624 | put_page(page); | ||
1625 | goto out; | 1621 | goto out; |
1626 | } | ||
1627 | 1622 | ||
1628 | isolated = numamigrate_isolate_page(pgdat, page); | 1623 | isolated = numamigrate_isolate_page(pgdat, page); |
1629 | if (!isolated) | 1624 | if (!isolated) |
@@ -1640,12 +1635,19 @@ int migrate_misplaced_page(struct page *page, int node) | |||
1640 | } else | 1635 | } else |
1641 | count_vm_numa_event(NUMA_PAGE_MIGRATE); | 1636 | count_vm_numa_event(NUMA_PAGE_MIGRATE); |
1642 | BUG_ON(!list_empty(&migratepages)); | 1637 | BUG_ON(!list_empty(&migratepages)); |
1643 | out: | ||
1644 | return isolated; | 1638 | return isolated; |
1639 | |||
1640 | out: | ||
1641 | put_page(page); | ||
1642 | return 0; | ||
1645 | } | 1643 | } |
1646 | #endif /* CONFIG_NUMA_BALANCING */ | 1644 | #endif /* CONFIG_NUMA_BALANCING */ |
1647 | 1645 | ||
1648 | #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) | 1646 | #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) |
1647 | /* | ||
1648 | * Migrates a THP to a given target node. page must be locked and is unlocked | ||
1649 | * before returning. | ||
1650 | */ | ||
1649 | int migrate_misplaced_transhuge_page(struct mm_struct *mm, | 1651 | int migrate_misplaced_transhuge_page(struct mm_struct *mm, |
1650 | struct vm_area_struct *vma, | 1652 | struct vm_area_struct *vma, |
1651 | pmd_t *pmd, pmd_t entry, | 1653 | pmd_t *pmd, pmd_t entry, |
@@ -1676,29 +1678,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1676 | 1678 | ||
1677 | new_page = alloc_pages_node(node, | 1679 | new_page = alloc_pages_node(node, |
1678 | (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); | 1680 | (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); |
1679 | if (!new_page) { | 1681 | if (!new_page) |
1680 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1682 | goto out_fail; |
1681 | goto out_dropref; | 1683 | |
1682 | } | ||
1683 | page_xchg_last_nid(new_page, page_last_nid(page)); | 1684 | page_xchg_last_nid(new_page, page_last_nid(page)); |
1684 | 1685 | ||
1685 | isolated = numamigrate_isolate_page(pgdat, page); | 1686 | isolated = numamigrate_isolate_page(pgdat, page); |
1686 | 1687 | if (!isolated) { | |
1687 | /* | ||
1688 | * Failing to isolate or a GUP pin prevents migration. The expected | ||
1689 | * page count is 2. 1 for anonymous pages without a mapping and 1 | ||
1690 | * for the callers pin. If the page was isolated, the page will | ||
1691 | * need to be put back on the LRU. | ||
1692 | */ | ||
1693 | if (!isolated || page_count(page) != 2) { | ||
1694 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | ||
1695 | put_page(new_page); | 1688 | put_page(new_page); |
1696 | if (isolated) { | 1689 | goto out_fail; |
1697 | putback_lru_page(page); | ||
1698 | isolated = 0; | ||
1699 | goto out; | ||
1700 | } | ||
1701 | goto out_keep_locked; | ||
1702 | } | 1690 | } |
1703 | 1691 | ||
1704 | /* Prepare a page as a migration target */ | 1692 | /* Prepare a page as a migration target */ |
@@ -1730,6 +1718,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1730 | putback_lru_page(page); | 1718 | putback_lru_page(page); |
1731 | 1719 | ||
1732 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1720 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); |
1721 | isolated = 0; | ||
1733 | goto out; | 1722 | goto out; |
1734 | } | 1723 | } |
1735 | 1724 | ||
@@ -1774,9 +1763,11 @@ out: | |||
1774 | -HPAGE_PMD_NR); | 1763 | -HPAGE_PMD_NR); |
1775 | return isolated; | 1764 | return isolated; |
1776 | 1765 | ||
1766 | out_fail: | ||
1767 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | ||
1777 | out_dropref: | 1768 | out_dropref: |
1769 | unlock_page(page); | ||
1778 | put_page(page); | 1770 | put_page(page); |
1779 | out_keep_locked: | ||
1780 | return 0; | 1771 | return 0; |
1781 | } | 1772 | } |
1782 | #endif /* CONFIG_NUMA_BALANCING */ | 1773 | #endif /* CONFIG_NUMA_BALANCING */ |