aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2013-02-22 19:34:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:17 -0500
commit340ef3902cf20cec43cdcd1e72ae5cb518be7328 (patch)
tree98051b821bc3749510dc7e27f14051902aa54743
parent75980e97daccfc6babbac7e180ff118537955f5d (diff)
mm: numa: cleanup flow of transhuge page migration
When correcting commit 04fa5d6a6547 ("mm: migrate: check page_count of THP before migrating") Hugh Dickins noted that the control flow for transhuge migration was difficult to follow. Unconditionally calling put_page() in numamigrate_isolate_page() made the failure paths of both migrate_misplaced_transhuge_page() and migrate_misplaced_page() more complex that they should be. Further, he was extremely wary that an unlock_page() should ever happen after a put_page() even if the put_page() should never be the final put_page. Hugh implemented the following cleanup to simplify the path by calling putback_lru_page() inside numamigrate_isolate_page() if it failed to isolate and always calling unlock_page() within migrate_misplaced_transhuge_page(). There is no functional change after this patch is applied but the code is easier to follow and unlock_page() always happens before put_page(). [mgorman@suse.de: changelog only] Signed-off-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Simon Jeons <simon.jeons@gmail.com> Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/huge_memory.c28
-rw-r--r--mm/migrate.c95
2 files changed, 52 insertions, 71 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9db521fa8e8a..f40b2ce23d60 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1296,7 +1296,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1296 int target_nid; 1296 int target_nid;
1297 int current_nid = -1; 1297 int current_nid = -1;
1298 bool migrated; 1298 bool migrated;
1299 bool page_locked = false;
1300 1299
1301 spin_lock(&mm->page_table_lock); 1300 spin_lock(&mm->page_table_lock);
1302 if (unlikely(!pmd_same(pmd, *pmdp))) 1301 if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1318,7 +1317,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1318 /* Acquire the page lock to serialise THP migrations */ 1317 /* Acquire the page lock to serialise THP migrations */
1319 spin_unlock(&mm->page_table_lock); 1318 spin_unlock(&mm->page_table_lock);
1320 lock_page(page); 1319 lock_page(page);
1321 page_locked = true;
1322 1320
1323 /* Confirm the PTE did not while locked */ 1321 /* Confirm the PTE did not while locked */
1324 spin_lock(&mm->page_table_lock); 1322 spin_lock(&mm->page_table_lock);
@@ -1331,34 +1329,26 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1331 1329
1332 /* Migrate the THP to the requested node */ 1330 /* Migrate the THP to the requested node */
1333 migrated = migrate_misplaced_transhuge_page(mm, vma, 1331 migrated = migrate_misplaced_transhuge_page(mm, vma,
1334 pmdp, pmd, addr, 1332 pmdp, pmd, addr, page, target_nid);
1335 page, target_nid); 1333 if (!migrated)
1336 if (migrated) 1334 goto check_same;
1337 current_nid = target_nid;
1338 else {
1339 spin_lock(&mm->page_table_lock);
1340 if (unlikely(!pmd_same(pmd, *pmdp))) {
1341 unlock_page(page);
1342 goto out_unlock;
1343 }
1344 goto clear_pmdnuma;
1345 }
1346 1335
1347 task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); 1336 task_numa_fault(target_nid, HPAGE_PMD_NR, true);
1348 return 0; 1337 return 0;
1349 1338
1339check_same:
1340 spin_lock(&mm->page_table_lock);
1341 if (unlikely(!pmd_same(pmd, *pmdp)))
1342 goto out_unlock;
1350clear_pmdnuma: 1343clear_pmdnuma:
1351 pmd = pmd_mknonnuma(pmd); 1344 pmd = pmd_mknonnuma(pmd);
1352 set_pmd_at(mm, haddr, pmdp, pmd); 1345 set_pmd_at(mm, haddr, pmdp, pmd);
1353 VM_BUG_ON(pmd_numa(*pmdp)); 1346 VM_BUG_ON(pmd_numa(*pmdp));
1354 update_mmu_cache_pmd(vma, addr, pmdp); 1347 update_mmu_cache_pmd(vma, addr, pmdp);
1355 if (page_locked)
1356 unlock_page(page);
1357
1358out_unlock: 1348out_unlock:
1359 spin_unlock(&mm->page_table_lock); 1349 spin_unlock(&mm->page_table_lock);
1360 if (current_nid != -1) 1350 if (current_nid != -1)
1361 task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); 1351 task_numa_fault(current_nid, HPAGE_PMD_NR, false);
1362 return 0; 1352 return 0;
1363} 1353}
1364 1354
diff --git a/mm/migrate.c b/mm/migrate.c
index 77f4e70df24d..f560071e89c5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1557,41 +1557,40 @@ bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages)
1557 1557
1558int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) 1558int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
1559{ 1559{
1560 int ret = 0; 1560 int page_lru;
1561 1561
1562 VM_BUG_ON(compound_order(page) && !PageTransHuge(page)); 1562 VM_BUG_ON(compound_order(page) && !PageTransHuge(page));
1563 1563
1564 /* Avoid migrating to a node that is nearly full */ 1564 /* Avoid migrating to a node that is nearly full */
1565 if (migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) { 1565 if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
1566 int page_lru; 1566 return 0;
1567 1567
1568 if (isolate_lru_page(page)) { 1568 if (isolate_lru_page(page))
1569 put_page(page); 1569 return 0;
1570 return 0;
1571 }
1572 1570
1573 /* Page is isolated */ 1571 /*
1574 ret = 1; 1572 * migrate_misplaced_transhuge_page() skips page migration's usual
1575 page_lru = page_is_file_cache(page); 1573 * check on page_count(), so we must do it here, now that the page
1576 if (!PageTransHuge(page)) 1574 * has been isolated: a GUP pin, or any other pin, prevents migration.
1577 inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru); 1575 * The expected page count is 3: 1 for page's mapcount and 1 for the
1578 else 1576 * caller's pin and 1 for the reference taken by isolate_lru_page().
1579 mod_zone_page_state(page_zone(page), 1577 */
1580 NR_ISOLATED_ANON + page_lru, 1578 if (PageTransHuge(page) && page_count(page) != 3) {
1581 HPAGE_PMD_NR); 1579 putback_lru_page(page);
1580 return 0;
1582 } 1581 }
1583 1582
1583 page_lru = page_is_file_cache(page);
1584 mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
1585 hpage_nr_pages(page));
1586
1584 /* 1587 /*
1585 * Page is either isolated or there is not enough space on the target 1588 * Isolating the page has taken another reference, so the
1586 * node. If isolated, then it has taken a reference count and the 1589 * caller's reference can be safely dropped without the page
1587 * callers reference can be safely dropped without the page 1590 * disappearing underneath us during migration.
1588 * disappearing underneath us during migration. Otherwise the page is
1589 * not to be migrated but the callers reference should still be
1590 * dropped so it does not leak.
1591 */ 1591 */
1592 put_page(page); 1592 put_page(page);
1593 1593 return 1;
1594 return ret;
1595} 1594}
1596 1595
1597/* 1596/*
@@ -1602,7 +1601,7 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
1602int migrate_misplaced_page(struct page *page, int node) 1601int migrate_misplaced_page(struct page *page, int node)
1603{ 1602{
1604 pg_data_t *pgdat = NODE_DATA(node); 1603 pg_data_t *pgdat = NODE_DATA(node);
1605 int isolated = 0; 1604 int isolated;
1606 int nr_remaining; 1605 int nr_remaining;
1607 LIST_HEAD(migratepages); 1606 LIST_HEAD(migratepages);
1608 1607
@@ -1610,20 +1609,16 @@ int migrate_misplaced_page(struct page *page, int node)
1610 * Don't migrate pages that are mapped in multiple processes. 1609 * Don't migrate pages that are mapped in multiple processes.
1611 * TODO: Handle false sharing detection instead of this hammer 1610 * TODO: Handle false sharing detection instead of this hammer
1612 */ 1611 */
1613 if (page_mapcount(page) != 1) { 1612 if (page_mapcount(page) != 1)
1614 put_page(page);
1615 goto out; 1613 goto out;
1616 }
1617 1614
1618 /* 1615 /*
1619 * Rate-limit the amount of data that is being migrated to a node. 1616 * Rate-limit the amount of data that is being migrated to a node.
1620 * Optimal placement is no good if the memory bus is saturated and 1617 * Optimal placement is no good if the memory bus is saturated and
1621 * all the time is being spent migrating! 1618 * all the time is being spent migrating!
1622 */ 1619 */
1623 if (numamigrate_update_ratelimit(pgdat, 1)) { 1620 if (numamigrate_update_ratelimit(pgdat, 1))
1624 put_page(page);
1625 goto out; 1621 goto out;
1626 }
1627 1622
1628 isolated = numamigrate_isolate_page(pgdat, page); 1623 isolated = numamigrate_isolate_page(pgdat, page);
1629 if (!isolated) 1624 if (!isolated)
@@ -1640,12 +1635,19 @@ int migrate_misplaced_page(struct page *page, int node)
1640 } else 1635 } else
1641 count_vm_numa_event(NUMA_PAGE_MIGRATE); 1636 count_vm_numa_event(NUMA_PAGE_MIGRATE);
1642 BUG_ON(!list_empty(&migratepages)); 1637 BUG_ON(!list_empty(&migratepages));
1643out:
1644 return isolated; 1638 return isolated;
1639
1640out:
1641 put_page(page);
1642 return 0;
1645} 1643}
1646#endif /* CONFIG_NUMA_BALANCING */ 1644#endif /* CONFIG_NUMA_BALANCING */
1647 1645
1648#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) 1646#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1647/*
1648 * Migrates a THP to a given target node. page must be locked and is unlocked
1649 * before returning.
1650 */
1649int migrate_misplaced_transhuge_page(struct mm_struct *mm, 1651int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1650 struct vm_area_struct *vma, 1652 struct vm_area_struct *vma,
1651 pmd_t *pmd, pmd_t entry, 1653 pmd_t *pmd, pmd_t entry,
@@ -1676,29 +1678,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1676 1678
1677 new_page = alloc_pages_node(node, 1679 new_page = alloc_pages_node(node,
1678 (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); 1680 (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
1679 if (!new_page) { 1681 if (!new_page)
1680 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1682 goto out_fail;
1681 goto out_dropref; 1683
1682 }
1683 page_xchg_last_nid(new_page, page_last_nid(page)); 1684 page_xchg_last_nid(new_page, page_last_nid(page));
1684 1685
1685 isolated = numamigrate_isolate_page(pgdat, page); 1686 isolated = numamigrate_isolate_page(pgdat, page);
1686 1687 if (!isolated) {
1687 /*
1688 * Failing to isolate or a GUP pin prevents migration. The expected
1689 * page count is 2. 1 for anonymous pages without a mapping and 1
1690 * for the callers pin. If the page was isolated, the page will
1691 * need to be put back on the LRU.
1692 */
1693 if (!isolated || page_count(page) != 2) {
1694 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1695 put_page(new_page); 1688 put_page(new_page);
1696 if (isolated) { 1689 goto out_fail;
1697 putback_lru_page(page);
1698 isolated = 0;
1699 goto out;
1700 }
1701 goto out_keep_locked;
1702 } 1690 }
1703 1691
1704 /* Prepare a page as a migration target */ 1692 /* Prepare a page as a migration target */
@@ -1730,6 +1718,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1730 putback_lru_page(page); 1718 putback_lru_page(page);
1731 1719
1732 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1720 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1721 isolated = 0;
1733 goto out; 1722 goto out;
1734 } 1723 }
1735 1724
@@ -1774,9 +1763,11 @@ out:
1774 -HPAGE_PMD_NR); 1763 -HPAGE_PMD_NR);
1775 return isolated; 1764 return isolated;
1776 1765
1766out_fail:
1767 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1777out_dropref: 1768out_dropref:
1769 unlock_page(page);
1778 put_page(page); 1770 put_page(page);
1779out_keep_locked:
1780 return 0; 1771 return 0;
1781} 1772}
1782#endif /* CONFIG_NUMA_BALANCING */ 1773#endif /* CONFIG_NUMA_BALANCING */