mm: numa: Add THP migration for the NUMA working set scanning fault case.

Note: This is very heavily based on a patch from Peter Zijlstra with fixes from Ingo Molnar, Hugh Dickins and Johannes Weiner. That patch put a lot of migration logic into mm/huge_memory.c where it does not belong. This version puts tries to share some of the migration logic with migrate_misplaced_page. However, it should be noted that now migrate.c is doing more with the pagetable manipulation than is preferred. The end result is barely recognisable so as before, the signed-offs had to be removed but will be re-added if the original authors are ok with it. Add THP migration for the NUMA working set scanning fault case. It uses the page lock to serialize. No migration pte dance is necessary because the pte is already unmapped when we decide to migrate. [dhillf@gmail.com: Fix memory leak on isolation failure] [dhillf@gmail.com: Fix transfer of last_nid information] Signed-off-by: Mel Gorman <mgorman@suse.de>
author: Mel Gorman <mgorman@suse.de> 2012-11-19 07:35:47 -0500
committer: Mel Gorman <mgorman@suse.de> 2012-12-11 09:42:57 -0500
commit: b32967ff101a7508f70be8de59b278d4df92fa00 (patch)
tree: b106d5eea06f97d0174f483d6a05a8b7ddd64154 /mm/migrate.c
parent: 5bca23035391928c4c7301835accca3551b96cc2 (diff)
1 files changed, 190 insertions, 41 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a5ce135eef0..c9400960fd52 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -410,7 +410,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 */
 void migrate_page_copy(struct page *newpage, struct page *page)
 {
-        if (PageHuge(page))
+        if (PageHuge(page) || PageTransHuge(page))
                copy_huge_page(newpage, page);
        else
                copy_highpage(newpage, page);
@@ -1491,25 +1491,10 @@ bool migrate_ratelimited(int node)
        return true;
 }
-/*
+/* Returns true if the node is migrate rate-limited after the update */
- * Attempt to migrate a misplaced page to the specified destination
+bool numamigrate_update_ratelimit(pg_data_t *pgdat)
- * node. Caller is expected to have an elevated reference count on
- * the page that will be dropped by this function before returning.
- */
-int migrate_misplaced_page(struct page *page, int node)
 {
-        pg_data_t *pgdat = NODE_DATA(node);
+        bool rate_limited = false;
-        int isolated = 0;
-        LIST_HEAD(migratepages);
-        /*
-         * Don't migrate pages that are mapped in multiple processes.
-         * TODO: Handle false sharing detection instead of this hammer
-         */
-        if (page_mapcount(page) != 1) {
-                put_page(page);
-                goto out;
-        }
        /*
         * Rate-limit the amount of data that is being migrated to a node.
@@ -1522,13 +1507,18 @@ int migrate_misplaced_page(struct page *page, int node)
                pgdat->numabalancing_migrate_next_window = jiffies +
                        msecs_to_jiffies(migrate_interval_millisecs);
        }
-        if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
+        if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
-                spin_unlock(&pgdat->numabalancing_migrate_lock);
+                rate_limited = true;
-                put_page(page);
+        else
-                goto out;
+                pgdat->numabalancing_migrate_nr_pages++;
-        }
-        pgdat->numabalancing_migrate_nr_pages++;
        spin_unlock(&pgdat->numabalancing_migrate_lock);
+        
+        return rate_limited;
+}
+int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
+{
+        int ret = 0;
        /* Avoid migrating to a node that is nearly full */
        if (migrate_balanced_pgdat(pgdat, 1)) {
@@ -1536,13 +1526,18 @@ int migrate_misplaced_page(struct page *page, int node)
                if (isolate_lru_page(page)) {
                        put_page(page);
-                        goto out;
+                        return 0;
                }
-                isolated = 1;
+                /* Page is isolated */
+                ret = 1;
                page_lru = page_is_file_cache(page);
-                inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
+                if (!PageTransHuge(page))
-                list_add(&page->lru, &migratepages);
+                        inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
+                else
+                        mod_zone_page_state(page_zone(page),
+                                        NR_ISOLATED_ANON + page_lru,
+                                        HPAGE_PMD_NR);
        }
        /*
@@ -1555,23 +1550,177 @@ int migrate_misplaced_page(struct page *page, int node)
         */
        put_page(page);
-        if (isolated) {
+        return ret;
-                int nr_remaining;
+}
-                nr_remaining = migrate_pages(&migratepages,
+/*
-                                alloc_misplaced_dst_page,
+ * Attempt to migrate a misplaced page to the specified destination
-                                node, false, MIGRATE_ASYNC,
+ * node. Caller is expected to have an elevated reference count on
-                                MR_NUMA_MISPLACED);
+ * the page that will be dropped by this function before returning.
-                if (nr_remaining) {
+ */
-                        putback_lru_pages(&migratepages);
+int migrate_misplaced_page(struct page *page, int node)
-                        isolated = 0;
+{
-                } else
+        pg_data_t *pgdat = NODE_DATA(node);
-                        count_vm_numa_event(NUMA_PAGE_MIGRATE);
+        int isolated = 0;
+        int nr_remaining;
+        LIST_HEAD(migratepages);
+        /*
+         * Don't migrate pages that are mapped in multiple processes.
+         * TODO: Handle false sharing detection instead of this hammer
+         */
+        if (page_mapcount(page) != 1) {
+                put_page(page);
+                goto out;
        }
+        /*
+         * Rate-limit the amount of data that is being migrated to a node.
+         * Optimal placement is no good if the memory bus is saturated and
+         * all the time is being spent migrating!
+         */
+        if (numamigrate_update_ratelimit(pgdat)) {
+                put_page(page);
+                goto out;
+        }
+        isolated = numamigrate_isolate_page(pgdat, page);
+        if (!isolated)
+                goto out;
+        list_add(&page->lru, &migratepages);
+        nr_remaining = migrate_pages(&migratepages,
+                        alloc_misplaced_dst_page,
+                        node, false, MIGRATE_ASYNC,
+                        MR_NUMA_MISPLACED);
+        if (nr_remaining) {
+                putback_lru_pages(&migratepages);
+                isolated = 0;
+        } else
+                count_vm_numa_event(NUMA_PAGE_MIGRATE);
        BUG_ON(!list_empty(&migratepages));
 out:
        return isolated;
 }
+int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+                                struct vm_area_struct *vma,
+                                pmd_t *pmd, pmd_t entry,
+                                unsigned long address,
+                                struct page *page, int node)
+{
+        unsigned long haddr = address & HPAGE_PMD_MASK;
+        pg_data_t *pgdat = NODE_DATA(node);
+        int isolated = 0;
+        struct page *new_page = NULL;
+        struct mem_cgroup *memcg = NULL;
+        int page_lru = page_is_file_cache(page);
+        /*
+         * Don't migrate pages that are mapped in multiple processes.
+         * TODO: Handle false sharing detection instead of this hammer
+         */
+        if (page_mapcount(page) != 1)
+                goto out_dropref;
+        /*
+         * Rate-limit the amount of data that is being migrated to a node.
+         * Optimal placement is no good if the memory bus is saturated and
+         * all the time is being spent migrating!
+         */
+        if (numamigrate_update_ratelimit(pgdat))
+                goto out_dropref;
+        new_page = alloc_pages_node(node,
+                (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
+        if (!new_page)
+                goto out_dropref;
+        page_xchg_last_nid(new_page, page_last_nid(page));
+        isolated = numamigrate_isolate_page(pgdat, page);
+        if (!isolated) {
+                put_page(new_page);
+                goto out_keep_locked;
+        }
+        /* Prepare a page as a migration target */
+        __set_page_locked(new_page);
+        SetPageSwapBacked(new_page);
+        /* anon mapping, we can simply copy page->mapping to the new page: */
+        new_page->mapping = page->mapping;
+        new_page->index = page->index;
+        migrate_page_copy(new_page, page);
+        WARN_ON(PageLRU(new_page));
+        /* Recheck the target PMD */
+        spin_lock(&mm->page_table_lock);
+        if (unlikely(!pmd_same(*pmd, entry))) {
+                spin_unlock(&mm->page_table_lock);
+                /* Reverse changes made by migrate_page_copy() */
+                if (TestClearPageActive(new_page))
+                        SetPageActive(page);
+                if (TestClearPageUnevictable(new_page))
+                        SetPageUnevictable(page);
+                mlock_migrate_page(page, new_page);
+                unlock_page(new_page);
+                put_page(new_page);             /* Free it */
+                unlock_page(page);
+                putback_lru_page(page);
+                count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
+                goto out;
+        }
+        /*
+         * Traditional migration needs to prepare the memcg charge
+         * transaction early to prevent the old page from being
+         * uncharged when installing migration entries.  Here we can
+         * save the potential rollback and start the charge transfer
+         * only when migration is already known to end successfully.
+         */
+        mem_cgroup_prepare_migration(page, new_page, &memcg);
+        entry = mk_pmd(new_page, vma->vm_page_prot);
+        entry = pmd_mknonnuma(entry);
+        entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+        entry = pmd_mkhuge(entry);
+        page_add_new_anon_rmap(new_page, vma, haddr);
+        set_pmd_at(mm, haddr, pmd, entry);
+        update_mmu_cache_pmd(vma, address, entry);
+        page_remove_rmap(page);
+        /*
+         * Finish the charge transaction under the page table lock to
+         * prevent split_huge_page() from dividing up the charge
+         * before it's fully transferred to the new page.
+         */
+        mem_cgroup_end_migration(memcg, page, new_page, true);
+        spin_unlock(&mm->page_table_lock);
+        unlock_page(new_page);
+        unlock_page(page);
+        put_page(page);                 /* Drop the rmap reference */
+        put_page(page);                 /* Drop the LRU isolation reference */
+        count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
+        count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
+out:
+        mod_zone_page_state(page_zone(page),
+                        NR_ISOLATED_ANON + page_lru,
+                        -HPAGE_PMD_NR);
+        return isolated;
+out_dropref:
+        put_page(page);
+out_keep_locked:
+        return 0;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 #endif /* CONFIG_NUMA */
author	Mel Gorman <mgorman@suse.de>	2012-11-19 07:35:47 -0500
committer	Mel Gorman <mgorman@suse.de>	2012-12-11 09:42:57 -0500
commit	b32967ff101a7508f70be8de59b278d4df92fa00 (patch)
tree	b106d5eea06f97d0174f483d6a05a8b7ddd64154 /mm/migrate.c
parent	5bca23035391928c4c7301835accca3551b96cc2 (diff)

diff --git a/mm/migrate.c b/mm/migrate.c index 2a5ce135eef0..c9400960fd52 100644 --- a/mm/migrate.c +++ b/mm/migrate.c
@@ -410,7 +410,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
410	*/	410	*/
411	void migrate_page_copy(struct page newpage, struct page page)	411	void migrate_page_copy(struct page newpage, struct page page)
412	{	412	{
413	if (PageHuge(page))	413	if (PageHuge(page) \|\| PageTransHuge(page))
414	copy_huge_page(newpage, page);	414	copy_huge_page(newpage, page);
415	else	415	else
416	copy_highpage(newpage, page);	416	copy_highpage(newpage, page);
@@ -1491,25 +1491,10 @@ bool migrate_ratelimited(int node)
1491	return true;	1491	return true;
1492	}	1492	}
1493		1493
1494	/*	1494	/* Returns true if the node is migrate rate-limited after the update */
1495	* Attempt to migrate a misplaced page to the specified destination	1495	bool numamigrate_update_ratelimit(pg_data_t *pgdat)
1496	* node. Caller is expected to have an elevated reference count on
1497	* the page that will be dropped by this function before returning.
1498	*/
1499	int migrate_misplaced_page(struct page *page, int node)
1500	{	1496	{
1501	pg_data_t *pgdat = NODE_DATA(node);	1497	bool rate_limited = false;
1502	int isolated = 0;
1503	LIST_HEAD(migratepages);
1504
1505	/*
1506	* Don't migrate pages that are mapped in multiple processes.
1507	* TODO: Handle false sharing detection instead of this hammer
1508	*/
1509	if (page_mapcount(page) != 1) {
1510	put_page(page);
1511	goto out;
1512	}
1513		1498
1514	/*	1499	/*
1515	* Rate-limit the amount of data that is being migrated to a node.	1500	* Rate-limit the amount of data that is being migrated to a node.
@@ -1522,13 +1507,18 @@ int migrate_misplaced_page(struct page *page, int node)
1522	pgdat->numabalancing_migrate_next_window = jiffies +	1507	pgdat->numabalancing_migrate_next_window = jiffies +
1523	msecs_to_jiffies(migrate_interval_millisecs);	1508	msecs_to_jiffies(migrate_interval_millisecs);
1524	}	1509	}
1525	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {	1510	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
1526	spin_unlock(&pgdat->numabalancing_migrate_lock);	1511	rate_limited = true;
1527	put_page(page);	1512	else
1528	goto out;	1513	pgdat->numabalancing_migrate_nr_pages++;
1529	}
1530	pgdat->numabalancing_migrate_nr_pages++;
1531	spin_unlock(&pgdat->numabalancing_migrate_lock);	1514	spin_unlock(&pgdat->numabalancing_migrate_lock);
		1515
		1516	return rate_limited;
		1517	}
		1518
		1519	int numamigrate_isolate_page(pg_data_t pgdat, struct page page)
		1520	{
		1521	int ret = 0;
1532		1522
1533	/* Avoid migrating to a node that is nearly full */	1523	/* Avoid migrating to a node that is nearly full */
1534	if (migrate_balanced_pgdat(pgdat, 1)) {	1524	if (migrate_balanced_pgdat(pgdat, 1)) {
@@ -1536,13 +1526,18 @@ int migrate_misplaced_page(struct page *page, int node)
1536		1526
1537	if (isolate_lru_page(page)) {	1527	if (isolate_lru_page(page)) {
1538	put_page(page);	1528	put_page(page);
1539	goto out;	1529	return 0;
1540	}	1530	}
1541	isolated = 1;
1542		1531
		1532	/* Page is isolated */
		1533	ret = 1;
1543	page_lru = page_is_file_cache(page);	1534	page_lru = page_is_file_cache(page);
1544	inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);	1535	if (!PageTransHuge(page))
1545	list_add(&page->lru, &migratepages);	1536	inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
		1537	else
		1538	mod_zone_page_state(page_zone(page),
		1539	NR_ISOLATED_ANON + page_lru,
		1540	HPAGE_PMD_NR);
1546	}	1541	}
1547		1542
1548	/*	1543	/*
@@ -1555,23 +1550,177 @@ int migrate_misplaced_page(struct page *page, int node)
1555	*/	1550	*/
1556	put_page(page);	1551	put_page(page);
1557		1552
1558	if (isolated) {	1553	return ret;
1559	int nr_remaining;	1554	}
1560		1555
1561	nr_remaining = migrate_pages(&migratepages,	1556	/*
1562	alloc_misplaced_dst_page,	1557	* Attempt to migrate a misplaced page to the specified destination
1563	node, false, MIGRATE_ASYNC,	1558	* node. Caller is expected to have an elevated reference count on
1564	MR_NUMA_MISPLACED);	1559	* the page that will be dropped by this function before returning.
1565	if (nr_remaining) {	1560	*/
1566	putback_lru_pages(&migratepages);	1561	int migrate_misplaced_page(struct page *page, int node)
1567	isolated = 0;	1562	{
1568	} else	1563	pg_data_t *pgdat = NODE_DATA(node);
1569	count_vm_numa_event(NUMA_PAGE_MIGRATE);	1564	int isolated = 0;
		1565	int nr_remaining;
		1566	LIST_HEAD(migratepages);
		1567
		1568	/*
		1569	* Don't migrate pages that are mapped in multiple processes.
		1570	* TODO: Handle false sharing detection instead of this hammer
		1571	*/
		1572	if (page_mapcount(page) != 1) {
		1573	put_page(page);
		1574	goto out;
1570	}	1575	}
		1576
		1577	/*
		1578	* Rate-limit the amount of data that is being migrated to a node.
		1579	* Optimal placement is no good if the memory bus is saturated and
		1580	* all the time is being spent migrating!
		1581	*/
		1582	if (numamigrate_update_ratelimit(pgdat)) {
		1583	put_page(page);
		1584	goto out;
		1585	}
		1586
		1587	isolated = numamigrate_isolate_page(pgdat, page);
		1588	if (!isolated)
		1589	goto out;
		1590
		1591	list_add(&page->lru, &migratepages);
		1592	nr_remaining = migrate_pages(&migratepages,
		1593	alloc_misplaced_dst_page,
		1594	node, false, MIGRATE_ASYNC,
		1595	MR_NUMA_MISPLACED);
		1596	if (nr_remaining) {
		1597	putback_lru_pages(&migratepages);
		1598	isolated = 0;
		1599	} else
		1600	count_vm_numa_event(NUMA_PAGE_MIGRATE);
1571	BUG_ON(!list_empty(&migratepages));	1601	BUG_ON(!list_empty(&migratepages));
1572	out:	1602	out:
1573	return isolated;	1603	return isolated;
1574	}	1604	}
		1605
		1606	int migrate_misplaced_transhuge_page(struct mm_struct *mm,
		1607	struct vm_area_struct *vma,
		1608	pmd_t *pmd, pmd_t entry,
		1609	unsigned long address,
		1610	struct page *page, int node)
		1611	{
		1612	unsigned long haddr = address & HPAGE_PMD_MASK;
		1613	pg_data_t *pgdat = NODE_DATA(node);
		1614	int isolated = 0;
		1615	struct page *new_page = NULL;
		1616	struct mem_cgroup *memcg = NULL;
		1617	int page_lru = page_is_file_cache(page);
		1618
		1619	/*
		1620	* Don't migrate pages that are mapped in multiple processes.
		1621	* TODO: Handle false sharing detection instead of this hammer
		1622	*/
		1623	if (page_mapcount(page) != 1)
		1624	goto out_dropref;
		1625
		1626	/*
		1627	* Rate-limit the amount of data that is being migrated to a node.
		1628	* Optimal placement is no good if the memory bus is saturated and
		1629	* all the time is being spent migrating!
		1630	*/
		1631	if (numamigrate_update_ratelimit(pgdat))
		1632	goto out_dropref;
		1633
		1634	new_page = alloc_pages_node(node,
		1635	(GFP_TRANSHUGE \| GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
		1636	if (!new_page)
		1637	goto out_dropref;
		1638	page_xchg_last_nid(new_page, page_last_nid(page));
		1639
		1640	isolated = numamigrate_isolate_page(pgdat, page);
		1641	if (!isolated) {
		1642	put_page(new_page);
		1643	goto out_keep_locked;
		1644	}
		1645
		1646	/* Prepare a page as a migration target */
		1647	__set_page_locked(new_page);
		1648	SetPageSwapBacked(new_page);
		1649
		1650	/* anon mapping, we can simply copy page->mapping to the new page: */
		1651	new_page->mapping = page->mapping;
		1652	new_page->index = page->index;
		1653	migrate_page_copy(new_page, page);
		1654	WARN_ON(PageLRU(new_page));
		1655
		1656	/* Recheck the target PMD */
		1657	spin_lock(&mm->page_table_lock);
		1658	if (unlikely(!pmd_same(*pmd, entry))) {
		1659	spin_unlock(&mm->page_table_lock);
		1660
		1661	/* Reverse changes made by migrate_page_copy() */
		1662	if (TestClearPageActive(new_page))
		1663	SetPageActive(page);
		1664	if (TestClearPageUnevictable(new_page))
		1665	SetPageUnevictable(page);
		1666	mlock_migrate_page(page, new_page);
		1667
		1668	unlock_page(new_page);
		1669	put_page(new_page); /* Free it */
		1670
		1671	unlock_page(page);
		1672	putback_lru_page(page);
		1673
		1674	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
		1675	goto out;
		1676	}
		1677
		1678	/*
		1679	* Traditional migration needs to prepare the memcg charge
		1680	* transaction early to prevent the old page from being
		1681	* uncharged when installing migration entries. Here we can
		1682	* save the potential rollback and start the charge transfer
		1683	* only when migration is already known to end successfully.
		1684	*/
		1685	mem_cgroup_prepare_migration(page, new_page, &memcg);
		1686
		1687	entry = mk_pmd(new_page, vma->vm_page_prot);
		1688	entry = pmd_mknonnuma(entry);
		1689	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
		1690	entry = pmd_mkhuge(entry);
		1691
		1692	page_add_new_anon_rmap(new_page, vma, haddr);
		1693
		1694	set_pmd_at(mm, haddr, pmd, entry);
		1695	update_mmu_cache_pmd(vma, address, entry);
		1696	page_remove_rmap(page);
		1697	/*
		1698	* Finish the charge transaction under the page table lock to
		1699	* prevent split_huge_page() from dividing up the charge
		1700	* before it's fully transferred to the new page.
		1701	*/
		1702	mem_cgroup_end_migration(memcg, page, new_page, true);
		1703	spin_unlock(&mm->page_table_lock);
		1704
		1705	unlock_page(new_page);
		1706	unlock_page(page);
		1707	put_page(page); /* Drop the rmap reference */
		1708	put_page(page); /* Drop the LRU isolation reference */
		1709
		1710	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
		1711	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
		1712
		1713	out:
		1714	mod_zone_page_state(page_zone(page),
		1715	NR_ISOLATED_ANON + page_lru,
		1716	-HPAGE_PMD_NR);
		1717	return isolated;
		1718
		1719	out_dropref:
		1720	put_page(page);
		1721	out_keep_locked:
		1722	return 0;
		1723	}
1575	#endif /* CONFIG_NUMA_BALANCING */	1724	#endif /* CONFIG_NUMA_BALANCING */
1576		1725
1577	#endif /* CONFIG_NUMA */	1726	#endif /* CONFIG_NUMA */