aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c337
1 files changed, 331 insertions, 6 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index cae02711181d..32efd8028bc9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -39,6 +39,9 @@
39 39
40#include <asm/tlbflush.h> 40#include <asm/tlbflush.h>
41 41
42#define CREATE_TRACE_POINTS
43#include <trace/events/migrate.h>
44
42#include "internal.h" 45#include "internal.h"
43 46
44/* 47/*
@@ -293,7 +296,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
293 struct page *newpage, struct page *page, 296 struct page *newpage, struct page *page,
294 struct buffer_head *head, enum migrate_mode mode) 297 struct buffer_head *head, enum migrate_mode mode)
295{ 298{
296 int expected_count; 299 int expected_count = 0;
297 void **pslot; 300 void **pslot;
298 301
299 if (!mapping) { 302 if (!mapping) {
@@ -421,7 +424,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
421 */ 424 */
422void migrate_page_copy(struct page *newpage, struct page *page) 425void migrate_page_copy(struct page *newpage, struct page *page)
423{ 426{
424 if (PageHuge(page)) 427 if (PageHuge(page) || PageTransHuge(page))
425 copy_huge_page(newpage, page); 428 copy_huge_page(newpage, page);
426 else 429 else
427 copy_highpage(newpage, page); 430 copy_highpage(newpage, page);
@@ -765,7 +768,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
765 */ 768 */
766 if (PageAnon(page)) { 769 if (PageAnon(page)) {
767 /* 770 /*
768 * Only page_lock_anon_vma() understands the subtleties of 771 * Only page_lock_anon_vma_read() understands the subtleties of
769 * getting a hold on an anon_vma from outside one of its mms. 772 * getting a hold on an anon_vma from outside one of its mms.
770 */ 773 */
771 anon_vma = page_get_anon_vma(page); 774 anon_vma = page_get_anon_vma(page);
@@ -998,10 +1001,11 @@ out:
998 */ 1001 */
999int migrate_pages(struct list_head *from, 1002int migrate_pages(struct list_head *from,
1000 new_page_t get_new_page, unsigned long private, bool offlining, 1003 new_page_t get_new_page, unsigned long private, bool offlining,
1001 enum migrate_mode mode) 1004 enum migrate_mode mode, int reason)
1002{ 1005{
1003 int retry = 1; 1006 int retry = 1;
1004 int nr_failed = 0; 1007 int nr_failed = 0;
1008 int nr_succeeded = 0;
1005 int pass = 0; 1009 int pass = 0;
1006 struct page *page; 1010 struct page *page;
1007 struct page *page2; 1011 struct page *page2;
@@ -1028,6 +1032,7 @@ int migrate_pages(struct list_head *from,
1028 retry++; 1032 retry++;
1029 break; 1033 break;
1030 case MIGRATEPAGE_SUCCESS: 1034 case MIGRATEPAGE_SUCCESS:
1035 nr_succeeded++;
1031 break; 1036 break;
1032 default: 1037 default:
1033 /* Permanent failure */ 1038 /* Permanent failure */
@@ -1038,6 +1043,12 @@ int migrate_pages(struct list_head *from,
1038 } 1043 }
1039 rc = nr_failed + retry; 1044 rc = nr_failed + retry;
1040out: 1045out:
1046 if (nr_succeeded)
1047 count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
1048 if (nr_failed)
1049 count_vm_events(PGMIGRATE_FAIL, nr_failed);
1050 trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
1051
1041 if (!swapwrite) 1052 if (!swapwrite)
1042 current->flags &= ~PF_SWAPWRITE; 1053 current->flags &= ~PF_SWAPWRITE;
1043 1054
@@ -1176,7 +1187,8 @@ set_status:
1176 err = 0; 1187 err = 0;
1177 if (!list_empty(&pagelist)) { 1188 if (!list_empty(&pagelist)) {
1178 err = migrate_pages(&pagelist, new_page_node, 1189 err = migrate_pages(&pagelist, new_page_node,
1179 (unsigned long)pm, 0, MIGRATE_SYNC); 1190 (unsigned long)pm, 0, MIGRATE_SYNC,
1191 MR_SYSCALL);
1180 if (err) 1192 if (err)
1181 putback_lru_pages(&pagelist); 1193 putback_lru_pages(&pagelist);
1182 } 1194 }
@@ -1440,4 +1452,317 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
1440 } 1452 }
1441 return err; 1453 return err;
1442} 1454}
1443#endif 1455
1456#ifdef CONFIG_NUMA_BALANCING
1457/*
1458 * Returns true if this is a safe migration target node for misplaced NUMA
1459 * pages. Currently it only checks the watermarks which crude
1460 */
1461static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
1462 int nr_migrate_pages)
1463{
1464 int z;
1465 for (z = pgdat->nr_zones - 1; z >= 0; z--) {
1466 struct zone *zone = pgdat->node_zones + z;
1467
1468 if (!populated_zone(zone))
1469 continue;
1470
1471 if (zone->all_unreclaimable)
1472 continue;
1473
1474 /* Avoid waking kswapd by allocating pages_to_migrate pages. */
1475 if (!zone_watermark_ok(zone, 0,
1476 high_wmark_pages(zone) +
1477 nr_migrate_pages,
1478 0, 0))
1479 continue;
1480 return true;
1481 }
1482 return false;
1483}
1484
1485static struct page *alloc_misplaced_dst_page(struct page *page,
1486 unsigned long data,
1487 int **result)
1488{
1489 int nid = (int) data;
1490 struct page *newpage;
1491
1492 newpage = alloc_pages_exact_node(nid,
1493 (GFP_HIGHUSER_MOVABLE | GFP_THISNODE |
1494 __GFP_NOMEMALLOC | __GFP_NORETRY |
1495 __GFP_NOWARN) &
1496 ~GFP_IOFS, 0);
1497 if (newpage)
1498 page_xchg_last_nid(newpage, page_last_nid(page));
1499
1500 return newpage;
1501}
1502
1503/*
1504 * page migration rate limiting control.
1505 * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
1506 * window of time. Default here says do not migrate more than 1280M per second.
1507 * If a node is rate-limited then PTE NUMA updates are also rate-limited. However
1508 * as it is faults that reset the window, pte updates will happen unconditionally
1509 * if there has not been a fault since @pteupdate_interval_millisecs after the
1510 * throttle window closed.
1511 */
1512static unsigned int migrate_interval_millisecs __read_mostly = 100;
1513static unsigned int pteupdate_interval_millisecs __read_mostly = 1000;
1514static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
1515
1516/* Returns true if NUMA migration is currently rate limited */
1517bool migrate_ratelimited(int node)
1518{
1519 pg_data_t *pgdat = NODE_DATA(node);
1520
1521 if (time_after(jiffies, pgdat->numabalancing_migrate_next_window +
1522 msecs_to_jiffies(pteupdate_interval_millisecs)))
1523 return false;
1524
1525 if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages)
1526 return false;
1527
1528 return true;
1529}
1530
1531/* Returns true if the node is migrate rate-limited after the update */
1532bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages)
1533{
1534 bool rate_limited = false;
1535
1536 /*
1537 * Rate-limit the amount of data that is being migrated to a node.
1538 * Optimal placement is no good if the memory bus is saturated and
1539 * all the time is being spent migrating!
1540 */
1541 spin_lock(&pgdat->numabalancing_migrate_lock);
1542 if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
1543 pgdat->numabalancing_migrate_nr_pages = 0;
1544 pgdat->numabalancing_migrate_next_window = jiffies +
1545 msecs_to_jiffies(migrate_interval_millisecs);
1546 }
1547 if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
1548 rate_limited = true;
1549 else
1550 pgdat->numabalancing_migrate_nr_pages += nr_pages;
1551 spin_unlock(&pgdat->numabalancing_migrate_lock);
1552
1553 return rate_limited;
1554}
1555
1556int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
1557{
1558 int ret = 0;
1559
1560 /* Avoid migrating to a node that is nearly full */
1561 if (migrate_balanced_pgdat(pgdat, 1)) {
1562 int page_lru;
1563
1564 if (isolate_lru_page(page)) {
1565 put_page(page);
1566 return 0;
1567 }
1568
1569 /* Page is isolated */
1570 ret = 1;
1571 page_lru = page_is_file_cache(page);
1572 if (!PageTransHuge(page))
1573 inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
1574 else
1575 mod_zone_page_state(page_zone(page),
1576 NR_ISOLATED_ANON + page_lru,
1577 HPAGE_PMD_NR);
1578 }
1579
1580 /*
1581 * Page is either isolated or there is not enough space on the target
1582 * node. If isolated, then it has taken a reference count and the
1583 * callers reference can be safely dropped without the page
1584 * disappearing underneath us during migration. Otherwise the page is
1585 * not to be migrated but the callers reference should still be
1586 * dropped so it does not leak.
1587 */
1588 put_page(page);
1589
1590 return ret;
1591}
1592
1593/*
1594 * Attempt to migrate a misplaced page to the specified destination
1595 * node. Caller is expected to have an elevated reference count on
1596 * the page that will be dropped by this function before returning.
1597 */
1598int migrate_misplaced_page(struct page *page, int node)
1599{
1600 pg_data_t *pgdat = NODE_DATA(node);
1601 int isolated = 0;
1602 int nr_remaining;
1603 LIST_HEAD(migratepages);
1604
1605 /*
1606 * Don't migrate pages that are mapped in multiple processes.
1607 * TODO: Handle false sharing detection instead of this hammer
1608 */
1609 if (page_mapcount(page) != 1) {
1610 put_page(page);
1611 goto out;
1612 }
1613
1614 /*
1615 * Rate-limit the amount of data that is being migrated to a node.
1616 * Optimal placement is no good if the memory bus is saturated and
1617 * all the time is being spent migrating!
1618 */
1619 if (numamigrate_update_ratelimit(pgdat, 1)) {
1620 put_page(page);
1621 goto out;
1622 }
1623
1624 isolated = numamigrate_isolate_page(pgdat, page);
1625 if (!isolated)
1626 goto out;
1627
1628 list_add(&page->lru, &migratepages);
1629 nr_remaining = migrate_pages(&migratepages,
1630 alloc_misplaced_dst_page,
1631 node, false, MIGRATE_ASYNC,
1632 MR_NUMA_MISPLACED);
1633 if (nr_remaining) {
1634 putback_lru_pages(&migratepages);
1635 isolated = 0;
1636 } else
1637 count_vm_numa_event(NUMA_PAGE_MIGRATE);
1638 BUG_ON(!list_empty(&migratepages));
1639out:
1640 return isolated;
1641}
1642#endif /* CONFIG_NUMA_BALANCING */
1643
1644#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1645int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1646 struct vm_area_struct *vma,
1647 pmd_t *pmd, pmd_t entry,
1648 unsigned long address,
1649 struct page *page, int node)
1650{
1651 unsigned long haddr = address & HPAGE_PMD_MASK;
1652 pg_data_t *pgdat = NODE_DATA(node);
1653 int isolated = 0;
1654 struct page *new_page = NULL;
1655 struct mem_cgroup *memcg = NULL;
1656 int page_lru = page_is_file_cache(page);
1657
1658 /*
1659 * Don't migrate pages that are mapped in multiple processes.
1660 * TODO: Handle false sharing detection instead of this hammer
1661 */
1662 if (page_mapcount(page) != 1)
1663 goto out_dropref;
1664
1665 /*
1666 * Rate-limit the amount of data that is being migrated to a node.
1667 * Optimal placement is no good if the memory bus is saturated and
1668 * all the time is being spent migrating!
1669 */
1670 if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
1671 goto out_dropref;
1672
1673 new_page = alloc_pages_node(node,
1674 (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
1675 if (!new_page) {
1676 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1677 goto out_dropref;
1678 }
1679 page_xchg_last_nid(new_page, page_last_nid(page));
1680
1681 isolated = numamigrate_isolate_page(pgdat, page);
1682 if (!isolated) {
1683 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1684 put_page(new_page);
1685 goto out_keep_locked;
1686 }
1687
1688 /* Prepare a page as a migration target */
1689 __set_page_locked(new_page);
1690 SetPageSwapBacked(new_page);
1691
1692 /* anon mapping, we can simply copy page->mapping to the new page: */
1693 new_page->mapping = page->mapping;
1694 new_page->index = page->index;
1695 migrate_page_copy(new_page, page);
1696 WARN_ON(PageLRU(new_page));
1697
1698 /* Recheck the target PMD */
1699 spin_lock(&mm->page_table_lock);
1700 if (unlikely(!pmd_same(*pmd, entry))) {
1701 spin_unlock(&mm->page_table_lock);
1702
1703 /* Reverse changes made by migrate_page_copy() */
1704 if (TestClearPageActive(new_page))
1705 SetPageActive(page);
1706 if (TestClearPageUnevictable(new_page))
1707 SetPageUnevictable(page);
1708 mlock_migrate_page(page, new_page);
1709
1710 unlock_page(new_page);
1711 put_page(new_page); /* Free it */
1712
1713 unlock_page(page);
1714 putback_lru_page(page);
1715
1716 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1717 goto out;
1718 }
1719
1720 /*
1721 * Traditional migration needs to prepare the memcg charge
1722 * transaction early to prevent the old page from being
1723 * uncharged when installing migration entries. Here we can
1724 * save the potential rollback and start the charge transfer
1725 * only when migration is already known to end successfully.
1726 */
1727 mem_cgroup_prepare_migration(page, new_page, &memcg);
1728
1729 entry = mk_pmd(new_page, vma->vm_page_prot);
1730 entry = pmd_mknonnuma(entry);
1731 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1732 entry = pmd_mkhuge(entry);
1733
1734 page_add_new_anon_rmap(new_page, vma, haddr);
1735
1736 set_pmd_at(mm, haddr, pmd, entry);
1737 update_mmu_cache_pmd(vma, address, entry);
1738 page_remove_rmap(page);
1739 /*
1740 * Finish the charge transaction under the page table lock to
1741 * prevent split_huge_page() from dividing up the charge
1742 * before it's fully transferred to the new page.
1743 */
1744 mem_cgroup_end_migration(memcg, page, new_page, true);
1745 spin_unlock(&mm->page_table_lock);
1746
1747 unlock_page(new_page);
1748 unlock_page(page);
1749 put_page(page); /* Drop the rmap reference */
1750 put_page(page); /* Drop the LRU isolation reference */
1751
1752 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
1753 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
1754
1755out:
1756 mod_zone_page_state(page_zone(page),
1757 NR_ISOLATED_ANON + page_lru,
1758 -HPAGE_PMD_NR);
1759 return isolated;
1760
1761out_dropref:
1762 put_page(page);
1763out_keep_locked:
1764 return 0;
1765}
1766#endif /* CONFIG_NUMA_BALANCING */
1767
1768#endif /* CONFIG_NUMA */