aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/migrate.h15
-rw-r--r--mm/huge_memory.c59
-rw-r--r--mm/internal.h7
-rw-r--r--mm/memcontrol.c7
-rw-r--r--mm/migrate.c231
5 files changed, 255 insertions, 64 deletions
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 91556889adac..51eac4bdc606 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -79,6 +79,12 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
79extern int migrate_misplaced_page(struct page *page, int node); 79extern int migrate_misplaced_page(struct page *page, int node);
80extern int migrate_misplaced_page(struct page *page, int node); 80extern int migrate_misplaced_page(struct page *page, int node);
81extern bool migrate_ratelimited(int node); 81extern bool migrate_ratelimited(int node);
82extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
83 struct vm_area_struct *vma,
84 pmd_t *pmd, pmd_t entry,
85 unsigned long address,
86 struct page *page, int node);
87
82#else 88#else
83static inline int migrate_misplaced_page(struct page *page, int node) 89static inline int migrate_misplaced_page(struct page *page, int node)
84{ 90{
@@ -88,6 +94,15 @@ static inline bool migrate_ratelimited(int node)
88{ 94{
89 return false; 95 return false;
90} 96}
97
98static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
99 struct vm_area_struct *vma,
100 pmd_t *pmd, pmd_t entry,
101 unsigned long address,
102 struct page *page, int node)
103{
104 return -EAGAIN;
105}
91#endif /* CONFIG_NUMA_BALANCING */ 106#endif /* CONFIG_NUMA_BALANCING */
92 107
93#endif /* _LINUX_MIGRATE_H */ 108#endif /* _LINUX_MIGRATE_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 199b261a257e..711baf84b153 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -600,7 +600,7 @@ out:
600} 600}
601__setup("transparent_hugepage=", setup_transparent_hugepage); 601__setup("transparent_hugepage=", setup_transparent_hugepage);
602 602
603static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) 603pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
604{ 604{
605 if (likely(vma->vm_flags & VM_WRITE)) 605 if (likely(vma->vm_flags & VM_WRITE))
606 pmd = pmd_mkwrite(pmd); 606 pmd = pmd_mkwrite(pmd);
@@ -1023,10 +1023,12 @@ out:
1023int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, 1023int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1024 unsigned long addr, pmd_t pmd, pmd_t *pmdp) 1024 unsigned long addr, pmd_t pmd, pmd_t *pmdp)
1025{ 1025{
1026 struct page *page = NULL; 1026 struct page *page;
1027 unsigned long haddr = addr & HPAGE_PMD_MASK; 1027 unsigned long haddr = addr & HPAGE_PMD_MASK;
1028 int target_nid; 1028 int target_nid;
1029 int current_nid = -1; 1029 int current_nid = -1;
1030 bool migrated;
1031 bool page_locked = false;
1030 1032
1031 spin_lock(&mm->page_table_lock); 1033 spin_lock(&mm->page_table_lock);
1032 if (unlikely(!pmd_same(pmd, *pmdp))) 1034 if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1034,42 +1036,61 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1034 1036
1035 page = pmd_page(pmd); 1037 page = pmd_page(pmd);
1036 get_page(page); 1038 get_page(page);
1037 spin_unlock(&mm->page_table_lock);
1038 current_nid = page_to_nid(page); 1039 current_nid = page_to_nid(page);
1039 count_vm_numa_event(NUMA_HINT_FAULTS); 1040 count_vm_numa_event(NUMA_HINT_FAULTS);
1040 if (current_nid == numa_node_id()) 1041 if (current_nid == numa_node_id())
1041 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 1042 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
1042 1043
1043 target_nid = mpol_misplaced(page, vma, haddr); 1044 target_nid = mpol_misplaced(page, vma, haddr);
1044 if (target_nid == -1) 1045 if (target_nid == -1) {
1046 put_page(page);
1045 goto clear_pmdnuma; 1047 goto clear_pmdnuma;
1048 }
1046 1049
1047 /* 1050 /* Acquire the page lock to serialise THP migrations */
1048 * Due to lacking code to migrate thp pages, we'll split 1051 spin_unlock(&mm->page_table_lock);
1049 * (which preserves the special PROT_NONE) and re-take the 1052 lock_page(page);
1050 * fault on the normal pages. 1053 page_locked = true;
1051 */
1052 split_huge_page(page);
1053 put_page(page);
1054
1055 return 0;
1056 1054
1057clear_pmdnuma: 1055 /* Confirm the PTE did not while locked */
1058 spin_lock(&mm->page_table_lock); 1056 spin_lock(&mm->page_table_lock);
1059 if (unlikely(!pmd_same(pmd, *pmdp))) 1057 if (unlikely(!pmd_same(pmd, *pmdp))) {
1058 unlock_page(page);
1059 put_page(page);
1060 goto out_unlock; 1060 goto out_unlock;
1061 }
1062 spin_unlock(&mm->page_table_lock);
1063
1064 /* Migrate the THP to the requested node */
1065 migrated = migrate_misplaced_transhuge_page(mm, vma,
1066 pmdp, pmd, addr,
1067 page, target_nid);
1068 if (migrated)
1069 current_nid = target_nid;
1070 else {
1071 spin_lock(&mm->page_table_lock);
1072 if (unlikely(!pmd_same(pmd, *pmdp))) {
1073 unlock_page(page);
1074 goto out_unlock;
1075 }
1076 goto clear_pmdnuma;
1077 }
1078
1079 task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
1080 return 0;
1061 1081
1082clear_pmdnuma:
1062 pmd = pmd_mknonnuma(pmd); 1083 pmd = pmd_mknonnuma(pmd);
1063 set_pmd_at(mm, haddr, pmdp, pmd); 1084 set_pmd_at(mm, haddr, pmdp, pmd);
1064 VM_BUG_ON(pmd_numa(*pmdp)); 1085 VM_BUG_ON(pmd_numa(*pmdp));
1065 update_mmu_cache_pmd(vma, addr, pmdp); 1086 update_mmu_cache_pmd(vma, addr, pmdp);
1087 if (page_locked)
1088 unlock_page(page);
1066 1089
1067out_unlock: 1090out_unlock:
1068 spin_unlock(&mm->page_table_lock); 1091 spin_unlock(&mm->page_table_lock);
1069 if (page) { 1092 if (current_nid != -1)
1070 put_page(page); 1093 task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
1071 task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
1072 }
1073 return 0; 1094 return 0;
1074} 1095}
1075 1096
diff --git a/mm/internal.h b/mm/internal.h
index a4fa284f6bc2..7e60ac826f2b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -212,15 +212,18 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
212{ 212{
213 if (TestClearPageMlocked(page)) { 213 if (TestClearPageMlocked(page)) {
214 unsigned long flags; 214 unsigned long flags;
215 int nr_pages = hpage_nr_pages(page);
215 216
216 local_irq_save(flags); 217 local_irq_save(flags);
217 __dec_zone_page_state(page, NR_MLOCK); 218 __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
218 SetPageMlocked(newpage); 219 SetPageMlocked(newpage);
219 __inc_zone_page_state(newpage, NR_MLOCK); 220 __mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages);
220 local_irq_restore(flags); 221 local_irq_restore(flags);
221 } 222 }
222} 223}
223 224
225extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
226
224#ifdef CONFIG_TRANSPARENT_HUGEPAGE 227#ifdef CONFIG_TRANSPARENT_HUGEPAGE
225extern unsigned long vma_address(struct page *page, 228extern unsigned long vma_address(struct page *page,
226 struct vm_area_struct *vma); 229 struct vm_area_struct *vma);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dd39ba000b31..d97af9636ab2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3288,15 +3288,18 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
3288 struct mem_cgroup **memcgp) 3288 struct mem_cgroup **memcgp)
3289{ 3289{
3290 struct mem_cgroup *memcg = NULL; 3290 struct mem_cgroup *memcg = NULL;
3291 unsigned int nr_pages = 1;
3291 struct page_cgroup *pc; 3292 struct page_cgroup *pc;
3292 enum charge_type ctype; 3293 enum charge_type ctype;
3293 3294
3294 *memcgp = NULL; 3295 *memcgp = NULL;
3295 3296
3296 VM_BUG_ON(PageTransHuge(page));
3297 if (mem_cgroup_disabled()) 3297 if (mem_cgroup_disabled())
3298 return; 3298 return;
3299 3299
3300 if (PageTransHuge(page))
3301 nr_pages <<= compound_order(page);
3302
3300 pc = lookup_page_cgroup(page); 3303 pc = lookup_page_cgroup(page);
3301 lock_page_cgroup(pc); 3304 lock_page_cgroup(pc);
3302 if (PageCgroupUsed(pc)) { 3305 if (PageCgroupUsed(pc)) {
@@ -3358,7 +3361,7 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
3358 * charged to the res_counter since we plan on replacing the 3361 * charged to the res_counter since we plan on replacing the
3359 * old one and only one page is going to be left afterwards. 3362 * old one and only one page is going to be left afterwards.
3360 */ 3363 */
3361 __mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false); 3364 __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
3362} 3365}
3363 3366
3364/* remove redundant charge if migration failed*/ 3367/* remove redundant charge if migration failed*/
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a5ce135eef0..c9400960fd52 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -410,7 +410,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
410 */ 410 */
411void migrate_page_copy(struct page *newpage, struct page *page) 411void migrate_page_copy(struct page *newpage, struct page *page)
412{ 412{
413 if (PageHuge(page)) 413 if (PageHuge(page) || PageTransHuge(page))
414 copy_huge_page(newpage, page); 414 copy_huge_page(newpage, page);
415 else 415 else
416 copy_highpage(newpage, page); 416 copy_highpage(newpage, page);
@@ -1491,25 +1491,10 @@ bool migrate_ratelimited(int node)
1491 return true; 1491 return true;
1492} 1492}
1493 1493
1494/* 1494/* Returns true if the node is migrate rate-limited after the update */
1495 * Attempt to migrate a misplaced page to the specified destination 1495bool numamigrate_update_ratelimit(pg_data_t *pgdat)
1496 * node. Caller is expected to have an elevated reference count on
1497 * the page that will be dropped by this function before returning.
1498 */
1499int migrate_misplaced_page(struct page *page, int node)
1500{ 1496{
1501 pg_data_t *pgdat = NODE_DATA(node); 1497 bool rate_limited = false;
1502 int isolated = 0;
1503 LIST_HEAD(migratepages);
1504
1505 /*
1506 * Don't migrate pages that are mapped in multiple processes.
1507 * TODO: Handle false sharing detection instead of this hammer
1508 */
1509 if (page_mapcount(page) != 1) {
1510 put_page(page);
1511 goto out;
1512 }
1513 1498
1514 /* 1499 /*
1515 * Rate-limit the amount of data that is being migrated to a node. 1500 * Rate-limit the amount of data that is being migrated to a node.
@@ -1522,13 +1507,18 @@ int migrate_misplaced_page(struct page *page, int node)
1522 pgdat->numabalancing_migrate_next_window = jiffies + 1507 pgdat->numabalancing_migrate_next_window = jiffies +
1523 msecs_to_jiffies(migrate_interval_millisecs); 1508 msecs_to_jiffies(migrate_interval_millisecs);
1524 } 1509 }
1525 if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) { 1510 if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
1526 spin_unlock(&pgdat->numabalancing_migrate_lock); 1511 rate_limited = true;
1527 put_page(page); 1512 else
1528 goto out; 1513 pgdat->numabalancing_migrate_nr_pages++;
1529 }
1530 pgdat->numabalancing_migrate_nr_pages++;
1531 spin_unlock(&pgdat->numabalancing_migrate_lock); 1514 spin_unlock(&pgdat->numabalancing_migrate_lock);
1515
1516 return rate_limited;
1517}
1518
1519int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
1520{
1521 int ret = 0;
1532 1522
1533 /* Avoid migrating to a node that is nearly full */ 1523 /* Avoid migrating to a node that is nearly full */
1534 if (migrate_balanced_pgdat(pgdat, 1)) { 1524 if (migrate_balanced_pgdat(pgdat, 1)) {
@@ -1536,13 +1526,18 @@ int migrate_misplaced_page(struct page *page, int node)
1536 1526
1537 if (isolate_lru_page(page)) { 1527 if (isolate_lru_page(page)) {
1538 put_page(page); 1528 put_page(page);
1539 goto out; 1529 return 0;
1540 } 1530 }
1541 isolated = 1;
1542 1531
1532 /* Page is isolated */
1533 ret = 1;
1543 page_lru = page_is_file_cache(page); 1534 page_lru = page_is_file_cache(page);
1544 inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru); 1535 if (!PageTransHuge(page))
1545 list_add(&page->lru, &migratepages); 1536 inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
1537 else
1538 mod_zone_page_state(page_zone(page),
1539 NR_ISOLATED_ANON + page_lru,
1540 HPAGE_PMD_NR);
1546 } 1541 }
1547 1542
1548 /* 1543 /*
@@ -1555,23 +1550,177 @@ int migrate_misplaced_page(struct page *page, int node)
1555 */ 1550 */
1556 put_page(page); 1551 put_page(page);
1557 1552
1558 if (isolated) { 1553 return ret;
1559 int nr_remaining; 1554}
1560 1555
1561 nr_remaining = migrate_pages(&migratepages, 1556/*
1562 alloc_misplaced_dst_page, 1557 * Attempt to migrate a misplaced page to the specified destination
1563 node, false, MIGRATE_ASYNC, 1558 * node. Caller is expected to have an elevated reference count on
1564 MR_NUMA_MISPLACED); 1559 * the page that will be dropped by this function before returning.
1565 if (nr_remaining) { 1560 */
1566 putback_lru_pages(&migratepages); 1561int migrate_misplaced_page(struct page *page, int node)
1567 isolated = 0; 1562{
1568 } else 1563 pg_data_t *pgdat = NODE_DATA(node);
1569 count_vm_numa_event(NUMA_PAGE_MIGRATE); 1564 int isolated = 0;
1565 int nr_remaining;
1566 LIST_HEAD(migratepages);
1567
1568 /*
1569 * Don't migrate pages that are mapped in multiple processes.
1570 * TODO: Handle false sharing detection instead of this hammer
1571 */
1572 if (page_mapcount(page) != 1) {
1573 put_page(page);
1574 goto out;
1570 } 1575 }
1576
1577 /*
1578 * Rate-limit the amount of data that is being migrated to a node.
1579 * Optimal placement is no good if the memory bus is saturated and
1580 * all the time is being spent migrating!
1581 */
1582 if (numamigrate_update_ratelimit(pgdat)) {
1583 put_page(page);
1584 goto out;
1585 }
1586
1587 isolated = numamigrate_isolate_page(pgdat, page);
1588 if (!isolated)
1589 goto out;
1590
1591 list_add(&page->lru, &migratepages);
1592 nr_remaining = migrate_pages(&migratepages,
1593 alloc_misplaced_dst_page,
1594 node, false, MIGRATE_ASYNC,
1595 MR_NUMA_MISPLACED);
1596 if (nr_remaining) {
1597 putback_lru_pages(&migratepages);
1598 isolated = 0;
1599 } else
1600 count_vm_numa_event(NUMA_PAGE_MIGRATE);
1571 BUG_ON(!list_empty(&migratepages)); 1601 BUG_ON(!list_empty(&migratepages));
1572out: 1602out:
1573 return isolated; 1603 return isolated;
1574} 1604}
1605
1606int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1607 struct vm_area_struct *vma,
1608 pmd_t *pmd, pmd_t entry,
1609 unsigned long address,
1610 struct page *page, int node)
1611{
1612 unsigned long haddr = address & HPAGE_PMD_MASK;
1613 pg_data_t *pgdat = NODE_DATA(node);
1614 int isolated = 0;
1615 struct page *new_page = NULL;
1616 struct mem_cgroup *memcg = NULL;
1617 int page_lru = page_is_file_cache(page);
1618
1619 /*
1620 * Don't migrate pages that are mapped in multiple processes.
1621 * TODO: Handle false sharing detection instead of this hammer
1622 */
1623 if (page_mapcount(page) != 1)
1624 goto out_dropref;
1625
1626 /*
1627 * Rate-limit the amount of data that is being migrated to a node.
1628 * Optimal placement is no good if the memory bus is saturated and
1629 * all the time is being spent migrating!
1630 */
1631 if (numamigrate_update_ratelimit(pgdat))
1632 goto out_dropref;
1633
1634 new_page = alloc_pages_node(node,
1635 (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
1636 if (!new_page)
1637 goto out_dropref;
1638 page_xchg_last_nid(new_page, page_last_nid(page));
1639
1640 isolated = numamigrate_isolate_page(pgdat, page);
1641 if (!isolated) {
1642 put_page(new_page);
1643 goto out_keep_locked;
1644 }
1645
1646 /* Prepare a page as a migration target */
1647 __set_page_locked(new_page);
1648 SetPageSwapBacked(new_page);
1649
1650 /* anon mapping, we can simply copy page->mapping to the new page: */
1651 new_page->mapping = page->mapping;
1652 new_page->index = page->index;
1653 migrate_page_copy(new_page, page);
1654 WARN_ON(PageLRU(new_page));
1655
1656 /* Recheck the target PMD */
1657 spin_lock(&mm->page_table_lock);
1658 if (unlikely(!pmd_same(*pmd, entry))) {
1659 spin_unlock(&mm->page_table_lock);
1660
1661 /* Reverse changes made by migrate_page_copy() */
1662 if (TestClearPageActive(new_page))
1663 SetPageActive(page);
1664 if (TestClearPageUnevictable(new_page))
1665 SetPageUnevictable(page);
1666 mlock_migrate_page(page, new_page);
1667
1668 unlock_page(new_page);
1669 put_page(new_page); /* Free it */
1670
1671 unlock_page(page);
1672 putback_lru_page(page);
1673
1674 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1675 goto out;
1676 }
1677
1678 /*
1679 * Traditional migration needs to prepare the memcg charge
1680 * transaction early to prevent the old page from being
1681 * uncharged when installing migration entries. Here we can
1682 * save the potential rollback and start the charge transfer
1683 * only when migration is already known to end successfully.
1684 */
1685 mem_cgroup_prepare_migration(page, new_page, &memcg);
1686
1687 entry = mk_pmd(new_page, vma->vm_page_prot);
1688 entry = pmd_mknonnuma(entry);
1689 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1690 entry = pmd_mkhuge(entry);
1691
1692 page_add_new_anon_rmap(new_page, vma, haddr);
1693
1694 set_pmd_at(mm, haddr, pmd, entry);
1695 update_mmu_cache_pmd(vma, address, entry);
1696 page_remove_rmap(page);
1697 /*
1698 * Finish the charge transaction under the page table lock to
1699 * prevent split_huge_page() from dividing up the charge
1700 * before it's fully transferred to the new page.
1701 */
1702 mem_cgroup_end_migration(memcg, page, new_page, true);
1703 spin_unlock(&mm->page_table_lock);
1704
1705 unlock_page(new_page);
1706 unlock_page(page);
1707 put_page(page); /* Drop the rmap reference */
1708 put_page(page); /* Drop the LRU isolation reference */
1709
1710 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
1711 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
1712
1713out:
1714 mod_zone_page_state(page_zone(page),
1715 NR_ISOLATED_ANON + page_lru,
1716 -HPAGE_PMD_NR);
1717 return isolated;
1718
1719out_dropref:
1720 put_page(page);
1721out_keep_locked:
1722 return 0;
1723}
1575#endif /* CONFIG_NUMA_BALANCING */ 1724#endif /* CONFIG_NUMA_BALANCING */
1576 1725
1577#endif /* CONFIG_NUMA */ 1726#endif /* CONFIG_NUMA */