diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 142 |
1 files changed, 86 insertions, 56 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 9194375b2307..bed48809e5d0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -72,28 +72,12 @@ int migrate_prep_local(void) | |||
72 | } | 72 | } |
73 | 73 | ||
74 | /* | 74 | /* |
75 | * Add isolated pages on the list back to the LRU under page lock | ||
76 | * to avoid leaking evictable pages back onto unevictable list. | ||
77 | */ | ||
78 | void putback_lru_pages(struct list_head *l) | ||
79 | { | ||
80 | struct page *page; | ||
81 | struct page *page2; | ||
82 | |||
83 | list_for_each_entry_safe(page, page2, l, lru) { | ||
84 | list_del(&page->lru); | ||
85 | dec_zone_page_state(page, NR_ISOLATED_ANON + | ||
86 | page_is_file_cache(page)); | ||
87 | putback_lru_page(page); | ||
88 | } | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * Put previously isolated pages back onto the appropriate lists | 75 | * Put previously isolated pages back onto the appropriate lists |
93 | * from where they were once taken off for compaction/migration. | 76 | * from where they were once taken off for compaction/migration. |
94 | * | 77 | * |
95 | * This function shall be used instead of putback_lru_pages(), | 78 | * This function shall be used whenever the isolated pageset has been |
96 | * whenever the isolated pageset has been built by isolate_migratepages_range() | 79 | * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range() |
80 | * and isolate_huge_page(). | ||
97 | */ | 81 | */ |
98 | void putback_movable_pages(struct list_head *l) | 82 | void putback_movable_pages(struct list_head *l) |
99 | { | 83 | { |
@@ -194,12 +178,49 @@ out: | |||
194 | } | 178 | } |
195 | 179 | ||
196 | /* | 180 | /* |
181 | * Congratulations to trinity for discovering this bug. | ||
182 | * mm/fremap.c's remap_file_pages() accepts any range within a single vma to | ||
183 | * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then | ||
184 | * replace the specified range by file ptes throughout (maybe populated after). | ||
185 | * If page migration finds a page within that range, while it's still located | ||
186 | * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem: | ||
187 | * zap_pte() clears the temporary migration entry before mmap_sem is dropped. | ||
188 | * But if the migrating page is in a part of the vma outside the range to be | ||
189 | * remapped, then it will not be cleared, and remove_migration_ptes() needs to | ||
190 | * deal with it. Fortunately, this part of the vma is of course still linear, | ||
191 | * so we just need to use linear location on the nonlinear list. | ||
192 | */ | ||
193 | static int remove_linear_migration_ptes_from_nonlinear(struct page *page, | ||
194 | struct address_space *mapping, void *arg) | ||
195 | { | ||
196 | struct vm_area_struct *vma; | ||
197 | /* hugetlbfs does not support remap_pages, so no huge pgoff worries */ | ||
198 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
199 | unsigned long addr; | ||
200 | |||
201 | list_for_each_entry(vma, | ||
202 | &mapping->i_mmap_nonlinear, shared.nonlinear) { | ||
203 | |||
204 | addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | ||
205 | if (addr >= vma->vm_start && addr < vma->vm_end) | ||
206 | remove_migration_pte(page, vma, addr, arg); | ||
207 | } | ||
208 | return SWAP_AGAIN; | ||
209 | } | ||
210 | |||
211 | /* | ||
197 | * Get rid of all migration entries and replace them by | 212 | * Get rid of all migration entries and replace them by |
198 | * references to the indicated page. | 213 | * references to the indicated page. |
199 | */ | 214 | */ |
200 | static void remove_migration_ptes(struct page *old, struct page *new) | 215 | static void remove_migration_ptes(struct page *old, struct page *new) |
201 | { | 216 | { |
202 | rmap_walk(new, remove_migration_pte, old); | 217 | struct rmap_walk_control rwc = { |
218 | .rmap_one = remove_migration_pte, | ||
219 | .arg = old, | ||
220 | .file_nonlinear = remove_linear_migration_ptes_from_nonlinear, | ||
221 | }; | ||
222 | |||
223 | rmap_walk(new, &rwc); | ||
203 | } | 224 | } |
204 | 225 | ||
205 | /* | 226 | /* |
@@ -510,7 +531,7 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
510 | if (PageUptodate(page)) | 531 | if (PageUptodate(page)) |
511 | SetPageUptodate(newpage); | 532 | SetPageUptodate(newpage); |
512 | if (TestClearPageActive(page)) { | 533 | if (TestClearPageActive(page)) { |
513 | VM_BUG_ON(PageUnevictable(page)); | 534 | VM_BUG_ON_PAGE(PageUnevictable(page), page); |
514 | SetPageActive(newpage); | 535 | SetPageActive(newpage); |
515 | } else if (TestClearPageUnevictable(page)) | 536 | } else if (TestClearPageUnevictable(page)) |
516 | SetPageUnevictable(newpage); | 537 | SetPageUnevictable(newpage); |
@@ -563,14 +584,6 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
563 | * Migration functions | 584 | * Migration functions |
564 | ***********************************************************/ | 585 | ***********************************************************/ |
565 | 586 | ||
566 | /* Always fail migration. Used for mappings that are not movable */ | ||
567 | int fail_migrate_page(struct address_space *mapping, | ||
568 | struct page *newpage, struct page *page) | ||
569 | { | ||
570 | return -EIO; | ||
571 | } | ||
572 | EXPORT_SYMBOL(fail_migrate_page); | ||
573 | |||
574 | /* | 587 | /* |
575 | * Common logic to directly migrate a single page suitable for | 588 | * Common logic to directly migrate a single page suitable for |
576 | * pages that do not use PagePrivate/PagePrivate2. | 589 | * pages that do not use PagePrivate/PagePrivate2. |
@@ -890,7 +903,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, | |||
890 | * free the metadata, so the page can be freed. | 903 | * free the metadata, so the page can be freed. |
891 | */ | 904 | */ |
892 | if (!page->mapping) { | 905 | if (!page->mapping) { |
893 | VM_BUG_ON(PageAnon(page)); | 906 | VM_BUG_ON_PAGE(PageAnon(page), page); |
894 | if (page_has_private(page)) { | 907 | if (page_has_private(page)) { |
895 | try_to_free_buffers(page); | 908 | try_to_free_buffers(page); |
896 | goto uncharge; | 909 | goto uncharge; |
@@ -1008,7 +1021,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, | |||
1008 | { | 1021 | { |
1009 | int rc = 0; | 1022 | int rc = 0; |
1010 | int *result = NULL; | 1023 | int *result = NULL; |
1011 | struct page *new_hpage = get_new_page(hpage, private, &result); | 1024 | struct page *new_hpage; |
1012 | struct anon_vma *anon_vma = NULL; | 1025 | struct anon_vma *anon_vma = NULL; |
1013 | 1026 | ||
1014 | /* | 1027 | /* |
@@ -1018,9 +1031,12 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, | |||
1018 | * tables or check whether the hugepage is pmd-based or not before | 1031 | * tables or check whether the hugepage is pmd-based or not before |
1019 | * kicking migration. | 1032 | * kicking migration. |
1020 | */ | 1033 | */ |
1021 | if (!hugepage_migration_support(page_hstate(hpage))) | 1034 | if (!hugepage_migration_support(page_hstate(hpage))) { |
1035 | putback_active_hugepage(hpage); | ||
1022 | return -ENOSYS; | 1036 | return -ENOSYS; |
1037 | } | ||
1023 | 1038 | ||
1039 | new_hpage = get_new_page(hpage, private, &result); | ||
1024 | if (!new_hpage) | 1040 | if (!new_hpage) |
1025 | return -ENOMEM; | 1041 | return -ENOMEM; |
1026 | 1042 | ||
@@ -1120,7 +1136,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, | |||
1120 | nr_succeeded++; | 1136 | nr_succeeded++; |
1121 | break; | 1137 | break; |
1122 | default: | 1138 | default: |
1123 | /* Permanent failure */ | 1139 | /* |
1140 | * Permanent failure (-EBUSY, -ENOSYS, etc.): | ||
1141 | * unlike -EAGAIN case, the failed page is | ||
1142 | * removed from migration page list and not | ||
1143 | * retried in the next outer loop. | ||
1144 | */ | ||
1124 | nr_failed++; | 1145 | nr_failed++; |
1125 | break; | 1146 | break; |
1126 | } | 1147 | } |
@@ -1169,7 +1190,7 @@ static struct page *new_page_node(struct page *p, unsigned long private, | |||
1169 | pm->node); | 1190 | pm->node); |
1170 | else | 1191 | else |
1171 | return alloc_pages_exact_node(pm->node, | 1192 | return alloc_pages_exact_node(pm->node, |
1172 | GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); | 1193 | GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); |
1173 | } | 1194 | } |
1174 | 1195 | ||
1175 | /* | 1196 | /* |
@@ -1555,12 +1576,10 @@ static struct page *alloc_misplaced_dst_page(struct page *page, | |||
1555 | struct page *newpage; | 1576 | struct page *newpage; |
1556 | 1577 | ||
1557 | newpage = alloc_pages_exact_node(nid, | 1578 | newpage = alloc_pages_exact_node(nid, |
1558 | (GFP_HIGHUSER_MOVABLE | GFP_THISNODE | | 1579 | (GFP_HIGHUSER_MOVABLE | |
1559 | __GFP_NOMEMALLOC | __GFP_NORETRY | | 1580 | __GFP_THISNODE | __GFP_NOMEMALLOC | |
1560 | __GFP_NOWARN) & | 1581 | __GFP_NORETRY | __GFP_NOWARN) & |
1561 | ~GFP_IOFS, 0); | 1582 | ~GFP_IOFS, 0); |
1562 | if (newpage) | ||
1563 | page_cpupid_xchg_last(newpage, page_cpupid_last(page)); | ||
1564 | 1583 | ||
1565 | return newpage; | 1584 | return newpage; |
1566 | } | 1585 | } |
@@ -1594,35 +1613,42 @@ bool migrate_ratelimited(int node) | |||
1594 | } | 1613 | } |
1595 | 1614 | ||
1596 | /* Returns true if the node is migrate rate-limited after the update */ | 1615 | /* Returns true if the node is migrate rate-limited after the update */ |
1597 | bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages) | 1616 | static bool numamigrate_update_ratelimit(pg_data_t *pgdat, |
1617 | unsigned long nr_pages) | ||
1598 | { | 1618 | { |
1599 | bool rate_limited = false; | ||
1600 | |||
1601 | /* | 1619 | /* |
1602 | * Rate-limit the amount of data that is being migrated to a node. | 1620 | * Rate-limit the amount of data that is being migrated to a node. |
1603 | * Optimal placement is no good if the memory bus is saturated and | 1621 | * Optimal placement is no good if the memory bus is saturated and |
1604 | * all the time is being spent migrating! | 1622 | * all the time is being spent migrating! |
1605 | */ | 1623 | */ |
1606 | spin_lock(&pgdat->numabalancing_migrate_lock); | ||
1607 | if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) { | 1624 | if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) { |
1625 | spin_lock(&pgdat->numabalancing_migrate_lock); | ||
1608 | pgdat->numabalancing_migrate_nr_pages = 0; | 1626 | pgdat->numabalancing_migrate_nr_pages = 0; |
1609 | pgdat->numabalancing_migrate_next_window = jiffies + | 1627 | pgdat->numabalancing_migrate_next_window = jiffies + |
1610 | msecs_to_jiffies(migrate_interval_millisecs); | 1628 | msecs_to_jiffies(migrate_interval_millisecs); |
1629 | spin_unlock(&pgdat->numabalancing_migrate_lock); | ||
1611 | } | 1630 | } |
1612 | if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) | 1631 | if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) { |
1613 | rate_limited = true; | 1632 | trace_mm_numa_migrate_ratelimit(current, pgdat->node_id, |
1614 | else | 1633 | nr_pages); |
1615 | pgdat->numabalancing_migrate_nr_pages += nr_pages; | 1634 | return true; |
1616 | spin_unlock(&pgdat->numabalancing_migrate_lock); | 1635 | } |
1617 | 1636 | ||
1618 | return rate_limited; | 1637 | /* |
1638 | * This is an unlocked non-atomic update so errors are possible. | ||
1639 | * The consequences are failing to migrate when we potentiall should | ||
1640 | * have which is not severe enough to warrant locking. If it is ever | ||
1641 | * a problem, it can be converted to a per-cpu counter. | ||
1642 | */ | ||
1643 | pgdat->numabalancing_migrate_nr_pages += nr_pages; | ||
1644 | return false; | ||
1619 | } | 1645 | } |
1620 | 1646 | ||
1621 | int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) | 1647 | static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) |
1622 | { | 1648 | { |
1623 | int page_lru; | 1649 | int page_lru; |
1624 | 1650 | ||
1625 | VM_BUG_ON(compound_order(page) && !PageTransHuge(page)); | 1651 | VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page); |
1626 | 1652 | ||
1627 | /* Avoid migrating to a node that is nearly full */ | 1653 | /* Avoid migrating to a node that is nearly full */ |
1628 | if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) | 1654 | if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) |
@@ -1705,7 +1731,12 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, | |||
1705 | nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page, | 1731 | nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page, |
1706 | node, MIGRATE_ASYNC, MR_NUMA_MISPLACED); | 1732 | node, MIGRATE_ASYNC, MR_NUMA_MISPLACED); |
1707 | if (nr_remaining) { | 1733 | if (nr_remaining) { |
1708 | putback_lru_pages(&migratepages); | 1734 | if (!list_empty(&migratepages)) { |
1735 | list_del(&page->lru); | ||
1736 | dec_zone_page_state(page, NR_ISOLATED_ANON + | ||
1737 | page_is_file_cache(page)); | ||
1738 | putback_lru_page(page); | ||
1739 | } | ||
1709 | isolated = 0; | 1740 | isolated = 0; |
1710 | } else | 1741 | } else |
1711 | count_vm_numa_event(NUMA_PAGE_MIGRATE); | 1742 | count_vm_numa_event(NUMA_PAGE_MIGRATE); |
@@ -1748,12 +1779,11 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1748 | goto out_dropref; | 1779 | goto out_dropref; |
1749 | 1780 | ||
1750 | new_page = alloc_pages_node(node, | 1781 | new_page = alloc_pages_node(node, |
1751 | (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); | 1782 | (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT, |
1783 | HPAGE_PMD_ORDER); | ||
1752 | if (!new_page) | 1784 | if (!new_page) |
1753 | goto out_fail; | 1785 | goto out_fail; |
1754 | 1786 | ||
1755 | page_cpupid_xchg_last(new_page, page_cpupid_last(page)); | ||
1756 | |||
1757 | isolated = numamigrate_isolate_page(pgdat, page); | 1787 | isolated = numamigrate_isolate_page(pgdat, page); |
1758 | if (!isolated) { | 1788 | if (!isolated) { |
1759 | put_page(new_page); | 1789 | put_page(new_page); |