diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 82 |
1 files changed, 65 insertions, 17 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index bb940045fe85..9194375b2307 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/hugetlb_cgroup.h> | 36 | #include <linux/hugetlb_cgroup.h> |
37 | #include <linux/gfp.h> | 37 | #include <linux/gfp.h> |
38 | #include <linux/balloon_compaction.h> | 38 | #include <linux/balloon_compaction.h> |
39 | #include <linux/mmu_notifier.h> | ||
39 | 40 | ||
40 | #include <asm/tlbflush.h> | 41 | #include <asm/tlbflush.h> |
41 | 42 | ||
@@ -316,14 +317,15 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, | |||
316 | */ | 317 | */ |
317 | int migrate_page_move_mapping(struct address_space *mapping, | 318 | int migrate_page_move_mapping(struct address_space *mapping, |
318 | struct page *newpage, struct page *page, | 319 | struct page *newpage, struct page *page, |
319 | struct buffer_head *head, enum migrate_mode mode) | 320 | struct buffer_head *head, enum migrate_mode mode, |
321 | int extra_count) | ||
320 | { | 322 | { |
321 | int expected_count = 0; | 323 | int expected_count = 1 + extra_count; |
322 | void **pslot; | 324 | void **pslot; |
323 | 325 | ||
324 | if (!mapping) { | 326 | if (!mapping) { |
325 | /* Anonymous page without mapping */ | 327 | /* Anonymous page without mapping */ |
326 | if (page_count(page) != 1) | 328 | if (page_count(page) != expected_count) |
327 | return -EAGAIN; | 329 | return -EAGAIN; |
328 | return MIGRATEPAGE_SUCCESS; | 330 | return MIGRATEPAGE_SUCCESS; |
329 | } | 331 | } |
@@ -333,7 +335,7 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
333 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | 335 | pslot = radix_tree_lookup_slot(&mapping->page_tree, |
334 | page_index(page)); | 336 | page_index(page)); |
335 | 337 | ||
336 | expected_count = 2 + page_has_private(page); | 338 | expected_count += 1 + page_has_private(page); |
337 | if (page_count(page) != expected_count || | 339 | if (page_count(page) != expected_count || |
338 | radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { | 340 | radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { |
339 | spin_unlock_irq(&mapping->tree_lock); | 341 | spin_unlock_irq(&mapping->tree_lock); |
@@ -583,7 +585,7 @@ int migrate_page(struct address_space *mapping, | |||
583 | 585 | ||
584 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | 586 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ |
585 | 587 | ||
586 | rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); | 588 | rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); |
587 | 589 | ||
588 | if (rc != MIGRATEPAGE_SUCCESS) | 590 | if (rc != MIGRATEPAGE_SUCCESS) |
589 | return rc; | 591 | return rc; |
@@ -610,7 +612,7 @@ int buffer_migrate_page(struct address_space *mapping, | |||
610 | 612 | ||
611 | head = page_buffers(page); | 613 | head = page_buffers(page); |
612 | 614 | ||
613 | rc = migrate_page_move_mapping(mapping, newpage, page, head, mode); | 615 | rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0); |
614 | 616 | ||
615 | if (rc != MIGRATEPAGE_SUCCESS) | 617 | if (rc != MIGRATEPAGE_SUCCESS) |
616 | return rc; | 618 | return rc; |
@@ -1654,6 +1656,18 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) | |||
1654 | return 1; | 1656 | return 1; |
1655 | } | 1657 | } |
1656 | 1658 | ||
1659 | bool pmd_trans_migrating(pmd_t pmd) | ||
1660 | { | ||
1661 | struct page *page = pmd_page(pmd); | ||
1662 | return PageLocked(page); | ||
1663 | } | ||
1664 | |||
1665 | void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd) | ||
1666 | { | ||
1667 | struct page *page = pmd_page(*pmd); | ||
1668 | wait_on_page_locked(page); | ||
1669 | } | ||
1670 | |||
1657 | /* | 1671 | /* |
1658 | * Attempt to migrate a misplaced page to the specified destination | 1672 | * Attempt to migrate a misplaced page to the specified destination |
1659 | * node. Caller is expected to have an elevated reference count on | 1673 | * node. Caller is expected to have an elevated reference count on |
@@ -1716,12 +1730,14 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1716 | struct page *page, int node) | 1730 | struct page *page, int node) |
1717 | { | 1731 | { |
1718 | spinlock_t *ptl; | 1732 | spinlock_t *ptl; |
1719 | unsigned long haddr = address & HPAGE_PMD_MASK; | ||
1720 | pg_data_t *pgdat = NODE_DATA(node); | 1733 | pg_data_t *pgdat = NODE_DATA(node); |
1721 | int isolated = 0; | 1734 | int isolated = 0; |
1722 | struct page *new_page = NULL; | 1735 | struct page *new_page = NULL; |
1723 | struct mem_cgroup *memcg = NULL; | 1736 | struct mem_cgroup *memcg = NULL; |
1724 | int page_lru = page_is_file_cache(page); | 1737 | int page_lru = page_is_file_cache(page); |
1738 | unsigned long mmun_start = address & HPAGE_PMD_MASK; | ||
1739 | unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; | ||
1740 | pmd_t orig_entry; | ||
1725 | 1741 | ||
1726 | /* | 1742 | /* |
1727 | * Rate-limit the amount of data that is being migrated to a node. | 1743 | * Rate-limit the amount of data that is being migrated to a node. |
@@ -1744,6 +1760,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1744 | goto out_fail; | 1760 | goto out_fail; |
1745 | } | 1761 | } |
1746 | 1762 | ||
1763 | if (mm_tlb_flush_pending(mm)) | ||
1764 | flush_tlb_range(vma, mmun_start, mmun_end); | ||
1765 | |||
1747 | /* Prepare a page as a migration target */ | 1766 | /* Prepare a page as a migration target */ |
1748 | __set_page_locked(new_page); | 1767 | __set_page_locked(new_page); |
1749 | SetPageSwapBacked(new_page); | 1768 | SetPageSwapBacked(new_page); |
@@ -1755,9 +1774,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1755 | WARN_ON(PageLRU(new_page)); | 1774 | WARN_ON(PageLRU(new_page)); |
1756 | 1775 | ||
1757 | /* Recheck the target PMD */ | 1776 | /* Recheck the target PMD */ |
1777 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
1758 | ptl = pmd_lock(mm, pmd); | 1778 | ptl = pmd_lock(mm, pmd); |
1759 | if (unlikely(!pmd_same(*pmd, entry))) { | 1779 | if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) { |
1780 | fail_putback: | ||
1760 | spin_unlock(ptl); | 1781 | spin_unlock(ptl); |
1782 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
1761 | 1783 | ||
1762 | /* Reverse changes made by migrate_page_copy() */ | 1784 | /* Reverse changes made by migrate_page_copy() */ |
1763 | if (TestClearPageActive(new_page)) | 1785 | if (TestClearPageActive(new_page)) |
@@ -1774,7 +1796,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1774 | putback_lru_page(page); | 1796 | putback_lru_page(page); |
1775 | mod_zone_page_state(page_zone(page), | 1797 | mod_zone_page_state(page_zone(page), |
1776 | NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); | 1798 | NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); |
1777 | goto out_fail; | 1799 | |
1800 | goto out_unlock; | ||
1778 | } | 1801 | } |
1779 | 1802 | ||
1780 | /* | 1803 | /* |
@@ -1786,16 +1809,35 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1786 | */ | 1809 | */ |
1787 | mem_cgroup_prepare_migration(page, new_page, &memcg); | 1810 | mem_cgroup_prepare_migration(page, new_page, &memcg); |
1788 | 1811 | ||
1812 | orig_entry = *pmd; | ||
1789 | entry = mk_pmd(new_page, vma->vm_page_prot); | 1813 | entry = mk_pmd(new_page, vma->vm_page_prot); |
1790 | entry = pmd_mknonnuma(entry); | ||
1791 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | ||
1792 | entry = pmd_mkhuge(entry); | 1814 | entry = pmd_mkhuge(entry); |
1815 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | ||
1793 | 1816 | ||
1794 | pmdp_clear_flush(vma, haddr, pmd); | 1817 | /* |
1795 | set_pmd_at(mm, haddr, pmd, entry); | 1818 | * Clear the old entry under pagetable lock and establish the new PTE. |
1796 | page_add_new_anon_rmap(new_page, vma, haddr); | 1819 | * Any parallel GUP will either observe the old page blocking on the |
1820 | * page lock, block on the page table lock or observe the new page. | ||
1821 | * The SetPageUptodate on the new page and page_add_new_anon_rmap | ||
1822 | * guarantee the copy is visible before the pagetable update. | ||
1823 | */ | ||
1824 | flush_cache_range(vma, mmun_start, mmun_end); | ||
1825 | page_add_new_anon_rmap(new_page, vma, mmun_start); | ||
1826 | pmdp_clear_flush(vma, mmun_start, pmd); | ||
1827 | set_pmd_at(mm, mmun_start, pmd, entry); | ||
1828 | flush_tlb_range(vma, mmun_start, mmun_end); | ||
1797 | update_mmu_cache_pmd(vma, address, &entry); | 1829 | update_mmu_cache_pmd(vma, address, &entry); |
1830 | |||
1831 | if (page_count(page) != 2) { | ||
1832 | set_pmd_at(mm, mmun_start, pmd, orig_entry); | ||
1833 | flush_tlb_range(vma, mmun_start, mmun_end); | ||
1834 | update_mmu_cache_pmd(vma, address, &entry); | ||
1835 | page_remove_rmap(new_page); | ||
1836 | goto fail_putback; | ||
1837 | } | ||
1838 | |||
1798 | page_remove_rmap(page); | 1839 | page_remove_rmap(page); |
1840 | |||
1799 | /* | 1841 | /* |
1800 | * Finish the charge transaction under the page table lock to | 1842 | * Finish the charge transaction under the page table lock to |
1801 | * prevent split_huge_page() from dividing up the charge | 1843 | * prevent split_huge_page() from dividing up the charge |
@@ -1803,6 +1845,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1803 | */ | 1845 | */ |
1804 | mem_cgroup_end_migration(memcg, page, new_page, true); | 1846 | mem_cgroup_end_migration(memcg, page, new_page, true); |
1805 | spin_unlock(ptl); | 1847 | spin_unlock(ptl); |
1848 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
1806 | 1849 | ||
1807 | unlock_page(new_page); | 1850 | unlock_page(new_page); |
1808 | unlock_page(page); | 1851 | unlock_page(page); |
@@ -1820,10 +1863,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1820 | out_fail: | 1863 | out_fail: |
1821 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1864 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); |
1822 | out_dropref: | 1865 | out_dropref: |
1823 | entry = pmd_mknonnuma(entry); | 1866 | ptl = pmd_lock(mm, pmd); |
1824 | set_pmd_at(mm, haddr, pmd, entry); | 1867 | if (pmd_same(*pmd, entry)) { |
1825 | update_mmu_cache_pmd(vma, address, &entry); | 1868 | entry = pmd_mknonnuma(entry); |
1869 | set_pmd_at(mm, mmun_start, pmd, entry); | ||
1870 | update_mmu_cache_pmd(vma, address, &entry); | ||
1871 | } | ||
1872 | spin_unlock(ptl); | ||
1826 | 1873 | ||
1874 | out_unlock: | ||
1827 | unlock_page(page); | 1875 | unlock_page(page); |
1828 | put_page(page); | 1876 | put_page(page); |
1829 | return 0; | 1877 | return 0; |