aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c82
1 files changed, 65 insertions, 17 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index bb940045fe85..9194375b2307 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -36,6 +36,7 @@
36#include <linux/hugetlb_cgroup.h> 36#include <linux/hugetlb_cgroup.h>
37#include <linux/gfp.h> 37#include <linux/gfp.h>
38#include <linux/balloon_compaction.h> 38#include <linux/balloon_compaction.h>
39#include <linux/mmu_notifier.h>
39 40
40#include <asm/tlbflush.h> 41#include <asm/tlbflush.h>
41 42
@@ -316,14 +317,15 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
316 */ 317 */
317int migrate_page_move_mapping(struct address_space *mapping, 318int migrate_page_move_mapping(struct address_space *mapping,
318 struct page *newpage, struct page *page, 319 struct page *newpage, struct page *page,
319 struct buffer_head *head, enum migrate_mode mode) 320 struct buffer_head *head, enum migrate_mode mode,
321 int extra_count)
320{ 322{
321 int expected_count = 0; 323 int expected_count = 1 + extra_count;
322 void **pslot; 324 void **pslot;
323 325
324 if (!mapping) { 326 if (!mapping) {
325 /* Anonymous page without mapping */ 327 /* Anonymous page without mapping */
326 if (page_count(page) != 1) 328 if (page_count(page) != expected_count)
327 return -EAGAIN; 329 return -EAGAIN;
328 return MIGRATEPAGE_SUCCESS; 330 return MIGRATEPAGE_SUCCESS;
329 } 331 }
@@ -333,7 +335,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
333 pslot = radix_tree_lookup_slot(&mapping->page_tree, 335 pslot = radix_tree_lookup_slot(&mapping->page_tree,
334 page_index(page)); 336 page_index(page));
335 337
336 expected_count = 2 + page_has_private(page); 338 expected_count += 1 + page_has_private(page);
337 if (page_count(page) != expected_count || 339 if (page_count(page) != expected_count ||
338 radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { 340 radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
339 spin_unlock_irq(&mapping->tree_lock); 341 spin_unlock_irq(&mapping->tree_lock);
@@ -583,7 +585,7 @@ int migrate_page(struct address_space *mapping,
583 585
584 BUG_ON(PageWriteback(page)); /* Writeback must be complete */ 586 BUG_ON(PageWriteback(page)); /* Writeback must be complete */
585 587
586 rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); 588 rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
587 589
588 if (rc != MIGRATEPAGE_SUCCESS) 590 if (rc != MIGRATEPAGE_SUCCESS)
589 return rc; 591 return rc;
@@ -610,7 +612,7 @@ int buffer_migrate_page(struct address_space *mapping,
610 612
611 head = page_buffers(page); 613 head = page_buffers(page);
612 614
613 rc = migrate_page_move_mapping(mapping, newpage, page, head, mode); 615 rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
614 616
615 if (rc != MIGRATEPAGE_SUCCESS) 617 if (rc != MIGRATEPAGE_SUCCESS)
616 return rc; 618 return rc;
@@ -1654,6 +1656,18 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
1654 return 1; 1656 return 1;
1655} 1657}
1656 1658
1659bool pmd_trans_migrating(pmd_t pmd)
1660{
1661 struct page *page = pmd_page(pmd);
1662 return PageLocked(page);
1663}
1664
1665void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd)
1666{
1667 struct page *page = pmd_page(*pmd);
1668 wait_on_page_locked(page);
1669}
1670
1657/* 1671/*
1658 * Attempt to migrate a misplaced page to the specified destination 1672 * Attempt to migrate a misplaced page to the specified destination
1659 * node. Caller is expected to have an elevated reference count on 1673 * node. Caller is expected to have an elevated reference count on
@@ -1716,12 +1730,14 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1716 struct page *page, int node) 1730 struct page *page, int node)
1717{ 1731{
1718 spinlock_t *ptl; 1732 spinlock_t *ptl;
1719 unsigned long haddr = address & HPAGE_PMD_MASK;
1720 pg_data_t *pgdat = NODE_DATA(node); 1733 pg_data_t *pgdat = NODE_DATA(node);
1721 int isolated = 0; 1734 int isolated = 0;
1722 struct page *new_page = NULL; 1735 struct page *new_page = NULL;
1723 struct mem_cgroup *memcg = NULL; 1736 struct mem_cgroup *memcg = NULL;
1724 int page_lru = page_is_file_cache(page); 1737 int page_lru = page_is_file_cache(page);
1738 unsigned long mmun_start = address & HPAGE_PMD_MASK;
1739 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
1740 pmd_t orig_entry;
1725 1741
1726 /* 1742 /*
1727 * Rate-limit the amount of data that is being migrated to a node. 1743 * Rate-limit the amount of data that is being migrated to a node.
@@ -1744,6 +1760,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1744 goto out_fail; 1760 goto out_fail;
1745 } 1761 }
1746 1762
1763 if (mm_tlb_flush_pending(mm))
1764 flush_tlb_range(vma, mmun_start, mmun_end);
1765
1747 /* Prepare a page as a migration target */ 1766 /* Prepare a page as a migration target */
1748 __set_page_locked(new_page); 1767 __set_page_locked(new_page);
1749 SetPageSwapBacked(new_page); 1768 SetPageSwapBacked(new_page);
@@ -1755,9 +1774,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1755 WARN_ON(PageLRU(new_page)); 1774 WARN_ON(PageLRU(new_page));
1756 1775
1757 /* Recheck the target PMD */ 1776 /* Recheck the target PMD */
1777 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
1758 ptl = pmd_lock(mm, pmd); 1778 ptl = pmd_lock(mm, pmd);
1759 if (unlikely(!pmd_same(*pmd, entry))) { 1779 if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
1780fail_putback:
1760 spin_unlock(ptl); 1781 spin_unlock(ptl);
1782 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1761 1783
1762 /* Reverse changes made by migrate_page_copy() */ 1784 /* Reverse changes made by migrate_page_copy() */
1763 if (TestClearPageActive(new_page)) 1785 if (TestClearPageActive(new_page))
@@ -1774,7 +1796,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1774 putback_lru_page(page); 1796 putback_lru_page(page);
1775 mod_zone_page_state(page_zone(page), 1797 mod_zone_page_state(page_zone(page),
1776 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); 1798 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
1777 goto out_fail; 1799
1800 goto out_unlock;
1778 } 1801 }
1779 1802
1780 /* 1803 /*
@@ -1786,16 +1809,35 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1786 */ 1809 */
1787 mem_cgroup_prepare_migration(page, new_page, &memcg); 1810 mem_cgroup_prepare_migration(page, new_page, &memcg);
1788 1811
1812 orig_entry = *pmd;
1789 entry = mk_pmd(new_page, vma->vm_page_prot); 1813 entry = mk_pmd(new_page, vma->vm_page_prot);
1790 entry = pmd_mknonnuma(entry);
1791 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1792 entry = pmd_mkhuge(entry); 1814 entry = pmd_mkhuge(entry);
1815 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1793 1816
1794 pmdp_clear_flush(vma, haddr, pmd); 1817 /*
1795 set_pmd_at(mm, haddr, pmd, entry); 1818 * Clear the old entry under pagetable lock and establish the new PTE.
1796 page_add_new_anon_rmap(new_page, vma, haddr); 1819 * Any parallel GUP will either observe the old page blocking on the
1820 * page lock, block on the page table lock or observe the new page.
1821 * The SetPageUptodate on the new page and page_add_new_anon_rmap
1822 * guarantee the copy is visible before the pagetable update.
1823 */
1824 flush_cache_range(vma, mmun_start, mmun_end);
1825 page_add_new_anon_rmap(new_page, vma, mmun_start);
1826 pmdp_clear_flush(vma, mmun_start, pmd);
1827 set_pmd_at(mm, mmun_start, pmd, entry);
1828 flush_tlb_range(vma, mmun_start, mmun_end);
1797 update_mmu_cache_pmd(vma, address, &entry); 1829 update_mmu_cache_pmd(vma, address, &entry);
1830
1831 if (page_count(page) != 2) {
1832 set_pmd_at(mm, mmun_start, pmd, orig_entry);
1833 flush_tlb_range(vma, mmun_start, mmun_end);
1834 update_mmu_cache_pmd(vma, address, &entry);
1835 page_remove_rmap(new_page);
1836 goto fail_putback;
1837 }
1838
1798 page_remove_rmap(page); 1839 page_remove_rmap(page);
1840
1799 /* 1841 /*
1800 * Finish the charge transaction under the page table lock to 1842 * Finish the charge transaction under the page table lock to
1801 * prevent split_huge_page() from dividing up the charge 1843 * prevent split_huge_page() from dividing up the charge
@@ -1803,6 +1845,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1803 */ 1845 */
1804 mem_cgroup_end_migration(memcg, page, new_page, true); 1846 mem_cgroup_end_migration(memcg, page, new_page, true);
1805 spin_unlock(ptl); 1847 spin_unlock(ptl);
1848 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1806 1849
1807 unlock_page(new_page); 1850 unlock_page(new_page);
1808 unlock_page(page); 1851 unlock_page(page);
@@ -1820,10 +1863,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1820out_fail: 1863out_fail:
1821 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1864 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1822out_dropref: 1865out_dropref:
1823 entry = pmd_mknonnuma(entry); 1866 ptl = pmd_lock(mm, pmd);
1824 set_pmd_at(mm, haddr, pmd, entry); 1867 if (pmd_same(*pmd, entry)) {
1825 update_mmu_cache_pmd(vma, address, &entry); 1868 entry = pmd_mknonnuma(entry);
1869 set_pmd_at(mm, mmun_start, pmd, entry);
1870 update_mmu_cache_pmd(vma, address, &entry);
1871 }
1872 spin_unlock(ptl);
1826 1873
1874out_unlock:
1827 unlock_page(page); 1875 unlock_page(page);
1828 put_page(page); 1876 put_page(page);
1829 return 0; 1877 return 0;