aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/mm/gup.c13
-rw-r--r--mm/huge_memory.c24
-rw-r--r--mm/migrate.c38
3 files changed, 60 insertions, 15 deletions
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index dd74e46828c0..0596e8e0cc19 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -83,6 +83,12 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
83 pte_t pte = gup_get_pte(ptep); 83 pte_t pte = gup_get_pte(ptep);
84 struct page *page; 84 struct page *page;
85 85
86 /* Similar to the PMD case, NUMA hinting must take slow path */
87 if (pte_numa(pte)) {
88 pte_unmap(ptep);
89 return 0;
90 }
91
86 if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { 92 if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
87 pte_unmap(ptep); 93 pte_unmap(ptep);
88 return 0; 94 return 0;
@@ -167,6 +173,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
167 if (pmd_none(pmd) || pmd_trans_splitting(pmd)) 173 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
168 return 0; 174 return 0;
169 if (unlikely(pmd_large(pmd))) { 175 if (unlikely(pmd_large(pmd))) {
176 /*
177 * NUMA hinting faults need to be handled in the GUP
178 * slowpath for accounting purposes and so that they
179 * can be serialised against THP migration.
180 */
181 if (pmd_numa(pmd))
182 return 0;
170 if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) 183 if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
171 return 0; 184 return 0;
172 } else { 185 } else {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 33a5dc492810..51f069303ab9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1243,6 +1243,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
1243 if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) 1243 if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
1244 return ERR_PTR(-EFAULT); 1244 return ERR_PTR(-EFAULT);
1245 1245
1246 /* Full NUMA hinting faults to serialise migration in fault paths */
1247 if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
1248 goto out;
1249
1246 page = pmd_page(*pmd); 1250 page = pmd_page(*pmd);
1247 VM_BUG_ON(!PageHead(page)); 1251 VM_BUG_ON(!PageHead(page));
1248 if (flags & FOLL_TOUCH) { 1252 if (flags & FOLL_TOUCH) {
@@ -1323,23 +1327,27 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1323 /* If the page was locked, there are no parallel migrations */ 1327 /* If the page was locked, there are no parallel migrations */
1324 if (page_locked) 1328 if (page_locked)
1325 goto clear_pmdnuma; 1329 goto clear_pmdnuma;
1330 }
1326 1331
1327 /* 1332 /*
1328 * Otherwise wait for potential migrations and retry. We do 1333 * If there are potential migrations, wait for completion and retry. We
1329 * relock and check_same as the page may no longer be mapped. 1334 * do not relock and check_same as the page may no longer be mapped.
1330 * As the fault is being retried, do not account for it. 1335 * Furtermore, even if the page is currently misplaced, there is no
1331 */ 1336 * guarantee it is still misplaced after the migration completes.
1337 */
1338 if (!page_locked) {
1332 spin_unlock(ptl); 1339 spin_unlock(ptl);
1333 wait_on_page_locked(page); 1340 wait_on_page_locked(page);
1334 page_nid = -1; 1341 page_nid = -1;
1335 goto out; 1342 goto out;
1336 } 1343 }
1337 1344
1338 /* Page is misplaced, serialise migrations and parallel THP splits */ 1345 /*
1346 * Page is misplaced. Page lock serialises migrations. Acquire anon_vma
1347 * to serialises splits
1348 */
1339 get_page(page); 1349 get_page(page);
1340 spin_unlock(ptl); 1350 spin_unlock(ptl);
1341 if (!page_locked)
1342 lock_page(page);
1343 anon_vma = page_lock_anon_vma_read(page); 1351 anon_vma = page_lock_anon_vma_read(page);
1344 1352
1345 /* Confirm the PMD did not change while page_table_lock was released */ 1353 /* Confirm the PMD did not change while page_table_lock was released */
diff --git a/mm/migrate.c b/mm/migrate.c
index bb940045fe85..2cabbd5fa5bf 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1722,6 +1722,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1722 struct page *new_page = NULL; 1722 struct page *new_page = NULL;
1723 struct mem_cgroup *memcg = NULL; 1723 struct mem_cgroup *memcg = NULL;
1724 int page_lru = page_is_file_cache(page); 1724 int page_lru = page_is_file_cache(page);
1725 pmd_t orig_entry;
1725 1726
1726 /* 1727 /*
1727 * Rate-limit the amount of data that is being migrated to a node. 1728 * Rate-limit the amount of data that is being migrated to a node.
@@ -1756,7 +1757,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1756 1757
1757 /* Recheck the target PMD */ 1758 /* Recheck the target PMD */
1758 ptl = pmd_lock(mm, pmd); 1759 ptl = pmd_lock(mm, pmd);
1759 if (unlikely(!pmd_same(*pmd, entry))) { 1760 if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
1761fail_putback:
1760 spin_unlock(ptl); 1762 spin_unlock(ptl);
1761 1763
1762 /* Reverse changes made by migrate_page_copy() */ 1764 /* Reverse changes made by migrate_page_copy() */
@@ -1786,16 +1788,34 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1786 */ 1788 */
1787 mem_cgroup_prepare_migration(page, new_page, &memcg); 1789 mem_cgroup_prepare_migration(page, new_page, &memcg);
1788 1790
1791 orig_entry = *pmd;
1789 entry = mk_pmd(new_page, vma->vm_page_prot); 1792 entry = mk_pmd(new_page, vma->vm_page_prot);
1790 entry = pmd_mknonnuma(entry);
1791 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1792 entry = pmd_mkhuge(entry); 1793 entry = pmd_mkhuge(entry);
1794 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1793 1795
1796 /*
1797 * Clear the old entry under pagetable lock and establish the new PTE.
1798 * Any parallel GUP will either observe the old page blocking on the
1799 * page lock, block on the page table lock or observe the new page.
1800 * The SetPageUptodate on the new page and page_add_new_anon_rmap
1801 * guarantee the copy is visible before the pagetable update.
1802 */
1803 flush_cache_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
1804 page_add_new_anon_rmap(new_page, vma, haddr);
1794 pmdp_clear_flush(vma, haddr, pmd); 1805 pmdp_clear_flush(vma, haddr, pmd);
1795 set_pmd_at(mm, haddr, pmd, entry); 1806 set_pmd_at(mm, haddr, pmd, entry);
1796 page_add_new_anon_rmap(new_page, vma, haddr);
1797 update_mmu_cache_pmd(vma, address, &entry); 1807 update_mmu_cache_pmd(vma, address, &entry);
1808
1809 if (page_count(page) != 2) {
1810 set_pmd_at(mm, haddr, pmd, orig_entry);
1811 flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
1812 update_mmu_cache_pmd(vma, address, &entry);
1813 page_remove_rmap(new_page);
1814 goto fail_putback;
1815 }
1816
1798 page_remove_rmap(page); 1817 page_remove_rmap(page);
1818
1799 /* 1819 /*
1800 * Finish the charge transaction under the page table lock to 1820 * Finish the charge transaction under the page table lock to
1801 * prevent split_huge_page() from dividing up the charge 1821 * prevent split_huge_page() from dividing up the charge
@@ -1820,9 +1840,13 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1820out_fail: 1840out_fail:
1821 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1841 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1822out_dropref: 1842out_dropref:
1823 entry = pmd_mknonnuma(entry); 1843 ptl = pmd_lock(mm, pmd);
1824 set_pmd_at(mm, haddr, pmd, entry); 1844 if (pmd_same(*pmd, entry)) {
1825 update_mmu_cache_pmd(vma, address, &entry); 1845 entry = pmd_mknonnuma(entry);
1846 set_pmd_at(mm, haddr, pmd, entry);
1847 update_mmu_cache_pmd(vma, address, &entry);
1848 }
1849 spin_unlock(ptl);
1826 1850
1827 unlock_page(page); 1851 unlock_page(page);
1828 put_page(page); 1852 put_page(page);