diff options
-rw-r--r-- | arch/x86/mm/gup.c | 13 | ||||
-rw-r--r-- | mm/huge_memory.c | 24 | ||||
-rw-r--r-- | mm/migrate.c | 38 |
3 files changed, 60 insertions, 15 deletions
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index dd74e46828c0..0596e8e0cc19 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -83,6 +83,12 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
83 | pte_t pte = gup_get_pte(ptep); | 83 | pte_t pte = gup_get_pte(ptep); |
84 | struct page *page; | 84 | struct page *page; |
85 | 85 | ||
86 | /* Similar to the PMD case, NUMA hinting must take slow path */ | ||
87 | if (pte_numa(pte)) { | ||
88 | pte_unmap(ptep); | ||
89 | return 0; | ||
90 | } | ||
91 | |||
86 | if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { | 92 | if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { |
87 | pte_unmap(ptep); | 93 | pte_unmap(ptep); |
88 | return 0; | 94 | return 0; |
@@ -167,6 +173,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
167 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | 173 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) |
168 | return 0; | 174 | return 0; |
169 | if (unlikely(pmd_large(pmd))) { | 175 | if (unlikely(pmd_large(pmd))) { |
176 | /* | ||
177 | * NUMA hinting faults need to be handled in the GUP | ||
178 | * slowpath for accounting purposes and so that they | ||
179 | * can be serialised against THP migration. | ||
180 | */ | ||
181 | if (pmd_numa(pmd)) | ||
182 | return 0; | ||
170 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) | 183 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) |
171 | return 0; | 184 | return 0; |
172 | } else { | 185 | } else { |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 33a5dc492810..51f069303ab9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1243,6 +1243,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
1243 | if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) | 1243 | if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) |
1244 | return ERR_PTR(-EFAULT); | 1244 | return ERR_PTR(-EFAULT); |
1245 | 1245 | ||
1246 | /* Full NUMA hinting faults to serialise migration in fault paths */ | ||
1247 | if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) | ||
1248 | goto out; | ||
1249 | |||
1246 | page = pmd_page(*pmd); | 1250 | page = pmd_page(*pmd); |
1247 | VM_BUG_ON(!PageHead(page)); | 1251 | VM_BUG_ON(!PageHead(page)); |
1248 | if (flags & FOLL_TOUCH) { | 1252 | if (flags & FOLL_TOUCH) { |
@@ -1323,23 +1327,27 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1323 | /* If the page was locked, there are no parallel migrations */ | 1327 | /* If the page was locked, there are no parallel migrations */ |
1324 | if (page_locked) | 1328 | if (page_locked) |
1325 | goto clear_pmdnuma; | 1329 | goto clear_pmdnuma; |
1330 | } | ||
1326 | 1331 | ||
1327 | /* | 1332 | /* |
1328 | * Otherwise wait for potential migrations and retry. We do | 1333 | * If there are potential migrations, wait for completion and retry. We |
1329 | * relock and check_same as the page may no longer be mapped. | 1334 | * do not relock and check_same as the page may no longer be mapped. |
1330 | * As the fault is being retried, do not account for it. | 1335 | * Furtermore, even if the page is currently misplaced, there is no |
1331 | */ | 1336 | * guarantee it is still misplaced after the migration completes. |
1337 | */ | ||
1338 | if (!page_locked) { | ||
1332 | spin_unlock(ptl); | 1339 | spin_unlock(ptl); |
1333 | wait_on_page_locked(page); | 1340 | wait_on_page_locked(page); |
1334 | page_nid = -1; | 1341 | page_nid = -1; |
1335 | goto out; | 1342 | goto out; |
1336 | } | 1343 | } |
1337 | 1344 | ||
1338 | /* Page is misplaced, serialise migrations and parallel THP splits */ | 1345 | /* |
1346 | * Page is misplaced. Page lock serialises migrations. Acquire anon_vma | ||
1347 | * to serialises splits | ||
1348 | */ | ||
1339 | get_page(page); | 1349 | get_page(page); |
1340 | spin_unlock(ptl); | 1350 | spin_unlock(ptl); |
1341 | if (!page_locked) | ||
1342 | lock_page(page); | ||
1343 | anon_vma = page_lock_anon_vma_read(page); | 1351 | anon_vma = page_lock_anon_vma_read(page); |
1344 | 1352 | ||
1345 | /* Confirm the PMD did not change while page_table_lock was released */ | 1353 | /* Confirm the PMD did not change while page_table_lock was released */ |
diff --git a/mm/migrate.c b/mm/migrate.c index bb940045fe85..2cabbd5fa5bf 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -1722,6 +1722,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1722 | struct page *new_page = NULL; | 1722 | struct page *new_page = NULL; |
1723 | struct mem_cgroup *memcg = NULL; | 1723 | struct mem_cgroup *memcg = NULL; |
1724 | int page_lru = page_is_file_cache(page); | 1724 | int page_lru = page_is_file_cache(page); |
1725 | pmd_t orig_entry; | ||
1725 | 1726 | ||
1726 | /* | 1727 | /* |
1727 | * Rate-limit the amount of data that is being migrated to a node. | 1728 | * Rate-limit the amount of data that is being migrated to a node. |
@@ -1756,7 +1757,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1756 | 1757 | ||
1757 | /* Recheck the target PMD */ | 1758 | /* Recheck the target PMD */ |
1758 | ptl = pmd_lock(mm, pmd); | 1759 | ptl = pmd_lock(mm, pmd); |
1759 | if (unlikely(!pmd_same(*pmd, entry))) { | 1760 | if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) { |
1761 | fail_putback: | ||
1760 | spin_unlock(ptl); | 1762 | spin_unlock(ptl); |
1761 | 1763 | ||
1762 | /* Reverse changes made by migrate_page_copy() */ | 1764 | /* Reverse changes made by migrate_page_copy() */ |
@@ -1786,16 +1788,34 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1786 | */ | 1788 | */ |
1787 | mem_cgroup_prepare_migration(page, new_page, &memcg); | 1789 | mem_cgroup_prepare_migration(page, new_page, &memcg); |
1788 | 1790 | ||
1791 | orig_entry = *pmd; | ||
1789 | entry = mk_pmd(new_page, vma->vm_page_prot); | 1792 | entry = mk_pmd(new_page, vma->vm_page_prot); |
1790 | entry = pmd_mknonnuma(entry); | ||
1791 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | ||
1792 | entry = pmd_mkhuge(entry); | 1793 | entry = pmd_mkhuge(entry); |
1794 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | ||
1793 | 1795 | ||
1796 | /* | ||
1797 | * Clear the old entry under pagetable lock and establish the new PTE. | ||
1798 | * Any parallel GUP will either observe the old page blocking on the | ||
1799 | * page lock, block on the page table lock or observe the new page. | ||
1800 | * The SetPageUptodate on the new page and page_add_new_anon_rmap | ||
1801 | * guarantee the copy is visible before the pagetable update. | ||
1802 | */ | ||
1803 | flush_cache_range(vma, haddr, haddr + HPAGE_PMD_SIZE); | ||
1804 | page_add_new_anon_rmap(new_page, vma, haddr); | ||
1794 | pmdp_clear_flush(vma, haddr, pmd); | 1805 | pmdp_clear_flush(vma, haddr, pmd); |
1795 | set_pmd_at(mm, haddr, pmd, entry); | 1806 | set_pmd_at(mm, haddr, pmd, entry); |
1796 | page_add_new_anon_rmap(new_page, vma, haddr); | ||
1797 | update_mmu_cache_pmd(vma, address, &entry); | 1807 | update_mmu_cache_pmd(vma, address, &entry); |
1808 | |||
1809 | if (page_count(page) != 2) { | ||
1810 | set_pmd_at(mm, haddr, pmd, orig_entry); | ||
1811 | flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); | ||
1812 | update_mmu_cache_pmd(vma, address, &entry); | ||
1813 | page_remove_rmap(new_page); | ||
1814 | goto fail_putback; | ||
1815 | } | ||
1816 | |||
1798 | page_remove_rmap(page); | 1817 | page_remove_rmap(page); |
1818 | |||
1799 | /* | 1819 | /* |
1800 | * Finish the charge transaction under the page table lock to | 1820 | * Finish the charge transaction under the page table lock to |
1801 | * prevent split_huge_page() from dividing up the charge | 1821 | * prevent split_huge_page() from dividing up the charge |
@@ -1820,9 +1840,13 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1820 | out_fail: | 1840 | out_fail: |
1821 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1841 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); |
1822 | out_dropref: | 1842 | out_dropref: |
1823 | entry = pmd_mknonnuma(entry); | 1843 | ptl = pmd_lock(mm, pmd); |
1824 | set_pmd_at(mm, haddr, pmd, entry); | 1844 | if (pmd_same(*pmd, entry)) { |
1825 | update_mmu_cache_pmd(vma, address, &entry); | 1845 | entry = pmd_mknonnuma(entry); |
1846 | set_pmd_at(mm, haddr, pmd, entry); | ||
1847 | update_mmu_cache_pmd(vma, address, &entry); | ||
1848 | } | ||
1849 | spin_unlock(ptl); | ||
1826 | 1850 | ||
1827 | unlock_page(page); | 1851 | unlock_page(page); |
1828 | put_page(page); | 1852 | put_page(page); |