diff options
author | Mel Gorman <mgorman@suse.de> | 2013-12-18 20:08:32 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-12-18 22:04:50 -0500 |
commit | 2b4847e73004c10ae6666c2e27b5c5430aed8698 (patch) | |
tree | f5062cda19087b1d9a6830feffc4089aeb5b7fc8 /mm/huge_memory.c | |
parent | c97102ba96324da330078ad8619ba4dfe840dbe3 (diff) |
mm: numa: serialise parallel get_user_page against THP migration
Base pages are unmapped and flushed from cache and TLB during normal
page migration and replaced with a migration entry that causes any
parallel NUMA hinting fault or gup to block until migration completes.
THP does not unmap pages due to a lack of support for migration entries
at a PMD level. This allows races with get_user_pages and
get_user_pages_fast which commit 3f926ab945b6 ("mm: Close races between
THP migration and PMD numa clearing") made worse by introducing a
pmd_clear_flush().
This patch forces get_user_page (fast and normal) on a pmd_numa page to
go through the slow get_user_page path where it will serialise against
THP migration and properly account for the NUMA hinting fault. On the
migration side the page table lock is taken for each PTE update.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 33a5dc492810..51f069303ab9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1243,6 +1243,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
1243 | if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) | 1243 | if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) |
1244 | return ERR_PTR(-EFAULT); | 1244 | return ERR_PTR(-EFAULT); |
1245 | 1245 | ||
1246 | /* Full NUMA hinting faults to serialise migration in fault paths */ | ||
1247 | if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) | ||
1248 | goto out; | ||
1249 | |||
1246 | page = pmd_page(*pmd); | 1250 | page = pmd_page(*pmd); |
1247 | VM_BUG_ON(!PageHead(page)); | 1251 | VM_BUG_ON(!PageHead(page)); |
1248 | if (flags & FOLL_TOUCH) { | 1252 | if (flags & FOLL_TOUCH) { |
@@ -1323,23 +1327,27 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1323 | /* If the page was locked, there are no parallel migrations */ | 1327 | /* If the page was locked, there are no parallel migrations */ |
1324 | if (page_locked) | 1328 | if (page_locked) |
1325 | goto clear_pmdnuma; | 1329 | goto clear_pmdnuma; |
1330 | } | ||
1326 | 1331 | ||
1327 | /* | 1332 | /* |
1328 | * Otherwise wait for potential migrations and retry. We do | 1333 | * If there are potential migrations, wait for completion and retry. We |
1329 | * relock and check_same as the page may no longer be mapped. | 1334 | * do not relock and check_same as the page may no longer be mapped. |
1330 | * As the fault is being retried, do not account for it. | 1335 | * Furtermore, even if the page is currently misplaced, there is no |
1331 | */ | 1336 | * guarantee it is still misplaced after the migration completes. |
1337 | */ | ||
1338 | if (!page_locked) { | ||
1332 | spin_unlock(ptl); | 1339 | spin_unlock(ptl); |
1333 | wait_on_page_locked(page); | 1340 | wait_on_page_locked(page); |
1334 | page_nid = -1; | 1341 | page_nid = -1; |
1335 | goto out; | 1342 | goto out; |
1336 | } | 1343 | } |
1337 | 1344 | ||
1338 | /* Page is misplaced, serialise migrations and parallel THP splits */ | 1345 | /* |
1346 | * Page is misplaced. Page lock serialises migrations. Acquire anon_vma | ||
1347 | * to serialises splits | ||
1348 | */ | ||
1339 | get_page(page); | 1349 | get_page(page); |
1340 | spin_unlock(ptl); | 1350 | spin_unlock(ptl); |
1341 | if (!page_locked) | ||
1342 | lock_page(page); | ||
1343 | anon_vma = page_lock_anon_vma_read(page); | 1351 | anon_vma = page_lock_anon_vma_read(page); |
1344 | 1352 | ||
1345 | /* Confirm the PMD did not change while page_table_lock was released */ | 1353 | /* Confirm the PMD did not change while page_table_lock was released */ |