aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-10-07 06:28:46 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-11-12 22:05:34 -0500
commita490bb33b51d0fc625c87ed5fff1edfd3a2afeb0 (patch)
tree32ad68b600db77e4484fc9f8def93de5eeb67f0b /mm/huge_memory.c
parent174dfa40d63b2250e299d5376937d200e4662b7c (diff)
mm: Close races between THP migration and PMD numa clearing
commit 3f926ab945b60a5824369d21add7710622a2eac0 upstream. THP migration uses the page lock to guard against parallel allocations but there are cases like this still open Task A Task B --------------------- --------------------- do_huge_pmd_numa_page do_huge_pmd_numa_page lock_page mpol_misplaced == -1 unlock_page goto clear_pmdnuma lock_page mpol_misplaced == 2 migrate_misplaced_transhuge pmd = pmd_mknonnuma set_pmd_at During hours of testing, one crashed with weird errors and while I have no direct evidence, I suspect something like the race above happened. This patch extends the page lock to being held until the pmd_numa is cleared to prevent migration starting in parallel while the pmd_numa is being cleared. It also flushes the old pmd entry and orders pagetable insertion before rmap insertion. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1381141781-10992-9-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c33
1 files changed, 15 insertions, 18 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f276efe68c82..c403a74e4bee 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1314,24 +1314,25 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1314 target_nid = mpol_misplaced(page, vma, haddr); 1314 target_nid = mpol_misplaced(page, vma, haddr);
1315 if (target_nid == -1) { 1315 if (target_nid == -1) {
1316 /* If the page was locked, there are no parallel migrations */ 1316 /* If the page was locked, there are no parallel migrations */
1317 if (page_locked) { 1317 if (page_locked)
1318 unlock_page(page);
1319 goto clear_pmdnuma; 1318 goto clear_pmdnuma;
1320 }
1321 1319
1322 /* Otherwise wait for potential migrations and retry fault */ 1320 /*
1321 * Otherwise wait for potential migrations and retry. We do
1322 * relock and check_same as the page may no longer be mapped.
1323 * As the fault is being retried, do not account for it.
1324 */
1323 spin_unlock(&mm->page_table_lock); 1325 spin_unlock(&mm->page_table_lock);
1324 wait_on_page_locked(page); 1326 wait_on_page_locked(page);
1327 page_nid = -1;
1325 goto out; 1328 goto out;
1326 } 1329 }
1327 1330
1328 /* Page is misplaced, serialise migrations and parallel THP splits */ 1331 /* Page is misplaced, serialise migrations and parallel THP splits */
1329 get_page(page); 1332 get_page(page);
1330 spin_unlock(&mm->page_table_lock); 1333 spin_unlock(&mm->page_table_lock);
1331 if (!page_locked) { 1334 if (!page_locked)
1332 lock_page(page); 1335 lock_page(page);
1333 page_locked = true;
1334 }
1335 anon_vma = page_lock_anon_vma_read(page); 1336 anon_vma = page_lock_anon_vma_read(page);
1336 1337
1337 /* Confirm the PTE did not while locked */ 1338 /* Confirm the PTE did not while locked */
@@ -1339,32 +1340,28 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1339 if (unlikely(!pmd_same(pmd, *pmdp))) { 1340 if (unlikely(!pmd_same(pmd, *pmdp))) {
1340 unlock_page(page); 1341 unlock_page(page);
1341 put_page(page); 1342 put_page(page);
1343 page_nid = -1;
1342 goto out_unlock; 1344 goto out_unlock;
1343 } 1345 }
1344 1346
1345 /* Migrate the THP to the requested node */ 1347 /*
1348 * Migrate the THP to the requested node, returns with page unlocked
1349 * and pmd_numa cleared.
1350 */
1346 spin_unlock(&mm->page_table_lock); 1351 spin_unlock(&mm->page_table_lock);
1347 migrated = migrate_misplaced_transhuge_page(mm, vma, 1352 migrated = migrate_misplaced_transhuge_page(mm, vma,
1348 pmdp, pmd, addr, page, target_nid); 1353 pmdp, pmd, addr, page, target_nid);
1349 if (migrated) 1354 if (migrated)
1350 page_nid = target_nid; 1355 page_nid = target_nid;
1351 else
1352 goto check_same;
1353 1356
1354 goto out; 1357 goto out;
1355
1356check_same:
1357 spin_lock(&mm->page_table_lock);
1358 if (unlikely(!pmd_same(pmd, *pmdp))) {
1359 /* Someone else took our fault */
1360 page_nid = -1;
1361 goto out_unlock;
1362 }
1363clear_pmdnuma: 1358clear_pmdnuma:
1359 BUG_ON(!PageLocked(page));
1364 pmd = pmd_mknonnuma(pmd); 1360 pmd = pmd_mknonnuma(pmd);
1365 set_pmd_at(mm, haddr, pmdp, pmd); 1361 set_pmd_at(mm, haddr, pmdp, pmd);
1366 VM_BUG_ON(pmd_numa(*pmdp)); 1362 VM_BUG_ON(pmd_numa(*pmdp));
1367 update_mmu_cache_pmd(vma, addr, pmdp); 1363 update_mmu_cache_pmd(vma, addr, pmdp);
1364 unlock_page(page);
1368out_unlock: 1365out_unlock:
1369 spin_unlock(&mm->page_table_lock); 1366 spin_unlock(&mm->page_table_lock);
1370 1367