aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-10-07 06:28:46 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-11-12 22:05:34 -0500
commita490bb33b51d0fc625c87ed5fff1edfd3a2afeb0 (patch)
tree32ad68b600db77e4484fc9f8def93de5eeb67f0b /mm
parent174dfa40d63b2250e299d5376937d200e4662b7c (diff)
mm: Close races between THP migration and PMD numa clearing
commit 3f926ab945b60a5824369d21add7710622a2eac0 upstream. THP migration uses the page lock to guard against parallel allocations but there are cases like this still open Task A Task B --------------------- --------------------- do_huge_pmd_numa_page do_huge_pmd_numa_page lock_page mpol_misplaced == -1 unlock_page goto clear_pmdnuma lock_page mpol_misplaced == 2 migrate_misplaced_transhuge pmd = pmd_mknonnuma set_pmd_at During hours of testing, one crashed with weird errors and while I have no direct evidence, I suspect something like the race above happened. This patch extends the page lock to being held until the pmd_numa is cleared to prevent migration starting in parallel while the pmd_numa is being cleared. It also flushes the old pmd entry and orders pagetable insertion before rmap insertion. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1381141781-10992-9-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c33
-rw-r--r--mm/migrate.c19
2 files changed, 26 insertions, 26 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f276efe68c82..c403a74e4bee 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1314,24 +1314,25 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1314 target_nid = mpol_misplaced(page, vma, haddr); 1314 target_nid = mpol_misplaced(page, vma, haddr);
1315 if (target_nid == -1) { 1315 if (target_nid == -1) {
1316 /* If the page was locked, there are no parallel migrations */ 1316 /* If the page was locked, there are no parallel migrations */
1317 if (page_locked) { 1317 if (page_locked)
1318 unlock_page(page);
1319 goto clear_pmdnuma; 1318 goto clear_pmdnuma;
1320 }
1321 1319
1322 /* Otherwise wait for potential migrations and retry fault */ 1320 /*
1321 * Otherwise wait for potential migrations and retry. We do
1322 * relock and check_same as the page may no longer be mapped.
1323 * As the fault is being retried, do not account for it.
1324 */
1323 spin_unlock(&mm->page_table_lock); 1325 spin_unlock(&mm->page_table_lock);
1324 wait_on_page_locked(page); 1326 wait_on_page_locked(page);
1327 page_nid = -1;
1325 goto out; 1328 goto out;
1326 } 1329 }
1327 1330
1328 /* Page is misplaced, serialise migrations and parallel THP splits */ 1331 /* Page is misplaced, serialise migrations and parallel THP splits */
1329 get_page(page); 1332 get_page(page);
1330 spin_unlock(&mm->page_table_lock); 1333 spin_unlock(&mm->page_table_lock);
1331 if (!page_locked) { 1334 if (!page_locked)
1332 lock_page(page); 1335 lock_page(page);
1333 page_locked = true;
1334 }
1335 anon_vma = page_lock_anon_vma_read(page); 1336 anon_vma = page_lock_anon_vma_read(page);
1336 1337
1337 /* Confirm the PTE did not while locked */ 1338 /* Confirm the PTE did not while locked */
@@ -1339,32 +1340,28 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1339 if (unlikely(!pmd_same(pmd, *pmdp))) { 1340 if (unlikely(!pmd_same(pmd, *pmdp))) {
1340 unlock_page(page); 1341 unlock_page(page);
1341 put_page(page); 1342 put_page(page);
1343 page_nid = -1;
1342 goto out_unlock; 1344 goto out_unlock;
1343 } 1345 }
1344 1346
1345 /* Migrate the THP to the requested node */ 1347 /*
1348 * Migrate the THP to the requested node, returns with page unlocked
1349 * and pmd_numa cleared.
1350 */
1346 spin_unlock(&mm->page_table_lock); 1351 spin_unlock(&mm->page_table_lock);
1347 migrated = migrate_misplaced_transhuge_page(mm, vma, 1352 migrated = migrate_misplaced_transhuge_page(mm, vma,
1348 pmdp, pmd, addr, page, target_nid); 1353 pmdp, pmd, addr, page, target_nid);
1349 if (migrated) 1354 if (migrated)
1350 page_nid = target_nid; 1355 page_nid = target_nid;
1351 else
1352 goto check_same;
1353 1356
1354 goto out; 1357 goto out;
1355
1356check_same:
1357 spin_lock(&mm->page_table_lock);
1358 if (unlikely(!pmd_same(pmd, *pmdp))) {
1359 /* Someone else took our fault */
1360 page_nid = -1;
1361 goto out_unlock;
1362 }
1363clear_pmdnuma: 1358clear_pmdnuma:
1359 BUG_ON(!PageLocked(page));
1364 pmd = pmd_mknonnuma(pmd); 1360 pmd = pmd_mknonnuma(pmd);
1365 set_pmd_at(mm, haddr, pmdp, pmd); 1361 set_pmd_at(mm, haddr, pmdp, pmd);
1366 VM_BUG_ON(pmd_numa(*pmdp)); 1362 VM_BUG_ON(pmd_numa(*pmdp));
1367 update_mmu_cache_pmd(vma, addr, pmdp); 1363 update_mmu_cache_pmd(vma, addr, pmdp);
1364 unlock_page(page);
1368out_unlock: 1365out_unlock:
1369 spin_unlock(&mm->page_table_lock); 1366 spin_unlock(&mm->page_table_lock);
1370 1367
diff --git a/mm/migrate.c b/mm/migrate.c
index 25ca7caf9092..bf436c15f055 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1710,12 +1710,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1710 unlock_page(new_page); 1710 unlock_page(new_page);
1711 put_page(new_page); /* Free it */ 1711 put_page(new_page); /* Free it */
1712 1712
1713 unlock_page(page); 1713 /* Retake the callers reference and putback on LRU */
1714 get_page(page);
1714 putback_lru_page(page); 1715 putback_lru_page(page);
1715 1716 mod_zone_page_state(page_zone(page),
1716 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1717 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
1717 isolated = 0; 1718 goto out_fail;
1718 goto out;
1719 } 1719 }
1720 1720
1721 /* 1721 /*
@@ -1732,9 +1732,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1732 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 1732 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1733 entry = pmd_mkhuge(entry); 1733 entry = pmd_mkhuge(entry);
1734 1734
1735 page_add_new_anon_rmap(new_page, vma, haddr); 1735 pmdp_clear_flush(vma, haddr, pmd);
1736
1737 set_pmd_at(mm, haddr, pmd, entry); 1736 set_pmd_at(mm, haddr, pmd, entry);
1737 page_add_new_anon_rmap(new_page, vma, haddr);
1738 update_mmu_cache_pmd(vma, address, &entry); 1738 update_mmu_cache_pmd(vma, address, &entry);
1739 page_remove_rmap(page); 1739 page_remove_rmap(page);
1740 /* 1740 /*
@@ -1753,7 +1753,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1753 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); 1753 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
1754 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); 1754 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
1755 1755
1756out:
1757 mod_zone_page_state(page_zone(page), 1756 mod_zone_page_state(page_zone(page),
1758 NR_ISOLATED_ANON + page_lru, 1757 NR_ISOLATED_ANON + page_lru,
1759 -HPAGE_PMD_NR); 1758 -HPAGE_PMD_NR);
@@ -1762,6 +1761,10 @@ out:
1762out_fail: 1761out_fail:
1763 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1762 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1764out_dropref: 1763out_dropref:
1764 entry = pmd_mknonnuma(entry);
1765 set_pmd_at(mm, haddr, pmd, entry);
1766 update_mmu_cache_pmd(vma, address, &entry);
1767
1765 unlock_page(page); 1768 unlock_page(page);
1766 put_page(page); 1769 put_page(page);
1767 return 0; 1770 return 0;