aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-10-07 06:28:46 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-09 06:39:45 -0400
commita54a407fbf7735fd8f7841375574f5d9b0375f93 (patch)
tree29a4af92708dfc99f1693e9a313b53474d50c496 /mm
parent8191acbd30c73e45c24ad16c372e0b42cc7ac8f8 (diff)
mm: Close races between THP migration and PMD numa clearing
THP migration uses the page lock to guard against parallel allocations but there are cases like this still open Task A Task B --------------------- --------------------- do_huge_pmd_numa_page do_huge_pmd_numa_page lock_page mpol_misplaced == -1 unlock_page goto clear_pmdnuma lock_page mpol_misplaced == 2 migrate_misplaced_transhuge pmd = pmd_mknonnuma set_pmd_at During hours of testing, one crashed with weird errors and while I have no direct evidence, I suspect something like the race above happened. This patch extends the page lock to being held until the pmd_numa is cleared to prevent migration starting in parallel while the pmd_numa is being cleared. It also flushes the old pmd entry and orders pagetable insertion before rmap insertion. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1381141781-10992-9-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c33
-rw-r--r--mm/migrate.c19
2 files changed, 26 insertions, 26 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c3bb65f284d5..d4928769680f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1304,24 +1304,25 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1304 target_nid = mpol_misplaced(page, vma, haddr); 1304 target_nid = mpol_misplaced(page, vma, haddr);
1305 if (target_nid == -1) { 1305 if (target_nid == -1) {
1306 /* If the page was locked, there are no parallel migrations */ 1306 /* If the page was locked, there are no parallel migrations */
1307 if (page_locked) { 1307 if (page_locked)
1308 unlock_page(page);
1309 goto clear_pmdnuma; 1308 goto clear_pmdnuma;
1310 }
1311 1309
1312 /* Otherwise wait for potential migrations and retry fault */ 1310 /*
1311 * Otherwise wait for potential migrations and retry. We do
1312 * relock and check_same as the page may no longer be mapped.
1313 * As the fault is being retried, do not account for it.
1314 */
1313 spin_unlock(&mm->page_table_lock); 1315 spin_unlock(&mm->page_table_lock);
1314 wait_on_page_locked(page); 1316 wait_on_page_locked(page);
1317 page_nid = -1;
1315 goto out; 1318 goto out;
1316 } 1319 }
1317 1320
1318 /* Page is misplaced, serialise migrations and parallel THP splits */ 1321 /* Page is misplaced, serialise migrations and parallel THP splits */
1319 get_page(page); 1322 get_page(page);
1320 spin_unlock(&mm->page_table_lock); 1323 spin_unlock(&mm->page_table_lock);
1321 if (!page_locked) { 1324 if (!page_locked)
1322 lock_page(page); 1325 lock_page(page);
1323 page_locked = true;
1324 }
1325 anon_vma = page_lock_anon_vma_read(page); 1326 anon_vma = page_lock_anon_vma_read(page);
1326 1327
1327 /* Confirm the PMD did not change while page_table_lock was released */ 1328 /* Confirm the PMD did not change while page_table_lock was released */
@@ -1329,32 +1330,28 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1329 if (unlikely(!pmd_same(pmd, *pmdp))) { 1330 if (unlikely(!pmd_same(pmd, *pmdp))) {
1330 unlock_page(page); 1331 unlock_page(page);
1331 put_page(page); 1332 put_page(page);
1333 page_nid = -1;
1332 goto out_unlock; 1334 goto out_unlock;
1333 } 1335 }
1334 1336
1335 /* Migrate the THP to the requested node */ 1337 /*
1338 * Migrate the THP to the requested node, returns with page unlocked
1339 * and pmd_numa cleared.
1340 */
1336 spin_unlock(&mm->page_table_lock); 1341 spin_unlock(&mm->page_table_lock);
1337 migrated = migrate_misplaced_transhuge_page(mm, vma, 1342 migrated = migrate_misplaced_transhuge_page(mm, vma,
1338 pmdp, pmd, addr, page, target_nid); 1343 pmdp, pmd, addr, page, target_nid);
1339 if (migrated) 1344 if (migrated)
1340 page_nid = target_nid; 1345 page_nid = target_nid;
1341 else
1342 goto check_same;
1343 1346
1344 goto out; 1347 goto out;
1345
1346check_same:
1347 spin_lock(&mm->page_table_lock);
1348 if (unlikely(!pmd_same(pmd, *pmdp))) {
1349 /* Someone else took our fault */
1350 page_nid = -1;
1351 goto out_unlock;
1352 }
1353clear_pmdnuma: 1348clear_pmdnuma:
1349 BUG_ON(!PageLocked(page));
1354 pmd = pmd_mknonnuma(pmd); 1350 pmd = pmd_mknonnuma(pmd);
1355 set_pmd_at(mm, haddr, pmdp, pmd); 1351 set_pmd_at(mm, haddr, pmdp, pmd);
1356 VM_BUG_ON(pmd_numa(*pmdp)); 1352 VM_BUG_ON(pmd_numa(*pmdp));
1357 update_mmu_cache_pmd(vma, addr, pmdp); 1353 update_mmu_cache_pmd(vma, addr, pmdp);
1354 unlock_page(page);
1358out_unlock: 1355out_unlock:
1359 spin_unlock(&mm->page_table_lock); 1356 spin_unlock(&mm->page_table_lock);
1360 1357
diff --git a/mm/migrate.c b/mm/migrate.c
index a26bccd44ccb..7bd90d3b16bb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1713,12 +1713,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1713 unlock_page(new_page); 1713 unlock_page(new_page);
1714 put_page(new_page); /* Free it */ 1714 put_page(new_page); /* Free it */
1715 1715
1716 unlock_page(page); 1716 /* Retake the callers reference and putback on LRU */
1717 get_page(page);
1717 putback_lru_page(page); 1718 putback_lru_page(page);
1718 1719 mod_zone_page_state(page_zone(page),
1719 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1720 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
1720 isolated = 0; 1721 goto out_fail;
1721 goto out;
1722 } 1722 }
1723 1723
1724 /* 1724 /*
@@ -1735,9 +1735,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1735 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 1735 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1736 entry = pmd_mkhuge(entry); 1736 entry = pmd_mkhuge(entry);
1737 1737
1738 page_add_new_anon_rmap(new_page, vma, haddr); 1738 pmdp_clear_flush(vma, haddr, pmd);
1739
1740 set_pmd_at(mm, haddr, pmd, entry); 1739 set_pmd_at(mm, haddr, pmd, entry);
1740 page_add_new_anon_rmap(new_page, vma, haddr);
1741 update_mmu_cache_pmd(vma, address, &entry); 1741 update_mmu_cache_pmd(vma, address, &entry);
1742 page_remove_rmap(page); 1742 page_remove_rmap(page);
1743 /* 1743 /*
@@ -1756,7 +1756,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1756 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); 1756 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
1757 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); 1757 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
1758 1758
1759out:
1760 mod_zone_page_state(page_zone(page), 1759 mod_zone_page_state(page_zone(page),
1761 NR_ISOLATED_ANON + page_lru, 1760 NR_ISOLATED_ANON + page_lru,
1762 -HPAGE_PMD_NR); 1761 -HPAGE_PMD_NR);
@@ -1765,6 +1764,10 @@ out:
1765out_fail: 1764out_fail:
1766 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1765 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1767out_dropref: 1766out_dropref:
1767 entry = pmd_mknonnuma(entry);
1768 set_pmd_at(mm, haddr, pmd, entry);
1769 update_mmu_cache_pmd(vma, address, &entry);
1770
1768 unlock_page(page); 1771 unlock_page(page);
1769 put_page(page); 1772 put_page(page);
1770 return 0; 1773 return 0;