diff options
author | Mel Gorman <mgorman@suse.de> | 2013-10-07 06:28:46 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-29 06:38:05 -0400 |
commit | 3f926ab945b60a5824369d21add7710622a2eac0 (patch) | |
tree | c3098df81f687abec56d42d864a07c214ff7f017 /mm | |
parent | c61109e34f60f6e85bb43c5a1cd51c0e3db40847 (diff) |
mm: Close races between THP migration and PMD numa clearing
THP migration uses the page lock to guard against parallel allocations
but there are cases like this still open
Task A Task B
--------------------- ---------------------
do_huge_pmd_numa_page do_huge_pmd_numa_page
lock_page
mpol_misplaced == -1
unlock_page
goto clear_pmdnuma
lock_page
mpol_misplaced == 2
migrate_misplaced_transhuge
pmd = pmd_mknonnuma
set_pmd_at
During hours of testing, one crashed with weird errors and while I have
no direct evidence, I suspect something like the race above happened.
This patch extends the page lock to being held until the pmd_numa is
cleared to prevent migration starting in parallel while the pmd_numa is
being cleared. It also flushes the old pmd entry and orders pagetable
insertion before rmap insertion.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: <stable@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-9-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 33 | ||||
-rw-r--r-- | mm/migrate.c | 19 |
2 files changed, 26 insertions, 26 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 00ddfcdd810e..cca80d96e509 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1304,24 +1304,25 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1304 | target_nid = mpol_misplaced(page, vma, haddr); | 1304 | target_nid = mpol_misplaced(page, vma, haddr); |
1305 | if (target_nid == -1) { | 1305 | if (target_nid == -1) { |
1306 | /* If the page was locked, there are no parallel migrations */ | 1306 | /* If the page was locked, there are no parallel migrations */ |
1307 | if (page_locked) { | 1307 | if (page_locked) |
1308 | unlock_page(page); | ||
1309 | goto clear_pmdnuma; | 1308 | goto clear_pmdnuma; |
1310 | } | ||
1311 | 1309 | ||
1312 | /* Otherwise wait for potential migrations and retry fault */ | 1310 | /* |
1311 | * Otherwise wait for potential migrations and retry. We do | ||
1312 | * relock and check_same as the page may no longer be mapped. | ||
1313 | * As the fault is being retried, do not account for it. | ||
1314 | */ | ||
1313 | spin_unlock(&mm->page_table_lock); | 1315 | spin_unlock(&mm->page_table_lock); |
1314 | wait_on_page_locked(page); | 1316 | wait_on_page_locked(page); |
1317 | page_nid = -1; | ||
1315 | goto out; | 1318 | goto out; |
1316 | } | 1319 | } |
1317 | 1320 | ||
1318 | /* Page is misplaced, serialise migrations and parallel THP splits */ | 1321 | /* Page is misplaced, serialise migrations and parallel THP splits */ |
1319 | get_page(page); | 1322 | get_page(page); |
1320 | spin_unlock(&mm->page_table_lock); | 1323 | spin_unlock(&mm->page_table_lock); |
1321 | if (!page_locked) { | 1324 | if (!page_locked) |
1322 | lock_page(page); | 1325 | lock_page(page); |
1323 | page_locked = true; | ||
1324 | } | ||
1325 | anon_vma = page_lock_anon_vma_read(page); | 1326 | anon_vma = page_lock_anon_vma_read(page); |
1326 | 1327 | ||
1327 | /* Confirm the PTE did not while locked */ | 1328 | /* Confirm the PTE did not while locked */ |
@@ -1329,32 +1330,28 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1329 | if (unlikely(!pmd_same(pmd, *pmdp))) { | 1330 | if (unlikely(!pmd_same(pmd, *pmdp))) { |
1330 | unlock_page(page); | 1331 | unlock_page(page); |
1331 | put_page(page); | 1332 | put_page(page); |
1333 | page_nid = -1; | ||
1332 | goto out_unlock; | 1334 | goto out_unlock; |
1333 | } | 1335 | } |
1334 | 1336 | ||
1335 | /* Migrate the THP to the requested node */ | 1337 | /* |
1338 | * Migrate the THP to the requested node, returns with page unlocked | ||
1339 | * and pmd_numa cleared. | ||
1340 | */ | ||
1336 | spin_unlock(&mm->page_table_lock); | 1341 | spin_unlock(&mm->page_table_lock); |
1337 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1342 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
1338 | pmdp, pmd, addr, page, target_nid); | 1343 | pmdp, pmd, addr, page, target_nid); |
1339 | if (migrated) | 1344 | if (migrated) |
1340 | page_nid = target_nid; | 1345 | page_nid = target_nid; |
1341 | else | ||
1342 | goto check_same; | ||
1343 | 1346 | ||
1344 | goto out; | 1347 | goto out; |
1345 | |||
1346 | check_same: | ||
1347 | spin_lock(&mm->page_table_lock); | ||
1348 | if (unlikely(!pmd_same(pmd, *pmdp))) { | ||
1349 | /* Someone else took our fault */ | ||
1350 | page_nid = -1; | ||
1351 | goto out_unlock; | ||
1352 | } | ||
1353 | clear_pmdnuma: | 1348 | clear_pmdnuma: |
1349 | BUG_ON(!PageLocked(page)); | ||
1354 | pmd = pmd_mknonnuma(pmd); | 1350 | pmd = pmd_mknonnuma(pmd); |
1355 | set_pmd_at(mm, haddr, pmdp, pmd); | 1351 | set_pmd_at(mm, haddr, pmdp, pmd); |
1356 | VM_BUG_ON(pmd_numa(*pmdp)); | 1352 | VM_BUG_ON(pmd_numa(*pmdp)); |
1357 | update_mmu_cache_pmd(vma, addr, pmdp); | 1353 | update_mmu_cache_pmd(vma, addr, pmdp); |
1354 | unlock_page(page); | ||
1358 | out_unlock: | 1355 | out_unlock: |
1359 | spin_unlock(&mm->page_table_lock); | 1356 | spin_unlock(&mm->page_table_lock); |
1360 | 1357 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index 7a7325ee1d08..c04692774e88 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -1715,12 +1715,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1715 | unlock_page(new_page); | 1715 | unlock_page(new_page); |
1716 | put_page(new_page); /* Free it */ | 1716 | put_page(new_page); /* Free it */ |
1717 | 1717 | ||
1718 | unlock_page(page); | 1718 | /* Retake the callers reference and putback on LRU */ |
1719 | get_page(page); | ||
1719 | putback_lru_page(page); | 1720 | putback_lru_page(page); |
1720 | 1721 | mod_zone_page_state(page_zone(page), | |
1721 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1722 | NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); |
1722 | isolated = 0; | 1723 | goto out_fail; |
1723 | goto out; | ||
1724 | } | 1724 | } |
1725 | 1725 | ||
1726 | /* | 1726 | /* |
@@ -1737,9 +1737,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1737 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | 1737 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); |
1738 | entry = pmd_mkhuge(entry); | 1738 | entry = pmd_mkhuge(entry); |
1739 | 1739 | ||
1740 | page_add_new_anon_rmap(new_page, vma, haddr); | 1740 | pmdp_clear_flush(vma, haddr, pmd); |
1741 | |||
1742 | set_pmd_at(mm, haddr, pmd, entry); | 1741 | set_pmd_at(mm, haddr, pmd, entry); |
1742 | page_add_new_anon_rmap(new_page, vma, haddr); | ||
1743 | update_mmu_cache_pmd(vma, address, &entry); | 1743 | update_mmu_cache_pmd(vma, address, &entry); |
1744 | page_remove_rmap(page); | 1744 | page_remove_rmap(page); |
1745 | /* | 1745 | /* |
@@ -1758,7 +1758,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1758 | count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); | 1758 | count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); |
1759 | count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); | 1759 | count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); |
1760 | 1760 | ||
1761 | out: | ||
1762 | mod_zone_page_state(page_zone(page), | 1761 | mod_zone_page_state(page_zone(page), |
1763 | NR_ISOLATED_ANON + page_lru, | 1762 | NR_ISOLATED_ANON + page_lru, |
1764 | -HPAGE_PMD_NR); | 1763 | -HPAGE_PMD_NR); |
@@ -1767,6 +1766,10 @@ out: | |||
1767 | out_fail: | 1766 | out_fail: |
1768 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1767 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); |
1769 | out_dropref: | 1768 | out_dropref: |
1769 | entry = pmd_mknonnuma(entry); | ||
1770 | set_pmd_at(mm, haddr, pmd, entry); | ||
1771 | update_mmu_cache_pmd(vma, address, &entry); | ||
1772 | |||
1770 | unlock_page(page); | 1773 | unlock_page(page); |
1771 | put_page(page); | 1774 | put_page(page); |
1772 | return 0; | 1775 | return 0; |