diff options
author | Mel Gorman <mgorman@suse.de> | 2013-10-07 06:28:46 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-09 06:39:45 -0400 |
commit | a54a407fbf7735fd8f7841375574f5d9b0375f93 (patch) | |
tree | 29a4af92708dfc99f1693e9a313b53474d50c496 /mm/huge_memory.c | |
parent | 8191acbd30c73e45c24ad16c372e0b42cc7ac8f8 (diff) |
mm: Close races between THP migration and PMD numa clearing
THP migration uses the page lock to guard against parallel allocations
but there are cases like this still open
Task A Task B
--------------------- ---------------------
do_huge_pmd_numa_page do_huge_pmd_numa_page
lock_page
mpol_misplaced == -1
unlock_page
goto clear_pmdnuma
lock_page
mpol_misplaced == 2
migrate_misplaced_transhuge
pmd = pmd_mknonnuma
set_pmd_at
During hours of testing, one crashed with weird errors and while I have
no direct evidence, I suspect something like the race above happened.
This patch extends the page lock to being held until the pmd_numa is
cleared to prevent migration starting in parallel while the pmd_numa is
being cleared. It also flushes the old pmd entry and orders pagetable
insertion before rmap insertion.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-9-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 33 |
1 files changed, 15 insertions, 18 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c3bb65f284d5..d4928769680f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1304,24 +1304,25 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1304 | target_nid = mpol_misplaced(page, vma, haddr); | 1304 | target_nid = mpol_misplaced(page, vma, haddr); |
1305 | if (target_nid == -1) { | 1305 | if (target_nid == -1) { |
1306 | /* If the page was locked, there are no parallel migrations */ | 1306 | /* If the page was locked, there are no parallel migrations */ |
1307 | if (page_locked) { | 1307 | if (page_locked) |
1308 | unlock_page(page); | ||
1309 | goto clear_pmdnuma; | 1308 | goto clear_pmdnuma; |
1310 | } | ||
1311 | 1309 | ||
1312 | /* Otherwise wait for potential migrations and retry fault */ | 1310 | /* |
1311 | * Otherwise wait for potential migrations and retry. We do | ||
1312 | * relock and check_same as the page may no longer be mapped. | ||
1313 | * As the fault is being retried, do not account for it. | ||
1314 | */ | ||
1313 | spin_unlock(&mm->page_table_lock); | 1315 | spin_unlock(&mm->page_table_lock); |
1314 | wait_on_page_locked(page); | 1316 | wait_on_page_locked(page); |
1317 | page_nid = -1; | ||
1315 | goto out; | 1318 | goto out; |
1316 | } | 1319 | } |
1317 | 1320 | ||
1318 | /* Page is misplaced, serialise migrations and parallel THP splits */ | 1321 | /* Page is misplaced, serialise migrations and parallel THP splits */ |
1319 | get_page(page); | 1322 | get_page(page); |
1320 | spin_unlock(&mm->page_table_lock); | 1323 | spin_unlock(&mm->page_table_lock); |
1321 | if (!page_locked) { | 1324 | if (!page_locked) |
1322 | lock_page(page); | 1325 | lock_page(page); |
1323 | page_locked = true; | ||
1324 | } | ||
1325 | anon_vma = page_lock_anon_vma_read(page); | 1326 | anon_vma = page_lock_anon_vma_read(page); |
1326 | 1327 | ||
1327 | /* Confirm the PMD did not change while page_table_lock was released */ | 1328 | /* Confirm the PMD did not change while page_table_lock was released */ |
@@ -1329,32 +1330,28 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1329 | if (unlikely(!pmd_same(pmd, *pmdp))) { | 1330 | if (unlikely(!pmd_same(pmd, *pmdp))) { |
1330 | unlock_page(page); | 1331 | unlock_page(page); |
1331 | put_page(page); | 1332 | put_page(page); |
1333 | page_nid = -1; | ||
1332 | goto out_unlock; | 1334 | goto out_unlock; |
1333 | } | 1335 | } |
1334 | 1336 | ||
1335 | /* Migrate the THP to the requested node */ | 1337 | /* |
1338 | * Migrate the THP to the requested node, returns with page unlocked | ||
1339 | * and pmd_numa cleared. | ||
1340 | */ | ||
1336 | spin_unlock(&mm->page_table_lock); | 1341 | spin_unlock(&mm->page_table_lock); |
1337 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1342 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
1338 | pmdp, pmd, addr, page, target_nid); | 1343 | pmdp, pmd, addr, page, target_nid); |
1339 | if (migrated) | 1344 | if (migrated) |
1340 | page_nid = target_nid; | 1345 | page_nid = target_nid; |
1341 | else | ||
1342 | goto check_same; | ||
1343 | 1346 | ||
1344 | goto out; | 1347 | goto out; |
1345 | |||
1346 | check_same: | ||
1347 | spin_lock(&mm->page_table_lock); | ||
1348 | if (unlikely(!pmd_same(pmd, *pmdp))) { | ||
1349 | /* Someone else took our fault */ | ||
1350 | page_nid = -1; | ||
1351 | goto out_unlock; | ||
1352 | } | ||
1353 | clear_pmdnuma: | 1348 | clear_pmdnuma: |
1349 | BUG_ON(!PageLocked(page)); | ||
1354 | pmd = pmd_mknonnuma(pmd); | 1350 | pmd = pmd_mknonnuma(pmd); |
1355 | set_pmd_at(mm, haddr, pmdp, pmd); | 1351 | set_pmd_at(mm, haddr, pmdp, pmd); |
1356 | VM_BUG_ON(pmd_numa(*pmdp)); | 1352 | VM_BUG_ON(pmd_numa(*pmdp)); |
1357 | update_mmu_cache_pmd(vma, addr, pmdp); | 1353 | update_mmu_cache_pmd(vma, addr, pmdp); |
1354 | unlock_page(page); | ||
1358 | out_unlock: | 1355 | out_unlock: |
1359 | spin_unlock(&mm->page_table_lock); | 1356 | spin_unlock(&mm->page_table_lock); |
1360 | 1357 | ||