aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoss Zwisler <ross.zwisler@linux.intel.com>2017-04-07 19:04:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-04-08 03:47:49 -0400
commite11f8b7b6c4ea13bf8af6b8f42b45e15b554a92b (patch)
treea19bf4df6a3475546fe5cf81c9ebe89e49ddf17d
parent4fad7fb6b0279a85e16e6a63e0f1f7d98cedddf1 (diff)
dax: fix radix tree insertion race
While running generic/340 in my test setup I hit the following race. It can happen with kernels that support FS DAX PMDs, so v4.10 thru v4.11-rc5. Thread 1 Thread 2 -------- -------- dax_iomap_pmd_fault() grab_mapping_entry() spin_lock_irq() get_unlocked_mapping_entry() 'entry' is NULL, can't call lock_slot() spin_unlock_irq() radix_tree_preload() dax_iomap_pmd_fault() grab_mapping_entry() spin_lock_irq() get_unlocked_mapping_entry() ... lock_slot() spin_unlock_irq() dax_pmd_insert_mapping() <inserts a PMD mapping> spin_lock_irq() __radix_tree_insert() fails with -EEXIST <fall back to 4k fault, and die horribly when inserting a 4k entry where a PMD exists> The issue is that we have to drop mapping->tree_lock while calling radix_tree_preload(), but since we didn't have a radix tree entry to lock (unlike in the pmd_downgrade case) we have no protection against Thread 2 coming along and inserting a PMD at the same index. For 4k entries we handled this with a special-case response to -EEXIST coming from the __radix_tree_insert(), but this doesn't save us for PMDs because the -EEXIST case can also mean that we collided with a 4k entry in the radix tree at a different index, but one that is covered by our PMD range. So, correctly handle both the 4k and 2M collision cases by explicitly re-checking the radix tree for an entry at our index once we reacquire mapping->tree_lock. This patch has made it through a clean xfstests run with the current v4.11-rc5 based linux/master, and it also ran generic/340 500 times in a loop. It used to fail within the first 10 iterations. Link: http://lkml.kernel.org/r/20170406212944.2866-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: "Darrick J. Wong" <darrick.wong@oracle.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Christoph Hellwig <hch@lst.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Jan Kara <jack@suse.cz> Cc: Matthew Wilcox <mawilcox@microsoft.com> Cc: <stable@vger.kernel.org> [4.10+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/dax.c35
1 files changed, 22 insertions, 13 deletions
diff --git a/fs/dax.c b/fs/dax.c
index de622d4282a6..85abd741253d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -373,6 +373,22 @@ restart:
373 } 373 }
374 spin_lock_irq(&mapping->tree_lock); 374 spin_lock_irq(&mapping->tree_lock);
375 375
376 if (!entry) {
377 /*
378 * We needed to drop the page_tree lock while calling
379 * radix_tree_preload() and we didn't have an entry to
380 * lock. See if another thread inserted an entry at
381 * our index during this time.
382 */
383 entry = __radix_tree_lookup(&mapping->page_tree, index,
384 NULL, &slot);
385 if (entry) {
386 radix_tree_preload_end();
387 spin_unlock_irq(&mapping->tree_lock);
388 goto restart;
389 }
390 }
391
376 if (pmd_downgrade) { 392 if (pmd_downgrade) {
377 radix_tree_delete(&mapping->page_tree, index); 393 radix_tree_delete(&mapping->page_tree, index);
378 mapping->nrexceptional--; 394 mapping->nrexceptional--;
@@ -388,19 +404,12 @@ restart:
388 if (err) { 404 if (err) {
389 spin_unlock_irq(&mapping->tree_lock); 405 spin_unlock_irq(&mapping->tree_lock);
390 /* 406 /*
391 * Someone already created the entry? This is a 407 * Our insertion of a DAX entry failed, most likely
392 * normal failure when inserting PMDs in a range 408 * because we were inserting a PMD entry and it
393 * that already contains PTEs. In that case we want 409 * collided with a PTE sized entry at a different
394 * to return -EEXIST immediately. 410 * index in the PMD range. We haven't inserted
395 */ 411 * anything into the radix tree and have no waiters to
396 if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD)) 412 * wake.
397 goto restart;
398 /*
399 * Our insertion of a DAX PMD entry failed, most
400 * likely because it collided with a PTE sized entry
401 * at a different index in the PMD range. We haven't
402 * inserted anything into the radix tree and have no
403 * waiters to wake.
404 */ 413 */
405 return ERR_PTR(err); 414 return ERR_PTR(err);
406 } 415 }