aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2016-12-14 18:07:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 19:04:09 -0500
commita6abc2c0e77b16480f4d2c1eb7925e5287ae1526 (patch)
tree10bf5b877550bdcb4aac550b4bb9fd14772129fb
parentcae1240257d9ba4b40eb240124c530de8ee349bc (diff)
dax: make cache flushing protected by entry lock
Currently, flushing of caches for DAX mappings was ignoring entry lock. So far this was ok (modulo a bug that a difference in entry lock could cause cache flushing to be mistakenly skipped) but in the following patches we will write-protect PTEs on cache flushing and clear dirty tags. For that we will need more exclusion. So do cache flushing under an entry lock. This allows us to remove one lock-unlock pair of mapping->tree_lock as a bonus. Link: http://lkml.kernel.org/r/1479460644-25076-19-git-send-email-jack@suse.cz Signed-off-by: Jan Kara <jack@suse.cz> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/dax.c61
1 files changed, 39 insertions, 22 deletions
diff --git a/fs/dax.c b/fs/dax.c
index e83aa4077df4..df5c0daba698 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -618,32 +618,50 @@ static int dax_writeback_one(struct block_device *bdev,
618 struct address_space *mapping, pgoff_t index, void *entry) 618 struct address_space *mapping, pgoff_t index, void *entry)
619{ 619{
620 struct radix_tree_root *page_tree = &mapping->page_tree; 620 struct radix_tree_root *page_tree = &mapping->page_tree;
621 struct radix_tree_node *node;
622 struct blk_dax_ctl dax; 621 struct blk_dax_ctl dax;
623 void **slot; 622 void *entry2, **slot;
624 int ret = 0; 623 int ret = 0;
625 624
626 spin_lock_irq(&mapping->tree_lock);
627 /* 625 /*
628 * Regular page slots are stabilized by the page lock even 626 * A page got tagged dirty in DAX mapping? Something is seriously
629 * without the tree itself locked. These unlocked entries 627 * wrong.
630 * need verification under the tree lock.
631 */ 628 */
632 if (!__radix_tree_lookup(page_tree, index, &node, &slot)) 629 if (WARN_ON(!radix_tree_exceptional_entry(entry)))
633 goto unlock; 630 return -EIO;
634 if (*slot != entry)
635 goto unlock;
636
637 /* another fsync thread may have already written back this entry */
638 if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
639 goto unlock;
640 631
632 spin_lock_irq(&mapping->tree_lock);
633 entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
634 /* Entry got punched out / reallocated? */
635 if (!entry2 || !radix_tree_exceptional_entry(entry2))
636 goto put_unlocked;
637 /*
638 * Entry got reallocated elsewhere? No need to writeback. We have to
639 * compare sectors as we must not bail out due to difference in lockbit
640 * or entry type.
641 */
642 if (dax_radix_sector(entry2) != dax_radix_sector(entry))
643 goto put_unlocked;
641 if (WARN_ON_ONCE(dax_is_empty_entry(entry) || 644 if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
642 dax_is_zero_entry(entry))) { 645 dax_is_zero_entry(entry))) {
643 ret = -EIO; 646 ret = -EIO;
644 goto unlock; 647 goto put_unlocked;
645 } 648 }
646 649
650 /* Another fsync thread may have already written back this entry */
651 if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
652 goto put_unlocked;
653 /* Lock the entry to serialize with page faults */
654 entry = lock_slot(mapping, slot);
655 /*
656 * We can clear the tag now but we have to be careful so that concurrent
657 * dax_writeback_one() calls for the same index cannot finish before we
658 * actually flush the caches. This is achieved as the calls will look
659 * at the entry only under tree_lock and once they do that they will
660 * see the entry locked and wait for it to unlock.
661 */
662 radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
663 spin_unlock_irq(&mapping->tree_lock);
664
647 /* 665 /*
648 * Even if dax_writeback_mapping_range() was given a wbc->range_start 666 * Even if dax_writeback_mapping_range() was given a wbc->range_start
649 * in the middle of a PMD, the 'index' we are given will be aligned to 667 * in the middle of a PMD, the 'index' we are given will be aligned to
@@ -653,15 +671,16 @@ static int dax_writeback_one(struct block_device *bdev,
653 */ 671 */
654 dax.sector = dax_radix_sector(entry); 672 dax.sector = dax_radix_sector(entry);
655 dax.size = PAGE_SIZE << dax_radix_order(entry); 673 dax.size = PAGE_SIZE << dax_radix_order(entry);
656 spin_unlock_irq(&mapping->tree_lock);
657 674
658 /* 675 /*
659 * We cannot hold tree_lock while calling dax_map_atomic() because it 676 * We cannot hold tree_lock while calling dax_map_atomic() because it
660 * eventually calls cond_resched(). 677 * eventually calls cond_resched().
661 */ 678 */
662 ret = dax_map_atomic(bdev, &dax); 679 ret = dax_map_atomic(bdev, &dax);
663 if (ret < 0) 680 if (ret < 0) {
681 put_locked_mapping_entry(mapping, index, entry);
664 return ret; 682 return ret;
683 }
665 684
666 if (WARN_ON_ONCE(ret < dax.size)) { 685 if (WARN_ON_ONCE(ret < dax.size)) {
667 ret = -EIO; 686 ret = -EIO;
@@ -669,15 +688,13 @@ static int dax_writeback_one(struct block_device *bdev,
669 } 688 }
670 689
671 wb_cache_pmem(dax.addr, dax.size); 690 wb_cache_pmem(dax.addr, dax.size);
672
673 spin_lock_irq(&mapping->tree_lock);
674 radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
675 spin_unlock_irq(&mapping->tree_lock);
676 unmap: 691 unmap:
677 dax_unmap_atomic(bdev, &dax); 692 dax_unmap_atomic(bdev, &dax);
693 put_locked_mapping_entry(mapping, index, entry);
678 return ret; 694 return ret;
679 695
680 unlock: 696 put_unlocked:
697 put_unlocked_mapping_entry(mapping, index, entry2);
681 spin_unlock_irq(&mapping->tree_lock); 698 spin_unlock_irq(&mapping->tree_lock);
682 return ret; 699 return ret;
683} 700}