diff options
author | Dave Chinner <david@fromorbit.com> | 2016-11-29 22:39:29 -0500 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2016-11-29 22:39:29 -0500 |
commit | 5f1c6d28cfcd11c9df67dad45992fd523727fe1e (patch) | |
tree | 6d1e2ec57356653992937381cd3b5753b22f2ca4 | |
parent | b7b26110edf88bad41b87e96a9f0148bed5e2ff8 (diff) | |
parent | acdda3aae146d9b69d30e9d8a32a8d8937055523 (diff) |
Merge branch 'iomap-4.10-directio' into for-next
-rw-r--r-- | block/bio.c | 49 | ||||
-rw-r--r-- | fs/direct-io.c | 2 | ||||
-rw-r--r-- | fs/internal.h | 3 | ||||
-rw-r--r-- | fs/iomap.c | 373 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 298 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.h | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_dir2_readdir.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 226 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 82 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 50 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_pnfs.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_pnfs.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_symlink.c | 7 | ||||
-rw-r--r-- | include/linux/bio.h | 1 | ||||
-rw-r--r-- | include/linux/iomap.h | 11 | ||||
-rw-r--r-- | include/linux/lockdep.h | 25 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 20 |
24 files changed, 668 insertions, 555 deletions
diff --git a/block/bio.c b/block/bio.c index db85c5753a76..2cf6ebabc68c 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -847,6 +847,55 @@ done: | |||
847 | } | 847 | } |
848 | EXPORT_SYMBOL(bio_add_page); | 848 | EXPORT_SYMBOL(bio_add_page); |
849 | 849 | ||
850 | /** | ||
851 | * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio | ||
852 | * @bio: bio to add pages to | ||
853 | * @iter: iov iterator describing the region to be mapped | ||
854 | * | ||
855 | * Pins as many pages from *iter and appends them to @bio's bvec array. The | ||
856 | * pages will have to be released using put_page() when done. | ||
857 | */ | ||
858 | int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) | ||
859 | { | ||
860 | unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; | ||
861 | struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; | ||
862 | struct page **pages = (struct page **)bv; | ||
863 | size_t offset, diff; | ||
864 | ssize_t size; | ||
865 | |||
866 | size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); | ||
867 | if (unlikely(size <= 0)) | ||
868 | return size ? size : -EFAULT; | ||
869 | nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; | ||
870 | |||
871 | /* | ||
872 | * Deep magic below: We need to walk the pinned pages backwards | ||
873 | * because we are abusing the space allocated for the bio_vecs | ||
874 | * for the page array. Because the bio_vecs are larger than the | ||
875 | * page pointers by definition this will always work. But it also | ||
876 | * means we can't use bio_add_page, so any changes to it's semantics | ||
877 | * need to be reflected here as well. | ||
878 | */ | ||
879 | bio->bi_iter.bi_size += size; | ||
880 | bio->bi_vcnt += nr_pages; | ||
881 | |||
882 | diff = (nr_pages * PAGE_SIZE - offset) - size; | ||
883 | while (nr_pages--) { | ||
884 | bv[nr_pages].bv_page = pages[nr_pages]; | ||
885 | bv[nr_pages].bv_len = PAGE_SIZE; | ||
886 | bv[nr_pages].bv_offset = 0; | ||
887 | } | ||
888 | |||
889 | bv[0].bv_offset += offset; | ||
890 | bv[0].bv_len -= offset; | ||
891 | if (diff) | ||
892 | bv[bio->bi_vcnt - 1].bv_len -= diff; | ||
893 | |||
894 | iov_iter_advance(iter, size); | ||
895 | return 0; | ||
896 | } | ||
897 | EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); | ||
898 | |||
850 | struct submit_bio_ret { | 899 | struct submit_bio_ret { |
851 | struct completion event; | 900 | struct completion event; |
852 | int error; | 901 | int error; |
diff --git a/fs/direct-io.c b/fs/direct-io.c index fb9aa16a7727..19aa448fde6a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -554,7 +554,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) | |||
554 | * filesystems that don't need it and also allows us to create the workqueue | 554 | * filesystems that don't need it and also allows us to create the workqueue |
555 | * late enough so the we can include s_id in the name of the workqueue. | 555 | * late enough so the we can include s_id in the name of the workqueue. |
556 | */ | 556 | */ |
557 | static int sb_init_dio_done_wq(struct super_block *sb) | 557 | int sb_init_dio_done_wq(struct super_block *sb) |
558 | { | 558 | { |
559 | struct workqueue_struct *old; | 559 | struct workqueue_struct *old; |
560 | struct workqueue_struct *wq = alloc_workqueue("dio/%s", | 560 | struct workqueue_struct *wq = alloc_workqueue("dio/%s", |
diff --git a/fs/internal.h b/fs/internal.h index f4da3341b4a3..4fcf51766d4a 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -184,3 +184,6 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len, | |||
184 | loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, | 184 | loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, |
185 | unsigned flags, struct iomap_ops *ops, void *data, | 185 | unsigned flags, struct iomap_ops *ops, void *data, |
186 | iomap_actor_t actor); | 186 | iomap_actor_t actor); |
187 | |||
188 | /* direct-io.c: */ | ||
189 | int sb_init_dio_done_wq(struct super_block *sb); | ||
diff --git a/fs/iomap.c b/fs/iomap.c index 13dd413b2b9c..fc2446242935 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/uio.h> | 24 | #include <linux/uio.h> |
25 | #include <linux/backing-dev.h> | 25 | #include <linux/backing-dev.h> |
26 | #include <linux/buffer_head.h> | 26 | #include <linux/buffer_head.h> |
27 | #include <linux/task_io_accounting_ops.h> | ||
27 | #include <linux/dax.h> | 28 | #include <linux/dax.h> |
28 | #include "internal.h" | 29 | #include "internal.h" |
29 | 30 | ||
@@ -584,3 +585,375 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, | |||
584 | return 0; | 585 | return 0; |
585 | } | 586 | } |
586 | EXPORT_SYMBOL_GPL(iomap_fiemap); | 587 | EXPORT_SYMBOL_GPL(iomap_fiemap); |
588 | |||
589 | /* | ||
590 | * Private flags for iomap_dio, must not overlap with the public ones in | ||
591 | * iomap.h: | ||
592 | */ | ||
593 | #define IOMAP_DIO_WRITE (1 << 30) | ||
594 | #define IOMAP_DIO_DIRTY (1 << 31) | ||
595 | |||
596 | struct iomap_dio { | ||
597 | struct kiocb *iocb; | ||
598 | iomap_dio_end_io_t *end_io; | ||
599 | loff_t i_size; | ||
600 | loff_t size; | ||
601 | atomic_t ref; | ||
602 | unsigned flags; | ||
603 | int error; | ||
604 | |||
605 | union { | ||
606 | /* used during submission and for synchronous completion: */ | ||
607 | struct { | ||
608 | struct iov_iter *iter; | ||
609 | struct task_struct *waiter; | ||
610 | struct request_queue *last_queue; | ||
611 | blk_qc_t cookie; | ||
612 | } submit; | ||
613 | |||
614 | /* used for aio completion: */ | ||
615 | struct { | ||
616 | struct work_struct work; | ||
617 | } aio; | ||
618 | }; | ||
619 | }; | ||
620 | |||
621 | static ssize_t iomap_dio_complete(struct iomap_dio *dio) | ||
622 | { | ||
623 | struct kiocb *iocb = dio->iocb; | ||
624 | ssize_t ret; | ||
625 | |||
626 | if (dio->end_io) { | ||
627 | ret = dio->end_io(iocb, | ||
628 | dio->error ? dio->error : dio->size, | ||
629 | dio->flags); | ||
630 | } else { | ||
631 | ret = dio->error; | ||
632 | } | ||
633 | |||
634 | if (likely(!ret)) { | ||
635 | ret = dio->size; | ||
636 | /* check for short read */ | ||
637 | if (iocb->ki_pos + ret > dio->i_size && | ||
638 | !(dio->flags & IOMAP_DIO_WRITE)) | ||
639 | ret = dio->i_size - iocb->ki_pos; | ||
640 | iocb->ki_pos += ret; | ||
641 | } | ||
642 | |||
643 | inode_dio_end(file_inode(iocb->ki_filp)); | ||
644 | kfree(dio); | ||
645 | |||
646 | return ret; | ||
647 | } | ||
648 | |||
649 | static void iomap_dio_complete_work(struct work_struct *work) | ||
650 | { | ||
651 | struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work); | ||
652 | struct kiocb *iocb = dio->iocb; | ||
653 | bool is_write = (dio->flags & IOMAP_DIO_WRITE); | ||
654 | ssize_t ret; | ||
655 | |||
656 | ret = iomap_dio_complete(dio); | ||
657 | if (is_write && ret > 0) | ||
658 | ret = generic_write_sync(iocb, ret); | ||
659 | iocb->ki_complete(iocb, ret, 0); | ||
660 | } | ||
661 | |||
662 | /* | ||
663 | * Set an error in the dio if none is set yet. We have to use cmpxchg | ||
664 | * as the submission context and the completion context(s) can race to | ||
665 | * update the error. | ||
666 | */ | ||
667 | static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret) | ||
668 | { | ||
669 | cmpxchg(&dio->error, 0, ret); | ||
670 | } | ||
671 | |||
672 | static void iomap_dio_bio_end_io(struct bio *bio) | ||
673 | { | ||
674 | struct iomap_dio *dio = bio->bi_private; | ||
675 | bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); | ||
676 | |||
677 | if (bio->bi_error) | ||
678 | iomap_dio_set_error(dio, bio->bi_error); | ||
679 | |||
680 | if (atomic_dec_and_test(&dio->ref)) { | ||
681 | if (is_sync_kiocb(dio->iocb)) { | ||
682 | struct task_struct *waiter = dio->submit.waiter; | ||
683 | |||
684 | WRITE_ONCE(dio->submit.waiter, NULL); | ||
685 | wake_up_process(waiter); | ||
686 | } else if (dio->flags & IOMAP_DIO_WRITE) { | ||
687 | struct inode *inode = file_inode(dio->iocb->ki_filp); | ||
688 | |||
689 | INIT_WORK(&dio->aio.work, iomap_dio_complete_work); | ||
690 | queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work); | ||
691 | } else { | ||
692 | iomap_dio_complete_work(&dio->aio.work); | ||
693 | } | ||
694 | } | ||
695 | |||
696 | if (should_dirty) { | ||
697 | bio_check_pages_dirty(bio); | ||
698 | } else { | ||
699 | struct bio_vec *bvec; | ||
700 | int i; | ||
701 | |||
702 | bio_for_each_segment_all(bvec, bio, i) | ||
703 | put_page(bvec->bv_page); | ||
704 | bio_put(bio); | ||
705 | } | ||
706 | } | ||
707 | |||
708 | static blk_qc_t | ||
709 | iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, | ||
710 | unsigned len) | ||
711 | { | ||
712 | struct page *page = ZERO_PAGE(0); | ||
713 | struct bio *bio; | ||
714 | |||
715 | bio = bio_alloc(GFP_KERNEL, 1); | ||
716 | bio->bi_bdev = iomap->bdev; | ||
717 | bio->bi_iter.bi_sector = | ||
718 | iomap->blkno + ((pos - iomap->offset) >> 9); | ||
719 | bio->bi_private = dio; | ||
720 | bio->bi_end_io = iomap_dio_bio_end_io; | ||
721 | |||
722 | get_page(page); | ||
723 | if (bio_add_page(bio, page, len, 0) != len) | ||
724 | BUG(); | ||
725 | bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_ODIRECT); | ||
726 | |||
727 | atomic_inc(&dio->ref); | ||
728 | return submit_bio(bio); | ||
729 | } | ||
730 | |||
731 | static loff_t | ||
732 | iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, | ||
733 | void *data, struct iomap *iomap) | ||
734 | { | ||
735 | struct iomap_dio *dio = data; | ||
736 | unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); | ||
737 | unsigned fs_block_size = (1 << inode->i_blkbits), pad; | ||
738 | unsigned align = iov_iter_alignment(dio->submit.iter); | ||
739 | struct iov_iter iter; | ||
740 | struct bio *bio; | ||
741 | bool need_zeroout = false; | ||
742 | int nr_pages, ret; | ||
743 | |||
744 | if ((pos | length | align) & ((1 << blkbits) - 1)) | ||
745 | return -EINVAL; | ||
746 | |||
747 | switch (iomap->type) { | ||
748 | case IOMAP_HOLE: | ||
749 | if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE)) | ||
750 | return -EIO; | ||
751 | /*FALLTHRU*/ | ||
752 | case IOMAP_UNWRITTEN: | ||
753 | if (!(dio->flags & IOMAP_DIO_WRITE)) { | ||
754 | iov_iter_zero(length, dio->submit.iter); | ||
755 | dio->size += length; | ||
756 | return length; | ||
757 | } | ||
758 | dio->flags |= IOMAP_DIO_UNWRITTEN; | ||
759 | need_zeroout = true; | ||
760 | break; | ||
761 | case IOMAP_MAPPED: | ||
762 | if (iomap->flags & IOMAP_F_SHARED) | ||
763 | dio->flags |= IOMAP_DIO_COW; | ||
764 | if (iomap->flags & IOMAP_F_NEW) | ||
765 | need_zeroout = true; | ||
766 | break; | ||
767 | default: | ||
768 | WARN_ON_ONCE(1); | ||
769 | return -EIO; | ||
770 | } | ||
771 | |||
772 | /* | ||
773 | * Operate on a partial iter trimmed to the extent we were called for. | ||
774 | * We'll update the iter in the dio once we're done with this extent. | ||
775 | */ | ||
776 | iter = *dio->submit.iter; | ||
777 | iov_iter_truncate(&iter, length); | ||
778 | |||
779 | nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); | ||
780 | if (nr_pages <= 0) | ||
781 | return nr_pages; | ||
782 | |||
783 | if (need_zeroout) { | ||
784 | /* zero out from the start of the block to the write offset */ | ||
785 | pad = pos & (fs_block_size - 1); | ||
786 | if (pad) | ||
787 | iomap_dio_zero(dio, iomap, pos - pad, pad); | ||
788 | } | ||
789 | |||
790 | do { | ||
791 | if (dio->error) | ||
792 | return 0; | ||
793 | |||
794 | bio = bio_alloc(GFP_KERNEL, nr_pages); | ||
795 | bio->bi_bdev = iomap->bdev; | ||
796 | bio->bi_iter.bi_sector = | ||
797 | iomap->blkno + ((pos - iomap->offset) >> 9); | ||
798 | bio->bi_private = dio; | ||
799 | bio->bi_end_io = iomap_dio_bio_end_io; | ||
800 | |||
801 | ret = bio_iov_iter_get_pages(bio, &iter); | ||
802 | if (unlikely(ret)) { | ||
803 | bio_put(bio); | ||
804 | return ret; | ||
805 | } | ||
806 | |||
807 | if (dio->flags & IOMAP_DIO_WRITE) { | ||
808 | bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_ODIRECT); | ||
809 | task_io_account_write(bio->bi_iter.bi_size); | ||
810 | } else { | ||
811 | bio_set_op_attrs(bio, REQ_OP_READ, 0); | ||
812 | if (dio->flags & IOMAP_DIO_DIRTY) | ||
813 | bio_set_pages_dirty(bio); | ||
814 | } | ||
815 | |||
816 | dio->size += bio->bi_iter.bi_size; | ||
817 | pos += bio->bi_iter.bi_size; | ||
818 | |||
819 | nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); | ||
820 | |||
821 | atomic_inc(&dio->ref); | ||
822 | |||
823 | dio->submit.last_queue = bdev_get_queue(iomap->bdev); | ||
824 | dio->submit.cookie = submit_bio(bio); | ||
825 | } while (nr_pages); | ||
826 | |||
827 | if (need_zeroout) { | ||
828 | /* zero out from the end of the write to the end of the block */ | ||
829 | pad = pos & (fs_block_size - 1); | ||
830 | if (pad) | ||
831 | iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); | ||
832 | } | ||
833 | |||
834 | iov_iter_advance(dio->submit.iter, length); | ||
835 | return length; | ||
836 | } | ||
837 | |||
838 | ssize_t | ||
839 | iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops, | ||
840 | iomap_dio_end_io_t end_io) | ||
841 | { | ||
842 | struct address_space *mapping = iocb->ki_filp->f_mapping; | ||
843 | struct inode *inode = file_inode(iocb->ki_filp); | ||
844 | size_t count = iov_iter_count(iter); | ||
845 | loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0; | ||
846 | unsigned int flags = IOMAP_DIRECT; | ||
847 | struct blk_plug plug; | ||
848 | struct iomap_dio *dio; | ||
849 | |||
850 | lockdep_assert_held(&inode->i_rwsem); | ||
851 | |||
852 | if (!count) | ||
853 | return 0; | ||
854 | |||
855 | dio = kmalloc(sizeof(*dio), GFP_KERNEL); | ||
856 | if (!dio) | ||
857 | return -ENOMEM; | ||
858 | |||
859 | dio->iocb = iocb; | ||
860 | atomic_set(&dio->ref, 1); | ||
861 | dio->size = 0; | ||
862 | dio->i_size = i_size_read(inode); | ||
863 | dio->end_io = end_io; | ||
864 | dio->error = 0; | ||
865 | dio->flags = 0; | ||
866 | |||
867 | dio->submit.iter = iter; | ||
868 | if (is_sync_kiocb(iocb)) { | ||
869 | dio->submit.waiter = current; | ||
870 | dio->submit.cookie = BLK_QC_T_NONE; | ||
871 | dio->submit.last_queue = NULL; | ||
872 | } | ||
873 | |||
874 | if (iov_iter_rw(iter) == READ) { | ||
875 | if (pos >= dio->i_size) | ||
876 | goto out_free_dio; | ||
877 | |||
878 | if (iter->type == ITER_IOVEC) | ||
879 | dio->flags |= IOMAP_DIO_DIRTY; | ||
880 | } else { | ||
881 | dio->flags |= IOMAP_DIO_WRITE; | ||
882 | flags |= IOMAP_WRITE; | ||
883 | } | ||
884 | |||
885 | if (mapping->nrpages) { | ||
886 | ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); | ||
887 | if (ret) | ||
888 | goto out_free_dio; | ||
889 | |||
890 | ret = invalidate_inode_pages2_range(mapping, | ||
891 | iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); | ||
892 | WARN_ON_ONCE(ret); | ||
893 | ret = 0; | ||
894 | } | ||
895 | |||
896 | inode_dio_begin(inode); | ||
897 | |||
898 | blk_start_plug(&plug); | ||
899 | do { | ||
900 | ret = iomap_apply(inode, pos, count, flags, ops, dio, | ||
901 | iomap_dio_actor); | ||
902 | if (ret <= 0) { | ||
903 | /* magic error code to fall back to buffered I/O */ | ||
904 | if (ret == -ENOTBLK) | ||
905 | ret = 0; | ||
906 | break; | ||
907 | } | ||
908 | pos += ret; | ||
909 | } while ((count = iov_iter_count(iter)) > 0); | ||
910 | blk_finish_plug(&plug); | ||
911 | |||
912 | if (ret < 0) | ||
913 | iomap_dio_set_error(dio, ret); | ||
914 | |||
915 | if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && | ||
916 | !inode->i_sb->s_dio_done_wq) { | ||
917 | ret = sb_init_dio_done_wq(inode->i_sb); | ||
918 | if (ret < 0) | ||
919 | iomap_dio_set_error(dio, ret); | ||
920 | } | ||
921 | |||
922 | if (!atomic_dec_and_test(&dio->ref)) { | ||
923 | if (!is_sync_kiocb(iocb)) | ||
924 | return -EIOCBQUEUED; | ||
925 | |||
926 | for (;;) { | ||
927 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
928 | if (!READ_ONCE(dio->submit.waiter)) | ||
929 | break; | ||
930 | |||
931 | if (!(iocb->ki_flags & IOCB_HIPRI) || | ||
932 | !dio->submit.last_queue || | ||
933 | !blk_poll(dio->submit.last_queue, | ||
934 | dio->submit.cookie)) | ||
935 | io_schedule(); | ||
936 | } | ||
937 | __set_current_state(TASK_RUNNING); | ||
938 | } | ||
939 | |||
940 | /* | ||
941 | * Try again to invalidate clean pages which might have been cached by | ||
942 | * non-direct readahead, or faulted in by get_user_pages() if the source | ||
943 | * of the write was an mmap'ed region of the file we're writing. Either | ||
944 | * one is a pretty crazy thing to do, so we don't support it 100%. If | ||
945 | * this invalidation fails, tough, the write still worked... | ||
946 | */ | ||
947 | if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { | ||
948 | ret = invalidate_inode_pages2_range(mapping, | ||
949 | iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); | ||
950 | WARN_ON_ONCE(ret); | ||
951 | } | ||
952 | |||
953 | return iomap_dio_complete(dio); | ||
954 | |||
955 | out_free_dio: | ||
956 | kfree(dio); | ||
957 | return ret; | ||
958 | } | ||
959 | EXPORT_SYMBOL_GPL(iomap_dio_rw); | ||
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index ab266d66124d..265000a09327 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -37,11 +37,6 @@ | |||
37 | #include <linux/pagevec.h> | 37 | #include <linux/pagevec.h> |
38 | #include <linux/writeback.h> | 38 | #include <linux/writeback.h> |
39 | 39 | ||
40 | /* flags for direct write completions */ | ||
41 | #define XFS_DIO_FLAG_UNWRITTEN (1 << 0) | ||
42 | #define XFS_DIO_FLAG_APPEND (1 << 1) | ||
43 | #define XFS_DIO_FLAG_COW (1 << 2) | ||
44 | |||
45 | /* | 40 | /* |
46 | * structure owned by writepages passed to individual writepage calls | 41 | * structure owned by writepages passed to individual writepage calls |
47 | */ | 42 | */ |
@@ -1176,45 +1171,6 @@ xfs_vm_releasepage( | |||
1176 | } | 1171 | } |
1177 | 1172 | ||
1178 | /* | 1173 | /* |
1179 | * When we map a DIO buffer, we may need to pass flags to | ||
1180 | * xfs_end_io_direct_write to tell it what kind of write IO we are doing. | ||
1181 | * | ||
1182 | * Note that for DIO, an IO to the highest supported file block offset (i.e. | ||
1183 | * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 | ||
1184 | * bit variable. Hence if we see this overflow, we have to assume that the IO is | ||
1185 | * extending the file size. We won't know for sure until IO completion is run | ||
1186 | * and the actual max write offset is communicated to the IO completion | ||
1187 | * routine. | ||
1188 | */ | ||
1189 | static void | ||
1190 | xfs_map_direct( | ||
1191 | struct inode *inode, | ||
1192 | struct buffer_head *bh_result, | ||
1193 | struct xfs_bmbt_irec *imap, | ||
1194 | xfs_off_t offset, | ||
1195 | bool is_cow) | ||
1196 | { | ||
1197 | uintptr_t *flags = (uintptr_t *)&bh_result->b_private; | ||
1198 | xfs_off_t size = bh_result->b_size; | ||
1199 | |||
1200 | trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size, | ||
1201 | ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW : | ||
1202 | XFS_IO_OVERWRITE, imap); | ||
1203 | |||
1204 | if (ISUNWRITTEN(imap)) { | ||
1205 | *flags |= XFS_DIO_FLAG_UNWRITTEN; | ||
1206 | set_buffer_defer_completion(bh_result); | ||
1207 | } else if (is_cow) { | ||
1208 | *flags |= XFS_DIO_FLAG_COW; | ||
1209 | set_buffer_defer_completion(bh_result); | ||
1210 | } | ||
1211 | if (offset + size > i_size_read(inode) || offset + size < 0) { | ||
1212 | *flags |= XFS_DIO_FLAG_APPEND; | ||
1213 | set_buffer_defer_completion(bh_result); | ||
1214 | } | ||
1215 | } | ||
1216 | |||
1217 | /* | ||
1218 | * If this is O_DIRECT or the mpage code calling tell them how large the mapping | 1174 | * If this is O_DIRECT or the mpage code calling tell them how large the mapping |
1219 | * is, so that we can avoid repeated get_blocks calls. | 1175 | * is, so that we can avoid repeated get_blocks calls. |
1220 | * | 1176 | * |
@@ -1254,51 +1210,12 @@ xfs_map_trim_size( | |||
1254 | bh_result->b_size = mapping_size; | 1210 | bh_result->b_size = mapping_size; |
1255 | } | 1211 | } |
1256 | 1212 | ||
1257 | /* Bounce unaligned directio writes to the page cache. */ | ||
1258 | static int | 1213 | static int |
1259 | xfs_bounce_unaligned_dio_write( | 1214 | xfs_get_blocks( |
1260 | struct xfs_inode *ip, | ||
1261 | xfs_fileoff_t offset_fsb, | ||
1262 | struct xfs_bmbt_irec *imap) | ||
1263 | { | ||
1264 | struct xfs_bmbt_irec irec; | ||
1265 | xfs_fileoff_t delta; | ||
1266 | bool shared; | ||
1267 | bool x; | ||
1268 | int error; | ||
1269 | |||
1270 | irec = *imap; | ||
1271 | if (offset_fsb > irec.br_startoff) { | ||
1272 | delta = offset_fsb - irec.br_startoff; | ||
1273 | irec.br_blockcount -= delta; | ||
1274 | irec.br_startblock += delta; | ||
1275 | irec.br_startoff = offset_fsb; | ||
1276 | } | ||
1277 | error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x); | ||
1278 | if (error) | ||
1279 | return error; | ||
1280 | |||
1281 | /* | ||
1282 | * We're here because we're trying to do a directio write to a | ||
1283 | * region that isn't aligned to a filesystem block. If any part | ||
1284 | * of the extent is shared, fall back to buffered mode to handle | ||
1285 | * the RMW. This is done by returning -EREMCHG ("remote addr | ||
1286 | * changed"), which is caught further up the call stack. | ||
1287 | */ | ||
1288 | if (shared) { | ||
1289 | trace_xfs_reflink_bounce_dio_write(ip, imap); | ||
1290 | return -EREMCHG; | ||
1291 | } | ||
1292 | return 0; | ||
1293 | } | ||
1294 | |||
1295 | STATIC int | ||
1296 | __xfs_get_blocks( | ||
1297 | struct inode *inode, | 1215 | struct inode *inode, |
1298 | sector_t iblock, | 1216 | sector_t iblock, |
1299 | struct buffer_head *bh_result, | 1217 | struct buffer_head *bh_result, |
1300 | int create, | 1218 | int create) |
1301 | bool direct) | ||
1302 | { | 1219 | { |
1303 | struct xfs_inode *ip = XFS_I(inode); | 1220 | struct xfs_inode *ip = XFS_I(inode); |
1304 | struct xfs_mount *mp = ip->i_mount; | 1221 | struct xfs_mount *mp = ip->i_mount; |
@@ -1309,10 +1226,8 @@ __xfs_get_blocks( | |||
1309 | int nimaps = 1; | 1226 | int nimaps = 1; |
1310 | xfs_off_t offset; | 1227 | xfs_off_t offset; |
1311 | ssize_t size; | 1228 | ssize_t size; |
1312 | int new = 0; | ||
1313 | bool is_cow = false; | ||
1314 | 1229 | ||
1315 | BUG_ON(create && !direct); | 1230 | BUG_ON(create); |
1316 | 1231 | ||
1317 | if (XFS_FORCED_SHUTDOWN(mp)) | 1232 | if (XFS_FORCED_SHUTDOWN(mp)) |
1318 | return -EIO; | 1233 | return -EIO; |
@@ -1321,7 +1236,7 @@ __xfs_get_blocks( | |||
1321 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1236 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
1322 | size = bh_result->b_size; | 1237 | size = bh_result->b_size; |
1323 | 1238 | ||
1324 | if (!create && offset >= i_size_read(inode)) | 1239 | if (offset >= i_size_read(inode)) |
1325 | return 0; | 1240 | return 0; |
1326 | 1241 | ||
1327 | /* | 1242 | /* |
@@ -1336,73 +1251,12 @@ __xfs_get_blocks( | |||
1336 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); | 1251 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); |
1337 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 1252 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
1338 | 1253 | ||
1339 | if (create && direct && xfs_is_reflink_inode(ip)) { | 1254 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, |
1340 | is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap); | 1255 | &imap, &nimaps, XFS_BMAPI_ENTIRE); |
1341 | ASSERT(!is_cow || !isnullstartblock(imap.br_startblock)); | ||
1342 | } | ||
1343 | |||
1344 | if (!is_cow) { | ||
1345 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, | ||
1346 | &imap, &nimaps, XFS_BMAPI_ENTIRE); | ||
1347 | /* | ||
1348 | * Truncate an overwrite extent if there's a pending CoW | ||
1349 | * reservation before the end of this extent. This | ||
1350 | * forces us to come back to get_blocks to take care of | ||
1351 | * the CoW. | ||
1352 | */ | ||
1353 | if (create && direct && nimaps && | ||
1354 | imap.br_startblock != HOLESTARTBLOCK && | ||
1355 | imap.br_startblock != DELAYSTARTBLOCK && | ||
1356 | !ISUNWRITTEN(&imap)) | ||
1357 | xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, | ||
1358 | &imap); | ||
1359 | } | ||
1360 | if (error) | 1256 | if (error) |
1361 | goto out_unlock; | 1257 | goto out_unlock; |
1362 | 1258 | ||
1363 | /* | 1259 | if (nimaps) { |
1364 | * The only time we can ever safely find delalloc blocks on direct I/O | ||
1365 | * is a dio write to post-eof speculative preallocation. All other | ||
1366 | * scenarios are indicative of a problem or misuse (such as mixing | ||
1367 | * direct and mapped I/O). | ||
1368 | * | ||
1369 | * The file may be unmapped by the time we get here so we cannot | ||
1370 | * reliably fail the I/O based on mapping. Instead, fail the I/O if this | ||
1371 | * is a read or a write within eof. Otherwise, carry on but warn as a | ||
1372 | * precuation if the file happens to be mapped. | ||
1373 | */ | ||
1374 | if (direct && imap.br_startblock == DELAYSTARTBLOCK) { | ||
1375 | if (!create || offset < i_size_read(VFS_I(ip))) { | ||
1376 | WARN_ON_ONCE(1); | ||
1377 | error = -EIO; | ||
1378 | goto out_unlock; | ||
1379 | } | ||
1380 | WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping)); | ||
1381 | } | ||
1382 | |||
1383 | /* for DAX, we convert unwritten extents directly */ | ||
1384 | if (create && | ||
1385 | (!nimaps || | ||
1386 | (imap.br_startblock == HOLESTARTBLOCK || | ||
1387 | imap.br_startblock == DELAYSTARTBLOCK) || | ||
1388 | (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { | ||
1389 | /* | ||
1390 | * xfs_iomap_write_direct() expects the shared lock. It | ||
1391 | * is unlocked on return. | ||
1392 | */ | ||
1393 | if (lockmode == XFS_ILOCK_EXCL) | ||
1394 | xfs_ilock_demote(ip, lockmode); | ||
1395 | |||
1396 | error = xfs_iomap_write_direct(ip, offset, size, | ||
1397 | &imap, nimaps); | ||
1398 | if (error) | ||
1399 | return error; | ||
1400 | new = 1; | ||
1401 | |||
1402 | trace_xfs_get_blocks_alloc(ip, offset, size, | ||
1403 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN | ||
1404 | : XFS_IO_DELALLOC, &imap); | ||
1405 | } else if (nimaps) { | ||
1406 | trace_xfs_get_blocks_found(ip, offset, size, | 1260 | trace_xfs_get_blocks_found(ip, offset, size, |
1407 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN | 1261 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN |
1408 | : XFS_IO_OVERWRITE, &imap); | 1262 | : XFS_IO_OVERWRITE, &imap); |
@@ -1412,12 +1266,6 @@ __xfs_get_blocks( | |||
1412 | goto out_unlock; | 1266 | goto out_unlock; |
1413 | } | 1267 | } |
1414 | 1268 | ||
1415 | if (IS_DAX(inode) && create) { | ||
1416 | ASSERT(!ISUNWRITTEN(&imap)); | ||
1417 | /* zeroing is not needed at a higher layer */ | ||
1418 | new = 0; | ||
1419 | } | ||
1420 | |||
1421 | /* trim mapping down to size requested */ | 1269 | /* trim mapping down to size requested */ |
1422 | xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); | 1270 | xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); |
1423 | 1271 | ||
@@ -1427,43 +1275,14 @@ __xfs_get_blocks( | |||
1427 | */ | 1275 | */ |
1428 | if (imap.br_startblock != HOLESTARTBLOCK && | 1276 | if (imap.br_startblock != HOLESTARTBLOCK && |
1429 | imap.br_startblock != DELAYSTARTBLOCK && | 1277 | imap.br_startblock != DELAYSTARTBLOCK && |
1430 | (create || !ISUNWRITTEN(&imap))) { | 1278 | !ISUNWRITTEN(&imap)) |
1431 | if (create && direct && !is_cow) { | ||
1432 | error = xfs_bounce_unaligned_dio_write(ip, offset_fsb, | ||
1433 | &imap); | ||
1434 | if (error) | ||
1435 | return error; | ||
1436 | } | ||
1437 | |||
1438 | xfs_map_buffer(inode, bh_result, &imap, offset); | 1279 | xfs_map_buffer(inode, bh_result, &imap, offset); |
1439 | if (ISUNWRITTEN(&imap)) | ||
1440 | set_buffer_unwritten(bh_result); | ||
1441 | /* direct IO needs special help */ | ||
1442 | if (create) | ||
1443 | xfs_map_direct(inode, bh_result, &imap, offset, is_cow); | ||
1444 | } | ||
1445 | 1280 | ||
1446 | /* | 1281 | /* |
1447 | * If this is a realtime file, data may be on a different device. | 1282 | * If this is a realtime file, data may be on a different device. |
1448 | * to that pointed to from the buffer_head b_bdev currently. | 1283 | * to that pointed to from the buffer_head b_bdev currently. |
1449 | */ | 1284 | */ |
1450 | bh_result->b_bdev = xfs_find_bdev_for_inode(inode); | 1285 | bh_result->b_bdev = xfs_find_bdev_for_inode(inode); |
1451 | |||
1452 | /* | ||
1453 | * If we previously allocated a block out beyond eof and we are now | ||
1454 | * coming back to use it then we will need to flag it as new even if it | ||
1455 | * has a disk address. | ||
1456 | * | ||
1457 | * With sub-block writes into unwritten extents we also need to mark | ||
1458 | * the buffer as new so that the unwritten parts of the buffer gets | ||
1459 | * correctly zeroed. | ||
1460 | */ | ||
1461 | if (create && | ||
1462 | ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || | ||
1463 | (offset >= i_size_read(inode)) || | ||
1464 | (new || ISUNWRITTEN(&imap)))) | ||
1465 | set_buffer_new(bh_result); | ||
1466 | |||
1467 | return 0; | 1286 | return 0; |
1468 | 1287 | ||
1469 | out_unlock: | 1288 | out_unlock: |
@@ -1471,100 +1290,6 @@ out_unlock: | |||
1471 | return error; | 1290 | return error; |
1472 | } | 1291 | } |
1473 | 1292 | ||
1474 | int | ||
1475 | xfs_get_blocks( | ||
1476 | struct inode *inode, | ||
1477 | sector_t iblock, | ||
1478 | struct buffer_head *bh_result, | ||
1479 | int create) | ||
1480 | { | ||
1481 | return __xfs_get_blocks(inode, iblock, bh_result, create, false); | ||
1482 | } | ||
1483 | |||
1484 | int | ||
1485 | xfs_get_blocks_direct( | ||
1486 | struct inode *inode, | ||
1487 | sector_t iblock, | ||
1488 | struct buffer_head *bh_result, | ||
1489 | int create) | ||
1490 | { | ||
1491 | return __xfs_get_blocks(inode, iblock, bh_result, create, true); | ||
1492 | } | ||
1493 | |||
1494 | /* | ||
1495 | * Complete a direct I/O write request. | ||
1496 | * | ||
1497 | * xfs_map_direct passes us some flags in the private data to tell us what to | ||
1498 | * do. If no flags are set, then the write IO is an overwrite wholly within | ||
1499 | * the existing allocated file size and so there is nothing for us to do. | ||
1500 | * | ||
1501 | * Note that in this case the completion can be called in interrupt context, | ||
1502 | * whereas if we have flags set we will always be called in task context | ||
1503 | * (i.e. from a workqueue). | ||
1504 | */ | ||
1505 | int | ||
1506 | xfs_end_io_direct_write( | ||
1507 | struct kiocb *iocb, | ||
1508 | loff_t offset, | ||
1509 | ssize_t size, | ||
1510 | void *private) | ||
1511 | { | ||
1512 | struct inode *inode = file_inode(iocb->ki_filp); | ||
1513 | struct xfs_inode *ip = XFS_I(inode); | ||
1514 | uintptr_t flags = (uintptr_t)private; | ||
1515 | int error = 0; | ||
1516 | |||
1517 | trace_xfs_end_io_direct_write(ip, offset, size); | ||
1518 | |||
1519 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
1520 | return -EIO; | ||
1521 | |||
1522 | if (size <= 0) | ||
1523 | return size; | ||
1524 | |||
1525 | /* | ||
1526 | * The flags tell us whether we are doing unwritten extent conversions | ||
1527 | * or an append transaction that updates the on-disk file size. These | ||
1528 | * cases are the only cases where we should *potentially* be needing | ||
1529 | * to update the VFS inode size. | ||
1530 | */ | ||
1531 | if (flags == 0) { | ||
1532 | ASSERT(offset + size <= i_size_read(inode)); | ||
1533 | return 0; | ||
1534 | } | ||
1535 | |||
1536 | /* | ||
1537 | * We need to update the in-core inode size here so that we don't end up | ||
1538 | * with the on-disk inode size being outside the in-core inode size. We | ||
1539 | * have no other method of updating EOF for AIO, so always do it here | ||
1540 | * if necessary. | ||
1541 | * | ||
1542 | * We need to lock the test/set EOF update as we can be racing with | ||
1543 | * other IO completions here to update the EOF. Failing to serialise | ||
1544 | * here can result in EOF moving backwards and Bad Things Happen when | ||
1545 | * that occurs. | ||
1546 | */ | ||
1547 | spin_lock(&ip->i_flags_lock); | ||
1548 | if (offset + size > i_size_read(inode)) | ||
1549 | i_size_write(inode, offset + size); | ||
1550 | spin_unlock(&ip->i_flags_lock); | ||
1551 | |||
1552 | if (flags & XFS_DIO_FLAG_COW) | ||
1553 | error = xfs_reflink_end_cow(ip, offset, size); | ||
1554 | if (flags & XFS_DIO_FLAG_UNWRITTEN) { | ||
1555 | trace_xfs_end_io_direct_write_unwritten(ip, offset, size); | ||
1556 | |||
1557 | error = xfs_iomap_write_unwritten(ip, offset, size); | ||
1558 | } | ||
1559 | if (flags & XFS_DIO_FLAG_APPEND) { | ||
1560 | trace_xfs_end_io_direct_write_append(ip, offset, size); | ||
1561 | |||
1562 | error = xfs_setfilesize(ip, offset, size); | ||
1563 | } | ||
1564 | |||
1565 | return error; | ||
1566 | } | ||
1567 | |||
1568 | STATIC ssize_t | 1293 | STATIC ssize_t |
1569 | xfs_vm_direct_IO( | 1294 | xfs_vm_direct_IO( |
1570 | struct kiocb *iocb, | 1295 | struct kiocb *iocb, |
@@ -1585,7 +1310,6 @@ xfs_vm_bmap( | |||
1585 | struct xfs_inode *ip = XFS_I(inode); | 1310 | struct xfs_inode *ip = XFS_I(inode); |
1586 | 1311 | ||
1587 | trace_xfs_vm_bmap(XFS_I(inode)); | 1312 | trace_xfs_vm_bmap(XFS_I(inode)); |
1588 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
1589 | 1313 | ||
1590 | /* | 1314 | /* |
1591 | * The swap code (ab-)uses ->bmap to get a block mapping and then | 1315 | * The swap code (ab-)uses ->bmap to get a block mapping and then |
@@ -1593,12 +1317,10 @@ xfs_vm_bmap( | |||
1593 | * that on reflinks inodes, so we have to skip out here. And yes, | 1317 | * that on reflinks inodes, so we have to skip out here. And yes, |
1594 | * 0 is the magic code for a bmap error.. | 1318 | * 0 is the magic code for a bmap error.. |
1595 | */ | 1319 | */ |
1596 | if (xfs_is_reflink_inode(ip)) { | 1320 | if (xfs_is_reflink_inode(ip)) |
1597 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
1598 | return 0; | 1321 | return 0; |
1599 | } | 1322 | |
1600 | filemap_write_and_wait(mapping); | 1323 | filemap_write_and_wait(mapping); |
1601 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
1602 | return generic_block_bmap(mapping, block, xfs_get_blocks); | 1324 | return generic_block_bmap(mapping, block, xfs_get_blocks); |
1603 | } | 1325 | } |
1604 | 1326 | ||
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 34dc00dfb91d..cc174ec6c2fd 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h | |||
@@ -55,12 +55,6 @@ struct xfs_ioend { | |||
55 | 55 | ||
56 | extern const struct address_space_operations xfs_address_space_operations; | 56 | extern const struct address_space_operations xfs_address_space_operations; |
57 | 57 | ||
58 | int xfs_get_blocks(struct inode *inode, sector_t offset, | ||
59 | struct buffer_head *map_bh, int create); | ||
60 | int xfs_get_blocks_direct(struct inode *inode, sector_t offset, | ||
61 | struct buffer_head *map_bh, int create); | ||
62 | int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset, | ||
63 | ssize_t size, void *private); | ||
64 | int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); | 58 | int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); |
65 | 59 | ||
66 | extern void xfs_count_page_state(struct page *, int *, int *); | 60 | extern void xfs_count_page_state(struct page *, int *, int *); |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 0670a8bd5818..b9abce524c33 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -1935,8 +1935,8 @@ xfs_swap_extents( | |||
1935 | * page cache safely. Once we have done this we can take the ilocks and | 1935 | * page cache safely. Once we have done this we can take the ilocks and |
1936 | * do the rest of the checks. | 1936 | * do the rest of the checks. |
1937 | */ | 1937 | */ |
1938 | lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; | 1938 | lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); |
1939 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | 1939 | lock_flags = XFS_MMAPLOCK_EXCL; |
1940 | xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); | 1940 | xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); |
1941 | 1941 | ||
1942 | /* Verify that both files have the same format */ | 1942 | /* Verify that both files have the same format */ |
@@ -2076,15 +2076,13 @@ xfs_swap_extents( | |||
2076 | trace_xfs_swap_extent_after(ip, 0); | 2076 | trace_xfs_swap_extent_after(ip, 0); |
2077 | trace_xfs_swap_extent_after(tip, 1); | 2077 | trace_xfs_swap_extent_after(tip, 1); |
2078 | 2078 | ||
2079 | out_unlock: | ||
2079 | xfs_iunlock(ip, lock_flags); | 2080 | xfs_iunlock(ip, lock_flags); |
2080 | xfs_iunlock(tip, lock_flags); | 2081 | xfs_iunlock(tip, lock_flags); |
2082 | unlock_two_nondirectories(VFS_I(ip), VFS_I(tip)); | ||
2081 | return error; | 2083 | return error; |
2082 | 2084 | ||
2083 | out_trans_cancel: | 2085 | out_trans_cancel: |
2084 | xfs_trans_cancel(tp); | 2086 | xfs_trans_cancel(tp); |
2085 | 2087 | goto out_unlock; | |
2086 | out_unlock: | ||
2087 | xfs_iunlock(ip, lock_flags); | ||
2088 | xfs_iunlock(tip, lock_flags); | ||
2089 | return error; | ||
2090 | } | 2088 | } |
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 29816981b50a..003a99b83bd8 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c | |||
@@ -677,7 +677,6 @@ xfs_readdir( | |||
677 | args.dp = dp; | 677 | args.dp = dp; |
678 | args.geo = dp->i_mount->m_dir_geo; | 678 | args.geo = dp->i_mount->m_dir_geo; |
679 | 679 | ||
680 | xfs_ilock(dp, XFS_IOLOCK_SHARED); | ||
681 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 680 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) |
682 | rval = xfs_dir2_sf_getdents(&args, ctx); | 681 | rval = xfs_dir2_sf_getdents(&args, ctx); |
683 | else if ((rval = xfs_dir2_isblock(&args, &v))) | 682 | else if ((rval = xfs_dir2_isblock(&args, &v))) |
@@ -686,7 +685,6 @@ xfs_readdir( | |||
686 | rval = xfs_dir2_block_getdents(&args, ctx); | 685 | rval = xfs_dir2_block_getdents(&args, ctx); |
687 | else | 686 | else |
688 | rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); | 687 | rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); |
689 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | ||
690 | 688 | ||
691 | return rval; | 689 | return rval; |
692 | } | 690 | } |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index d818c160451f..f5effa68e037 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -48,40 +48,6 @@ | |||
48 | static const struct vm_operations_struct xfs_file_vm_ops; | 48 | static const struct vm_operations_struct xfs_file_vm_ops; |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * Locking primitives for read and write IO paths to ensure we consistently use | ||
52 | * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. | ||
53 | */ | ||
54 | static inline void | ||
55 | xfs_rw_ilock( | ||
56 | struct xfs_inode *ip, | ||
57 | int type) | ||
58 | { | ||
59 | if (type & XFS_IOLOCK_EXCL) | ||
60 | inode_lock(VFS_I(ip)); | ||
61 | xfs_ilock(ip, type); | ||
62 | } | ||
63 | |||
64 | static inline void | ||
65 | xfs_rw_iunlock( | ||
66 | struct xfs_inode *ip, | ||
67 | int type) | ||
68 | { | ||
69 | xfs_iunlock(ip, type); | ||
70 | if (type & XFS_IOLOCK_EXCL) | ||
71 | inode_unlock(VFS_I(ip)); | ||
72 | } | ||
73 | |||
74 | static inline void | ||
75 | xfs_rw_ilock_demote( | ||
76 | struct xfs_inode *ip, | ||
77 | int type) | ||
78 | { | ||
79 | xfs_ilock_demote(ip, type); | ||
80 | if (type & XFS_IOLOCK_EXCL) | ||
81 | inode_unlock(VFS_I(ip)); | ||
82 | } | ||
83 | |||
84 | /* | ||
85 | * Clear the specified ranges to zero through either the pagecache or DAX. | 51 | * Clear the specified ranges to zero through either the pagecache or DAX. |
86 | * Holes and unwritten extents will be left as-is as they already are zeroed. | 52 | * Holes and unwritten extents will be left as-is as they already are zeroed. |
87 | */ | 53 | */ |
@@ -244,62 +210,21 @@ xfs_file_dio_aio_read( | |||
244 | struct kiocb *iocb, | 210 | struct kiocb *iocb, |
245 | struct iov_iter *to) | 211 | struct iov_iter *to) |
246 | { | 212 | { |
247 | struct address_space *mapping = iocb->ki_filp->f_mapping; | 213 | struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); |
248 | struct inode *inode = mapping->host; | ||
249 | struct xfs_inode *ip = XFS_I(inode); | ||
250 | loff_t isize = i_size_read(inode); | ||
251 | size_t count = iov_iter_count(to); | 214 | size_t count = iov_iter_count(to); |
252 | loff_t end = iocb->ki_pos + count - 1; | 215 | ssize_t ret; |
253 | struct iov_iter data; | ||
254 | struct xfs_buftarg *target; | ||
255 | ssize_t ret = 0; | ||
256 | 216 | ||
257 | trace_xfs_file_direct_read(ip, count, iocb->ki_pos); | 217 | trace_xfs_file_direct_read(ip, count, iocb->ki_pos); |
258 | 218 | ||
259 | if (!count) | 219 | if (!count) |
260 | return 0; /* skip atime */ | 220 | return 0; /* skip atime */ |
261 | 221 | ||
262 | if (XFS_IS_REALTIME_INODE(ip)) | ||
263 | target = ip->i_mount->m_rtdev_targp; | ||
264 | else | ||
265 | target = ip->i_mount->m_ddev_targp; | ||
266 | |||
267 | /* DIO must be aligned to device logical sector size */ | ||
268 | if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { | ||
269 | if (iocb->ki_pos == isize) | ||
270 | return 0; | ||
271 | return -EINVAL; | ||
272 | } | ||
273 | |||
274 | file_accessed(iocb->ki_filp); | 222 | file_accessed(iocb->ki_filp); |
275 | 223 | ||
276 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); | 224 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
277 | if (mapping->nrpages) { | 225 | ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL); |
278 | ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); | 226 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
279 | if (ret) | ||
280 | goto out_unlock; | ||
281 | |||
282 | /* | ||
283 | * Invalidate whole pages. This can return an error if we fail | ||
284 | * to invalidate a page, but this should never happen on XFS. | ||
285 | * Warn if it does fail. | ||
286 | */ | ||
287 | ret = invalidate_inode_pages2_range(mapping, | ||
288 | iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); | ||
289 | WARN_ON_ONCE(ret); | ||
290 | ret = 0; | ||
291 | } | ||
292 | |||
293 | data = *to; | ||
294 | ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, | ||
295 | xfs_get_blocks_direct, NULL, NULL, 0); | ||
296 | if (ret >= 0) { | ||
297 | iocb->ki_pos += ret; | ||
298 | iov_iter_advance(to, ret); | ||
299 | } | ||
300 | 227 | ||
301 | out_unlock: | ||
302 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | ||
303 | return ret; | 228 | return ret; |
304 | } | 229 | } |
305 | 230 | ||
@@ -317,9 +242,9 @@ xfs_file_dax_read( | |||
317 | if (!count) | 242 | if (!count) |
318 | return 0; /* skip atime */ | 243 | return 0; /* skip atime */ |
319 | 244 | ||
320 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); | 245 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
321 | ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); | 246 | ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); |
322 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | 247 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
323 | 248 | ||
324 | file_accessed(iocb->ki_filp); | 249 | file_accessed(iocb->ki_filp); |
325 | return ret; | 250 | return ret; |
@@ -335,9 +260,9 @@ xfs_file_buffered_aio_read( | |||
335 | 260 | ||
336 | trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); | 261 | trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); |
337 | 262 | ||
338 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); | 263 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
339 | ret = generic_file_read_iter(iocb, to); | 264 | ret = generic_file_read_iter(iocb, to); |
340 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | 265 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
341 | 266 | ||
342 | return ret; | 267 | return ret; |
343 | } | 268 | } |
@@ -418,15 +343,18 @@ restart: | |||
418 | if (error <= 0) | 343 | if (error <= 0) |
419 | return error; | 344 | return error; |
420 | 345 | ||
421 | error = xfs_break_layouts(inode, iolock, true); | 346 | error = xfs_break_layouts(inode, iolock); |
422 | if (error) | 347 | if (error) |
423 | return error; | 348 | return error; |
424 | 349 | ||
425 | /* For changing security info in file_remove_privs() we need i_mutex */ | 350 | /* |
351 | * For changing security info in file_remove_privs() we need i_rwsem | ||
352 | * exclusively. | ||
353 | */ | ||
426 | if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { | 354 | if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { |
427 | xfs_rw_iunlock(ip, *iolock); | 355 | xfs_iunlock(ip, *iolock); |
428 | *iolock = XFS_IOLOCK_EXCL; | 356 | *iolock = XFS_IOLOCK_EXCL; |
429 | xfs_rw_ilock(ip, *iolock); | 357 | xfs_ilock(ip, *iolock); |
430 | goto restart; | 358 | goto restart; |
431 | } | 359 | } |
432 | /* | 360 | /* |
@@ -451,9 +379,9 @@ restart: | |||
451 | spin_unlock(&ip->i_flags_lock); | 379 | spin_unlock(&ip->i_flags_lock); |
452 | if (!drained_dio) { | 380 | if (!drained_dio) { |
453 | if (*iolock == XFS_IOLOCK_SHARED) { | 381 | if (*iolock == XFS_IOLOCK_SHARED) { |
454 | xfs_rw_iunlock(ip, *iolock); | 382 | xfs_iunlock(ip, *iolock); |
455 | *iolock = XFS_IOLOCK_EXCL; | 383 | *iolock = XFS_IOLOCK_EXCL; |
456 | xfs_rw_ilock(ip, *iolock); | 384 | xfs_ilock(ip, *iolock); |
457 | iov_iter_reexpand(from, count); | 385 | iov_iter_reexpand(from, count); |
458 | } | 386 | } |
459 | /* | 387 | /* |
@@ -496,6 +424,58 @@ restart: | |||
496 | return 0; | 424 | return 0; |
497 | } | 425 | } |
498 | 426 | ||
427 | static int | ||
428 | xfs_dio_write_end_io( | ||
429 | struct kiocb *iocb, | ||
430 | ssize_t size, | ||
431 | unsigned flags) | ||
432 | { | ||
433 | struct inode *inode = file_inode(iocb->ki_filp); | ||
434 | struct xfs_inode *ip = XFS_I(inode); | ||
435 | loff_t offset = iocb->ki_pos; | ||
436 | bool update_size = false; | ||
437 | int error = 0; | ||
438 | |||
439 | trace_xfs_end_io_direct_write(ip, offset, size); | ||
440 | |||
441 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
442 | return -EIO; | ||
443 | |||
444 | if (size <= 0) | ||
445 | return size; | ||
446 | |||
447 | /* | ||
448 | * We need to update the in-core inode size here so that we don't end up | ||
449 | * with the on-disk inode size being outside the in-core inode size. We | ||
450 | * have no other method of updating EOF for AIO, so always do it here | ||
451 | * if necessary. | ||
452 | * | ||
453 | * We need to lock the test/set EOF update as we can be racing with | ||
454 | * other IO completions here to update the EOF. Failing to serialise | ||
455 | * here can result in EOF moving backwards and Bad Things Happen when | ||
456 | * that occurs. | ||
457 | */ | ||
458 | spin_lock(&ip->i_flags_lock); | ||
459 | if (offset + size > i_size_read(inode)) { | ||
460 | i_size_write(inode, offset + size); | ||
461 | update_size = true; | ||
462 | } | ||
463 | spin_unlock(&ip->i_flags_lock); | ||
464 | |||
465 | if (flags & IOMAP_DIO_COW) { | ||
466 | error = xfs_reflink_end_cow(ip, offset, size); | ||
467 | if (error) | ||
468 | return error; | ||
469 | } | ||
470 | |||
471 | if (flags & IOMAP_DIO_UNWRITTEN) | ||
472 | error = xfs_iomap_write_unwritten(ip, offset, size); | ||
473 | else if (update_size) | ||
474 | error = xfs_setfilesize(ip, offset, size); | ||
475 | |||
476 | return error; | ||
477 | } | ||
478 | |||
499 | /* | 479 | /* |
500 | * xfs_file_dio_aio_write - handle direct IO writes | 480 | * xfs_file_dio_aio_write - handle direct IO writes |
501 | * | 481 | * |
@@ -535,9 +515,7 @@ xfs_file_dio_aio_write( | |||
535 | int unaligned_io = 0; | 515 | int unaligned_io = 0; |
536 | int iolock; | 516 | int iolock; |
537 | size_t count = iov_iter_count(from); | 517 | size_t count = iov_iter_count(from); |
538 | loff_t end; | 518 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? |
539 | struct iov_iter data; | ||
540 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | ||
541 | mp->m_rtdev_targp : mp->m_ddev_targp; | 519 | mp->m_rtdev_targp : mp->m_ddev_targp; |
542 | 520 | ||
543 | /* DIO must be aligned to device logical sector size */ | 521 | /* DIO must be aligned to device logical sector size */ |
@@ -559,29 +537,12 @@ xfs_file_dio_aio_write( | |||
559 | iolock = XFS_IOLOCK_SHARED; | 537 | iolock = XFS_IOLOCK_SHARED; |
560 | } | 538 | } |
561 | 539 | ||
562 | xfs_rw_ilock(ip, iolock); | 540 | xfs_ilock(ip, iolock); |
563 | 541 | ||
564 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); | 542 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); |
565 | if (ret) | 543 | if (ret) |
566 | goto out; | 544 | goto out; |
567 | count = iov_iter_count(from); | 545 | count = iov_iter_count(from); |
568 | end = iocb->ki_pos + count - 1; | ||
569 | |||
570 | if (mapping->nrpages) { | ||
571 | ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); | ||
572 | if (ret) | ||
573 | goto out; | ||
574 | |||
575 | /* | ||
576 | * Invalidate whole pages. This can return an error if we fail | ||
577 | * to invalidate a page, but this should never happen on XFS. | ||
578 | * Warn if it does fail. | ||
579 | */ | ||
580 | ret = invalidate_inode_pages2_range(mapping, | ||
581 | iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); | ||
582 | WARN_ON_ONCE(ret); | ||
583 | ret = 0; | ||
584 | } | ||
585 | 546 | ||
586 | /* | 547 | /* |
587 | * If we are doing unaligned IO, wait for all other IO to drain, | 548 | * If we are doing unaligned IO, wait for all other IO to drain, |
@@ -591,7 +552,7 @@ xfs_file_dio_aio_write( | |||
591 | if (unaligned_io) | 552 | if (unaligned_io) |
592 | inode_dio_wait(inode); | 553 | inode_dio_wait(inode); |
593 | else if (iolock == XFS_IOLOCK_EXCL) { | 554 | else if (iolock == XFS_IOLOCK_EXCL) { |
594 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); | 555 | xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); |
595 | iolock = XFS_IOLOCK_SHARED; | 556 | iolock = XFS_IOLOCK_SHARED; |
596 | } | 557 | } |
597 | 558 | ||
@@ -604,24 +565,9 @@ xfs_file_dio_aio_write( | |||
604 | goto out; | 565 | goto out; |
605 | } | 566 | } |
606 | 567 | ||
607 | data = *from; | 568 | ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); |
608 | ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, | ||
609 | xfs_get_blocks_direct, xfs_end_io_direct_write, | ||
610 | NULL, DIO_ASYNC_EXTEND); | ||
611 | |||
612 | /* see generic_file_direct_write() for why this is necessary */ | ||
613 | if (mapping->nrpages) { | ||
614 | invalidate_inode_pages2_range(mapping, | ||
615 | iocb->ki_pos >> PAGE_SHIFT, | ||
616 | end >> PAGE_SHIFT); | ||
617 | } | ||
618 | |||
619 | if (ret > 0) { | ||
620 | iocb->ki_pos += ret; | ||
621 | iov_iter_advance(from, ret); | ||
622 | } | ||
623 | out: | 569 | out: |
624 | xfs_rw_iunlock(ip, iolock); | 570 | xfs_iunlock(ip, iolock); |
625 | 571 | ||
626 | /* | 572 | /* |
627 | * No fallback to buffered IO on errors for XFS, direct IO will either | 573 | * No fallback to buffered IO on errors for XFS, direct IO will either |
@@ -643,7 +589,7 @@ xfs_file_dax_write( | |||
643 | size_t count; | 589 | size_t count; |
644 | loff_t pos; | 590 | loff_t pos; |
645 | 591 | ||
646 | xfs_rw_ilock(ip, iolock); | 592 | xfs_ilock(ip, iolock); |
647 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); | 593 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); |
648 | if (ret) | 594 | if (ret) |
649 | goto out; | 595 | goto out; |
@@ -652,15 +598,13 @@ xfs_file_dax_write( | |||
652 | count = iov_iter_count(from); | 598 | count = iov_iter_count(from); |
653 | 599 | ||
654 | trace_xfs_file_dax_write(ip, count, pos); | 600 | trace_xfs_file_dax_write(ip, count, pos); |
655 | |||
656 | ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops); | 601 | ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops); |
657 | if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { | 602 | if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { |
658 | i_size_write(inode, iocb->ki_pos); | 603 | i_size_write(inode, iocb->ki_pos); |
659 | error = xfs_setfilesize(ip, pos, ret); | 604 | error = xfs_setfilesize(ip, pos, ret); |
660 | } | 605 | } |
661 | |||
662 | out: | 606 | out: |
663 | xfs_rw_iunlock(ip, iolock); | 607 | xfs_iunlock(ip, iolock); |
664 | return error ? error : ret; | 608 | return error ? error : ret; |
665 | } | 609 | } |
666 | 610 | ||
@@ -677,7 +621,7 @@ xfs_file_buffered_aio_write( | |||
677 | int enospc = 0; | 621 | int enospc = 0; |
678 | int iolock = XFS_IOLOCK_EXCL; | 622 | int iolock = XFS_IOLOCK_EXCL; |
679 | 623 | ||
680 | xfs_rw_ilock(ip, iolock); | 624 | xfs_ilock(ip, iolock); |
681 | 625 | ||
682 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); | 626 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); |
683 | if (ret) | 627 | if (ret) |
@@ -721,7 +665,7 @@ write_retry: | |||
721 | 665 | ||
722 | current->backing_dev_info = NULL; | 666 | current->backing_dev_info = NULL; |
723 | out: | 667 | out: |
724 | xfs_rw_iunlock(ip, iolock); | 668 | xfs_iunlock(ip, iolock); |
725 | return ret; | 669 | return ret; |
726 | } | 670 | } |
727 | 671 | ||
@@ -797,7 +741,7 @@ xfs_file_fallocate( | |||
797 | return -EOPNOTSUPP; | 741 | return -EOPNOTSUPP; |
798 | 742 | ||
799 | xfs_ilock(ip, iolock); | 743 | xfs_ilock(ip, iolock); |
800 | error = xfs_break_layouts(inode, &iolock, false); | 744 | error = xfs_break_layouts(inode, &iolock); |
801 | if (error) | 745 | if (error) |
802 | goto out_unlock; | 746 | goto out_unlock; |
803 | 747 | ||
@@ -1501,15 +1445,9 @@ xfs_filemap_fault( | |||
1501 | return xfs_filemap_page_mkwrite(vma, vmf); | 1445 | return xfs_filemap_page_mkwrite(vma, vmf); |
1502 | 1446 | ||
1503 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); | 1447 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); |
1504 | if (IS_DAX(inode)) { | 1448 | if (IS_DAX(inode)) |
1505 | /* | ||
1506 | * we do not want to trigger unwritten extent conversion on read | ||
1507 | * faults - that is unnecessary overhead and would also require | ||
1508 | * changes to xfs_get_blocks_direct() to map unwritten extent | ||
1509 | * ioend for conversion on read-only mappings. | ||
1510 | */ | ||
1511 | ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); | 1449 | ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); |
1512 | } else | 1450 | else |
1513 | ret = filemap_fault(vma, vmf); | 1451 | ret = filemap_fault(vma, vmf); |
1514 | xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); | 1452 | xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); |
1515 | 1453 | ||
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 9c3e5c6ddf20..ff4d6311c7f4 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -70,8 +70,6 @@ xfs_inode_alloc( | |||
70 | ASSERT(!xfs_isiflocked(ip)); | 70 | ASSERT(!xfs_isiflocked(ip)); |
71 | ASSERT(ip->i_ino == 0); | 71 | ASSERT(ip->i_ino == 0); |
72 | 72 | ||
73 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
74 | |||
75 | /* initialise the xfs inode */ | 73 | /* initialise the xfs inode */ |
76 | ip->i_ino = ino; | 74 | ip->i_ino = ino; |
77 | ip->i_mount = mp; | 75 | ip->i_mount = mp; |
@@ -394,8 +392,8 @@ xfs_iget_cache_hit( | |||
394 | xfs_inode_clear_reclaim_tag(pag, ip->i_ino); | 392 | xfs_inode_clear_reclaim_tag(pag, ip->i_ino); |
395 | inode->i_state = I_NEW; | 393 | inode->i_state = I_NEW; |
396 | 394 | ||
397 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | 395 | ASSERT(!rwsem_is_locked(&inode->i_rwsem)); |
398 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 396 | init_rwsem(&inode->i_rwsem); |
399 | 397 | ||
400 | spin_unlock(&ip->i_flags_lock); | 398 | spin_unlock(&ip->i_flags_lock); |
401 | spin_unlock(&pag->pag_ici_lock); | 399 | spin_unlock(&pag->pag_ici_lock); |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4e560e6a12c1..e9ab42d8965b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -142,31 +142,31 @@ xfs_ilock_attr_map_shared( | |||
142 | } | 142 | } |
143 | 143 | ||
144 | /* | 144 | /* |
145 | * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and | 145 | * In addition to i_rwsem in the VFS inode, the xfs inode contains 2 |
146 | * the i_lock. This routine allows various combinations of the locks to be | 146 | * multi-reader locks: i_mmap_lock and the i_lock. This routine allows |
147 | * obtained. | 147 | * various combinations of the locks to be obtained. |
148 | * | 148 | * |
149 | * The 3 locks should always be ordered so that the IO lock is obtained first, | 149 | * The 3 locks should always be ordered so that the IO lock is obtained first, |
150 | * the mmap lock second and the ilock last in order to prevent deadlock. | 150 | * the mmap lock second and the ilock last in order to prevent deadlock. |
151 | * | 151 | * |
152 | * Basic locking order: | 152 | * Basic locking order: |
153 | * | 153 | * |
154 | * i_iolock -> i_mmap_lock -> page_lock -> i_ilock | 154 | * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock |
155 | * | 155 | * |
156 | * mmap_sem locking order: | 156 | * mmap_sem locking order: |
157 | * | 157 | * |
158 | * i_iolock -> page lock -> mmap_sem | 158 | * i_rwsem -> page lock -> mmap_sem |
159 | * mmap_sem -> i_mmap_lock -> page_lock | 159 | * mmap_sem -> i_mmap_lock -> page_lock |
160 | * | 160 | * |
161 | * The difference in mmap_sem locking order mean that we cannot hold the | 161 | * The difference in mmap_sem locking order mean that we cannot hold the |
162 | * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can | 162 | * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can |
163 | * fault in pages during copy in/out (for buffered IO) or require the mmap_sem | 163 | * fault in pages during copy in/out (for buffered IO) or require the mmap_sem |
164 | * in get_user_pages() to map the user pages into the kernel address space for | 164 | * in get_user_pages() to map the user pages into the kernel address space for |
165 | * direct IO. Similarly the i_iolock cannot be taken inside a page fault because | 165 | * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because |
166 | * page faults already hold the mmap_sem. | 166 | * page faults already hold the mmap_sem. |
167 | * | 167 | * |
168 | * Hence to serialise fully against both syscall and mmap based IO, we need to | 168 | * Hence to serialise fully against both syscall and mmap based IO, we need to |
169 | * take both the i_iolock and the i_mmap_lock. These locks should *only* be both | 169 | * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both |
170 | * taken in places where we need to invalidate the page cache in a race | 170 | * taken in places where we need to invalidate the page cache in a race |
171 | * free manner (e.g. truncate, hole punch and other extent manipulation | 171 | * free manner (e.g. truncate, hole punch and other extent manipulation |
172 | * functions). | 172 | * functions). |
@@ -191,10 +191,13 @@ xfs_ilock( | |||
191 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | 191 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); |
192 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); | 192 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); |
193 | 193 | ||
194 | if (lock_flags & XFS_IOLOCK_EXCL) | 194 | if (lock_flags & XFS_IOLOCK_EXCL) { |
195 | mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | 195 | down_write_nested(&VFS_I(ip)->i_rwsem, |
196 | else if (lock_flags & XFS_IOLOCK_SHARED) | 196 | XFS_IOLOCK_DEP(lock_flags)); |
197 | mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | 197 | } else if (lock_flags & XFS_IOLOCK_SHARED) { |
198 | down_read_nested(&VFS_I(ip)->i_rwsem, | ||
199 | XFS_IOLOCK_DEP(lock_flags)); | ||
200 | } | ||
198 | 201 | ||
199 | if (lock_flags & XFS_MMAPLOCK_EXCL) | 202 | if (lock_flags & XFS_MMAPLOCK_EXCL) |
200 | mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); | 203 | mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); |
@@ -240,10 +243,10 @@ xfs_ilock_nowait( | |||
240 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); | 243 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); |
241 | 244 | ||
242 | if (lock_flags & XFS_IOLOCK_EXCL) { | 245 | if (lock_flags & XFS_IOLOCK_EXCL) { |
243 | if (!mrtryupdate(&ip->i_iolock)) | 246 | if (!down_write_trylock(&VFS_I(ip)->i_rwsem)) |
244 | goto out; | 247 | goto out; |
245 | } else if (lock_flags & XFS_IOLOCK_SHARED) { | 248 | } else if (lock_flags & XFS_IOLOCK_SHARED) { |
246 | if (!mrtryaccess(&ip->i_iolock)) | 249 | if (!down_read_trylock(&VFS_I(ip)->i_rwsem)) |
247 | goto out; | 250 | goto out; |
248 | } | 251 | } |
249 | 252 | ||
@@ -271,9 +274,9 @@ out_undo_mmaplock: | |||
271 | mrunlock_shared(&ip->i_mmaplock); | 274 | mrunlock_shared(&ip->i_mmaplock); |
272 | out_undo_iolock: | 275 | out_undo_iolock: |
273 | if (lock_flags & XFS_IOLOCK_EXCL) | 276 | if (lock_flags & XFS_IOLOCK_EXCL) |
274 | mrunlock_excl(&ip->i_iolock); | 277 | up_write(&VFS_I(ip)->i_rwsem); |
275 | else if (lock_flags & XFS_IOLOCK_SHARED) | 278 | else if (lock_flags & XFS_IOLOCK_SHARED) |
276 | mrunlock_shared(&ip->i_iolock); | 279 | up_read(&VFS_I(ip)->i_rwsem); |
277 | out: | 280 | out: |
278 | return 0; | 281 | return 0; |
279 | } | 282 | } |
@@ -310,9 +313,9 @@ xfs_iunlock( | |||
310 | ASSERT(lock_flags != 0); | 313 | ASSERT(lock_flags != 0); |
311 | 314 | ||
312 | if (lock_flags & XFS_IOLOCK_EXCL) | 315 | if (lock_flags & XFS_IOLOCK_EXCL) |
313 | mrunlock_excl(&ip->i_iolock); | 316 | up_write(&VFS_I(ip)->i_rwsem); |
314 | else if (lock_flags & XFS_IOLOCK_SHARED) | 317 | else if (lock_flags & XFS_IOLOCK_SHARED) |
315 | mrunlock_shared(&ip->i_iolock); | 318 | up_read(&VFS_I(ip)->i_rwsem); |
316 | 319 | ||
317 | if (lock_flags & XFS_MMAPLOCK_EXCL) | 320 | if (lock_flags & XFS_MMAPLOCK_EXCL) |
318 | mrunlock_excl(&ip->i_mmaplock); | 321 | mrunlock_excl(&ip->i_mmaplock); |
@@ -345,7 +348,7 @@ xfs_ilock_demote( | |||
345 | if (lock_flags & XFS_MMAPLOCK_EXCL) | 348 | if (lock_flags & XFS_MMAPLOCK_EXCL) |
346 | mrdemote(&ip->i_mmaplock); | 349 | mrdemote(&ip->i_mmaplock); |
347 | if (lock_flags & XFS_IOLOCK_EXCL) | 350 | if (lock_flags & XFS_IOLOCK_EXCL) |
348 | mrdemote(&ip->i_iolock); | 351 | downgrade_write(&VFS_I(ip)->i_rwsem); |
349 | 352 | ||
350 | trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); | 353 | trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); |
351 | } | 354 | } |
@@ -370,8 +373,9 @@ xfs_isilocked( | |||
370 | 373 | ||
371 | if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { | 374 | if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { |
372 | if (!(lock_flags & XFS_IOLOCK_SHARED)) | 375 | if (!(lock_flags & XFS_IOLOCK_SHARED)) |
373 | return !!ip->i_iolock.mr_writer; | 376 | return !debug_locks || |
374 | return rwsem_is_locked(&ip->i_iolock.mr_lock); | 377 | lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0); |
378 | return rwsem_is_locked(&VFS_I(ip)->i_rwsem); | ||
375 | } | 379 | } |
376 | 380 | ||
377 | ASSERT(0); | 381 | ASSERT(0); |
@@ -421,11 +425,7 @@ xfs_lock_inumorder(int lock_mode, int subclass) | |||
421 | 425 | ||
422 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { | 426 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { |
423 | ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); | 427 | ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); |
424 | ASSERT(xfs_lockdep_subclass_ok(subclass + | ||
425 | XFS_IOLOCK_PARENT_VAL)); | ||
426 | class += subclass << XFS_IOLOCK_SHIFT; | 428 | class += subclass << XFS_IOLOCK_SHIFT; |
427 | if (lock_mode & XFS_IOLOCK_PARENT) | ||
428 | class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT; | ||
429 | } | 429 | } |
430 | 430 | ||
431 | if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { | 431 | if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { |
@@ -477,8 +477,6 @@ xfs_lock_inodes( | |||
477 | XFS_ILOCK_EXCL)); | 477 | XFS_ILOCK_EXCL)); |
478 | ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | | 478 | ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | |
479 | XFS_ILOCK_SHARED))); | 479 | XFS_ILOCK_SHARED))); |
480 | ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) || | ||
481 | inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1); | ||
482 | ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || | 480 | ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || |
483 | inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); | 481 | inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); |
484 | ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || | 482 | ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || |
@@ -581,10 +579,8 @@ xfs_lock_two_inodes( | |||
581 | int attempts = 0; | 579 | int attempts = 0; |
582 | xfs_log_item_t *lp; | 580 | xfs_log_item_t *lp; |
583 | 581 | ||
584 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { | 582 | ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); |
585 | ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); | 583 | if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) |
586 | ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); | ||
587 | } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) | ||
588 | ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); | 584 | ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); |
589 | 585 | ||
590 | ASSERT(ip0->i_ino != ip1->i_ino); | 586 | ASSERT(ip0->i_ino != ip1->i_ino); |
@@ -715,7 +711,6 @@ xfs_lookup( | |||
715 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) | 711 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) |
716 | return -EIO; | 712 | return -EIO; |
717 | 713 | ||
718 | xfs_ilock(dp, XFS_IOLOCK_SHARED); | ||
719 | error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); | 714 | error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); |
720 | if (error) | 715 | if (error) |
721 | goto out_unlock; | 716 | goto out_unlock; |
@@ -724,14 +719,12 @@ xfs_lookup( | |||
724 | if (error) | 719 | if (error) |
725 | goto out_free_name; | 720 | goto out_free_name; |
726 | 721 | ||
727 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | ||
728 | return 0; | 722 | return 0; |
729 | 723 | ||
730 | out_free_name: | 724 | out_free_name: |
731 | if (ci_name) | 725 | if (ci_name) |
732 | kmem_free(ci_name->name); | 726 | kmem_free(ci_name->name); |
733 | out_unlock: | 727 | out_unlock: |
734 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | ||
735 | *ipp = NULL; | 728 | *ipp = NULL; |
736 | return error; | 729 | return error; |
737 | } | 730 | } |
@@ -1215,8 +1208,7 @@ xfs_create( | |||
1215 | if (error) | 1208 | if (error) |
1216 | goto out_release_inode; | 1209 | goto out_release_inode; |
1217 | 1210 | ||
1218 | xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | | 1211 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); |
1219 | XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); | ||
1220 | unlock_dp_on_error = true; | 1212 | unlock_dp_on_error = true; |
1221 | 1213 | ||
1222 | xfs_defer_init(&dfops, &first_block); | 1214 | xfs_defer_init(&dfops, &first_block); |
@@ -1252,7 +1244,7 @@ xfs_create( | |||
1252 | * the transaction cancel unlocking dp so don't do it explicitly in the | 1244 | * the transaction cancel unlocking dp so don't do it explicitly in the |
1253 | * error path. | 1245 | * error path. |
1254 | */ | 1246 | */ |
1255 | xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 1247 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
1256 | unlock_dp_on_error = false; | 1248 | unlock_dp_on_error = false; |
1257 | 1249 | ||
1258 | error = xfs_dir_createname(tp, dp, name, ip->i_ino, | 1250 | error = xfs_dir_createname(tp, dp, name, ip->i_ino, |
@@ -1325,7 +1317,7 @@ xfs_create( | |||
1325 | xfs_qm_dqrele(pdqp); | 1317 | xfs_qm_dqrele(pdqp); |
1326 | 1318 | ||
1327 | if (unlock_dp_on_error) | 1319 | if (unlock_dp_on_error) |
1328 | xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 1320 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
1329 | return error; | 1321 | return error; |
1330 | } | 1322 | } |
1331 | 1323 | ||
@@ -1466,11 +1458,10 @@ xfs_link( | |||
1466 | if (error) | 1458 | if (error) |
1467 | goto std_return; | 1459 | goto std_return; |
1468 | 1460 | ||
1469 | xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); | ||
1470 | xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); | 1461 | xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); |
1471 | 1462 | ||
1472 | xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); | 1463 | xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); |
1473 | xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 1464 | xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); |
1474 | 1465 | ||
1475 | /* | 1466 | /* |
1476 | * If we are using project inheritance, we only allow hard link | 1467 | * If we are using project inheritance, we only allow hard link |
@@ -2579,10 +2570,9 @@ xfs_remove( | |||
2579 | goto std_return; | 2570 | goto std_return; |
2580 | } | 2571 | } |
2581 | 2572 | ||
2582 | xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); | ||
2583 | xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); | 2573 | xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); |
2584 | 2574 | ||
2585 | xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 2575 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
2586 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 2576 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
2587 | 2577 | ||
2588 | /* | 2578 | /* |
@@ -2963,12 +2953,6 @@ xfs_rename( | |||
2963 | * whether the target directory is the same as the source | 2953 | * whether the target directory is the same as the source |
2964 | * directory, we can lock from 2 to 4 inodes. | 2954 | * directory, we can lock from 2 to 4 inodes. |
2965 | */ | 2955 | */ |
2966 | if (!new_parent) | ||
2967 | xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); | ||
2968 | else | ||
2969 | xfs_lock_two_inodes(src_dp, target_dp, | ||
2970 | XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); | ||
2971 | |||
2972 | xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); | 2956 | xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); |
2973 | 2957 | ||
2974 | /* | 2958 | /* |
@@ -2976,9 +2960,9 @@ xfs_rename( | |||
2976 | * we can rely on either trans_commit or trans_cancel to unlock | 2960 | * we can rely on either trans_commit or trans_cancel to unlock |
2977 | * them. | 2961 | * them. |
2978 | */ | 2962 | */ |
2979 | xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 2963 | xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); |
2980 | if (new_parent) | 2964 | if (new_parent) |
2981 | xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 2965 | xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); |
2982 | xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); | 2966 | xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); |
2983 | if (target_ip) | 2967 | if (target_ip) |
2984 | xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); | 2968 | xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 71e8a81c91a3..10dcf27b4c85 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -56,7 +56,6 @@ typedef struct xfs_inode { | |||
56 | /* Transaction and locking information. */ | 56 | /* Transaction and locking information. */ |
57 | struct xfs_inode_log_item *i_itemp; /* logging information */ | 57 | struct xfs_inode_log_item *i_itemp; /* logging information */ |
58 | mrlock_t i_lock; /* inode lock */ | 58 | mrlock_t i_lock; /* inode lock */ |
59 | mrlock_t i_iolock; /* inode IO lock */ | ||
60 | mrlock_t i_mmaplock; /* inode mmap IO lock */ | 59 | mrlock_t i_mmaplock; /* inode mmap IO lock */ |
61 | atomic_t i_pincount; /* inode pin count */ | 60 | atomic_t i_pincount; /* inode pin count */ |
62 | spinlock_t i_flags_lock; /* inode i_flags lock */ | 61 | spinlock_t i_flags_lock; /* inode i_flags lock */ |
@@ -333,7 +332,7 @@ static inline void xfs_ifunlock(struct xfs_inode *ip) | |||
333 | * IOLOCK values | 332 | * IOLOCK values |
334 | * | 333 | * |
335 | * 0-3 subclass value | 334 | * 0-3 subclass value |
336 | * 4-7 PARENT subclass values | 335 | * 4-7 unused |
337 | * | 336 | * |
338 | * MMAPLOCK values | 337 | * MMAPLOCK values |
339 | * | 338 | * |
@@ -348,10 +347,8 @@ static inline void xfs_ifunlock(struct xfs_inode *ip) | |||
348 | * | 347 | * |
349 | */ | 348 | */ |
350 | #define XFS_IOLOCK_SHIFT 16 | 349 | #define XFS_IOLOCK_SHIFT 16 |
351 | #define XFS_IOLOCK_PARENT_VAL 4 | 350 | #define XFS_IOLOCK_MAX_SUBCLASS 3 |
352 | #define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1) | ||
353 | #define XFS_IOLOCK_DEP_MASK 0x000f0000 | 351 | #define XFS_IOLOCK_DEP_MASK 0x000f0000 |
354 | #define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT) | ||
355 | 352 | ||
356 | #define XFS_MMAPLOCK_SHIFT 20 | 353 | #define XFS_MMAPLOCK_SHIFT 20 |
357 | #define XFS_MMAPLOCK_NUMORDER 0 | 354 | #define XFS_MMAPLOCK_NUMORDER 0 |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a39197501a7c..fc563b82aea6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -639,7 +639,7 @@ xfs_ioc_space( | |||
639 | return error; | 639 | return error; |
640 | 640 | ||
641 | xfs_ilock(ip, iolock); | 641 | xfs_ilock(ip, iolock); |
642 | error = xfs_break_layouts(inode, &iolock, false); | 642 | error = xfs_break_layouts(inode, &iolock); |
643 | if (error) | 643 | if (error) |
644 | goto out_unlock; | 644 | goto out_unlock; |
645 | 645 | ||
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 15a83813b708..0d147428971e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -950,6 +950,19 @@ static inline bool imap_needs_alloc(struct inode *inode, | |||
950 | (IS_DAX(inode) && ISUNWRITTEN(imap)); | 950 | (IS_DAX(inode) && ISUNWRITTEN(imap)); |
951 | } | 951 | } |
952 | 952 | ||
953 | static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags) | ||
954 | { | ||
955 | /* | ||
956 | * COW writes will allocate delalloc space, so we need to make sure | ||
957 | * to take the lock exclusively here. | ||
958 | */ | ||
959 | if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) | ||
960 | return true; | ||
961 | if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE)) | ||
962 | return true; | ||
963 | return false; | ||
964 | } | ||
965 | |||
953 | static int | 966 | static int |
954 | xfs_file_iomap_begin( | 967 | xfs_file_iomap_begin( |
955 | struct inode *inode, | 968 | struct inode *inode, |
@@ -969,18 +982,14 @@ xfs_file_iomap_begin( | |||
969 | if (XFS_FORCED_SHUTDOWN(mp)) | 982 | if (XFS_FORCED_SHUTDOWN(mp)) |
970 | return -EIO; | 983 | return -EIO; |
971 | 984 | ||
972 | if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && | 985 | if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) && |
973 | !xfs_get_extsz_hint(ip)) { | 986 | !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { |
974 | /* Reserve delalloc blocks for regular writeback. */ | 987 | /* Reserve delalloc blocks for regular writeback. */ |
975 | return xfs_file_iomap_begin_delay(inode, offset, length, flags, | 988 | return xfs_file_iomap_begin_delay(inode, offset, length, flags, |
976 | iomap); | 989 | iomap); |
977 | } | 990 | } |
978 | 991 | ||
979 | /* | 992 | if (need_excl_ilock(ip, flags)) { |
980 | * COW writes will allocate delalloc space, so we need to make sure | ||
981 | * to take the lock exclusively here. | ||
982 | */ | ||
983 | if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { | ||
984 | lockmode = XFS_ILOCK_EXCL; | 993 | lockmode = XFS_ILOCK_EXCL; |
985 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 994 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
986 | } else { | 995 | } else { |
@@ -993,17 +1002,41 @@ xfs_file_iomap_begin( | |||
993 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 1002 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
994 | end_fsb = XFS_B_TO_FSB(mp, offset + length); | 1003 | end_fsb = XFS_B_TO_FSB(mp, offset + length); |
995 | 1004 | ||
1005 | if (xfs_is_reflink_inode(ip) && | ||
1006 | (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) { | ||
1007 | shared = xfs_reflink_find_cow_mapping(ip, offset, &imap); | ||
1008 | if (shared) { | ||
1009 | xfs_iunlock(ip, lockmode); | ||
1010 | goto alloc_done; | ||
1011 | } | ||
1012 | ASSERT(!isnullstartblock(imap.br_startblock)); | ||
1013 | } | ||
1014 | |||
996 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, | 1015 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, |
997 | &nimaps, 0); | 1016 | &nimaps, 0); |
998 | if (error) | 1017 | if (error) |
999 | goto out_unlock; | 1018 | goto out_unlock; |
1000 | 1019 | ||
1001 | if (flags & IOMAP_REPORT) { | 1020 | if ((flags & IOMAP_REPORT) || |
1021 | (xfs_is_reflink_inode(ip) && | ||
1022 | (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) { | ||
1002 | /* Trim the mapping to the nearest shared extent boundary. */ | 1023 | /* Trim the mapping to the nearest shared extent boundary. */ |
1003 | error = xfs_reflink_trim_around_shared(ip, &imap, &shared, | 1024 | error = xfs_reflink_trim_around_shared(ip, &imap, &shared, |
1004 | &trimmed); | 1025 | &trimmed); |
1005 | if (error) | 1026 | if (error) |
1006 | goto out_unlock; | 1027 | goto out_unlock; |
1028 | |||
1029 | /* | ||
1030 | * We're here because we're trying to do a directio write to a | ||
1031 | * region that isn't aligned to a filesystem block. If the | ||
1032 | * extent is shared, fall back to buffered mode to handle the | ||
1033 | * RMW. | ||
1034 | */ | ||
1035 | if (!(flags & IOMAP_REPORT) && shared) { | ||
1036 | trace_xfs_reflink_bounce_dio_write(ip, &imap); | ||
1037 | error = -EREMCHG; | ||
1038 | goto out_unlock; | ||
1039 | } | ||
1007 | } | 1040 | } |
1008 | 1041 | ||
1009 | if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { | 1042 | if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { |
@@ -1038,6 +1071,7 @@ xfs_file_iomap_begin( | |||
1038 | if (error) | 1071 | if (error) |
1039 | return error; | 1072 | return error; |
1040 | 1073 | ||
1074 | alloc_done: | ||
1041 | iomap->flags = IOMAP_F_NEW; | 1075 | iomap->flags = IOMAP_F_NEW; |
1042 | trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); | 1076 | trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); |
1043 | } else { | 1077 | } else { |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 405a65cd9d6b..c962999a87ab 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -983,15 +983,13 @@ xfs_vn_setattr( | |||
983 | struct xfs_inode *ip = XFS_I(d_inode(dentry)); | 983 | struct xfs_inode *ip = XFS_I(d_inode(dentry)); |
984 | uint iolock = XFS_IOLOCK_EXCL; | 984 | uint iolock = XFS_IOLOCK_EXCL; |
985 | 985 | ||
986 | xfs_ilock(ip, iolock); | 986 | error = xfs_break_layouts(d_inode(dentry), &iolock); |
987 | error = xfs_break_layouts(d_inode(dentry), &iolock, true); | 987 | if (error) |
988 | if (!error) { | 988 | return error; |
989 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); | ||
990 | iolock |= XFS_MMAPLOCK_EXCL; | ||
991 | 989 | ||
992 | error = xfs_vn_setattr_size(dentry, iattr); | 990 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); |
993 | } | 991 | error = xfs_setattr_size(ip, iattr); |
994 | xfs_iunlock(ip, iolock); | 992 | xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); |
995 | } else { | 993 | } else { |
996 | error = xfs_vn_setattr_nonsize(dentry, iattr); | 994 | error = xfs_vn_setattr_nonsize(dentry, iattr); |
997 | } | 995 | } |
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 93a7aafa56d6..2f2dc3c09ad0 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c | |||
@@ -32,8 +32,7 @@ | |||
32 | int | 32 | int |
33 | xfs_break_layouts( | 33 | xfs_break_layouts( |
34 | struct inode *inode, | 34 | struct inode *inode, |
35 | uint *iolock, | 35 | uint *iolock) |
36 | bool with_imutex) | ||
37 | { | 36 | { |
38 | struct xfs_inode *ip = XFS_I(inode); | 37 | struct xfs_inode *ip = XFS_I(inode); |
39 | int error; | 38 | int error; |
@@ -42,12 +41,8 @@ xfs_break_layouts( | |||
42 | 41 | ||
43 | while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { | 42 | while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { |
44 | xfs_iunlock(ip, *iolock); | 43 | xfs_iunlock(ip, *iolock); |
45 | if (with_imutex && (*iolock & XFS_IOLOCK_EXCL)) | ||
46 | inode_unlock(inode); | ||
47 | error = break_layout(inode, true); | 44 | error = break_layout(inode, true); |
48 | *iolock = XFS_IOLOCK_EXCL; | 45 | *iolock = XFS_IOLOCK_EXCL; |
49 | if (with_imutex) | ||
50 | inode_lock(inode); | ||
51 | xfs_ilock(ip, *iolock); | 46 | xfs_ilock(ip, *iolock); |
52 | } | 47 | } |
53 | 48 | ||
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h index e8339f74966b..b587cb99b2b7 100644 --- a/fs/xfs/xfs_pnfs.h +++ b/fs/xfs/xfs_pnfs.h | |||
@@ -8,10 +8,10 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, | |||
8 | int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, | 8 | int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, |
9 | struct iattr *iattr); | 9 | struct iattr *iattr); |
10 | 10 | ||
11 | int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex); | 11 | int xfs_break_layouts(struct inode *inode, uint *iolock); |
12 | #else | 12 | #else |
13 | static inline int | 13 | static inline int |
14 | xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex) | 14 | xfs_break_layouts(struct inode *inode, uint *iolock) |
15 | { | 15 | { |
16 | return 0; | 16 | return 0; |
17 | } | 17 | } |
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index becf2465dd23..88fd03c66e99 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
@@ -1251,13 +1251,11 @@ xfs_reflink_remap_range( | |||
1251 | return -EIO; | 1251 | return -EIO; |
1252 | 1252 | ||
1253 | /* Lock both files against IO */ | 1253 | /* Lock both files against IO */ |
1254 | if (same_inode) { | 1254 | lock_two_nondirectories(inode_in, inode_out); |
1255 | xfs_ilock(src, XFS_IOLOCK_EXCL); | 1255 | if (same_inode) |
1256 | xfs_ilock(src, XFS_MMAPLOCK_EXCL); | 1256 | xfs_ilock(src, XFS_MMAPLOCK_EXCL); |
1257 | } else { | 1257 | else |
1258 | xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL); | ||
1259 | xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); | 1258 | xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); |
1260 | } | ||
1261 | 1259 | ||
1262 | /* Don't touch certain kinds of inodes */ | 1260 | /* Don't touch certain kinds of inodes */ |
1263 | ret = -EPERM; | 1261 | ret = -EPERM; |
@@ -1402,11 +1400,9 @@ xfs_reflink_remap_range( | |||
1402 | 1400 | ||
1403 | out_unlock: | 1401 | out_unlock: |
1404 | xfs_iunlock(src, XFS_MMAPLOCK_EXCL); | 1402 | xfs_iunlock(src, XFS_MMAPLOCK_EXCL); |
1405 | xfs_iunlock(src, XFS_IOLOCK_EXCL); | 1403 | if (!same_inode) |
1406 | if (src->i_ino != dest->i_ino) { | ||
1407 | xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); | 1404 | xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); |
1408 | xfs_iunlock(dest, XFS_IOLOCK_EXCL); | 1405 | unlock_two_nondirectories(inode_in, inode_out); |
1409 | } | ||
1410 | if (ret) | 1406 | if (ret) |
1411 | trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); | 1407 | trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); |
1412 | return ret; | 1408 | return ret; |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ade4691e3f74..563d1d146b8c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -943,7 +943,7 @@ xfs_fs_destroy_inode( | |||
943 | 943 | ||
944 | trace_xfs_destroy_inode(ip); | 944 | trace_xfs_destroy_inode(ip); |
945 | 945 | ||
946 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | 946 | ASSERT(!rwsem_is_locked(&inode->i_rwsem)); |
947 | XFS_STATS_INC(ip->i_mount, vn_rele); | 947 | XFS_STATS_INC(ip->i_mount, vn_rele); |
948 | XFS_STATS_INC(ip->i_mount, vn_remove); | 948 | XFS_STATS_INC(ip->i_mount, vn_remove); |
949 | 949 | ||
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 58142aeeeea6..f2cb45ed1d54 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c | |||
@@ -238,8 +238,7 @@ xfs_symlink( | |||
238 | if (error) | 238 | if (error) |
239 | goto out_release_inode; | 239 | goto out_release_inode; |
240 | 240 | ||
241 | xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | | 241 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); |
242 | XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); | ||
243 | unlock_dp_on_error = true; | 242 | unlock_dp_on_error = true; |
244 | 243 | ||
245 | /* | 244 | /* |
@@ -287,7 +286,7 @@ xfs_symlink( | |||
287 | * the transaction cancel unlocking dp so don't do it explicitly in the | 286 | * the transaction cancel unlocking dp so don't do it explicitly in the |
288 | * error path. | 287 | * error path. |
289 | */ | 288 | */ |
290 | xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 289 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
291 | unlock_dp_on_error = false; | 290 | unlock_dp_on_error = false; |
292 | 291 | ||
293 | /* | 292 | /* |
@@ -412,7 +411,7 @@ out_release_inode: | |||
412 | xfs_qm_dqrele(pdqp); | 411 | xfs_qm_dqrele(pdqp); |
413 | 412 | ||
414 | if (unlock_dp_on_error) | 413 | if (unlock_dp_on_error) |
415 | xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 414 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
416 | return error; | 415 | return error; |
417 | } | 416 | } |
418 | 417 | ||
diff --git a/include/linux/bio.h b/include/linux/bio.h index 97cb48f03dc7..66228c28c621 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -430,6 +430,7 @@ void bio_chain(struct bio *, struct bio *); | |||
430 | extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); | 430 | extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); |
431 | extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, | 431 | extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, |
432 | unsigned int, unsigned int); | 432 | unsigned int, unsigned int); |
433 | int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); | ||
433 | struct rq_map_data; | 434 | struct rq_map_data; |
434 | extern struct bio *bio_map_user_iov(struct request_queue *, | 435 | extern struct bio *bio_map_user_iov(struct request_queue *, |
435 | const struct iov_iter *, gfp_t); | 436 | const struct iov_iter *, gfp_t); |
diff --git a/include/linux/iomap.h b/include/linux/iomap.h index f185156de74d..a4c94b86401e 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h | |||
@@ -50,6 +50,7 @@ struct iomap { | |||
50 | #define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */ | 50 | #define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */ |
51 | #define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ | 51 | #define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ |
52 | #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ | 52 | #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ |
53 | #define IOMAP_DIRECT (1 << 4) /* direct I/O */ | ||
53 | 54 | ||
54 | struct iomap_ops { | 55 | struct iomap_ops { |
55 | /* | 56 | /* |
@@ -83,4 +84,14 @@ int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
83 | int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 84 | int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
84 | loff_t start, loff_t len, struct iomap_ops *ops); | 85 | loff_t start, loff_t len, struct iomap_ops *ops); |
85 | 86 | ||
87 | /* | ||
88 | * Flags for direct I/O ->end_io: | ||
89 | */ | ||
90 | #define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */ | ||
91 | #define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */ | ||
92 | typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret, | ||
93 | unsigned flags); | ||
94 | ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | ||
95 | struct iomap_ops *ops, iomap_dio_end_io_t end_io); | ||
96 | |||
86 | #endif /* LINUX_IOMAP_H */ | 97 | #endif /* LINUX_IOMAP_H */ |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index c1458fede1f9..1e327bb80838 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
@@ -338,9 +338,18 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
338 | extern void lock_release(struct lockdep_map *lock, int nested, | 338 | extern void lock_release(struct lockdep_map *lock, int nested, |
339 | unsigned long ip); | 339 | unsigned long ip); |
340 | 340 | ||
341 | #define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map) | 341 | /* |
342 | * Same "read" as for lock_acquire(), except -1 means any. | ||
343 | */ | ||
344 | extern int lock_is_held_type(struct lockdep_map *lock, int read); | ||
345 | |||
346 | static inline int lock_is_held(struct lockdep_map *lock) | ||
347 | { | ||
348 | return lock_is_held_type(lock, -1); | ||
349 | } | ||
342 | 350 | ||
343 | extern int lock_is_held(struct lockdep_map *lock); | 351 | #define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map) |
352 | #define lockdep_is_held_type(lock, r) lock_is_held_type(&(lock)->dep_map, (r)) | ||
344 | 353 | ||
345 | extern void lock_set_class(struct lockdep_map *lock, const char *name, | 354 | extern void lock_set_class(struct lockdep_map *lock, const char *name, |
346 | struct lock_class_key *key, unsigned int subclass, | 355 | struct lock_class_key *key, unsigned int subclass, |
@@ -372,6 +381,14 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); | |||
372 | WARN_ON(debug_locks && !lockdep_is_held(l)); \ | 381 | WARN_ON(debug_locks && !lockdep_is_held(l)); \ |
373 | } while (0) | 382 | } while (0) |
374 | 383 | ||
384 | #define lockdep_assert_held_exclusive(l) do { \ | ||
385 | WARN_ON(debug_locks && !lockdep_is_held_type(l, 0)); \ | ||
386 | } while (0) | ||
387 | |||
388 | #define lockdep_assert_held_read(l) do { \ | ||
389 | WARN_ON(debug_locks && !lockdep_is_held_type(l, 1)); \ | ||
390 | } while (0) | ||
391 | |||
375 | #define lockdep_assert_held_once(l) do { \ | 392 | #define lockdep_assert_held_once(l) do { \ |
376 | WARN_ON_ONCE(debug_locks && !lockdep_is_held(l)); \ | 393 | WARN_ON_ONCE(debug_locks && !lockdep_is_held(l)); \ |
377 | } while (0) | 394 | } while (0) |
@@ -428,7 +445,11 @@ struct lock_class_key { }; | |||
428 | 445 | ||
429 | #define lockdep_depth(tsk) (0) | 446 | #define lockdep_depth(tsk) (0) |
430 | 447 | ||
448 | #define lockdep_is_held_type(l, r) (1) | ||
449 | |||
431 | #define lockdep_assert_held(l) do { (void)(l); } while (0) | 450 | #define lockdep_assert_held(l) do { (void)(l); } while (0) |
451 | #define lockdep_assert_held_exclusive(l) do { (void)(l); } while (0) | ||
452 | #define lockdep_assert_held_read(l) do { (void)(l); } while (0) | ||
432 | #define lockdep_assert_held_once(l) do { (void)(l); } while (0) | 453 | #define lockdep_assert_held_once(l) do { (void)(l); } while (0) |
433 | 454 | ||
434 | #define lockdep_recursing(tsk) (0) | 455 | #define lockdep_recursing(tsk) (0) |
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 589d763a49b3..cff580a6edf9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -3188,7 +3188,7 @@ print_lock_nested_lock_not_held(struct task_struct *curr, | |||
3188 | return 0; | 3188 | return 0; |
3189 | } | 3189 | } |
3190 | 3190 | ||
3191 | static int __lock_is_held(struct lockdep_map *lock); | 3191 | static int __lock_is_held(struct lockdep_map *lock, int read); |
3192 | 3192 | ||
3193 | /* | 3193 | /* |
3194 | * This gets called for every mutex_lock*()/spin_lock*() operation. | 3194 | * This gets called for every mutex_lock*()/spin_lock*() operation. |
@@ -3329,7 +3329,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3329 | } | 3329 | } |
3330 | chain_key = iterate_chain_key(chain_key, class_idx); | 3330 | chain_key = iterate_chain_key(chain_key, class_idx); |
3331 | 3331 | ||
3332 | if (nest_lock && !__lock_is_held(nest_lock)) | 3332 | if (nest_lock && !__lock_is_held(nest_lock, -1)) |
3333 | return print_lock_nested_lock_not_held(curr, hlock, ip); | 3333 | return print_lock_nested_lock_not_held(curr, hlock, ip); |
3334 | 3334 | ||
3335 | if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) | 3335 | if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) |
@@ -3576,7 +3576,7 @@ found_it: | |||
3576 | return 1; | 3576 | return 1; |
3577 | } | 3577 | } |
3578 | 3578 | ||
3579 | static int __lock_is_held(struct lockdep_map *lock) | 3579 | static int __lock_is_held(struct lockdep_map *lock, int read) |
3580 | { | 3580 | { |
3581 | struct task_struct *curr = current; | 3581 | struct task_struct *curr = current; |
3582 | int i; | 3582 | int i; |
@@ -3584,8 +3584,12 @@ static int __lock_is_held(struct lockdep_map *lock) | |||
3584 | for (i = 0; i < curr->lockdep_depth; i++) { | 3584 | for (i = 0; i < curr->lockdep_depth; i++) { |
3585 | struct held_lock *hlock = curr->held_locks + i; | 3585 | struct held_lock *hlock = curr->held_locks + i; |
3586 | 3586 | ||
3587 | if (match_held_lock(hlock, lock)) | 3587 | if (match_held_lock(hlock, lock)) { |
3588 | return 1; | 3588 | if (read == -1 || hlock->read == read) |
3589 | return 1; | ||
3590 | |||
3591 | return 0; | ||
3592 | } | ||
3589 | } | 3593 | } |
3590 | 3594 | ||
3591 | return 0; | 3595 | return 0; |
@@ -3769,7 +3773,7 @@ void lock_release(struct lockdep_map *lock, int nested, | |||
3769 | } | 3773 | } |
3770 | EXPORT_SYMBOL_GPL(lock_release); | 3774 | EXPORT_SYMBOL_GPL(lock_release); |
3771 | 3775 | ||
3772 | int lock_is_held(struct lockdep_map *lock) | 3776 | int lock_is_held_type(struct lockdep_map *lock, int read) |
3773 | { | 3777 | { |
3774 | unsigned long flags; | 3778 | unsigned long flags; |
3775 | int ret = 0; | 3779 | int ret = 0; |
@@ -3781,13 +3785,13 @@ int lock_is_held(struct lockdep_map *lock) | |||
3781 | check_flags(flags); | 3785 | check_flags(flags); |
3782 | 3786 | ||
3783 | current->lockdep_recursion = 1; | 3787 | current->lockdep_recursion = 1; |
3784 | ret = __lock_is_held(lock); | 3788 | ret = __lock_is_held(lock, read); |
3785 | current->lockdep_recursion = 0; | 3789 | current->lockdep_recursion = 0; |
3786 | raw_local_irq_restore(flags); | 3790 | raw_local_irq_restore(flags); |
3787 | 3791 | ||
3788 | return ret; | 3792 | return ret; |
3789 | } | 3793 | } |
3790 | EXPORT_SYMBOL_GPL(lock_is_held); | 3794 | EXPORT_SYMBOL_GPL(lock_is_held_type); |
3791 | 3795 | ||
3792 | struct pin_cookie lock_pin_lock(struct lockdep_map *lock) | 3796 | struct pin_cookie lock_pin_lock(struct lockdep_map *lock) |
3793 | { | 3797 | { |