diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ioctl.c | 90 |
1 files changed, 76 insertions, 14 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 83c9ad3f2621..1d6767c4c092 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -2811,7 +2811,6 @@ static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) | |||
2811 | return NULL; | 2811 | return NULL; |
2812 | } | 2812 | } |
2813 | } | 2813 | } |
2814 | unlock_page(page); | ||
2815 | 2814 | ||
2816 | return page; | 2815 | return page; |
2817 | } | 2816 | } |
@@ -2830,10 +2829,17 @@ static int gather_extent_pages(struct inode *inode, struct page **pages, | |||
2830 | return 0; | 2829 | return 0; |
2831 | } | 2830 | } |
2832 | 2831 | ||
2833 | static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | 2832 | static int lock_extent_range(struct inode *inode, u64 off, u64 len, |
2833 | bool retry_range_locking) | ||
2834 | { | 2834 | { |
2835 | /* do any pending delalloc/csum calc on src, one way or | 2835 | /* |
2836 | another, and lock file content */ | 2836 | * Do any pending delalloc/csum calculations on inode, one way or |
2837 | * another, and lock file content. | ||
2838 | * The locking order is: | ||
2839 | * | ||
2840 | * 1) pages | ||
2841 | * 2) range in the inode's io tree | ||
2842 | */ | ||
2837 | while (1) { | 2843 | while (1) { |
2838 | struct btrfs_ordered_extent *ordered; | 2844 | struct btrfs_ordered_extent *ordered; |
2839 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2845 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
@@ -2851,8 +2857,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | |||
2851 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2857 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
2852 | if (ordered) | 2858 | if (ordered) |
2853 | btrfs_put_ordered_extent(ordered); | 2859 | btrfs_put_ordered_extent(ordered); |
2860 | if (!retry_range_locking) | ||
2861 | return -EAGAIN; | ||
2854 | btrfs_wait_ordered_range(inode, off, len); | 2862 | btrfs_wait_ordered_range(inode, off, len); |
2855 | } | 2863 | } |
2864 | return 0; | ||
2856 | } | 2865 | } |
2857 | 2866 | ||
2858 | static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) | 2867 | static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) |
@@ -2877,15 +2886,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, | |||
2877 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); | 2886 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); |
2878 | } | 2887 | } |
2879 | 2888 | ||
2880 | static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, | 2889 | static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1, |
2881 | struct inode *inode2, u64 loff2, u64 len) | 2890 | struct inode *inode2, u64 loff2, u64 len, |
2891 | bool retry_range_locking) | ||
2882 | { | 2892 | { |
2893 | int ret; | ||
2894 | |||
2883 | if (inode1 < inode2) { | 2895 | if (inode1 < inode2) { |
2884 | swap(inode1, inode2); | 2896 | swap(inode1, inode2); |
2885 | swap(loff1, loff2); | 2897 | swap(loff1, loff2); |
2886 | } | 2898 | } |
2887 | lock_extent_range(inode1, loff1, len); | 2899 | ret = lock_extent_range(inode1, loff1, len, retry_range_locking); |
2888 | lock_extent_range(inode2, loff2, len); | 2900 | if (ret) |
2901 | return ret; | ||
2902 | ret = lock_extent_range(inode2, loff2, len, retry_range_locking); | ||
2903 | if (ret) | ||
2904 | unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, | ||
2905 | loff1 + len - 1); | ||
2906 | return ret; | ||
2889 | } | 2907 | } |
2890 | 2908 | ||
2891 | struct cmp_pages { | 2909 | struct cmp_pages { |
@@ -2901,11 +2919,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp) | |||
2901 | 2919 | ||
2902 | for (i = 0; i < cmp->num_pages; i++) { | 2920 | for (i = 0; i < cmp->num_pages; i++) { |
2903 | pg = cmp->src_pages[i]; | 2921 | pg = cmp->src_pages[i]; |
2904 | if (pg) | 2922 | if (pg) { |
2923 | unlock_page(pg); | ||
2905 | page_cache_release(pg); | 2924 | page_cache_release(pg); |
2925 | } | ||
2906 | pg = cmp->dst_pages[i]; | 2926 | pg = cmp->dst_pages[i]; |
2907 | if (pg) | 2927 | if (pg) { |
2928 | unlock_page(pg); | ||
2908 | page_cache_release(pg); | 2929 | page_cache_release(pg); |
2930 | } | ||
2909 | } | 2931 | } |
2910 | kfree(cmp->src_pages); | 2932 | kfree(cmp->src_pages); |
2911 | kfree(cmp->dst_pages); | 2933 | kfree(cmp->dst_pages); |
@@ -2966,6 +2988,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, | |||
2966 | 2988 | ||
2967 | src_page = cmp->src_pages[i]; | 2989 | src_page = cmp->src_pages[i]; |
2968 | dst_page = cmp->dst_pages[i]; | 2990 | dst_page = cmp->dst_pages[i]; |
2991 | ASSERT(PageLocked(src_page)); | ||
2992 | ASSERT(PageLocked(dst_page)); | ||
2969 | 2993 | ||
2970 | addr = kmap_atomic(src_page); | 2994 | addr = kmap_atomic(src_page); |
2971 | dst_addr = kmap_atomic(dst_page); | 2995 | dst_addr = kmap_atomic(dst_page); |
@@ -3078,14 +3102,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, | |||
3078 | goto out_unlock; | 3102 | goto out_unlock; |
3079 | } | 3103 | } |
3080 | 3104 | ||
3105 | again: | ||
3081 | ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); | 3106 | ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); |
3082 | if (ret) | 3107 | if (ret) |
3083 | goto out_unlock; | 3108 | goto out_unlock; |
3084 | 3109 | ||
3085 | if (same_inode) | 3110 | if (same_inode) |
3086 | lock_extent_range(src, same_lock_start, same_lock_len); | 3111 | ret = lock_extent_range(src, same_lock_start, same_lock_len, |
3112 | false); | ||
3087 | else | 3113 | else |
3088 | btrfs_double_extent_lock(src, loff, dst, dst_loff, len); | 3114 | ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len, |
3115 | false); | ||
3116 | /* | ||
3117 | * If one of the inodes has dirty pages in the respective range or | ||
3118 | * ordered extents, we need to flush dellaloc and wait for all ordered | ||
3119 | * extents in the range. We must unlock the pages and the ranges in the | ||
3120 | * io trees to avoid deadlocks when flushing delalloc (requires locking | ||
3121 | * pages) and when waiting for ordered extents to complete (they require | ||
3122 | * range locking). | ||
3123 | */ | ||
3124 | if (ret == -EAGAIN) { | ||
3125 | /* | ||
3126 | * Ranges in the io trees already unlocked. Now unlock all | ||
3127 | * pages before waiting for all IO to complete. | ||
3128 | */ | ||
3129 | btrfs_cmp_data_free(&cmp); | ||
3130 | if (same_inode) { | ||
3131 | btrfs_wait_ordered_range(src, same_lock_start, | ||
3132 | same_lock_len); | ||
3133 | } else { | ||
3134 | btrfs_wait_ordered_range(src, loff, len); | ||
3135 | btrfs_wait_ordered_range(dst, dst_loff, len); | ||
3136 | } | ||
3137 | goto again; | ||
3138 | } | ||
3139 | ASSERT(ret == 0); | ||
3140 | if (WARN_ON(ret)) { | ||
3141 | /* ranges in the io trees already unlocked */ | ||
3142 | btrfs_cmp_data_free(&cmp); | ||
3143 | return ret; | ||
3144 | } | ||
3089 | 3145 | ||
3090 | /* pass original length for comparison so we stay within i_size */ | 3146 | /* pass original length for comparison so we stay within i_size */ |
3091 | ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); | 3147 | ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); |
@@ -3907,9 +3963,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
3907 | u64 lock_start = min_t(u64, off, destoff); | 3963 | u64 lock_start = min_t(u64, off, destoff); |
3908 | u64 lock_len = max_t(u64, off, destoff) + len - lock_start; | 3964 | u64 lock_len = max_t(u64, off, destoff) + len - lock_start; |
3909 | 3965 | ||
3910 | lock_extent_range(src, lock_start, lock_len); | 3966 | ret = lock_extent_range(src, lock_start, lock_len, true); |
3911 | } else { | 3967 | } else { |
3912 | btrfs_double_extent_lock(src, off, inode, destoff, len); | 3968 | ret = btrfs_double_extent_lock(src, off, inode, destoff, len, |
3969 | true); | ||
3970 | } | ||
3971 | ASSERT(ret == 0); | ||
3972 | if (WARN_ON(ret)) { | ||
3973 | /* ranges in the io trees already unlocked */ | ||
3974 | goto out_unlock; | ||
3913 | } | 3975 | } |
3914 | 3976 | ||
3915 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); | 3977 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); |