diff options
| -rw-r--r-- | fs/btrfs/ioctl.c | 90 |
1 files changed, 76 insertions, 14 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 83c9ad3f2621..1d6767c4c092 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -2811,7 +2811,6 @@ static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) | |||
| 2811 | return NULL; | 2811 | return NULL; |
| 2812 | } | 2812 | } |
| 2813 | } | 2813 | } |
| 2814 | unlock_page(page); | ||
| 2815 | 2814 | ||
| 2816 | return page; | 2815 | return page; |
| 2817 | } | 2816 | } |
| @@ -2830,10 +2829,17 @@ static int gather_extent_pages(struct inode *inode, struct page **pages, | |||
| 2830 | return 0; | 2829 | return 0; |
| 2831 | } | 2830 | } |
| 2832 | 2831 | ||
| 2833 | static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | 2832 | static int lock_extent_range(struct inode *inode, u64 off, u64 len, |
| 2833 | bool retry_range_locking) | ||
| 2834 | { | 2834 | { |
| 2835 | /* do any pending delalloc/csum calc on src, one way or | 2835 | /* |
| 2836 | another, and lock file content */ | 2836 | * Do any pending delalloc/csum calculations on inode, one way or |
| 2837 | * another, and lock file content. | ||
| 2838 | * The locking order is: | ||
| 2839 | * | ||
| 2840 | * 1) pages | ||
| 2841 | * 2) range in the inode's io tree | ||
| 2842 | */ | ||
| 2837 | while (1) { | 2843 | while (1) { |
| 2838 | struct btrfs_ordered_extent *ordered; | 2844 | struct btrfs_ordered_extent *ordered; |
| 2839 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2845 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
| @@ -2851,8 +2857,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | |||
| 2851 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2857 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
| 2852 | if (ordered) | 2858 | if (ordered) |
| 2853 | btrfs_put_ordered_extent(ordered); | 2859 | btrfs_put_ordered_extent(ordered); |
| 2860 | if (!retry_range_locking) | ||
| 2861 | return -EAGAIN; | ||
| 2854 | btrfs_wait_ordered_range(inode, off, len); | 2862 | btrfs_wait_ordered_range(inode, off, len); |
| 2855 | } | 2863 | } |
| 2864 | return 0; | ||
| 2856 | } | 2865 | } |
| 2857 | 2866 | ||
| 2858 | static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) | 2867 | static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) |
| @@ -2877,15 +2886,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, | |||
| 2877 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); | 2886 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); |
| 2878 | } | 2887 | } |
| 2879 | 2888 | ||
| 2880 | static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, | 2889 | static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1, |
| 2881 | struct inode *inode2, u64 loff2, u64 len) | 2890 | struct inode *inode2, u64 loff2, u64 len, |
| 2891 | bool retry_range_locking) | ||
| 2882 | { | 2892 | { |
| 2893 | int ret; | ||
| 2894 | |||
| 2883 | if (inode1 < inode2) { | 2895 | if (inode1 < inode2) { |
| 2884 | swap(inode1, inode2); | 2896 | swap(inode1, inode2); |
| 2885 | swap(loff1, loff2); | 2897 | swap(loff1, loff2); |
| 2886 | } | 2898 | } |
| 2887 | lock_extent_range(inode1, loff1, len); | 2899 | ret = lock_extent_range(inode1, loff1, len, retry_range_locking); |
| 2888 | lock_extent_range(inode2, loff2, len); | 2900 | if (ret) |
| 2901 | return ret; | ||
| 2902 | ret = lock_extent_range(inode2, loff2, len, retry_range_locking); | ||
| 2903 | if (ret) | ||
| 2904 | unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, | ||
| 2905 | loff1 + len - 1); | ||
| 2906 | return ret; | ||
| 2889 | } | 2907 | } |
| 2890 | 2908 | ||
| 2891 | struct cmp_pages { | 2909 | struct cmp_pages { |
| @@ -2901,11 +2919,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp) | |||
| 2901 | 2919 | ||
| 2902 | for (i = 0; i < cmp->num_pages; i++) { | 2920 | for (i = 0; i < cmp->num_pages; i++) { |
| 2903 | pg = cmp->src_pages[i]; | 2921 | pg = cmp->src_pages[i]; |
| 2904 | if (pg) | 2922 | if (pg) { |
| 2923 | unlock_page(pg); | ||
| 2905 | page_cache_release(pg); | 2924 | page_cache_release(pg); |
| 2925 | } | ||
| 2906 | pg = cmp->dst_pages[i]; | 2926 | pg = cmp->dst_pages[i]; |
| 2907 | if (pg) | 2927 | if (pg) { |
| 2928 | unlock_page(pg); | ||
| 2908 | page_cache_release(pg); | 2929 | page_cache_release(pg); |
| 2930 | } | ||
| 2909 | } | 2931 | } |
| 2910 | kfree(cmp->src_pages); | 2932 | kfree(cmp->src_pages); |
| 2911 | kfree(cmp->dst_pages); | 2933 | kfree(cmp->dst_pages); |
| @@ -2966,6 +2988,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, | |||
| 2966 | 2988 | ||
| 2967 | src_page = cmp->src_pages[i]; | 2989 | src_page = cmp->src_pages[i]; |
| 2968 | dst_page = cmp->dst_pages[i]; | 2990 | dst_page = cmp->dst_pages[i]; |
| 2991 | ASSERT(PageLocked(src_page)); | ||
| 2992 | ASSERT(PageLocked(dst_page)); | ||
| 2969 | 2993 | ||
| 2970 | addr = kmap_atomic(src_page); | 2994 | addr = kmap_atomic(src_page); |
| 2971 | dst_addr = kmap_atomic(dst_page); | 2995 | dst_addr = kmap_atomic(dst_page); |
| @@ -3078,14 +3102,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, | |||
| 3078 | goto out_unlock; | 3102 | goto out_unlock; |
| 3079 | } | 3103 | } |
| 3080 | 3104 | ||
| 3105 | again: | ||
| 3081 | ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); | 3106 | ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); |
| 3082 | if (ret) | 3107 | if (ret) |
| 3083 | goto out_unlock; | 3108 | goto out_unlock; |
| 3084 | 3109 | ||
| 3085 | if (same_inode) | 3110 | if (same_inode) |
| 3086 | lock_extent_range(src, same_lock_start, same_lock_len); | 3111 | ret = lock_extent_range(src, same_lock_start, same_lock_len, |
| 3112 | false); | ||
| 3087 | else | 3113 | else |
| 3088 | btrfs_double_extent_lock(src, loff, dst, dst_loff, len); | 3114 | ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len, |
| 3115 | false); | ||
| 3116 | /* | ||
| 3117 | * If one of the inodes has dirty pages in the respective range or | ||
| 3118 | * ordered extents, we need to flush dellaloc and wait for all ordered | ||
| 3119 | * extents in the range. We must unlock the pages and the ranges in the | ||
| 3120 | * io trees to avoid deadlocks when flushing delalloc (requires locking | ||
| 3121 | * pages) and when waiting for ordered extents to complete (they require | ||
| 3122 | * range locking). | ||
| 3123 | */ | ||
| 3124 | if (ret == -EAGAIN) { | ||
| 3125 | /* | ||
| 3126 | * Ranges in the io trees already unlocked. Now unlock all | ||
| 3127 | * pages before waiting for all IO to complete. | ||
| 3128 | */ | ||
| 3129 | btrfs_cmp_data_free(&cmp); | ||
| 3130 | if (same_inode) { | ||
| 3131 | btrfs_wait_ordered_range(src, same_lock_start, | ||
| 3132 | same_lock_len); | ||
| 3133 | } else { | ||
| 3134 | btrfs_wait_ordered_range(src, loff, len); | ||
| 3135 | btrfs_wait_ordered_range(dst, dst_loff, len); | ||
| 3136 | } | ||
| 3137 | goto again; | ||
| 3138 | } | ||
| 3139 | ASSERT(ret == 0); | ||
| 3140 | if (WARN_ON(ret)) { | ||
| 3141 | /* ranges in the io trees already unlocked */ | ||
| 3142 | btrfs_cmp_data_free(&cmp); | ||
| 3143 | return ret; | ||
| 3144 | } | ||
| 3089 | 3145 | ||
| 3090 | /* pass original length for comparison so we stay within i_size */ | 3146 | /* pass original length for comparison so we stay within i_size */ |
| 3091 | ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); | 3147 | ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); |
| @@ -3907,9 +3963,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 3907 | u64 lock_start = min_t(u64, off, destoff); | 3963 | u64 lock_start = min_t(u64, off, destoff); |
| 3908 | u64 lock_len = max_t(u64, off, destoff) + len - lock_start; | 3964 | u64 lock_len = max_t(u64, off, destoff) + len - lock_start; |
| 3909 | 3965 | ||
| 3910 | lock_extent_range(src, lock_start, lock_len); | 3966 | ret = lock_extent_range(src, lock_start, lock_len, true); |
| 3911 | } else { | 3967 | } else { |
| 3912 | btrfs_double_extent_lock(src, off, inode, destoff, len); | 3968 | ret = btrfs_double_extent_lock(src, off, inode, destoff, len, |
| 3969 | true); | ||
| 3970 | } | ||
| 3971 | ASSERT(ret == 0); | ||
| 3972 | if (WARN_ON(ret)) { | ||
| 3973 | /* ranges in the io trees already unlocked */ | ||
| 3974 | goto out_unlock; | ||
| 3913 | } | 3975 | } |
| 3914 | 3976 | ||
| 3915 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); | 3977 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); |
