aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMark Fasheh <mfasheh@suse.de>2015-06-30 17:42:05 -0400
committerChris Mason <clm@fb.com>2015-07-01 20:17:14 -0400
commitf441460202cb787c49963bcc1f54cb48c52f7512 (patch)
treeb66f909b1cf80dc1e2736f5716fb5566ec368a06 /fs
parent207910ddeeda38fd54544d94f8c8ca5a9632cc25 (diff)
btrfs: fix deadlock with extent-same and readpage
->readpage() does page_lock() before extent_lock(), we do the opposite in extent-same. We want to reverse the order in btrfs_extent_same() but it's not quite straightforward since the page locks are taken inside btrfs_cmp_data(). So I split btrfs_cmp_data() into 3 parts with a small context structure that is passed between them. The first, btrfs_cmp_data_prepare() gathers up the pages needed (taking page lock as required) and puts them on our context structure. At this point, we are safe to lock the extent range. Afterwards, we use btrfs_cmp_data() to do the data compare as usual and btrfs_cmp_data_free() to clean up our context. Signed-off-by: Mark Fasheh <mfasheh@suse.de> Reviewed-by: David Sterba <dsterba@suse.cz> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ioctl.c148
1 files changed, 117 insertions, 31 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 55504338491d..9ebe2dd31f2a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2765,14 +2765,11 @@ out:
2765 return ret; 2765 return ret;
2766} 2766}
2767 2767
2768static struct page *extent_same_get_page(struct inode *inode, u64 off) 2768static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
2769{ 2769{
2770 struct page *page; 2770 struct page *page;
2771 pgoff_t index;
2772 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2771 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2773 2772
2774 index = off >> PAGE_CACHE_SHIFT;
2775
2776 page = grab_cache_page(inode->i_mapping, index); 2773 page = grab_cache_page(inode->i_mapping, index);
2777 if (!page) 2774 if (!page)
2778 return NULL; 2775 return NULL;
@@ -2793,6 +2790,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off)
2793 return page; 2790 return page;
2794} 2791}
2795 2792
2793static int gather_extent_pages(struct inode *inode, struct page **pages,
2794 int num_pages, u64 off)
2795{
2796 int i;
2797 pgoff_t index = off >> PAGE_CACHE_SHIFT;
2798
2799 for (i = 0; i < num_pages; i++) {
2800 pages[i] = extent_same_get_page(inode, index + i);
2801 if (!pages[i])
2802 return -ENOMEM;
2803 }
2804 return 0;
2805}
2806
2796static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) 2807static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2797{ 2808{
2798 /* do any pending delalloc/csum calc on src, one way or 2809 /* do any pending delalloc/csum calc on src, one way or
@@ -2818,52 +2829,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2818 } 2829 }
2819} 2830}
2820 2831
2821static void btrfs_double_unlock(struct inode *inode1, u64 loff1, 2832static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
2822 struct inode *inode2, u64 loff2, u64 len)
2823{ 2833{
2824 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
2825 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2826
2827 mutex_unlock(&inode1->i_mutex); 2834 mutex_unlock(&inode1->i_mutex);
2828 mutex_unlock(&inode2->i_mutex); 2835 mutex_unlock(&inode2->i_mutex);
2829} 2836}
2830 2837
2831static void btrfs_double_lock(struct inode *inode1, u64 loff1, 2838static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
2832 struct inode *inode2, u64 loff2, u64 len) 2839{
2840 if (inode1 < inode2)
2841 swap(inode1, inode2);
2842
2843 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
2844 if (inode1 != inode2)
2845 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
2846}
2847
2848static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
2849 struct inode *inode2, u64 loff2, u64 len)
2850{
2851 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
2852 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2853}
2854
2855static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
2856 struct inode *inode2, u64 loff2, u64 len)
2833{ 2857{
2834 if (inode1 < inode2) { 2858 if (inode1 < inode2) {
2835 swap(inode1, inode2); 2859 swap(inode1, inode2);
2836 swap(loff1, loff2); 2860 swap(loff1, loff2);
2837 } 2861 }
2838
2839 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
2840 lock_extent_range(inode1, loff1, len); 2862 lock_extent_range(inode1, loff1, len);
2841 if (inode1 != inode2) { 2863 if (inode1 != inode2)
2842 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
2843 lock_extent_range(inode2, loff2, len); 2864 lock_extent_range(inode2, loff2, len);
2865}
2866
2867struct cmp_pages {
2868 int num_pages;
2869 struct page **src_pages;
2870 struct page **dst_pages;
2871};
2872
2873static void btrfs_cmp_data_free(struct cmp_pages *cmp)
2874{
2875 int i;
2876 struct page *pg;
2877
2878 for (i = 0; i < cmp->num_pages; i++) {
2879 pg = cmp->src_pages[i];
2880 if (pg)
2881 page_cache_release(pg);
2882 pg = cmp->dst_pages[i];
2883 if (pg)
2884 page_cache_release(pg);
2885 }
2886 kfree(cmp->src_pages);
2887 kfree(cmp->dst_pages);
2888}
2889
2890static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
2891 struct inode *dst, u64 dst_loff,
2892 u64 len, struct cmp_pages *cmp)
2893{
2894 int ret;
2895 int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
2896 struct page **src_pgarr, **dst_pgarr;
2897
2898 /*
2899 * We must gather up all the pages before we initiate our
2900 * extent locking. We use an array for the page pointers. Size
2901 * of the array is bounded by len, which is in turn bounded by
2902 * BTRFS_MAX_DEDUPE_LEN.
2903 */
2904 src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
2905 dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
2906 if (!src_pgarr || !dst_pgarr) {
2907 kfree(src_pgarr);
2908 kfree(dst_pgarr);
2909 return -ENOMEM;
2844 } 2910 }
2911 cmp->num_pages = num_pages;
2912 cmp->src_pages = src_pgarr;
2913 cmp->dst_pages = dst_pgarr;
2914
2915 ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
2916 if (ret)
2917 goto out;
2918
2919 ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
2920
2921out:
2922 if (ret)
2923 btrfs_cmp_data_free(cmp);
2924 return 0;
2845} 2925}
2846 2926
2847static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, 2927static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2848 u64 dst_loff, u64 len) 2928 u64 dst_loff, u64 len, struct cmp_pages *cmp)
2849{ 2929{
2850 int ret = 0; 2930 int ret = 0;
2931 int i;
2851 struct page *src_page, *dst_page; 2932 struct page *src_page, *dst_page;
2852 unsigned int cmp_len = PAGE_CACHE_SIZE; 2933 unsigned int cmp_len = PAGE_CACHE_SIZE;
2853 void *addr, *dst_addr; 2934 void *addr, *dst_addr;
2854 2935
2936 i = 0;
2855 while (len) { 2937 while (len) {
2856 if (len < PAGE_CACHE_SIZE) 2938 if (len < PAGE_CACHE_SIZE)
2857 cmp_len = len; 2939 cmp_len = len;
2858 2940
2859 src_page = extent_same_get_page(src, loff); 2941 BUG_ON(i >= cmp->num_pages);
2860 if (!src_page) 2942
2861 return -EINVAL; 2943 src_page = cmp->src_pages[i];
2862 dst_page = extent_same_get_page(dst, dst_loff); 2944 dst_page = cmp->dst_pages[i];
2863 if (!dst_page) { 2945
2864 page_cache_release(src_page);
2865 return -EINVAL;
2866 }
2867 addr = kmap_atomic(src_page); 2946 addr = kmap_atomic(src_page);
2868 dst_addr = kmap_atomic(dst_page); 2947 dst_addr = kmap_atomic(dst_page);
2869 2948
@@ -2875,15 +2954,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2875 2954
2876 kunmap_atomic(addr); 2955 kunmap_atomic(addr);
2877 kunmap_atomic(dst_addr); 2956 kunmap_atomic(dst_addr);
2878 page_cache_release(src_page);
2879 page_cache_release(dst_page);
2880 2957
2881 if (ret) 2958 if (ret)
2882 break; 2959 break;
2883 2960
2884 loff += cmp_len;
2885 dst_loff += cmp_len;
2886 len -= cmp_len; 2961 len -= cmp_len;
2962 i++;
2887 } 2963 }
2888 2964
2889 return ret; 2965 return ret;
@@ -2914,6 +2990,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
2914{ 2990{
2915 int ret; 2991 int ret;
2916 u64 len = olen; 2992 u64 len = olen;
2993 struct cmp_pages cmp;
2917 2994
2918 /* 2995 /*
2919 * btrfs_clone() can't handle extents in the same file 2996 * btrfs_clone() can't handle extents in the same file
@@ -2926,7 +3003,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
2926 if (len == 0) 3003 if (len == 0)
2927 return 0; 3004 return 0;
2928 3005
2929 btrfs_double_lock(src, loff, dst, dst_loff, len); 3006 btrfs_double_inode_lock(src, dst);
2930 3007
2931 ret = extent_same_check_offsets(src, loff, &len, olen); 3008 ret = extent_same_check_offsets(src, loff, &len, olen);
2932 if (ret) 3009 if (ret)
@@ -2943,13 +3020,22 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
2943 goto out_unlock; 3020 goto out_unlock;
2944 } 3021 }
2945 3022
3023 ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
3024 if (ret)
3025 goto out_unlock;
3026
3027 btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
3028
2946 /* pass original length for comparison so we stay within i_size */ 3029 /* pass original length for comparison so we stay within i_size */
2947 ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen); 3030 ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
2948 if (ret == 0) 3031 if (ret == 0)
2949 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); 3032 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
2950 3033
3034 btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
3035
3036 btrfs_cmp_data_free(&cmp);
2951out_unlock: 3037out_unlock:
2952 btrfs_double_unlock(src, loff, dst, dst_loff, len); 3038 btrfs_double_inode_unlock(src, dst);
2953 3039
2954 return ret; 3040 return ret;
2955} 3041}