diff options
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r-- | fs/btrfs/ioctl.c | 119 |
1 files changed, 97 insertions, 22 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 952172ca7e45..48aee9846329 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -2794,24 +2794,29 @@ out: | |||
2794 | static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) | 2794 | static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) |
2795 | { | 2795 | { |
2796 | struct page *page; | 2796 | struct page *page; |
2797 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | ||
2798 | 2797 | ||
2799 | page = grab_cache_page(inode->i_mapping, index); | 2798 | page = grab_cache_page(inode->i_mapping, index); |
2800 | if (!page) | 2799 | if (!page) |
2801 | return NULL; | 2800 | return ERR_PTR(-ENOMEM); |
2802 | 2801 | ||
2803 | if (!PageUptodate(page)) { | 2802 | if (!PageUptodate(page)) { |
2804 | if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, | 2803 | int ret; |
2805 | 0)) | 2804 | |
2806 | return NULL; | 2805 | ret = btrfs_readpage(NULL, page); |
2806 | if (ret) | ||
2807 | return ERR_PTR(ret); | ||
2807 | lock_page(page); | 2808 | lock_page(page); |
2808 | if (!PageUptodate(page)) { | 2809 | if (!PageUptodate(page)) { |
2809 | unlock_page(page); | 2810 | unlock_page(page); |
2810 | page_cache_release(page); | 2811 | page_cache_release(page); |
2811 | return NULL; | 2812 | return ERR_PTR(-EIO); |
2813 | } | ||
2814 | if (page->mapping != inode->i_mapping) { | ||
2815 | unlock_page(page); | ||
2816 | page_cache_release(page); | ||
2817 | return ERR_PTR(-EAGAIN); | ||
2812 | } | 2818 | } |
2813 | } | 2819 | } |
2814 | unlock_page(page); | ||
2815 | 2820 | ||
2816 | return page; | 2821 | return page; |
2817 | } | 2822 | } |
@@ -2823,17 +2828,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages, | |||
2823 | pgoff_t index = off >> PAGE_CACHE_SHIFT; | 2828 | pgoff_t index = off >> PAGE_CACHE_SHIFT; |
2824 | 2829 | ||
2825 | for (i = 0; i < num_pages; i++) { | 2830 | for (i = 0; i < num_pages; i++) { |
2831 | again: | ||
2826 | pages[i] = extent_same_get_page(inode, index + i); | 2832 | pages[i] = extent_same_get_page(inode, index + i); |
2827 | if (!pages[i]) | 2833 | if (IS_ERR(pages[i])) { |
2828 | return -ENOMEM; | 2834 | int err = PTR_ERR(pages[i]); |
2835 | |||
2836 | if (err == -EAGAIN) | ||
2837 | goto again; | ||
2838 | pages[i] = NULL; | ||
2839 | return err; | ||
2840 | } | ||
2829 | } | 2841 | } |
2830 | return 0; | 2842 | return 0; |
2831 | } | 2843 | } |
2832 | 2844 | ||
2833 | static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | 2845 | static int lock_extent_range(struct inode *inode, u64 off, u64 len, |
2846 | bool retry_range_locking) | ||
2834 | { | 2847 | { |
2835 | /* do any pending delalloc/csum calc on src, one way or | 2848 | /* |
2836 | another, and lock file content */ | 2849 | * Do any pending delalloc/csum calculations on inode, one way or |
2850 | * another, and lock file content. | ||
2851 | * The locking order is: | ||
2852 | * | ||
2853 | * 1) pages | ||
2854 | * 2) range in the inode's io tree | ||
2855 | */ | ||
2837 | while (1) { | 2856 | while (1) { |
2838 | struct btrfs_ordered_extent *ordered; | 2857 | struct btrfs_ordered_extent *ordered; |
2839 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2858 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
@@ -2851,8 +2870,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | |||
2851 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2870 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
2852 | if (ordered) | 2871 | if (ordered) |
2853 | btrfs_put_ordered_extent(ordered); | 2872 | btrfs_put_ordered_extent(ordered); |
2873 | if (!retry_range_locking) | ||
2874 | return -EAGAIN; | ||
2854 | btrfs_wait_ordered_range(inode, off, len); | 2875 | btrfs_wait_ordered_range(inode, off, len); |
2855 | } | 2876 | } |
2877 | return 0; | ||
2856 | } | 2878 | } |
2857 | 2879 | ||
2858 | static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) | 2880 | static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) |
@@ -2877,15 +2899,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, | |||
2877 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); | 2899 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); |
2878 | } | 2900 | } |
2879 | 2901 | ||
2880 | static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, | 2902 | static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1, |
2881 | struct inode *inode2, u64 loff2, u64 len) | 2903 | struct inode *inode2, u64 loff2, u64 len, |
2904 | bool retry_range_locking) | ||
2882 | { | 2905 | { |
2906 | int ret; | ||
2907 | |||
2883 | if (inode1 < inode2) { | 2908 | if (inode1 < inode2) { |
2884 | swap(inode1, inode2); | 2909 | swap(inode1, inode2); |
2885 | swap(loff1, loff2); | 2910 | swap(loff1, loff2); |
2886 | } | 2911 | } |
2887 | lock_extent_range(inode1, loff1, len); | 2912 | ret = lock_extent_range(inode1, loff1, len, retry_range_locking); |
2888 | lock_extent_range(inode2, loff2, len); | 2913 | if (ret) |
2914 | return ret; | ||
2915 | ret = lock_extent_range(inode2, loff2, len, retry_range_locking); | ||
2916 | if (ret) | ||
2917 | unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, | ||
2918 | loff1 + len - 1); | ||
2919 | return ret; | ||
2889 | } | 2920 | } |
2890 | 2921 | ||
2891 | struct cmp_pages { | 2922 | struct cmp_pages { |
@@ -2901,11 +2932,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp) | |||
2901 | 2932 | ||
2902 | for (i = 0; i < cmp->num_pages; i++) { | 2933 | for (i = 0; i < cmp->num_pages; i++) { |
2903 | pg = cmp->src_pages[i]; | 2934 | pg = cmp->src_pages[i]; |
2904 | if (pg) | 2935 | if (pg) { |
2936 | unlock_page(pg); | ||
2905 | page_cache_release(pg); | 2937 | page_cache_release(pg); |
2938 | } | ||
2906 | pg = cmp->dst_pages[i]; | 2939 | pg = cmp->dst_pages[i]; |
2907 | if (pg) | 2940 | if (pg) { |
2941 | unlock_page(pg); | ||
2908 | page_cache_release(pg); | 2942 | page_cache_release(pg); |
2943 | } | ||
2909 | } | 2944 | } |
2910 | kfree(cmp->src_pages); | 2945 | kfree(cmp->src_pages); |
2911 | kfree(cmp->dst_pages); | 2946 | kfree(cmp->dst_pages); |
@@ -2966,6 +3001,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, | |||
2966 | 3001 | ||
2967 | src_page = cmp->src_pages[i]; | 3002 | src_page = cmp->src_pages[i]; |
2968 | dst_page = cmp->dst_pages[i]; | 3003 | dst_page = cmp->dst_pages[i]; |
3004 | ASSERT(PageLocked(src_page)); | ||
3005 | ASSERT(PageLocked(dst_page)); | ||
2969 | 3006 | ||
2970 | addr = kmap_atomic(src_page); | 3007 | addr = kmap_atomic(src_page); |
2971 | dst_addr = kmap_atomic(dst_page); | 3008 | dst_addr = kmap_atomic(dst_page); |
@@ -3078,14 +3115,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, | |||
3078 | goto out_unlock; | 3115 | goto out_unlock; |
3079 | } | 3116 | } |
3080 | 3117 | ||
3118 | again: | ||
3081 | ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); | 3119 | ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); |
3082 | if (ret) | 3120 | if (ret) |
3083 | goto out_unlock; | 3121 | goto out_unlock; |
3084 | 3122 | ||
3085 | if (same_inode) | 3123 | if (same_inode) |
3086 | lock_extent_range(src, same_lock_start, same_lock_len); | 3124 | ret = lock_extent_range(src, same_lock_start, same_lock_len, |
3125 | false); | ||
3087 | else | 3126 | else |
3088 | btrfs_double_extent_lock(src, loff, dst, dst_loff, len); | 3127 | ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len, |
3128 | false); | ||
3129 | /* | ||
3130 | * If one of the inodes has dirty pages in the respective range or | ||
3131 | * ordered extents, we need to flush dellaloc and wait for all ordered | ||
3132 | * extents in the range. We must unlock the pages and the ranges in the | ||
3133 | * io trees to avoid deadlocks when flushing delalloc (requires locking | ||
3134 | * pages) and when waiting for ordered extents to complete (they require | ||
3135 | * range locking). | ||
3136 | */ | ||
3137 | if (ret == -EAGAIN) { | ||
3138 | /* | ||
3139 | * Ranges in the io trees already unlocked. Now unlock all | ||
3140 | * pages before waiting for all IO to complete. | ||
3141 | */ | ||
3142 | btrfs_cmp_data_free(&cmp); | ||
3143 | if (same_inode) { | ||
3144 | btrfs_wait_ordered_range(src, same_lock_start, | ||
3145 | same_lock_len); | ||
3146 | } else { | ||
3147 | btrfs_wait_ordered_range(src, loff, len); | ||
3148 | btrfs_wait_ordered_range(dst, dst_loff, len); | ||
3149 | } | ||
3150 | goto again; | ||
3151 | } | ||
3152 | ASSERT(ret == 0); | ||
3153 | if (WARN_ON(ret)) { | ||
3154 | /* ranges in the io trees already unlocked */ | ||
3155 | btrfs_cmp_data_free(&cmp); | ||
3156 | return ret; | ||
3157 | } | ||
3089 | 3158 | ||
3090 | /* pass original length for comparison so we stay within i_size */ | 3159 | /* pass original length for comparison so we stay within i_size */ |
3091 | ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); | 3160 | ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); |
@@ -3795,9 +3864,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, | |||
3795 | u64 lock_start = min_t(u64, off, destoff); | 3864 | u64 lock_start = min_t(u64, off, destoff); |
3796 | u64 lock_len = max_t(u64, off, destoff) + len - lock_start; | 3865 | u64 lock_len = max_t(u64, off, destoff) + len - lock_start; |
3797 | 3866 | ||
3798 | lock_extent_range(src, lock_start, lock_len); | 3867 | ret = lock_extent_range(src, lock_start, lock_len, true); |
3799 | } else { | 3868 | } else { |
3800 | btrfs_double_extent_lock(src, off, inode, destoff, len); | 3869 | ret = btrfs_double_extent_lock(src, off, inode, destoff, len, |
3870 | true); | ||
3871 | } | ||
3872 | ASSERT(ret == 0); | ||
3873 | if (WARN_ON(ret)) { | ||
3874 | /* ranges in the io trees already unlocked */ | ||
3875 | goto out_unlock; | ||
3801 | } | 3876 | } |
3802 | 3877 | ||
3803 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); | 3878 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); |