aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ioctl.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r--fs/btrfs/ioctl.c259
1 files changed, 205 insertions, 54 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c86b835da7a8..0770c91586ca 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 {
87 87
88 88
89static int btrfs_clone(struct inode *src, struct inode *inode, 89static int btrfs_clone(struct inode *src, struct inode *inode,
90 u64 off, u64 olen, u64 olen_aligned, u64 destoff); 90 u64 off, u64 olen, u64 olen_aligned, u64 destoff,
91 int no_time_update);
91 92
92/* Mask out flags that are inappropriate for the given type of inode. */ 93/* Mask out flags that are inappropriate for the given type of inode. */
93static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 94static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -2765,14 +2766,11 @@ out:
2765 return ret; 2766 return ret;
2766} 2767}
2767 2768
2768static struct page *extent_same_get_page(struct inode *inode, u64 off) 2769static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
2769{ 2770{
2770 struct page *page; 2771 struct page *page;
2771 pgoff_t index;
2772 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2772 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2773 2773
2774 index = off >> PAGE_CACHE_SHIFT;
2775
2776 page = grab_cache_page(inode->i_mapping, index); 2774 page = grab_cache_page(inode->i_mapping, index);
2777 if (!page) 2775 if (!page)
2778 return NULL; 2776 return NULL;
@@ -2793,6 +2791,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off)
2793 return page; 2791 return page;
2794} 2792}
2795 2793
2794static int gather_extent_pages(struct inode *inode, struct page **pages,
2795 int num_pages, u64 off)
2796{
2797 int i;
2798 pgoff_t index = off >> PAGE_CACHE_SHIFT;
2799
2800 for (i = 0; i < num_pages; i++) {
2801 pages[i] = extent_same_get_page(inode, index + i);
2802 if (!pages[i])
2803 return -ENOMEM;
2804 }
2805 return 0;
2806}
2807
2796static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) 2808static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2797{ 2809{
2798 /* do any pending delalloc/csum calc on src, one way or 2810 /* do any pending delalloc/csum calc on src, one way or
@@ -2818,52 +2830,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2818 } 2830 }
2819} 2831}
2820 2832
2821static void btrfs_double_unlock(struct inode *inode1, u64 loff1, 2833static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
2822 struct inode *inode2, u64 loff2, u64 len)
2823{ 2834{
2824 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
2825 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2826
2827 mutex_unlock(&inode1->i_mutex); 2835 mutex_unlock(&inode1->i_mutex);
2828 mutex_unlock(&inode2->i_mutex); 2836 mutex_unlock(&inode2->i_mutex);
2829} 2837}
2830 2838
2831static void btrfs_double_lock(struct inode *inode1, u64 loff1, 2839static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
2832 struct inode *inode2, u64 loff2, u64 len) 2840{
2841 if (inode1 < inode2)
2842 swap(inode1, inode2);
2843
2844 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
2845 if (inode1 != inode2)
2846 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
2847}
2848
2849static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
2850 struct inode *inode2, u64 loff2, u64 len)
2851{
2852 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
2853 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2854}
2855
2856static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
2857 struct inode *inode2, u64 loff2, u64 len)
2833{ 2858{
2834 if (inode1 < inode2) { 2859 if (inode1 < inode2) {
2835 swap(inode1, inode2); 2860 swap(inode1, inode2);
2836 swap(loff1, loff2); 2861 swap(loff1, loff2);
2837 } 2862 }
2838
2839 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
2840 lock_extent_range(inode1, loff1, len); 2863 lock_extent_range(inode1, loff1, len);
2841 if (inode1 != inode2) { 2864 if (inode1 != inode2)
2842 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
2843 lock_extent_range(inode2, loff2, len); 2865 lock_extent_range(inode2, loff2, len);
2866}
2867
2868struct cmp_pages {
2869 int num_pages;
2870 struct page **src_pages;
2871 struct page **dst_pages;
2872};
2873
2874static void btrfs_cmp_data_free(struct cmp_pages *cmp)
2875{
2876 int i;
2877 struct page *pg;
2878
2879 for (i = 0; i < cmp->num_pages; i++) {
2880 pg = cmp->src_pages[i];
2881 if (pg)
2882 page_cache_release(pg);
2883 pg = cmp->dst_pages[i];
2884 if (pg)
2885 page_cache_release(pg);
2886 }
2887 kfree(cmp->src_pages);
2888 kfree(cmp->dst_pages);
2889}
2890
2891static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
2892 struct inode *dst, u64 dst_loff,
2893 u64 len, struct cmp_pages *cmp)
2894{
2895 int ret;
2896 int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
2897 struct page **src_pgarr, **dst_pgarr;
2898
2899 /*
2900 * We must gather up all the pages before we initiate our
2901 * extent locking. We use an array for the page pointers. Size
2902 * of the array is bounded by len, which is in turn bounded by
2903 * BTRFS_MAX_DEDUPE_LEN.
2904 */
2905 src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
2906 dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
2907 if (!src_pgarr || !dst_pgarr) {
2908 kfree(src_pgarr);
2909 kfree(dst_pgarr);
2910 return -ENOMEM;
2844 } 2911 }
2912 cmp->num_pages = num_pages;
2913 cmp->src_pages = src_pgarr;
2914 cmp->dst_pages = dst_pgarr;
2915
2916 ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
2917 if (ret)
2918 goto out;
2919
2920 ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
2921
2922out:
2923 if (ret)
2924 btrfs_cmp_data_free(cmp);
2925 return 0;
2845} 2926}
2846 2927
2847static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, 2928static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2848 u64 dst_loff, u64 len) 2929 u64 dst_loff, u64 len, struct cmp_pages *cmp)
2849{ 2930{
2850 int ret = 0; 2931 int ret = 0;
2932 int i;
2851 struct page *src_page, *dst_page; 2933 struct page *src_page, *dst_page;
2852 unsigned int cmp_len = PAGE_CACHE_SIZE; 2934 unsigned int cmp_len = PAGE_CACHE_SIZE;
2853 void *addr, *dst_addr; 2935 void *addr, *dst_addr;
2854 2936
2937 i = 0;
2855 while (len) { 2938 while (len) {
2856 if (len < PAGE_CACHE_SIZE) 2939 if (len < PAGE_CACHE_SIZE)
2857 cmp_len = len; 2940 cmp_len = len;
2858 2941
2859 src_page = extent_same_get_page(src, loff); 2942 BUG_ON(i >= cmp->num_pages);
2860 if (!src_page) 2943
2861 return -EINVAL; 2944 src_page = cmp->src_pages[i];
2862 dst_page = extent_same_get_page(dst, dst_loff); 2945 dst_page = cmp->dst_pages[i];
2863 if (!dst_page) { 2946
2864 page_cache_release(src_page);
2865 return -EINVAL;
2866 }
2867 addr = kmap_atomic(src_page); 2947 addr = kmap_atomic(src_page);
2868 dst_addr = kmap_atomic(dst_page); 2948 dst_addr = kmap_atomic(dst_page);
2869 2949
@@ -2875,15 +2955,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2875 2955
2876 kunmap_atomic(addr); 2956 kunmap_atomic(addr);
2877 kunmap_atomic(dst_addr); 2957 kunmap_atomic(dst_addr);
2878 page_cache_release(src_page);
2879 page_cache_release(dst_page);
2880 2958
2881 if (ret) 2959 if (ret)
2882 break; 2960 break;
2883 2961
2884 loff += cmp_len;
2885 dst_loff += cmp_len;
2886 len -= cmp_len; 2962 len -= cmp_len;
2963 i++;
2887 } 2964 }
2888 2965
2889 return ret; 2966 return ret;
@@ -2914,27 +2991,62 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
2914{ 2991{
2915 int ret; 2992 int ret;
2916 u64 len = olen; 2993 u64 len = olen;
2994 struct cmp_pages cmp;
2995 int same_inode = 0;
2996 u64 same_lock_start = 0;
2997 u64 same_lock_len = 0;
2917 2998
2918 /*
2919 * btrfs_clone() can't handle extents in the same file
2920 * yet. Once that works, we can drop this check and replace it
2921 * with a check for the same inode, but overlapping extents.
2922 */
2923 if (src == dst) 2999 if (src == dst)
2924 return -EINVAL; 3000 same_inode = 1;
2925 3001
2926 if (len == 0) 3002 if (len == 0)
2927 return 0; 3003 return 0;
2928 3004
2929 btrfs_double_lock(src, loff, dst, dst_loff, len); 3005 if (same_inode) {
3006 mutex_lock(&src->i_mutex);
2930 3007
2931 ret = extent_same_check_offsets(src, loff, &len, olen); 3008 ret = extent_same_check_offsets(src, loff, &len, olen);
2932 if (ret) 3009 if (ret)
2933 goto out_unlock; 3010 goto out_unlock;
2934 3011
2935 ret = extent_same_check_offsets(dst, dst_loff, &len, olen); 3012 /*
2936 if (ret) 3013 * Single inode case wants the same checks, except we
2937 goto out_unlock; 3014 * don't want our length pushed out past i_size as
3015 * comparing that data range makes no sense.
3016 *
3017 * extent_same_check_offsets() will do this for an
3018 * unaligned length at i_size, so catch it here and
3019 * reject the request.
3020 *
3021 * This effectively means we require aligned extents
3022 * for the single-inode case, whereas the other cases
3023 * allow an unaligned length so long as it ends at
3024 * i_size.
3025 */
3026 if (len != olen) {
3027 ret = -EINVAL;
3028 goto out_unlock;
3029 }
3030
3031 /* Check for overlapping ranges */
3032 if (dst_loff + len > loff && dst_loff < loff + len) {
3033 ret = -EINVAL;
3034 goto out_unlock;
3035 }
3036
3037 same_lock_start = min_t(u64, loff, dst_loff);
3038 same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
3039 } else {
3040 btrfs_double_inode_lock(src, dst);
3041
3042 ret = extent_same_check_offsets(src, loff, &len, olen);
3043 if (ret)
3044 goto out_unlock;
3045
3046 ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
3047 if (ret)
3048 goto out_unlock;
3049 }
2938 3050
2939 /* don't make the dst file partly checksummed */ 3051 /* don't make the dst file partly checksummed */
2940 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3052 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
@@ -2943,12 +3055,32 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
2943 goto out_unlock; 3055 goto out_unlock;
2944 } 3056 }
2945 3057
2946 ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); 3058 ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
3059 if (ret)
3060 goto out_unlock;
3061
3062 if (same_inode)
3063 lock_extent_range(src, same_lock_start, same_lock_len);
3064 else
3065 btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
3066
3067 /* pass original length for comparison so we stay within i_size */
3068 ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
2947 if (ret == 0) 3069 if (ret == 0)
2948 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); 3070 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
3071
3072 if (same_inode)
3073 unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
3074 same_lock_start + same_lock_len - 1);
3075 else
3076 btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
2949 3077
3078 btrfs_cmp_data_free(&cmp);
2950out_unlock: 3079out_unlock:
2951 btrfs_double_unlock(src, loff, dst, dst_loff, len); 3080 if (same_inode)
3081 mutex_unlock(&src->i_mutex);
3082 else
3083 btrfs_double_inode_unlock(src, dst);
2952 3084
2953 return ret; 3085 return ret;
2954} 3086}
@@ -2958,7 +3090,7 @@ out_unlock:
2958static long btrfs_ioctl_file_extent_same(struct file *file, 3090static long btrfs_ioctl_file_extent_same(struct file *file,
2959 struct btrfs_ioctl_same_args __user *argp) 3091 struct btrfs_ioctl_same_args __user *argp)
2960{ 3092{
2961 struct btrfs_ioctl_same_args *same; 3093 struct btrfs_ioctl_same_args *same = NULL;
2962 struct btrfs_ioctl_same_extent_info *info; 3094 struct btrfs_ioctl_same_extent_info *info;
2963 struct inode *src = file_inode(file); 3095 struct inode *src = file_inode(file);
2964 u64 off; 3096 u64 off;
@@ -2988,6 +3120,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2988 3120
2989 if (IS_ERR(same)) { 3121 if (IS_ERR(same)) {
2990 ret = PTR_ERR(same); 3122 ret = PTR_ERR(same);
3123 same = NULL;
2991 goto out; 3124 goto out;
2992 } 3125 }
2993 3126
@@ -3058,6 +3191,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
3058 3191
3059out: 3192out:
3060 mnt_drop_write_file(file); 3193 mnt_drop_write_file(file);
3194 kfree(same);
3061 return ret; 3195 return ret;
3062} 3196}
3063 3197
@@ -3100,13 +3234,15 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
3100 struct inode *inode, 3234 struct inode *inode,
3101 u64 endoff, 3235 u64 endoff,
3102 const u64 destoff, 3236 const u64 destoff,
3103 const u64 olen) 3237 const u64 olen,
3238 int no_time_update)
3104{ 3239{
3105 struct btrfs_root *root = BTRFS_I(inode)->root; 3240 struct btrfs_root *root = BTRFS_I(inode)->root;
3106 int ret; 3241 int ret;
3107 3242
3108 inode_inc_iversion(inode); 3243 inode_inc_iversion(inode);
3109 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3244 if (!no_time_update)
3245 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3110 /* 3246 /*
3111 * We round up to the block size at eof when determining which 3247 * We round up to the block size at eof when determining which
3112 * extents to clone above, but shouldn't round up the file size. 3248 * extents to clone above, but shouldn't round up the file size.
@@ -3191,13 +3327,13 @@ static void clone_update_extent_map(struct inode *inode,
3191 * @inode: Inode to clone to 3327 * @inode: Inode to clone to
3192 * @off: Offset within source to start clone from 3328 * @off: Offset within source to start clone from
3193 * @olen: Original length, passed by user, of range to clone 3329 * @olen: Original length, passed by user, of range to clone
3194 * @olen_aligned: Block-aligned value of olen, extent_same uses 3330 * @olen_aligned: Block-aligned value of olen
3195 * identical values here
3196 * @destoff: Offset within @inode to start clone 3331 * @destoff: Offset within @inode to start clone
3332 * @no_time_update: Whether to update mtime/ctime on the target inode
3197 */ 3333 */
3198static int btrfs_clone(struct inode *src, struct inode *inode, 3334static int btrfs_clone(struct inode *src, struct inode *inode,
3199 const u64 off, const u64 olen, const u64 olen_aligned, 3335 const u64 off, const u64 olen, const u64 olen_aligned,
3200 const u64 destoff) 3336 const u64 destoff, int no_time_update)
3201{ 3337{
3202 struct btrfs_root *root = BTRFS_I(inode)->root; 3338 struct btrfs_root *root = BTRFS_I(inode)->root;
3203 struct btrfs_path *path = NULL; 3339 struct btrfs_path *path = NULL;
@@ -3452,6 +3588,20 @@ process_slot:
3452 u64 trim = 0; 3588 u64 trim = 0;
3453 u64 aligned_end = 0; 3589 u64 aligned_end = 0;
3454 3590
3591 /*
3592 * Don't copy an inline extent into an offset
3593 * greater than zero. Having an inline extent
3594 * at such an offset results in chaos as btrfs
3595 * isn't prepared for such cases. Just skip
3596 * this case for the same reasons as commented
3597 * at btrfs_ioctl_clone().
3598 */
3599 if (last_dest_end > 0) {
3600 ret = -EOPNOTSUPP;
3601 btrfs_end_transaction(trans, root);
3602 goto out;
3603 }
3604
3455 if (off > key.offset) { 3605 if (off > key.offset) {
3456 skip = off - key.offset; 3606 skip = off - key.offset;
3457 new_key.offset += skip; 3607 new_key.offset += skip;
@@ -3521,7 +3671,8 @@ process_slot:
3521 root->sectorsize); 3671 root->sectorsize);
3522 ret = clone_finish_inode_update(trans, inode, 3672 ret = clone_finish_inode_update(trans, inode,
3523 last_dest_end, 3673 last_dest_end,
3524 destoff, olen); 3674 destoff, olen,
3675 no_time_update);
3525 if (ret) 3676 if (ret)
3526 goto out; 3677 goto out;
3527 if (new_key.offset + datal >= destoff + len) 3678 if (new_key.offset + datal >= destoff + len)
@@ -3559,7 +3710,7 @@ process_slot:
3559 clone_update_extent_map(inode, trans, NULL, last_dest_end, 3710 clone_update_extent_map(inode, trans, NULL, last_dest_end,
3560 destoff + len - last_dest_end); 3711 destoff + len - last_dest_end);
3561 ret = clone_finish_inode_update(trans, inode, destoff + len, 3712 ret = clone_finish_inode_update(trans, inode, destoff + len,
3562 destoff, olen); 3713 destoff, olen, no_time_update);
3563 } 3714 }
3564 3715
3565out: 3716out:
@@ -3696,7 +3847,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
3696 lock_extent_range(inode, destoff, len); 3847 lock_extent_range(inode, destoff, len);
3697 } 3848 }
3698 3849
3699 ret = btrfs_clone(src, inode, off, olen, len, destoff); 3850 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
3700 3851
3701 if (same_inode) { 3852 if (same_inode) {
3702 u64 lock_start = min_t(u64, off, destoff); 3853 u64 lock_start = min_t(u64, off, destoff);