aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2009-11-12 04:35:36 -0500
committerChris Mason <chris.mason@oracle.com>2009-12-17 12:33:34 -0500
commit8082510e7124cc50d728f1b875639cb4e22312cc (patch)
treee9f0a0a4504a87689b4765368b508fff5ae2ddf8 /fs/btrfs/inode.c
parent5a303d5d4b8055d2e5a03e92d04745bfc5881a22 (diff)
Btrfs: Make truncate(2) more ENOSPC friendly
truncating and deleting regular files are unbound operations, so it's not good to do them in a single transaction. This patch makes btrfs_truncate and btrfs_delete_inode start a new transaction after all items in a tree leaf are deleted. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c316
1 files changed, 192 insertions, 124 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8d8baaa61504..dcec42ee8cf2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2848,37 +2848,40 @@ out:
2848 * min_type is the minimum key type to truncate down to. If set to 0, this 2848 * min_type is the minimum key type to truncate down to. If set to 0, this
2849 * will kill all the items on this inode, including the INODE_ITEM_KEY. 2849 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2850 */ 2850 */
2851noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2851int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2852 struct btrfs_root *root, 2852 struct btrfs_root *root,
2853 struct inode *inode, 2853 struct inode *inode,
2854 u64 new_size, u32 min_type) 2854 u64 new_size, u32 min_type)
2855{ 2855{
2856 int ret;
2857 struct btrfs_path *path; 2856 struct btrfs_path *path;
2858 struct btrfs_key key;
2859 struct btrfs_key found_key;
2860 u32 found_type = (u8)-1;
2861 struct extent_buffer *leaf; 2857 struct extent_buffer *leaf;
2862 struct btrfs_file_extent_item *fi; 2858 struct btrfs_file_extent_item *fi;
2859 struct btrfs_key key;
2860 struct btrfs_key found_key;
2863 u64 extent_start = 0; 2861 u64 extent_start = 0;
2864 u64 extent_num_bytes = 0; 2862 u64 extent_num_bytes = 0;
2865 u64 extent_offset = 0; 2863 u64 extent_offset = 0;
2866 u64 item_end = 0; 2864 u64 item_end = 0;
2865 u64 mask = root->sectorsize - 1;
2866 u32 found_type = (u8)-1;
2867 int found_extent; 2867 int found_extent;
2868 int del_item; 2868 int del_item;
2869 int pending_del_nr = 0; 2869 int pending_del_nr = 0;
2870 int pending_del_slot = 0; 2870 int pending_del_slot = 0;
2871 int extent_type = -1; 2871 int extent_type = -1;
2872 int encoding; 2872 int encoding;
2873 u64 mask = root->sectorsize - 1; 2873 int ret;
2874 int err = 0;
2875
2876 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2874 2877
2875 if (root->ref_cows) 2878 if (root->ref_cows)
2876 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2879 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2880
2877 path = btrfs_alloc_path(); 2881 path = btrfs_alloc_path();
2878 BUG_ON(!path); 2882 BUG_ON(!path);
2879 path->reada = -1; 2883 path->reada = -1;
2880 2884
2881 /* FIXME, add redo link to tree so we don't leak on crash */
2882 key.objectid = inode->i_ino; 2885 key.objectid = inode->i_ino;
2883 key.offset = (u64)-1; 2886 key.offset = (u64)-1;
2884 key.type = (u8)-1; 2887 key.type = (u8)-1;
@@ -2886,17 +2889,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2886search_again: 2889search_again:
2887 path->leave_spinning = 1; 2890 path->leave_spinning = 1;
2888 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2891 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2889 if (ret < 0) 2892 if (ret < 0) {
2890 goto error; 2893 err = ret;
2894 goto out;
2895 }
2891 2896
2892 if (ret > 0) { 2897 if (ret > 0) {
2893 /* there are no items in the tree for us to truncate, we're 2898 /* there are no items in the tree for us to truncate, we're
2894 * done 2899 * done
2895 */ 2900 */
2896 if (path->slots[0] == 0) { 2901 if (path->slots[0] == 0)
2897 ret = 0; 2902 goto out;
2898 goto error;
2899 }
2900 path->slots[0]--; 2903 path->slots[0]--;
2901 } 2904 }
2902 2905
@@ -2931,28 +2934,17 @@ search_again:
2931 } 2934 }
2932 item_end--; 2935 item_end--;
2933 } 2936 }
2934 if (item_end < new_size) { 2937 if (found_type > min_type) {
2935 if (found_type == BTRFS_DIR_ITEM_KEY) 2938 del_item = 1;
2936 found_type = BTRFS_INODE_ITEM_KEY; 2939 } else {
2937 else if (found_type == BTRFS_EXTENT_ITEM_KEY) 2940 if (item_end < new_size)
2938 found_type = BTRFS_EXTENT_DATA_KEY;
2939 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2940 found_type = BTRFS_XATTR_ITEM_KEY;
2941 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2942 found_type = BTRFS_INODE_REF_KEY;
2943 else if (found_type)
2944 found_type--;
2945 else
2946 break; 2941 break;
2947 btrfs_set_key_type(&key, found_type); 2942 if (found_key.offset >= new_size)
2948 goto next; 2943 del_item = 1;
2944 else
2945 del_item = 0;
2949 } 2946 }
2950 if (found_key.offset >= new_size)
2951 del_item = 1;
2952 else
2953 del_item = 0;
2954 found_extent = 0; 2947 found_extent = 0;
2955
2956 /* FIXME, shrink the extent if the ref count is only 1 */ 2948 /* FIXME, shrink the extent if the ref count is only 1 */
2957 if (found_type != BTRFS_EXTENT_DATA_KEY) 2949 if (found_type != BTRFS_EXTENT_DATA_KEY)
2958 goto delete; 2950 goto delete;
@@ -3039,42 +3031,36 @@ delete:
3039 inode->i_ino, extent_offset); 3031 inode->i_ino, extent_offset);
3040 BUG_ON(ret); 3032 BUG_ON(ret);
3041 } 3033 }
3042next:
3043 if (path->slots[0] == 0) {
3044 if (pending_del_nr)
3045 goto del_pending;
3046 btrfs_release_path(root, path);
3047 if (found_type == BTRFS_INODE_ITEM_KEY)
3048 break;
3049 goto search_again;
3050 }
3051 3034
3052 path->slots[0]--; 3035 if (found_type == BTRFS_INODE_ITEM_KEY)
3053 if (pending_del_nr && 3036 break;
3054 path->slots[0] + 1 != pending_del_slot) { 3037
3055 struct btrfs_key debug; 3038 if (path->slots[0] == 0 ||
3056del_pending: 3039 path->slots[0] != pending_del_slot) {
3057 btrfs_item_key_to_cpu(path->nodes[0], &debug, 3040 if (root->ref_cows) {
3058 pending_del_slot); 3041 err = -EAGAIN;
3059 ret = btrfs_del_items(trans, root, path, 3042 goto out;
3060 pending_del_slot, 3043 }
3061 pending_del_nr); 3044 if (pending_del_nr) {
3062 BUG_ON(ret); 3045 ret = btrfs_del_items(trans, root, path,
3063 pending_del_nr = 0; 3046 pending_del_slot,
3047 pending_del_nr);
3048 BUG_ON(ret);
3049 pending_del_nr = 0;
3050 }
3064 btrfs_release_path(root, path); 3051 btrfs_release_path(root, path);
3065 if (found_type == BTRFS_INODE_ITEM_KEY)
3066 break;
3067 goto search_again; 3052 goto search_again;
3053 } else {
3054 path->slots[0]--;
3068 } 3055 }
3069 } 3056 }
3070 ret = 0; 3057out:
3071error:
3072 if (pending_del_nr) { 3058 if (pending_del_nr) {
3073 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3059 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3074 pending_del_nr); 3060 pending_del_nr);
3075 } 3061 }
3076 btrfs_free_path(path); 3062 btrfs_free_path(path);
3077 return ret; 3063 return err;
3078} 3064}
3079 3065
3080/* 3066/*
@@ -3194,10 +3180,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3194 if (size <= hole_start) 3180 if (size <= hole_start)
3195 return 0; 3181 return 0;
3196 3182
3197 err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
3198 if (err)
3199 return err;
3200
3201 while (1) { 3183 while (1) {
3202 struct btrfs_ordered_extent *ordered; 3184 struct btrfs_ordered_extent *ordered;
3203 btrfs_wait_ordered_range(inode, hole_start, 3185 btrfs_wait_ordered_range(inode, hole_start,
@@ -3210,9 +3192,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3210 btrfs_put_ordered_extent(ordered); 3192 btrfs_put_ordered_extent(ordered);
3211 } 3193 }
3212 3194
3213 trans = btrfs_start_transaction(root, 1);
3214 btrfs_set_trans_block_group(trans, inode);
3215
3216 cur_offset = hole_start; 3195 cur_offset = hole_start;
3217 while (1) { 3196 while (1) {
3218 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3197 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3220,38 +3199,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3220 BUG_ON(IS_ERR(em) || !em); 3199 BUG_ON(IS_ERR(em) || !em);
3221 last_byte = min(extent_map_end(em), block_end); 3200 last_byte = min(extent_map_end(em), block_end);
3222 last_byte = (last_byte + mask) & ~mask; 3201 last_byte = (last_byte + mask) & ~mask;
3223 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 3202 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3224 u64 hint_byte = 0; 3203 u64 hint_byte = 0;
3225 hole_size = last_byte - cur_offset; 3204 hole_size = last_byte - cur_offset;
3226 err = btrfs_drop_extents(trans, inode, cur_offset,
3227 cur_offset + hole_size,
3228 &hint_byte, 1);
3229 if (err)
3230 break;
3231 3205
3232 err = btrfs_reserve_metadata_space(root, 1); 3206 err = btrfs_reserve_metadata_space(root, 2);
3233 if (err) 3207 if (err)
3234 break; 3208 break;
3235 3209
3210 trans = btrfs_start_transaction(root, 1);
3211 btrfs_set_trans_block_group(trans, inode);
3212
3213 err = btrfs_drop_extents(trans, inode, cur_offset,
3214 cur_offset + hole_size,
3215 &hint_byte, 1);
3216 BUG_ON(err);
3217
3236 err = btrfs_insert_file_extent(trans, root, 3218 err = btrfs_insert_file_extent(trans, root,
3237 inode->i_ino, cur_offset, 0, 3219 inode->i_ino, cur_offset, 0,
3238 0, hole_size, 0, hole_size, 3220 0, hole_size, 0, hole_size,
3239 0, 0, 0); 3221 0, 0, 0);
3222 BUG_ON(err);
3223
3240 btrfs_drop_extent_cache(inode, hole_start, 3224 btrfs_drop_extent_cache(inode, hole_start,
3241 last_byte - 1, 0); 3225 last_byte - 1, 0);
3242 btrfs_unreserve_metadata_space(root, 1); 3226
3227 btrfs_end_transaction(trans, root);
3228 btrfs_unreserve_metadata_space(root, 2);
3243 } 3229 }
3244 free_extent_map(em); 3230 free_extent_map(em);
3245 cur_offset = last_byte; 3231 cur_offset = last_byte;
3246 if (err || cur_offset >= block_end) 3232 if (cur_offset >= block_end)
3247 break; 3233 break;
3248 } 3234 }
3249 3235
3250 btrfs_end_transaction(trans, root);
3251 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3236 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
3252 return err; 3237 return err;
3253} 3238}
3254 3239
3240static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3241{
3242 struct btrfs_root *root = BTRFS_I(inode)->root;
3243 struct btrfs_trans_handle *trans;
3244 unsigned long nr;
3245 int ret;
3246
3247 if (attr->ia_size == inode->i_size)
3248 return 0;
3249
3250 if (attr->ia_size > inode->i_size) {
3251 unsigned long limit;
3252 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
3253 if (attr->ia_size > inode->i_sb->s_maxbytes)
3254 return -EFBIG;
3255 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3256 send_sig(SIGXFSZ, current, 0);
3257 return -EFBIG;
3258 }
3259 }
3260
3261 ret = btrfs_reserve_metadata_space(root, 1);
3262 if (ret)
3263 return ret;
3264
3265 trans = btrfs_start_transaction(root, 1);
3266 btrfs_set_trans_block_group(trans, inode);
3267
3268 ret = btrfs_orphan_add(trans, inode);
3269 BUG_ON(ret);
3270
3271 nr = trans->blocks_used;
3272 btrfs_end_transaction(trans, root);
3273 btrfs_unreserve_metadata_space(root, 1);
3274 btrfs_btree_balance_dirty(root, nr);
3275
3276 if (attr->ia_size > inode->i_size) {
3277 ret = btrfs_cont_expand(inode, attr->ia_size);
3278 if (ret) {
3279 btrfs_truncate(inode);
3280 return ret;
3281 }
3282
3283 i_size_write(inode, attr->ia_size);
3284 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3285
3286 trans = btrfs_start_transaction(root, 1);
3287 btrfs_set_trans_block_group(trans, inode);
3288
3289 ret = btrfs_update_inode(trans, root, inode);
3290 BUG_ON(ret);
3291 if (inode->i_nlink > 0) {
3292 ret = btrfs_orphan_del(trans, inode);
3293 BUG_ON(ret);
3294 }
3295 nr = trans->blocks_used;
3296 btrfs_end_transaction(trans, root);
3297 btrfs_btree_balance_dirty(root, nr);
3298 return 0;
3299 }
3300
3301 /*
3302 * We're truncating a file that used to have good data down to
3303 * zero. Make sure it gets into the ordered flush list so that
3304 * any new writes get down to disk quickly.
3305 */
3306 if (attr->ia_size == 0)
3307 BTRFS_I(inode)->ordered_data_close = 1;
3308
3309 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3310 ret = vmtruncate(inode, attr->ia_size);
3311 BUG_ON(ret);
3312
3313 return 0;
3314}
3315
3255static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3316static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3256{ 3317{
3257 struct inode *inode = dentry->d_inode; 3318 struct inode *inode = dentry->d_inode;
@@ -3262,23 +3323,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3262 return err; 3323 return err;
3263 3324
3264 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3325 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3265 if (attr->ia_size > inode->i_size) { 3326 err = btrfs_setattr_size(inode, attr);
3266 err = btrfs_cont_expand(inode, attr->ia_size); 3327 if (err)
3267 if (err) 3328 return err;
3268 return err;
3269 } else if (inode->i_size > 0 &&
3270 attr->ia_size == 0) {
3271
3272 /* we're truncating a file that used to have good
3273 * data down to zero. Make sure it gets into
3274 * the ordered flush list so that any new writes
3275 * get down to disk quickly.
3276 */
3277 BTRFS_I(inode)->ordered_data_close = 1;
3278 }
3279 } 3329 }
3330 attr->ia_valid &= ~ATTR_SIZE;
3280 3331
3281 err = inode_setattr(inode, attr); 3332 if (attr->ia_valid)
3333 err = inode_setattr(inode, attr);
3282 3334
3283 if (!err && ((attr->ia_valid & ATTR_MODE))) 3335 if (!err && ((attr->ia_valid & ATTR_MODE)))
3284 err = btrfs_acl_chmod(inode); 3336 err = btrfs_acl_chmod(inode);
@@ -3310,30 +3362,32 @@ void btrfs_delete_inode(struct inode *inode)
3310 } 3362 }
3311 3363
3312 btrfs_i_size_write(inode, 0); 3364 btrfs_i_size_write(inode, 0);
3313 trans = btrfs_join_transaction(root, 1);
3314 3365
3315 btrfs_set_trans_block_group(trans, inode); 3366 while (1) {
3316 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); 3367 trans = btrfs_start_transaction(root, 1);
3317 if (ret) { 3368 btrfs_set_trans_block_group(trans, inode);
3318 btrfs_orphan_del(NULL, inode); 3369 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3319 goto no_delete_lock;
3320 }
3321 3370
3322 btrfs_orphan_del(trans, inode); 3371 if (ret != -EAGAIN)
3372 break;
3323 3373
3324 nr = trans->blocks_used; 3374 nr = trans->blocks_used;
3325 clear_inode(inode); 3375 btrfs_end_transaction(trans, root);
3376 trans = NULL;
3377 btrfs_btree_balance_dirty(root, nr);
3378 }
3326 3379
3327 btrfs_end_transaction(trans, root); 3380 if (ret == 0) {
3328 btrfs_btree_balance_dirty(root, nr); 3381 ret = btrfs_orphan_del(trans, inode);
3329 return; 3382 BUG_ON(ret);
3383 }
3330 3384
3331no_delete_lock:
3332 nr = trans->blocks_used; 3385 nr = trans->blocks_used;
3333 btrfs_end_transaction(trans, root); 3386 btrfs_end_transaction(trans, root);
3334 btrfs_btree_balance_dirty(root, nr); 3387 btrfs_btree_balance_dirty(root, nr);
3335no_delete: 3388no_delete:
3336 clear_inode(inode); 3389 clear_inode(inode);
3390 return;
3337} 3391}
3338 3392
3339/* 3393/*
@@ -5097,17 +5151,20 @@ static void btrfs_truncate(struct inode *inode)
5097 unsigned long nr; 5151 unsigned long nr;
5098 u64 mask = root->sectorsize - 1; 5152 u64 mask = root->sectorsize - 1;
5099 5153
5100 if (!S_ISREG(inode->i_mode)) 5154 if (!S_ISREG(inode->i_mode)) {
5101 return; 5155 WARN_ON(1);
5102 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
5103 return; 5156 return;
5157 }
5104 5158
5105 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 5159 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
5106 if (ret) 5160 if (ret)
5107 return; 5161 return;
5162
5108 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 5163 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5164 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5109 5165
5110 trans = btrfs_start_transaction(root, 1); 5166 trans = btrfs_start_transaction(root, 1);
5167 btrfs_set_trans_block_group(trans, inode);
5111 5168
5112 /* 5169 /*
5113 * setattr is responsible for setting the ordered_data_close flag, 5170 * setattr is responsible for setting the ordered_data_close flag,
@@ -5129,21 +5186,32 @@ static void btrfs_truncate(struct inode *inode)
5129 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 5186 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
5130 btrfs_add_ordered_operation(trans, root, inode); 5187 btrfs_add_ordered_operation(trans, root, inode);
5131 5188
5132 btrfs_set_trans_block_group(trans, inode); 5189 while (1) {
5133 btrfs_i_size_write(inode, inode->i_size); 5190 ret = btrfs_truncate_inode_items(trans, root, inode,
5191 inode->i_size,
5192 BTRFS_EXTENT_DATA_KEY);
5193 if (ret != -EAGAIN)
5194 break;
5134 5195
5135 ret = btrfs_orphan_add(trans, inode); 5196 ret = btrfs_update_inode(trans, root, inode);
5136 if (ret) 5197 BUG_ON(ret);
5137 goto out; 5198
5138 /* FIXME, add redo link to tree so we don't leak on crash */ 5199 nr = trans->blocks_used;
5139 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 5200 btrfs_end_transaction(trans, root);
5140 BTRFS_EXTENT_DATA_KEY); 5201 btrfs_btree_balance_dirty(root, nr);
5141 btrfs_update_inode(trans, root, inode); 5202
5203 trans = btrfs_start_transaction(root, 1);
5204 btrfs_set_trans_block_group(trans, inode);
5205 }
5142 5206
5143 ret = btrfs_orphan_del(trans, inode); 5207 if (ret == 0 && inode->i_nlink > 0) {
5208 ret = btrfs_orphan_del(trans, inode);
5209 BUG_ON(ret);
5210 }
5211
5212 ret = btrfs_update_inode(trans, root, inode);
5144 BUG_ON(ret); 5213 BUG_ON(ret);
5145 5214
5146out:
5147 nr = trans->blocks_used; 5215 nr = trans->blocks_used;
5148 ret = btrfs_end_transaction_throttle(trans, root); 5216 ret = btrfs_end_transaction_throttle(trans, root);
5149 BUG_ON(ret); 5217 BUG_ON(ret);
@@ -5240,9 +5308,9 @@ void btrfs_destroy_inode(struct inode *inode)
5240 5308
5241 spin_lock(&root->list_lock); 5309 spin_lock(&root->list_lock);
5242 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 5310 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5243 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 5311 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5244 " list\n", inode->i_ino); 5312 inode->i_ino);
5245 dump_stack(); 5313 list_del_init(&BTRFS_I(inode)->i_orphan);
5246 } 5314 }
5247 spin_unlock(&root->list_lock); 5315 spin_unlock(&root->list_lock);
5248 5316