aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-17 19:01:03 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-17 19:01:03 -0500
commit7c508e50be47737b9a72d0f15c3ef1146925e2d2 (patch)
tree767a7b1427e6f150e46ce99fd5b8d2463dda479c /fs
parentdcc7cd011220d7425a265c9bbf04c5731dacec1b (diff)
parent7a5d24b1067823e870bf23e62bf8a788bd73818d (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: Btrfs: make sure fallocate properly starts a transaction Btrfs: make metadata chunks smaller Btrfs: Show discard option in /proc/mounts Btrfs: deny sys_link across subvolumes. Btrfs: fail mount on bad mount options Btrfs: don't add extent 0 to the free space cache v2 Btrfs: Fix per root used space accounting Btrfs: Fix btrfs_drop_extent_cache for skip pinned case Btrfs: Add delayed iput Btrfs: Pass transaction handle to security and ACL initialization functions Btrfs: Make truncate(2) more ENOSPC friendly Btrfs: Make fallocate(2) more ENOSPC friendly Btrfs: Avoid orphan inodes cleanup during committing transaction Btrfs: Avoid orphan inodes cleanup while replaying log Btrfs: Fix disk_i_size update corner case Btrfs: Rewrite btrfs_drop_extents Btrfs: Add btrfs_duplicate_item Btrfs: Avoid superfluous tree-log writeout
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/acl.c23
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/ctree.c229
-rw-r--r--fs/btrfs/ctree.h40
-rw-r--r--fs/btrfs/dir-item.c19
-rw-r--r--fs/btrfs/disk-io.c27
-rw-r--r--fs/btrfs/extent-tree.c72
-rw-r--r--fs/btrfs/file.c669
-rw-r--r--fs/btrfs/inode.c567
-rw-r--r--fs/btrfs/ioctl.c34
-rw-r--r--fs/btrfs/ordered-data.c115
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c38
-rw-r--r--fs/btrfs/super.c15
-rw-r--r--fs/btrfs/transaction.c44
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/tree-log.c86
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/btrfs/xattr.c80
-rw-r--r--fs/btrfs/xattr.h9
20 files changed, 1171 insertions, 914 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 52cbe47022bf..2e9e69987a82 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -94,7 +94,8 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
94/* 94/*
95 * Needs to be called with fs_mutex held 95 * Needs to be called with fs_mutex held
96 */ 96 */
97static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) 97static int btrfs_set_acl(struct btrfs_trans_handle *trans,
98 struct inode *inode, struct posix_acl *acl, int type)
98{ 99{
99 int ret, size = 0; 100 int ret, size = 0;
100 const char *name; 101 const char *name;
@@ -140,8 +141,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
140 goto out; 141 goto out;
141 } 142 }
142 143
143 ret = __btrfs_setxattr(inode, name, value, size, 0); 144 ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
144
145out: 145out:
146 kfree(value); 146 kfree(value);
147 147
@@ -154,7 +154,7 @@ out:
154static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, 154static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
155 const void *value, size_t size, int flags, int type) 155 const void *value, size_t size, int flags, int type)
156{ 156{
157 int ret = 0; 157 int ret;
158 struct posix_acl *acl = NULL; 158 struct posix_acl *acl = NULL;
159 159
160 if (value) { 160 if (value) {
@@ -167,7 +167,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
167 } 167 }
168 } 168 }
169 169
170 ret = btrfs_set_acl(dentry->d_inode, acl, type); 170 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
171 171
172 posix_acl_release(acl); 172 posix_acl_release(acl);
173 173
@@ -196,7 +196,8 @@ int btrfs_check_acl(struct inode *inode, int mask)
196 * stuff has been fixed to work with that. If the locking stuff changes, we 196 * stuff has been fixed to work with that. If the locking stuff changes, we
197 * need to re-evaluate the acl locking stuff. 197 * need to re-evaluate the acl locking stuff.
198 */ 198 */
199int btrfs_init_acl(struct inode *inode, struct inode *dir) 199int btrfs_init_acl(struct btrfs_trans_handle *trans,
200 struct inode *inode, struct inode *dir)
200{ 201{
201 struct posix_acl *acl = NULL; 202 struct posix_acl *acl = NULL;
202 int ret = 0; 203 int ret = 0;
@@ -221,7 +222,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
221 mode_t mode; 222 mode_t mode;
222 223
223 if (S_ISDIR(inode->i_mode)) { 224 if (S_ISDIR(inode->i_mode)) {
224 ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); 225 ret = btrfs_set_acl(trans, inode, acl,
226 ACL_TYPE_DEFAULT);
225 if (ret) 227 if (ret)
226 goto failed; 228 goto failed;
227 } 229 }
@@ -236,7 +238,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
236 inode->i_mode = mode; 238 inode->i_mode = mode;
237 if (ret > 0) { 239 if (ret > 0) {
238 /* we need an acl */ 240 /* we need an acl */
239 ret = btrfs_set_acl(inode, clone, 241 ret = btrfs_set_acl(trans, inode, clone,
240 ACL_TYPE_ACCESS); 242 ACL_TYPE_ACCESS);
241 } 243 }
242 } 244 }
@@ -269,7 +271,7 @@ int btrfs_acl_chmod(struct inode *inode)
269 271
270 ret = posix_acl_chmod_masq(clone, inode->i_mode); 272 ret = posix_acl_chmod_masq(clone, inode->i_mode);
271 if (!ret) 273 if (!ret)
272 ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); 274 ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
273 275
274 posix_acl_release(clone); 276 posix_acl_release(clone);
275 277
@@ -297,7 +299,8 @@ int btrfs_acl_chmod(struct inode *inode)
297 return 0; 299 return 0;
298} 300}
299 301
300int btrfs_init_acl(struct inode *inode, struct inode *dir) 302int btrfs_init_acl(struct btrfs_trans_handle *trans,
303 struct inode *inode, struct inode *dir)
301{ 304{
302 return 0; 305 return 0;
303} 306}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f6783a42f010..3f1f50d9d916 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,9 +44,6 @@ struct btrfs_inode {
44 */ 44 */
45 struct extent_io_tree io_failure_tree; 45 struct extent_io_tree io_failure_tree;
46 46
47 /* held while inesrting or deleting extents from files */
48 struct mutex extent_mutex;
49
50 /* held while logging the inode in tree-log.c */ 47 /* held while logging the inode in tree-log.c */
51 struct mutex log_mutex; 48 struct mutex log_mutex;
52 49
@@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
166 163
167static inline void btrfs_i_size_write(struct inode *inode, u64 size) 164static inline void btrfs_i_size_write(struct inode *inode, u64 size)
168{ 165{
169 inode->i_size = size; 166 i_size_write(inode, size);
170 BTRFS_I(inode)->disk_i_size = size; 167 BTRFS_I(inode)->disk_i_size = size;
171} 168}
172 169
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ec96f3a6d536..c4bc570a396e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,6 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct extent_buffer *src_buf); 37 struct extent_buffer *src_buf);
38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 struct btrfs_path *path, int level, int slot); 39 struct btrfs_path *path, int level, int slot);
40static int setup_items_for_insert(struct btrfs_trans_handle *trans,
41 struct btrfs_root *root, struct btrfs_path *path,
42 struct btrfs_key *cpu_key, u32 *data_size,
43 u32 total_data, u32 total_size, int nr);
44
40 45
41struct btrfs_path *btrfs_alloc_path(void) 46struct btrfs_path *btrfs_alloc_path(void)
42{ 47{
@@ -451,9 +456,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
451 extent_buffer_get(cow); 456 extent_buffer_get(cow);
452 spin_unlock(&root->node_lock); 457 spin_unlock(&root->node_lock);
453 458
454 btrfs_free_extent(trans, root, buf->start, buf->len, 459 btrfs_free_tree_block(trans, root, buf->start, buf->len,
455 parent_start, root->root_key.objectid, 460 parent_start, root->root_key.objectid, level);
456 level, 0);
457 free_extent_buffer(buf); 461 free_extent_buffer(buf);
458 add_root_to_dirty_list(root); 462 add_root_to_dirty_list(root);
459 } else { 463 } else {
@@ -468,9 +472,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
468 btrfs_set_node_ptr_generation(parent, parent_slot, 472 btrfs_set_node_ptr_generation(parent, parent_slot,
469 trans->transid); 473 trans->transid);
470 btrfs_mark_buffer_dirty(parent); 474 btrfs_mark_buffer_dirty(parent);
471 btrfs_free_extent(trans, root, buf->start, buf->len, 475 btrfs_free_tree_block(trans, root, buf->start, buf->len,
472 parent_start, root->root_key.objectid, 476 parent_start, root->root_key.objectid, level);
473 level, 0);
474 } 477 }
475 if (unlock_orig) 478 if (unlock_orig)
476 btrfs_tree_unlock(buf); 479 btrfs_tree_unlock(buf);
@@ -1030,8 +1033,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1030 btrfs_tree_unlock(mid); 1033 btrfs_tree_unlock(mid);
1031 /* once for the path */ 1034 /* once for the path */
1032 free_extent_buffer(mid); 1035 free_extent_buffer(mid);
1033 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1036 ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
1034 0, root->root_key.objectid, level, 1); 1037 0, root->root_key.objectid, level);
1035 /* once for the root ptr */ 1038 /* once for the root ptr */
1036 free_extent_buffer(mid); 1039 free_extent_buffer(mid);
1037 return ret; 1040 return ret;
@@ -1095,10 +1098,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1095 1); 1098 1);
1096 if (wret) 1099 if (wret)
1097 ret = wret; 1100 ret = wret;
1098 wret = btrfs_free_extent(trans, root, bytenr, 1101 wret = btrfs_free_tree_block(trans, root,
1099 blocksize, 0, 1102 bytenr, blocksize, 0,
1100 root->root_key.objectid, 1103 root->root_key.objectid,
1101 level, 0); 1104 level);
1102 if (wret) 1105 if (wret)
1103 ret = wret; 1106 ret = wret;
1104 } else { 1107 } else {
@@ -1143,9 +1146,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1143 wret = del_ptr(trans, root, path, level + 1, pslot); 1146 wret = del_ptr(trans, root, path, level + 1, pslot);
1144 if (wret) 1147 if (wret)
1145 ret = wret; 1148 ret = wret;
1146 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1149 wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
1147 0, root->root_key.objectid, 1150 0, root->root_key.objectid, level);
1148 level, 0);
1149 if (wret) 1151 if (wret)
1150 ret = wret; 1152 ret = wret;
1151 } else { 1153 } else {
@@ -2997,75 +2999,85 @@ again:
2997 return ret; 2999 return ret;
2998} 3000}
2999 3001
3000/* 3002static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3001 * This function splits a single item into two items, 3003 struct btrfs_root *root,
3002 * giving 'new_key' to the new item and splitting the 3004 struct btrfs_path *path, int ins_len)
3003 * old one at split_offset (from the start of the item).
3004 *
3005 * The path may be released by this operation. After
3006 * the split, the path is pointing to the old item. The
3007 * new item is going to be in the same node as the old one.
3008 *
3009 * Note, the item being split must be smaller enough to live alone on
3010 * a tree block with room for one extra struct btrfs_item
3011 *
3012 * This allows us to split the item in place, keeping a lock on the
3013 * leaf the entire time.
3014 */
3015int btrfs_split_item(struct btrfs_trans_handle *trans,
3016 struct btrfs_root *root,
3017 struct btrfs_path *path,
3018 struct btrfs_key *new_key,
3019 unsigned long split_offset)
3020{ 3005{
3021 u32 item_size; 3006 struct btrfs_key key;
3022 struct extent_buffer *leaf; 3007 struct extent_buffer *leaf;
3023 struct btrfs_key orig_key; 3008 struct btrfs_file_extent_item *fi;
3024 struct btrfs_item *item; 3009 u64 extent_len = 0;
3025 struct btrfs_item *new_item; 3010 u32 item_size;
3026 int ret = 0; 3011 int ret;
3027 int slot;
3028 u32 nritems;
3029 u32 orig_offset;
3030 struct btrfs_disk_key disk_key;
3031 char *buf;
3032 3012
3033 leaf = path->nodes[0]; 3013 leaf = path->nodes[0];
3034 btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); 3014 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3035 if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) 3015
3036 goto split; 3016 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3017 key.type != BTRFS_EXTENT_CSUM_KEY);
3018
3019 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
3020 return 0;
3037 3021
3038 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3022 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3023 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3024 fi = btrfs_item_ptr(leaf, path->slots[0],
3025 struct btrfs_file_extent_item);
3026 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3027 }
3039 btrfs_release_path(root, path); 3028 btrfs_release_path(root, path);
3040 3029
3041 path->search_for_split = 1;
3042 path->keep_locks = 1; 3030 path->keep_locks = 1;
3043 3031 path->search_for_split = 1;
3044 ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); 3032 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3045 path->search_for_split = 0; 3033 path->search_for_split = 0;
3034 if (ret < 0)
3035 goto err;
3046 3036
3037 ret = -EAGAIN;
3038 leaf = path->nodes[0];
3047 /* if our item isn't there or got smaller, return now */ 3039 /* if our item isn't there or got smaller, return now */
3048 if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], 3040 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3049 path->slots[0])) { 3041 goto err;
3050 path->keep_locks = 0; 3042
3051 return -EAGAIN; 3043 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3044 fi = btrfs_item_ptr(leaf, path->slots[0],
3045 struct btrfs_file_extent_item);
3046 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3047 goto err;
3052 } 3048 }
3053 3049
3054 btrfs_set_path_blocking(path); 3050 btrfs_set_path_blocking(path);
3055 ret = split_leaf(trans, root, &orig_key, path, 3051 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3056 sizeof(struct btrfs_item), 1);
3057 path->keep_locks = 0;
3058 BUG_ON(ret); 3052 BUG_ON(ret);
3059 3053
3054 path->keep_locks = 0;
3060 btrfs_unlock_up_safe(path, 1); 3055 btrfs_unlock_up_safe(path, 1);
3056 return 0;
3057err:
3058 path->keep_locks = 0;
3059 return ret;
3060}
3061
3062static noinline int split_item(struct btrfs_trans_handle *trans,
3063 struct btrfs_root *root,
3064 struct btrfs_path *path,
3065 struct btrfs_key *new_key,
3066 unsigned long split_offset)
3067{
3068 struct extent_buffer *leaf;
3069 struct btrfs_item *item;
3070 struct btrfs_item *new_item;
3071 int slot;
3072 char *buf;
3073 u32 nritems;
3074 u32 item_size;
3075 u32 orig_offset;
3076 struct btrfs_disk_key disk_key;
3077
3061 leaf = path->nodes[0]; 3078 leaf = path->nodes[0];
3062 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3079 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3063 3080
3064split:
3065 /*
3066 * make sure any changes to the path from split_leaf leave it
3067 * in a blocking state
3068 */
3069 btrfs_set_path_blocking(path); 3081 btrfs_set_path_blocking(path);
3070 3082
3071 item = btrfs_item_nr(leaf, path->slots[0]); 3083 item = btrfs_item_nr(leaf, path->slots[0]);
@@ -3073,19 +3085,19 @@ split:
3073 item_size = btrfs_item_size(leaf, item); 3085 item_size = btrfs_item_size(leaf, item);
3074 3086
3075 buf = kmalloc(item_size, GFP_NOFS); 3087 buf = kmalloc(item_size, GFP_NOFS);
3088 if (!buf)
3089 return -ENOMEM;
3090
3076 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3091 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3077 path->slots[0]), item_size); 3092 path->slots[0]), item_size);
3078 slot = path->slots[0] + 1;
3079 leaf = path->nodes[0];
3080 3093
3094 slot = path->slots[0] + 1;
3081 nritems = btrfs_header_nritems(leaf); 3095 nritems = btrfs_header_nritems(leaf);
3082
3083 if (slot != nritems) { 3096 if (slot != nritems) {
3084 /* shift the items */ 3097 /* shift the items */
3085 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3098 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3086 btrfs_item_nr_offset(slot), 3099 btrfs_item_nr_offset(slot),
3087 (nritems - slot) * sizeof(struct btrfs_item)); 3100 (nritems - slot) * sizeof(struct btrfs_item));
3088
3089 } 3101 }
3090 3102
3091 btrfs_cpu_key_to_disk(&disk_key, new_key); 3103 btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3113,16 +3125,81 @@ split:
3113 item_size - split_offset); 3125 item_size - split_offset);
3114 btrfs_mark_buffer_dirty(leaf); 3126 btrfs_mark_buffer_dirty(leaf);
3115 3127
3116 ret = 0; 3128 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
3117 if (btrfs_leaf_free_space(root, leaf) < 0) {
3118 btrfs_print_leaf(root, leaf);
3119 BUG();
3120 }
3121 kfree(buf); 3129 kfree(buf);
3130 return 0;
3131}
3132
3133/*
3134 * This function splits a single item into two items,
3135 * giving 'new_key' to the new item and splitting the
3136 * old one at split_offset (from the start of the item).
3137 *
3138 * The path may be released by this operation. After
3139 * the split, the path is pointing to the old item. The
3140 * new item is going to be in the same node as the old one.
3141 *
3142 * Note, the item being split must be smaller enough to live alone on
3143 * a tree block with room for one extra struct btrfs_item
3144 *
3145 * This allows us to split the item in place, keeping a lock on the
3146 * leaf the entire time.
3147 */
3148int btrfs_split_item(struct btrfs_trans_handle *trans,
3149 struct btrfs_root *root,
3150 struct btrfs_path *path,
3151 struct btrfs_key *new_key,
3152 unsigned long split_offset)
3153{
3154 int ret;
3155 ret = setup_leaf_for_split(trans, root, path,
3156 sizeof(struct btrfs_item));
3157 if (ret)
3158 return ret;
3159
3160 ret = split_item(trans, root, path, new_key, split_offset);
3122 return ret; 3161 return ret;
3123} 3162}
3124 3163
3125/* 3164/*
3165 * This function duplicate a item, giving 'new_key' to the new item.
3166 * It guarantees both items live in the same tree leaf and the new item
3167 * is contiguous with the original item.
3168 *
3169 * This allows us to split file extent in place, keeping a lock on the
3170 * leaf the entire time.
3171 */
3172int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct btrfs_key *new_key)
3176{
3177 struct extent_buffer *leaf;
3178 int ret;
3179 u32 item_size;
3180
3181 leaf = path->nodes[0];
3182 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3183 ret = setup_leaf_for_split(trans, root, path,
3184 item_size + sizeof(struct btrfs_item));
3185 if (ret)
3186 return ret;
3187
3188 path->slots[0]++;
3189 ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
3190 item_size, item_size +
3191 sizeof(struct btrfs_item), 1);
3192 BUG_ON(ret);
3193
3194 leaf = path->nodes[0];
3195 memcpy_extent_buffer(leaf,
3196 btrfs_item_ptr_offset(leaf, path->slots[0]),
3197 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3198 item_size);
3199 return 0;
3200}
3201
3202/*
3126 * make the item pointed to by the path smaller. new_size indicates 3203 * make the item pointed to by the path smaller. new_size indicates
3127 * how small to make it, and from_end tells us if we just chop bytes 3204 * how small to make it, and from_end tells us if we just chop bytes
3128 * off the end of the item or if we shift the item to chop bytes off 3205 * off the end of the item or if we shift the item to chop bytes off
@@ -3714,8 +3791,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3714 */ 3791 */
3715 btrfs_unlock_up_safe(path, 0); 3792 btrfs_unlock_up_safe(path, 0);
3716 3793
3717 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, 3794 ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
3718 0, root->root_key.objectid, 0, 0); 3795 0, root->root_key.objectid, 0);
3719 return ret; 3796 return ret;
3720} 3797}
3721/* 3798/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 444b3e9b92a4..9f806dd04c27 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -310,6 +310,9 @@ struct btrfs_header {
310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ 310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
311 sizeof(struct btrfs_item) - \ 311 sizeof(struct btrfs_item) - \
312 sizeof(struct btrfs_file_extent_item)) 312 sizeof(struct btrfs_file_extent_item))
313#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
314 sizeof(struct btrfs_item) -\
315 sizeof(struct btrfs_dir_item))
313 316
314 317
315/* 318/*
@@ -859,8 +862,9 @@ struct btrfs_fs_info {
859 struct mutex ordered_operations_mutex; 862 struct mutex ordered_operations_mutex;
860 struct rw_semaphore extent_commit_sem; 863 struct rw_semaphore extent_commit_sem;
861 864
862 struct rw_semaphore subvol_sem; 865 struct rw_semaphore cleanup_work_sem;
863 866
867 struct rw_semaphore subvol_sem;
864 struct srcu_struct subvol_srcu; 868 struct srcu_struct subvol_srcu;
865 869
866 struct list_head trans_list; 870 struct list_head trans_list;
@@ -868,6 +872,9 @@ struct btrfs_fs_info {
868 struct list_head dead_roots; 872 struct list_head dead_roots;
869 struct list_head caching_block_groups; 873 struct list_head caching_block_groups;
870 874
875 spinlock_t delayed_iput_lock;
876 struct list_head delayed_iputs;
877
871 atomic_t nr_async_submits; 878 atomic_t nr_async_submits;
872 atomic_t async_submit_draining; 879 atomic_t async_submit_draining;
873 atomic_t nr_async_bios; 880 atomic_t nr_async_bios;
@@ -1034,12 +1041,12 @@ struct btrfs_root {
1034 int ref_cows; 1041 int ref_cows;
1035 int track_dirty; 1042 int track_dirty;
1036 int in_radix; 1043 int in_radix;
1044 int clean_orphans;
1037 1045
1038 u64 defrag_trans_start; 1046 u64 defrag_trans_start;
1039 struct btrfs_key defrag_progress; 1047 struct btrfs_key defrag_progress;
1040 struct btrfs_key defrag_max; 1048 struct btrfs_key defrag_max;
1041 int defrag_running; 1049 int defrag_running;
1042 int defrag_level;
1043 char *name; 1050 char *name;
1044 int in_sysfs; 1051 int in_sysfs;
1045 1052
@@ -1975,6 +1982,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1975 u64 parent, u64 root_objectid, 1982 u64 parent, u64 root_objectid,
1976 struct btrfs_disk_key *key, int level, 1983 struct btrfs_disk_key *key, int level,
1977 u64 hint, u64 empty_size); 1984 u64 hint, u64 empty_size);
1985int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
1986 struct btrfs_root *root,
1987 u64 bytenr, u32 blocksize,
1988 u64 parent, u64 root_objectid, int level);
1978struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1989struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1979 struct btrfs_root *root, 1990 struct btrfs_root *root,
1980 u64 bytenr, u32 blocksize, 1991 u64 bytenr, u32 blocksize,
@@ -2089,6 +2100,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
2089 struct btrfs_path *path, 2100 struct btrfs_path *path,
2090 struct btrfs_key *new_key, 2101 struct btrfs_key *new_key,
2091 unsigned long split_offset); 2102 unsigned long split_offset);
2103int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
2104 struct btrfs_root *root,
2105 struct btrfs_path *path,
2106 struct btrfs_key *new_key);
2092int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 2107int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2093 *root, struct btrfs_key *key, struct btrfs_path *p, int 2108 *root, struct btrfs_key *key, struct btrfs_path *p, int
2094 ins_len, int cow); 2109 ins_len, int cow);
@@ -2196,9 +2211,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
2196 struct btrfs_path *path, 2211 struct btrfs_path *path,
2197 struct btrfs_dir_item *di); 2212 struct btrfs_dir_item *di);
2198int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 2213int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
2199 struct btrfs_root *root, const char *name, 2214 struct btrfs_root *root,
2200 u16 name_len, const void *data, u16 data_len, 2215 struct btrfs_path *path, u64 objectid,
2201 u64 dir); 2216 const char *name, u16 name_len,
2217 const void *data, u16 data_len);
2202struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, 2218struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
2203 struct btrfs_root *root, 2219 struct btrfs_root *root,
2204 struct btrfs_path *path, u64 dir, 2220 struct btrfs_path *path, u64 dir,
@@ -2292,7 +2308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2292 struct inode *inode, u64 new_size, 2308 struct inode *inode, u64 new_size,
2293 u32 min_type); 2309 u32 min_type);
2294 2310
2295int btrfs_start_delalloc_inodes(struct btrfs_root *root); 2311int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2296int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); 2312int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
2297int btrfs_writepages(struct address_space *mapping, 2313int btrfs_writepages(struct address_space *mapping,
2298 struct writeback_control *wbc); 2314 struct writeback_control *wbc);
@@ -2332,6 +2348,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2332void btrfs_orphan_cleanup(struct btrfs_root *root); 2348void btrfs_orphan_cleanup(struct btrfs_root *root);
2333int btrfs_cont_expand(struct inode *inode, loff_t size); 2349int btrfs_cont_expand(struct inode *inode, loff_t size);
2334int btrfs_invalidate_inodes(struct btrfs_root *root); 2350int btrfs_invalidate_inodes(struct btrfs_root *root);
2351void btrfs_add_delayed_iput(struct inode *inode);
2352void btrfs_run_delayed_iputs(struct btrfs_root *root);
2335extern const struct dentry_operations btrfs_dentry_operations; 2353extern const struct dentry_operations btrfs_dentry_operations;
2336 2354
2337/* ioctl.c */ 2355/* ioctl.c */
@@ -2345,12 +2363,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2345 int skip_pinned); 2363 int skip_pinned);
2346int btrfs_check_file(struct btrfs_root *root, struct inode *inode); 2364int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2347extern const struct file_operations btrfs_file_operations; 2365extern const struct file_operations btrfs_file_operations;
2348int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2366int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2349 struct btrfs_root *root, struct inode *inode, 2367 u64 start, u64 end, u64 *hint_byte, int drop_cache);
2350 u64 start, u64 end, u64 locked_end,
2351 u64 inline_limit, u64 *hint_block, int drop_cache);
2352int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2368int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2353 struct btrfs_root *root,
2354 struct inode *inode, u64 start, u64 end); 2369 struct inode *inode, u64 start, u64 end);
2355int btrfs_release_file(struct inode *inode, struct file *file); 2370int btrfs_release_file(struct inode *inode, struct file *file);
2356 2371
@@ -2380,7 +2395,8 @@ int btrfs_check_acl(struct inode *inode, int mask);
2380#else 2395#else
2381#define btrfs_check_acl NULL 2396#define btrfs_check_acl NULL
2382#endif 2397#endif
2383int btrfs_init_acl(struct inode *inode, struct inode *dir); 2398int btrfs_init_acl(struct btrfs_trans_handle *trans,
2399 struct inode *inode, struct inode *dir);
2384int btrfs_acl_chmod(struct inode *inode); 2400int btrfs_acl_chmod(struct inode *inode);
2385 2401
2386/* relocation.c */ 2402/* relocation.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f3a6075519cc..e9103b3baa49 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
68 * into the tree 68 * into the tree
69 */ 69 */
70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
71 struct btrfs_root *root, const char *name, 71 struct btrfs_root *root,
72 u16 name_len, const void *data, u16 data_len, 72 struct btrfs_path *path, u64 objectid,
73 u64 dir) 73 const char *name, u16 name_len,
74 const void *data, u16 data_len)
74{ 75{
75 int ret = 0; 76 int ret = 0;
76 struct btrfs_path *path;
77 struct btrfs_dir_item *dir_item; 77 struct btrfs_dir_item *dir_item;
78 unsigned long name_ptr, data_ptr; 78 unsigned long name_ptr, data_ptr;
79 struct btrfs_key key, location; 79 struct btrfs_key key, location;
@@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
81 struct extent_buffer *leaf; 81 struct extent_buffer *leaf;
82 u32 data_size; 82 u32 data_size;
83 83
84 key.objectid = dir; 84 BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
85
86 key.objectid = objectid;
85 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 87 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
86 key.offset = btrfs_name_hash(name, name_len); 88 key.offset = btrfs_name_hash(name, name_len);
87 path = btrfs_alloc_path();
88 if (!path)
89 return -ENOMEM;
90 if (name_len + data_len + sizeof(struct btrfs_dir_item) >
91 BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
92 return -ENOSPC;
93 89
94 data_size = sizeof(*dir_item) + name_len + data_len; 90 data_size = sizeof(*dir_item) + name_len + data_len;
95 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 91 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
@@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
117 write_extent_buffer(leaf, data, data_ptr, data_len); 113 write_extent_buffer(leaf, data, data_ptr, data_len);
118 btrfs_mark_buffer_dirty(path->nodes[0]); 114 btrfs_mark_buffer_dirty(path->nodes[0]);
119 115
120 btrfs_free_path(path);
121 return ret; 116 return ret;
122} 117}
123 118
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02b6afbd7450..009e3bd18f23 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
892 root->stripesize = stripesize; 892 root->stripesize = stripesize;
893 root->ref_cows = 0; 893 root->ref_cows = 0;
894 root->track_dirty = 0; 894 root->track_dirty = 0;
895 root->in_radix = 0;
896 root->clean_orphans = 0;
895 897
896 root->fs_info = fs_info; 898 root->fs_info = fs_info;
897 root->objectid = objectid; 899 root->objectid = objectid;
@@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
928 root->defrag_trans_start = fs_info->generation; 930 root->defrag_trans_start = fs_info->generation;
929 init_completion(&root->kobj_unregister); 931 init_completion(&root->kobj_unregister);
930 root->defrag_running = 0; 932 root->defrag_running = 0;
931 root->defrag_level = 0;
932 root->root_key.objectid = objectid; 933 root->root_key.objectid = objectid;
933 root->anon_super.s_root = NULL; 934 root->anon_super.s_root = NULL;
934 root->anon_super.s_dev = 0; 935 root->anon_super.s_dev = 0;
@@ -980,12 +981,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
980 981
981 while (1) { 982 while (1) {
982 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 983 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
983 0, &start, &end, EXTENT_DIRTY); 984 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
984 if (ret) 985 if (ret)
985 break; 986 break;
986 987
987 clear_extent_dirty(&log_root_tree->dirty_log_pages, 988 clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
988 start, end, GFP_NOFS); 989 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
989 } 990 }
990 eb = fs_info->log_root_tree->node; 991 eb = fs_info->log_root_tree->node;
991 992
@@ -1210,8 +1211,10 @@ again:
1210 ret = radix_tree_insert(&fs_info->fs_roots_radix, 1211 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1211 (unsigned long)root->root_key.objectid, 1212 (unsigned long)root->root_key.objectid,
1212 root); 1213 root);
1213 if (ret == 0) 1214 if (ret == 0) {
1214 root->in_radix = 1; 1215 root->in_radix = 1;
1216 root->clean_orphans = 1;
1217 }
1215 spin_unlock(&fs_info->fs_roots_radix_lock); 1218 spin_unlock(&fs_info->fs_roots_radix_lock);
1216 radix_tree_preload_end(); 1219 radix_tree_preload_end();
1217 if (ret) { 1220 if (ret) {
@@ -1225,10 +1228,6 @@ again:
1225 ret = btrfs_find_dead_roots(fs_info->tree_root, 1228 ret = btrfs_find_dead_roots(fs_info->tree_root,
1226 root->root_key.objectid); 1229 root->root_key.objectid);
1227 WARN_ON(ret); 1230 WARN_ON(ret);
1228
1229 if (!(fs_info->sb->s_flags & MS_RDONLY))
1230 btrfs_orphan_cleanup(root);
1231
1232 return root; 1231 return root;
1233fail: 1232fail:
1234 free_fs_root(root); 1233 free_fs_root(root);
@@ -1477,6 +1476,7 @@ static int cleaner_kthread(void *arg)
1477 1476
1478 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1477 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1479 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1478 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1479 btrfs_run_delayed_iputs(root);
1480 btrfs_clean_old_snapshots(root); 1480 btrfs_clean_old_snapshots(root);
1481 mutex_unlock(&root->fs_info->cleaner_mutex); 1481 mutex_unlock(&root->fs_info->cleaner_mutex);
1482 } 1482 }
@@ -1606,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1607 INIT_LIST_HEAD(&fs_info->trans_list); 1607 INIT_LIST_HEAD(&fs_info->trans_list);
1608 INIT_LIST_HEAD(&fs_info->dead_roots); 1608 INIT_LIST_HEAD(&fs_info->dead_roots);
1609 INIT_LIST_HEAD(&fs_info->delayed_iputs);
1609 INIT_LIST_HEAD(&fs_info->hashers); 1610 INIT_LIST_HEAD(&fs_info->hashers);
1610 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1611 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1611 INIT_LIST_HEAD(&fs_info->ordered_operations); 1612 INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1614,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1614 spin_lock_init(&fs_info->new_trans_lock); 1615 spin_lock_init(&fs_info->new_trans_lock);
1615 spin_lock_init(&fs_info->ref_cache_lock); 1616 spin_lock_init(&fs_info->ref_cache_lock);
1616 spin_lock_init(&fs_info->fs_roots_radix_lock); 1617 spin_lock_init(&fs_info->fs_roots_radix_lock);
1618 spin_lock_init(&fs_info->delayed_iput_lock);
1617 1619
1618 init_completion(&fs_info->kobj_unregister); 1620 init_completion(&fs_info->kobj_unregister);
1619 fs_info->tree_root = tree_root; 1621 fs_info->tree_root = tree_root;
@@ -1689,6 +1691,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1689 mutex_init(&fs_info->cleaner_mutex); 1691 mutex_init(&fs_info->cleaner_mutex);
1690 mutex_init(&fs_info->volume_mutex); 1692 mutex_init(&fs_info->volume_mutex);
1691 init_rwsem(&fs_info->extent_commit_sem); 1693 init_rwsem(&fs_info->extent_commit_sem);
1694 init_rwsem(&fs_info->cleanup_work_sem);
1692 init_rwsem(&fs_info->subvol_sem); 1695 init_rwsem(&fs_info->subvol_sem);
1693 1696
1694 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1697 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -2386,8 +2389,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2386 int ret; 2389 int ret;
2387 2390
2388 mutex_lock(&root->fs_info->cleaner_mutex); 2391 mutex_lock(&root->fs_info->cleaner_mutex);
2392 btrfs_run_delayed_iputs(root);
2389 btrfs_clean_old_snapshots(root); 2393 btrfs_clean_old_snapshots(root);
2390 mutex_unlock(&root->fs_info->cleaner_mutex); 2394 mutex_unlock(&root->fs_info->cleaner_mutex);
2395
2396 /* wait until ongoing cleanup work done */
2397 down_write(&root->fs_info->cleanup_work_sem);
2398 up_write(&root->fs_info->cleanup_work_sem);
2399
2391 trans = btrfs_start_transaction(root, 1); 2400 trans = btrfs_start_transaction(root, 1);
2392 ret = btrfs_commit_transaction(trans, root); 2401 ret = btrfs_commit_transaction(trans, root);
2393 BUG_ON(ret); 2402 BUG_ON(ret);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 94627c4cc193..56e50137d0e6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -195,6 +195,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
195 int stripe_len; 195 int stripe_len;
196 int i, nr, ret; 196 int i, nr, ret;
197 197
198 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
199 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
200 cache->bytes_super += stripe_len;
201 ret = add_excluded_extent(root, cache->key.objectid,
202 stripe_len);
203 BUG_ON(ret);
204 }
205
198 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 206 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
199 bytenr = btrfs_sb_offset(i); 207 bytenr = btrfs_sb_offset(i);
200 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 208 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
@@ -255,7 +263,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
255 if (ret) 263 if (ret)
256 break; 264 break;
257 265
258 if (extent_start == start) { 266 if (extent_start <= start) {
259 start = extent_end + 1; 267 start = extent_end + 1;
260 } else if (extent_start > start && extent_start < end) { 268 } else if (extent_start > start && extent_start < end) {
261 size = extent_start - start; 269 size = extent_start - start;
@@ -2880,9 +2888,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
2880 root = async->root; 2888 root = async->root;
2881 info = async->info; 2889 info = async->info;
2882 2890
2883 btrfs_start_delalloc_inodes(root); 2891 btrfs_start_delalloc_inodes(root, 0);
2884 wake_up(&info->flush_wait); 2892 wake_up(&info->flush_wait);
2885 btrfs_wait_ordered_extents(root, 0); 2893 btrfs_wait_ordered_extents(root, 0, 0);
2886 2894
2887 spin_lock(&info->lock); 2895 spin_lock(&info->lock);
2888 info->flushing = 0; 2896 info->flushing = 0;
@@ -2956,8 +2964,8 @@ static void flush_delalloc(struct btrfs_root *root,
2956 return; 2964 return;
2957 2965
2958flush: 2966flush:
2959 btrfs_start_delalloc_inodes(root); 2967 btrfs_start_delalloc_inodes(root, 0);
2960 btrfs_wait_ordered_extents(root, 0); 2968 btrfs_wait_ordered_extents(root, 0, 0);
2961 2969
2962 spin_lock(&info->lock); 2970 spin_lock(&info->lock);
2963 info->flushing = 0; 2971 info->flushing = 0;
@@ -3454,14 +3462,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3454 else 3462 else
3455 old_val -= num_bytes; 3463 old_val -= num_bytes;
3456 btrfs_set_super_bytes_used(&info->super_copy, old_val); 3464 btrfs_set_super_bytes_used(&info->super_copy, old_val);
3457
3458 /* block accounting for root item */
3459 old_val = btrfs_root_used(&root->root_item);
3460 if (alloc)
3461 old_val += num_bytes;
3462 else
3463 old_val -= num_bytes;
3464 btrfs_set_root_used(&root->root_item, old_val);
3465 spin_unlock(&info->delalloc_lock); 3465 spin_unlock(&info->delalloc_lock);
3466 3466
3467 while (total) { 3467 while (total) {
@@ -4049,6 +4049,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4049 return ret; 4049 return ret;
4050} 4050}
4051 4051
4052int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4053 struct btrfs_root *root,
4054 u64 bytenr, u32 blocksize,
4055 u64 parent, u64 root_objectid, int level)
4056{
4057 u64 used;
4058 spin_lock(&root->node_lock);
4059 used = btrfs_root_used(&root->root_item) - blocksize;
4060 btrfs_set_root_used(&root->root_item, used);
4061 spin_unlock(&root->node_lock);
4062
4063 return btrfs_free_extent(trans, root, bytenr, blocksize,
4064 parent, root_objectid, level, 0);
4065}
4066
4052static u64 stripe_align(struct btrfs_root *root, u64 val) 4067static u64 stripe_align(struct btrfs_root *root, u64 val)
4053{ 4068{
4054 u64 mask = ((u64)root->stripesize - 1); 4069 u64 mask = ((u64)root->stripesize - 1);
@@ -4578,7 +4593,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4578{ 4593{
4579 int ret; 4594 int ret;
4580 u64 search_start = 0; 4595 u64 search_start = 0;
4581 struct btrfs_fs_info *info = root->fs_info;
4582 4596
4583 data = btrfs_get_alloc_profile(root, data); 4597 data = btrfs_get_alloc_profile(root, data);
4584again: 4598again:
@@ -4586,17 +4600,9 @@ again:
4586 * the only place that sets empty_size is btrfs_realloc_node, which 4600 * the only place that sets empty_size is btrfs_realloc_node, which
4587 * is not called recursively on allocations 4601 * is not called recursively on allocations
4588 */ 4602 */
4589 if (empty_size || root->ref_cows) { 4603 if (empty_size || root->ref_cows)
4590 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
4591 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4592 2 * 1024 * 1024,
4593 BTRFS_BLOCK_GROUP_METADATA |
4594 (info->metadata_alloc_profile &
4595 info->avail_metadata_alloc_bits), 0);
4596 }
4597 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4604 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4598 num_bytes + 2 * 1024 * 1024, data, 0); 4605 num_bytes + 2 * 1024 * 1024, data, 0);
4599 }
4600 4606
4601 WARN_ON(num_bytes < root->sectorsize); 4607 WARN_ON(num_bytes < root->sectorsize);
4602 ret = find_free_extent(trans, root, num_bytes, empty_size, 4608 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -4897,6 +4903,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4897 extent_op); 4903 extent_op);
4898 BUG_ON(ret); 4904 BUG_ON(ret);
4899 } 4905 }
4906
4907 if (root_objectid == root->root_key.objectid) {
4908 u64 used;
4909 spin_lock(&root->node_lock);
4910 used = btrfs_root_used(&root->root_item) + num_bytes;
4911 btrfs_set_root_used(&root->root_item, used);
4912 spin_unlock(&root->node_lock);
4913 }
4900 return ret; 4914 return ret;
4901} 4915}
4902 4916
@@ -4919,8 +4933,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4919 btrfs_set_buffer_uptodate(buf); 4933 btrfs_set_buffer_uptodate(buf);
4920 4934
4921 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4935 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4922 set_extent_dirty(&root->dirty_log_pages, buf->start, 4936 /*
4923 buf->start + buf->len - 1, GFP_NOFS); 4937 * we allow two log transactions at a time, use different
4938 * EXENT bit to differentiate dirty pages.
4939 */
4940 if (root->log_transid % 2 == 0)
4941 set_extent_dirty(&root->dirty_log_pages, buf->start,
4942 buf->start + buf->len - 1, GFP_NOFS);
4943 else
4944 set_extent_new(&root->dirty_log_pages, buf->start,
4945 buf->start + buf->len - 1, GFP_NOFS);
4924 } else { 4946 } else {
4925 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4947 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4926 buf->start + buf->len - 1, GFP_NOFS); 4948 buf->start + buf->len - 1, GFP_NOFS);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77f759302e12..feaa13b105d9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -179,18 +179,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
179 } 179 }
180 flags = em->flags; 180 flags = em->flags;
181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
182 if (em->start <= start && 182 if (testend && em->start + em->len >= start + len) {
183 (!testend || em->start + em->len >= start + len)) {
184 free_extent_map(em); 183 free_extent_map(em);
185 write_unlock(&em_tree->lock); 184 write_unlock(&em_tree->lock);
186 break; 185 break;
187 } 186 }
188 if (start < em->start) { 187 start = em->start + em->len;
189 len = em->start - start; 188 if (testend)
190 } else {
191 len = start + len - (em->start + em->len); 189 len = start + len - (em->start + em->len);
192 start = em->start + em->len;
193 }
194 free_extent_map(em); 190 free_extent_map(em);
195 write_unlock(&em_tree->lock); 191 write_unlock(&em_tree->lock);
196 continue; 192 continue;
@@ -265,319 +261,247 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
265 * If an extent intersects the range but is not entirely inside the range 261 * If an extent intersects the range but is not entirely inside the range
266 * it is either truncated or split. Anything entirely inside the range 262 * it is either truncated or split. Anything entirely inside the range
267 * is deleted from the tree. 263 * is deleted from the tree.
268 *
269 * inline_limit is used to tell this code which offsets in the file to keep
270 * if they contain inline extents.
271 */ 264 */
272noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 265int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
273 struct btrfs_root *root, struct inode *inode, 266 u64 start, u64 end, u64 *hint_byte, int drop_cache)
274 u64 start, u64 end, u64 locked_end,
275 u64 inline_limit, u64 *hint_byte, int drop_cache)
276{ 267{
277 u64 extent_end = 0; 268 struct btrfs_root *root = BTRFS_I(inode)->root;
278 u64 search_start = start;
279 u64 ram_bytes = 0;
280 u64 disk_bytenr = 0;
281 u64 orig_locked_end = locked_end;
282 u8 compression;
283 u8 encryption;
284 u16 other_encoding = 0;
285 struct extent_buffer *leaf; 269 struct extent_buffer *leaf;
286 struct btrfs_file_extent_item *extent; 270 struct btrfs_file_extent_item *fi;
287 struct btrfs_path *path; 271 struct btrfs_path *path;
288 struct btrfs_key key; 272 struct btrfs_key key;
289 struct btrfs_file_extent_item old; 273 struct btrfs_key new_key;
290 int keep; 274 u64 search_start = start;
291 int slot; 275 u64 disk_bytenr = 0;
292 int bookend; 276 u64 num_bytes = 0;
293 int found_type = 0; 277 u64 extent_offset = 0;
294 int found_extent; 278 u64 extent_end = 0;
295 int found_inline; 279 int del_nr = 0;
280 int del_slot = 0;
281 int extent_type;
296 int recow; 282 int recow;
297 int ret; 283 int ret;
298 284
299 inline_limit = 0;
300 if (drop_cache) 285 if (drop_cache)
301 btrfs_drop_extent_cache(inode, start, end - 1, 0); 286 btrfs_drop_extent_cache(inode, start, end - 1, 0);
302 287
303 path = btrfs_alloc_path(); 288 path = btrfs_alloc_path();
304 if (!path) 289 if (!path)
305 return -ENOMEM; 290 return -ENOMEM;
291
306 while (1) { 292 while (1) {
307 recow = 0; 293 recow = 0;
308 btrfs_release_path(root, path);
309 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 294 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
310 search_start, -1); 295 search_start, -1);
311 if (ret < 0) 296 if (ret < 0)
312 goto out; 297 break;
313 if (ret > 0) { 298 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
314 if (path->slots[0] == 0) { 299 leaf = path->nodes[0];
315 ret = 0; 300 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
316 goto out; 301 if (key.objectid == inode->i_ino &&
317 } 302 key.type == BTRFS_EXTENT_DATA_KEY)
318 path->slots[0]--; 303 path->slots[0]--;
319 } 304 }
305 ret = 0;
320next_slot: 306next_slot:
321 keep = 0;
322 bookend = 0;
323 found_extent = 0;
324 found_inline = 0;
325 compression = 0;
326 encryption = 0;
327 extent = NULL;
328 leaf = path->nodes[0]; 307 leaf = path->nodes[0];
329 slot = path->slots[0]; 308 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
330 ret = 0; 309 BUG_ON(del_nr > 0);
331 btrfs_item_key_to_cpu(leaf, &key, slot); 310 ret = btrfs_next_leaf(root, path);
332 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY && 311 if (ret < 0)
333 key.offset >= end) { 312 break;
334 goto out; 313 if (ret > 0) {
335 } 314 ret = 0;
336 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 315 break;
337 key.objectid != inode->i_ino) {
338 goto out;
339 }
340 if (recow) {
341 search_start = max(key.offset, start);
342 continue;
343 }
344 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
345 extent = btrfs_item_ptr(leaf, slot,
346 struct btrfs_file_extent_item);
347 found_type = btrfs_file_extent_type(leaf, extent);
348 compression = btrfs_file_extent_compression(leaf,
349 extent);
350 encryption = btrfs_file_extent_encryption(leaf,
351 extent);
352 other_encoding = btrfs_file_extent_other_encoding(leaf,
353 extent);
354 if (found_type == BTRFS_FILE_EXTENT_REG ||
355 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
356 extent_end =
357 btrfs_file_extent_disk_bytenr(leaf,
358 extent);
359 if (extent_end)
360 *hint_byte = extent_end;
361
362 extent_end = key.offset +
363 btrfs_file_extent_num_bytes(leaf, extent);
364 ram_bytes = btrfs_file_extent_ram_bytes(leaf,
365 extent);
366 found_extent = 1;
367 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
368 found_inline = 1;
369 extent_end = key.offset +
370 btrfs_file_extent_inline_len(leaf, extent);
371 } 316 }
317 leaf = path->nodes[0];
318 recow = 1;
319 }
320
321 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
322 if (key.objectid > inode->i_ino ||
323 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
324 break;
325
326 fi = btrfs_item_ptr(leaf, path->slots[0],
327 struct btrfs_file_extent_item);
328 extent_type = btrfs_file_extent_type(leaf, fi);
329
330 if (extent_type == BTRFS_FILE_EXTENT_REG ||
331 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
332 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
333 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
334 extent_offset = btrfs_file_extent_offset(leaf, fi);
335 extent_end = key.offset +
336 btrfs_file_extent_num_bytes(leaf, fi);
337 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
338 extent_end = key.offset +
339 btrfs_file_extent_inline_len(leaf, fi);
372 } else { 340 } else {
341 WARN_ON(1);
373 extent_end = search_start; 342 extent_end = search_start;
374 } 343 }
375 344
376 /* we found nothing we can drop */ 345 if (extent_end <= search_start) {
377 if ((!found_extent && !found_inline) || 346 path->slots[0]++;
378 search_start >= extent_end) {
379 int nextret;
380 u32 nritems;
381 nritems = btrfs_header_nritems(leaf);
382 if (slot >= nritems - 1) {
383 nextret = btrfs_next_leaf(root, path);
384 if (nextret)
385 goto out;
386 recow = 1;
387 } else {
388 path->slots[0]++;
389 }
390 goto next_slot; 347 goto next_slot;
391 } 348 }
392 349
393 if (end <= extent_end && start >= key.offset && found_inline) 350 search_start = max(key.offset, start);
394 *hint_byte = EXTENT_MAP_INLINE; 351 if (recow) {
395 352 btrfs_release_path(root, path);
396 if (found_extent) { 353 continue;
397 read_extent_buffer(leaf, &old, (unsigned long)extent,
398 sizeof(old));
399 }
400
401 if (end < extent_end && end >= key.offset) {
402 bookend = 1;
403 if (found_inline && start <= key.offset)
404 keep = 1;
405 } 354 }
406 355
407 if (bookend && found_extent) { 356 /*
408 if (locked_end < extent_end) { 357 * | - range to drop - |
409 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 358 * | -------- extent -------- |
410 locked_end, extent_end - 1, 359 */
411 GFP_NOFS); 360 if (start > key.offset && end < extent_end) {
412 if (!ret) { 361 BUG_ON(del_nr > 0);
413 btrfs_release_path(root, path); 362 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
414 lock_extent(&BTRFS_I(inode)->io_tree, 363
415 locked_end, extent_end - 1, 364 memcpy(&new_key, &key, sizeof(new_key));
416 GFP_NOFS); 365 new_key.offset = start;
417 locked_end = extent_end; 366 ret = btrfs_duplicate_item(trans, root, path,
418 continue; 367 &new_key);
419 } 368 if (ret == -EAGAIN) {
420 locked_end = extent_end; 369 btrfs_release_path(root, path);
370 continue;
421 } 371 }
422 disk_bytenr = le64_to_cpu(old.disk_bytenr); 372 if (ret < 0)
423 if (disk_bytenr != 0) { 373 break;
374
375 leaf = path->nodes[0];
376 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
377 struct btrfs_file_extent_item);
378 btrfs_set_file_extent_num_bytes(leaf, fi,
379 start - key.offset);
380
381 fi = btrfs_item_ptr(leaf, path->slots[0],
382 struct btrfs_file_extent_item);
383
384 extent_offset += start - key.offset;
385 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
386 btrfs_set_file_extent_num_bytes(leaf, fi,
387 extent_end - start);
388 btrfs_mark_buffer_dirty(leaf);
389
390 if (disk_bytenr > 0) {
424 ret = btrfs_inc_extent_ref(trans, root, 391 ret = btrfs_inc_extent_ref(trans, root,
425 disk_bytenr, 392 disk_bytenr, num_bytes, 0,
426 le64_to_cpu(old.disk_num_bytes), 0, 393 root->root_key.objectid,
427 root->root_key.objectid, 394 new_key.objectid,
428 key.objectid, key.offset - 395 start - extent_offset);
429 le64_to_cpu(old.offset));
430 BUG_ON(ret); 396 BUG_ON(ret);
397 *hint_byte = disk_bytenr;
431 } 398 }
399 key.offset = start;
432 } 400 }
401 /*
402 * | ---- range to drop ----- |
403 * | -------- extent -------- |
404 */
405 if (start <= key.offset && end < extent_end) {
406 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
433 407
434 if (found_inline) { 408 memcpy(&new_key, &key, sizeof(new_key));
435 u64 mask = root->sectorsize - 1; 409 new_key.offset = end;
436 search_start = (extent_end + mask) & ~mask; 410 btrfs_set_item_key_safe(trans, root, path, &new_key);
437 } else 411
438 search_start = extent_end; 412 extent_offset += end - key.offset;
439 413 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
440 /* truncate existing extent */ 414 btrfs_set_file_extent_num_bytes(leaf, fi,
441 if (start > key.offset) { 415 extent_end - end);
442 u64 new_num; 416 btrfs_mark_buffer_dirty(leaf);
443 u64 old_num; 417 if (disk_bytenr > 0) {
444 keep = 1; 418 inode_sub_bytes(inode, end - key.offset);
445 WARN_ON(start & (root->sectorsize - 1)); 419 *hint_byte = disk_bytenr;
446 if (found_extent) {
447 new_num = start - key.offset;
448 old_num = btrfs_file_extent_num_bytes(leaf,
449 extent);
450 *hint_byte =
451 btrfs_file_extent_disk_bytenr(leaf,
452 extent);
453 if (btrfs_file_extent_disk_bytenr(leaf,
454 extent)) {
455 inode_sub_bytes(inode, old_num -
456 new_num);
457 }
458 btrfs_set_file_extent_num_bytes(leaf,
459 extent, new_num);
460 btrfs_mark_buffer_dirty(leaf);
461 } else if (key.offset < inline_limit &&
462 (end > extent_end) &&
463 (inline_limit < extent_end)) {
464 u32 new_size;
465 new_size = btrfs_file_extent_calc_inline_size(
466 inline_limit - key.offset);
467 inode_sub_bytes(inode, extent_end -
468 inline_limit);
469 btrfs_set_file_extent_ram_bytes(leaf, extent,
470 new_size);
471 if (!compression && !encryption) {
472 btrfs_truncate_item(trans, root, path,
473 new_size, 1);
474 }
475 } 420 }
421 break;
476 } 422 }
477 /* delete the entire extent */
478 if (!keep) {
479 if (found_inline)
480 inode_sub_bytes(inode, extent_end -
481 key.offset);
482 ret = btrfs_del_item(trans, root, path);
483 /* TODO update progress marker and return */
484 BUG_ON(ret);
485 extent = NULL;
486 btrfs_release_path(root, path);
487 /* the extent will be freed later */
488 }
489 if (bookend && found_inline && start <= key.offset) {
490 u32 new_size;
491 new_size = btrfs_file_extent_calc_inline_size(
492 extent_end - end);
493 inode_sub_bytes(inode, end - key.offset);
494 btrfs_set_file_extent_ram_bytes(leaf, extent,
495 new_size);
496 if (!compression && !encryption)
497 ret = btrfs_truncate_item(trans, root, path,
498 new_size, 0);
499 BUG_ON(ret);
500 }
501 /* create bookend, splitting the extent in two */
502 if (bookend && found_extent) {
503 struct btrfs_key ins;
504 ins.objectid = inode->i_ino;
505 ins.offset = end;
506 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
507 423
508 btrfs_release_path(root, path); 424 search_start = extent_end;
509 path->leave_spinning = 1; 425 /*
510 ret = btrfs_insert_empty_item(trans, root, path, &ins, 426 * | ---- range to drop ----- |
511 sizeof(*extent)); 427 * | -------- extent -------- |
512 BUG_ON(ret); 428 */
429 if (start > key.offset && end >= extent_end) {
430 BUG_ON(del_nr > 0);
431 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
513 432
514 leaf = path->nodes[0]; 433 btrfs_set_file_extent_num_bytes(leaf, fi,
515 extent = btrfs_item_ptr(leaf, path->slots[0], 434 start - key.offset);
516 struct btrfs_file_extent_item); 435 btrfs_mark_buffer_dirty(leaf);
517 write_extent_buffer(leaf, &old, 436 if (disk_bytenr > 0) {
518 (unsigned long)extent, sizeof(old)); 437 inode_sub_bytes(inode, extent_end - start);
519 438 *hint_byte = disk_bytenr;
520 btrfs_set_file_extent_compression(leaf, extent, 439 }
521 compression); 440 if (end == extent_end)
522 btrfs_set_file_extent_encryption(leaf, extent, 441 break;
523 encryption);
524 btrfs_set_file_extent_other_encoding(leaf, extent,
525 other_encoding);
526 btrfs_set_file_extent_offset(leaf, extent,
527 le64_to_cpu(old.offset) + end - key.offset);
528 WARN_ON(le64_to_cpu(old.num_bytes) <
529 (extent_end - end));
530 btrfs_set_file_extent_num_bytes(leaf, extent,
531 extent_end - end);
532 442
533 /* 443 path->slots[0]++;
534 * set the ram bytes to the size of the full extent 444 goto next_slot;
535 * before splitting. This is a worst case flag,
536 * but its the best we can do because we don't know
537 * how splitting affects compression
538 */
539 btrfs_set_file_extent_ram_bytes(leaf, extent,
540 ram_bytes);
541 btrfs_set_file_extent_type(leaf, extent, found_type);
542
543 btrfs_unlock_up_safe(path, 1);
544 btrfs_mark_buffer_dirty(path->nodes[0]);
545 btrfs_set_lock_blocking(path->nodes[0]);
546
547 path->leave_spinning = 0;
548 btrfs_release_path(root, path);
549 if (disk_bytenr != 0)
550 inode_add_bytes(inode, extent_end - end);
551 } 445 }
552 446
553 if (found_extent && !keep) { 447 /*
554 u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr); 448 * | ---- range to drop ----- |
449 * | ------ extent ------ |
450 */
451 if (start <= key.offset && end >= extent_end) {
452 if (del_nr == 0) {
453 del_slot = path->slots[0];
454 del_nr = 1;
455 } else {
456 BUG_ON(del_slot + del_nr != path->slots[0]);
457 del_nr++;
458 }
555 459
556 if (old_disk_bytenr != 0) { 460 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
557 inode_sub_bytes(inode, 461 inode_sub_bytes(inode,
558 le64_to_cpu(old.num_bytes)); 462 extent_end - key.offset);
463 extent_end = ALIGN(extent_end,
464 root->sectorsize);
465 } else if (disk_bytenr > 0) {
559 ret = btrfs_free_extent(trans, root, 466 ret = btrfs_free_extent(trans, root,
560 old_disk_bytenr, 467 disk_bytenr, num_bytes, 0,
561 le64_to_cpu(old.disk_num_bytes), 468 root->root_key.objectid,
562 0, root->root_key.objectid,
563 key.objectid, key.offset - 469 key.objectid, key.offset -
564 le64_to_cpu(old.offset)); 470 extent_offset);
565 BUG_ON(ret); 471 BUG_ON(ret);
566 *hint_byte = old_disk_bytenr; 472 inode_sub_bytes(inode,
473 extent_end - key.offset);
474 *hint_byte = disk_bytenr;
567 } 475 }
568 }
569 476
570 if (search_start >= end) { 477 if (end == extent_end)
571 ret = 0; 478 break;
572 goto out; 479
480 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
481 path->slots[0]++;
482 goto next_slot;
483 }
484
485 ret = btrfs_del_items(trans, root, path, del_slot,
486 del_nr);
487 BUG_ON(ret);
488
489 del_nr = 0;
490 del_slot = 0;
491
492 btrfs_release_path(root, path);
493 continue;
573 } 494 }
495
496 BUG_ON(1);
574 } 497 }
575out: 498
576 btrfs_free_path(path); 499 if (del_nr > 0) {
577 if (locked_end > orig_locked_end) { 500 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
578 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, 501 BUG_ON(ret);
579 locked_end - 1, GFP_NOFS);
580 } 502 }
503
504 btrfs_free_path(path);
581 return ret; 505 return ret;
582} 506}
583 507
@@ -620,23 +544,23 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
620 * two or three. 544 * two or three.
621 */ 545 */
622int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 546int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
623 struct btrfs_root *root,
624 struct inode *inode, u64 start, u64 end) 547 struct inode *inode, u64 start, u64 end)
625{ 548{
549 struct btrfs_root *root = BTRFS_I(inode)->root;
626 struct extent_buffer *leaf; 550 struct extent_buffer *leaf;
627 struct btrfs_path *path; 551 struct btrfs_path *path;
628 struct btrfs_file_extent_item *fi; 552 struct btrfs_file_extent_item *fi;
629 struct btrfs_key key; 553 struct btrfs_key key;
554 struct btrfs_key new_key;
630 u64 bytenr; 555 u64 bytenr;
631 u64 num_bytes; 556 u64 num_bytes;
632 u64 extent_end; 557 u64 extent_end;
633 u64 orig_offset; 558 u64 orig_offset;
634 u64 other_start; 559 u64 other_start;
635 u64 other_end; 560 u64 other_end;
636 u64 split = start; 561 u64 split;
637 u64 locked_end = end; 562 int del_nr = 0;
638 int extent_type; 563 int del_slot = 0;
639 int split_end = 1;
640 int ret; 564 int ret;
641 565
642 btrfs_drop_extent_cache(inode, start, end - 1, 0); 566 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -644,12 +568,10 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
644 path = btrfs_alloc_path(); 568 path = btrfs_alloc_path();
645 BUG_ON(!path); 569 BUG_ON(!path);
646again: 570again:
571 split = start;
647 key.objectid = inode->i_ino; 572 key.objectid = inode->i_ino;
648 key.type = BTRFS_EXTENT_DATA_KEY; 573 key.type = BTRFS_EXTENT_DATA_KEY;
649 if (split == start) 574 key.offset = split;
650 key.offset = split;
651 else
652 key.offset = split - 1;
653 575
654 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 576 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
655 if (ret > 0 && path->slots[0] > 0) 577 if (ret > 0 && path->slots[0] > 0)
@@ -661,8 +583,8 @@ again:
661 key.type != BTRFS_EXTENT_DATA_KEY); 583 key.type != BTRFS_EXTENT_DATA_KEY);
662 fi = btrfs_item_ptr(leaf, path->slots[0], 584 fi = btrfs_item_ptr(leaf, path->slots[0],
663 struct btrfs_file_extent_item); 585 struct btrfs_file_extent_item);
664 extent_type = btrfs_file_extent_type(leaf, fi); 586 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
665 BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC); 587 BTRFS_FILE_EXTENT_PREALLOC);
666 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 588 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
667 BUG_ON(key.offset > start || extent_end < end); 589 BUG_ON(key.offset > start || extent_end < end);
668 590
@@ -670,150 +592,91 @@ again:
670 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 592 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
671 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 593 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
672 594
673 if (key.offset == start) 595 while (start > key.offset || end < extent_end) {
674 split = end; 596 if (key.offset == start)
675 597 split = end;
676 if (key.offset == start && extent_end == end) { 598
677 int del_nr = 0; 599 memcpy(&new_key, &key, sizeof(new_key));
678 int del_slot = 0; 600 new_key.offset = split;
679 other_start = end; 601 ret = btrfs_duplicate_item(trans, root, path, &new_key);
680 other_end = 0; 602 if (ret == -EAGAIN) {
681 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 603 btrfs_release_path(root, path);
682 bytenr, &other_start, &other_end)) { 604 goto again;
683 extent_end = other_end;
684 del_slot = path->slots[0] + 1;
685 del_nr++;
686 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
687 0, root->root_key.objectid,
688 inode->i_ino, orig_offset);
689 BUG_ON(ret);
690 }
691 other_start = 0;
692 other_end = start;
693 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
694 bytenr, &other_start, &other_end)) {
695 key.offset = other_start;
696 del_slot = path->slots[0];
697 del_nr++;
698 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
699 0, root->root_key.objectid,
700 inode->i_ino, orig_offset);
701 BUG_ON(ret);
702 }
703 split_end = 0;
704 if (del_nr == 0) {
705 btrfs_set_file_extent_type(leaf, fi,
706 BTRFS_FILE_EXTENT_REG);
707 goto done;
708 } 605 }
606 BUG_ON(ret < 0);
709 607
710 fi = btrfs_item_ptr(leaf, del_slot - 1, 608 leaf = path->nodes[0];
609 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
711 struct btrfs_file_extent_item); 610 struct btrfs_file_extent_item);
712 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
713 btrfs_set_file_extent_num_bytes(leaf, fi, 611 btrfs_set_file_extent_num_bytes(leaf, fi,
714 extent_end - key.offset); 612 split - key.offset);
613
614 fi = btrfs_item_ptr(leaf, path->slots[0],
615 struct btrfs_file_extent_item);
616
617 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
618 btrfs_set_file_extent_num_bytes(leaf, fi,
619 extent_end - split);
715 btrfs_mark_buffer_dirty(leaf); 620 btrfs_mark_buffer_dirty(leaf);
716 621
717 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 622 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
623 root->root_key.objectid,
624 inode->i_ino, orig_offset);
718 BUG_ON(ret); 625 BUG_ON(ret);
719 goto release;
720 } else if (split == start) {
721 if (locked_end < extent_end) {
722 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
723 locked_end, extent_end - 1, GFP_NOFS);
724 if (!ret) {
725 btrfs_release_path(root, path);
726 lock_extent(&BTRFS_I(inode)->io_tree,
727 locked_end, extent_end - 1, GFP_NOFS);
728 locked_end = extent_end;
729 goto again;
730 }
731 locked_end = extent_end;
732 }
733 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
734 } else {
735 BUG_ON(key.offset != start);
736 key.offset = split;
737 btrfs_set_file_extent_offset(leaf, fi, key.offset -
738 orig_offset);
739 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
740 btrfs_set_item_key_safe(trans, root, path, &key);
741 extent_end = split;
742 }
743 626
744 if (extent_end == end) { 627 if (split == start) {
745 split_end = 0; 628 key.offset = start;
746 extent_type = BTRFS_FILE_EXTENT_REG; 629 } else {
747 } 630 BUG_ON(start != key.offset);
748 if (extent_end == end && split == start) {
749 other_start = end;
750 other_end = 0;
751 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
752 bytenr, &other_start, &other_end)) {
753 path->slots[0]++;
754 fi = btrfs_item_ptr(leaf, path->slots[0],
755 struct btrfs_file_extent_item);
756 key.offset = split;
757 btrfs_set_item_key_safe(trans, root, path, &key);
758 btrfs_set_file_extent_offset(leaf, fi, key.offset -
759 orig_offset);
760 btrfs_set_file_extent_num_bytes(leaf, fi,
761 other_end - split);
762 goto done;
763 }
764 }
765 if (extent_end == end && split == end) {
766 other_start = 0;
767 other_end = start;
768 if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
769 bytenr, &other_start, &other_end)) {
770 path->slots[0]--; 631 path->slots[0]--;
771 fi = btrfs_item_ptr(leaf, path->slots[0], 632 extent_end = end;
772 struct btrfs_file_extent_item);
773 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
774 other_start);
775 goto done;
776 } 633 }
777 } 634 }
778 635
779 btrfs_mark_buffer_dirty(leaf);
780
781 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
782 root->root_key.objectid,
783 inode->i_ino, orig_offset);
784 BUG_ON(ret);
785 btrfs_release_path(root, path);
786
787 key.offset = start;
788 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
789 BUG_ON(ret);
790
791 leaf = path->nodes[0];
792 fi = btrfs_item_ptr(leaf, path->slots[0], 636 fi = btrfs_item_ptr(leaf, path->slots[0],
793 struct btrfs_file_extent_item); 637 struct btrfs_file_extent_item);
794 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
795 btrfs_set_file_extent_type(leaf, fi, extent_type);
796 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
797 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
798 btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
799 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
800 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
801 btrfs_set_file_extent_compression(leaf, fi, 0);
802 btrfs_set_file_extent_encryption(leaf, fi, 0);
803 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
804done:
805 btrfs_mark_buffer_dirty(leaf);
806 638
807release: 639 other_start = end;
808 btrfs_release_path(root, path); 640 other_end = 0;
809 if (split_end && split == start) { 641 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
810 split = end; 642 bytenr, &other_start, &other_end)) {
811 goto again; 643 extent_end = other_end;
644 del_slot = path->slots[0] + 1;
645 del_nr++;
646 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
647 0, root->root_key.objectid,
648 inode->i_ino, orig_offset);
649 BUG_ON(ret);
812 } 650 }
813 if (locked_end > end) { 651 other_start = 0;
814 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 652 other_end = start;
815 GFP_NOFS); 653 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
654 bytenr, &other_start, &other_end)) {
655 key.offset = other_start;
656 del_slot = path->slots[0];
657 del_nr++;
658 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
659 0, root->root_key.objectid,
660 inode->i_ino, orig_offset);
661 BUG_ON(ret);
816 } 662 }
663 if (del_nr == 0) {
664 btrfs_set_file_extent_type(leaf, fi,
665 BTRFS_FILE_EXTENT_REG);
666 btrfs_mark_buffer_dirty(leaf);
667 goto out;
668 }
669
670 fi = btrfs_item_ptr(leaf, del_slot - 1,
671 struct btrfs_file_extent_item);
672 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
673 btrfs_set_file_extent_num_bytes(leaf, fi,
674 extent_end - key.offset);
675 btrfs_mark_buffer_dirty(leaf);
676
677 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
678 BUG_ON(ret);
679out:
817 btrfs_free_path(path); 680 btrfs_free_path(path);
818 return 0; 681 return 0;
819} 682}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3ad168a0bfc..5440bab23635 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode,
88 u64 start, u64 end, int *page_started, 88 u64 start, u64 end, int *page_started,
89 unsigned long *nr_written, int unlock); 89 unsigned long *nr_written, int unlock);
90 90
91static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) 91static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
92 struct inode *inode, struct inode *dir)
92{ 93{
93 int err; 94 int err;
94 95
95 err = btrfs_init_acl(inode, dir); 96 err = btrfs_init_acl(trans, inode, dir);
96 if (!err) 97 if (!err)
97 err = btrfs_xattr_security_init(inode, dir); 98 err = btrfs_xattr_security_init(trans, inode, dir);
98 return err; 99 return err;
99} 100}
100 101
@@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
188 btrfs_mark_buffer_dirty(leaf); 189 btrfs_mark_buffer_dirty(leaf);
189 btrfs_free_path(path); 190 btrfs_free_path(path);
190 191
192 /*
193 * we're an inline extent, so nobody can
194 * extend the file past i_size without locking
195 * a page we already have locked.
196 *
197 * We must do any isize and inode updates
198 * before we unlock the pages. Otherwise we
199 * could end up racing with unlink.
200 */
191 BTRFS_I(inode)->disk_i_size = inode->i_size; 201 BTRFS_I(inode)->disk_i_size = inode->i_size;
192 btrfs_update_inode(trans, root, inode); 202 btrfs_update_inode(trans, root, inode);
203
193 return 0; 204 return 0;
194fail: 205fail:
195 btrfs_free_path(path); 206 btrfs_free_path(path);
@@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
230 return 1; 241 return 1;
231 } 242 }
232 243
233 ret = btrfs_drop_extents(trans, root, inode, start, 244 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
234 aligned_end, aligned_end, start,
235 &hint_byte, 1); 245 &hint_byte, 1);
236 BUG_ON(ret); 246 BUG_ON(ret);
237 247
@@ -416,7 +426,6 @@ again:
416 start, end, 426 start, end,
417 total_compressed, pages); 427 total_compressed, pages);
418 } 428 }
419 btrfs_end_transaction(trans, root);
420 if (ret == 0) { 429 if (ret == 0) {
421 /* 430 /*
422 * inline extent creation worked, we don't need 431 * inline extent creation worked, we don't need
@@ -430,9 +439,11 @@ again:
430 EXTENT_CLEAR_DELALLOC | 439 EXTENT_CLEAR_DELALLOC |
431 EXTENT_CLEAR_ACCOUNTING | 440 EXTENT_CLEAR_ACCOUNTING |
432 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 441 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
433 ret = 0; 442
443 btrfs_end_transaction(trans, root);
434 goto free_pages_out; 444 goto free_pages_out;
435 } 445 }
446 btrfs_end_transaction(trans, root);
436 } 447 }
437 448
438 if (will_compress) { 449 if (will_compress) {
@@ -543,7 +554,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
543 if (list_empty(&async_cow->extents)) 554 if (list_empty(&async_cow->extents))
544 return 0; 555 return 0;
545 556
546 trans = btrfs_join_transaction(root, 1);
547 557
548 while (!list_empty(&async_cow->extents)) { 558 while (!list_empty(&async_cow->extents)) {
549 async_extent = list_entry(async_cow->extents.next, 559 async_extent = list_entry(async_cow->extents.next,
@@ -590,19 +600,15 @@ retry:
590 lock_extent(io_tree, async_extent->start, 600 lock_extent(io_tree, async_extent->start,
591 async_extent->start + async_extent->ram_size - 1, 601 async_extent->start + async_extent->ram_size - 1,
592 GFP_NOFS); 602 GFP_NOFS);
593 /*
594 * here we're doing allocation and writeback of the
595 * compressed pages
596 */
597 btrfs_drop_extent_cache(inode, async_extent->start,
598 async_extent->start +
599 async_extent->ram_size - 1, 0);
600 603
604 trans = btrfs_join_transaction(root, 1);
601 ret = btrfs_reserve_extent(trans, root, 605 ret = btrfs_reserve_extent(trans, root,
602 async_extent->compressed_size, 606 async_extent->compressed_size,
603 async_extent->compressed_size, 607 async_extent->compressed_size,
604 0, alloc_hint, 608 0, alloc_hint,
605 (u64)-1, &ins, 1); 609 (u64)-1, &ins, 1);
610 btrfs_end_transaction(trans, root);
611
606 if (ret) { 612 if (ret) {
607 int i; 613 int i;
608 for (i = 0; i < async_extent->nr_pages; i++) { 614 for (i = 0; i < async_extent->nr_pages; i++) {
@@ -618,6 +624,14 @@ retry:
618 goto retry; 624 goto retry;
619 } 625 }
620 626
627 /*
628 * here we're doing allocation and writeback of the
629 * compressed pages
630 */
631 btrfs_drop_extent_cache(inode, async_extent->start,
632 async_extent->start +
633 async_extent->ram_size - 1, 0);
634
621 em = alloc_extent_map(GFP_NOFS); 635 em = alloc_extent_map(GFP_NOFS);
622 em->start = async_extent->start; 636 em->start = async_extent->start;
623 em->len = async_extent->ram_size; 637 em->len = async_extent->ram_size;
@@ -649,8 +663,6 @@ retry:
649 BTRFS_ORDERED_COMPRESSED); 663 BTRFS_ORDERED_COMPRESSED);
650 BUG_ON(ret); 664 BUG_ON(ret);
651 665
652 btrfs_end_transaction(trans, root);
653
654 /* 666 /*
655 * clear dirty, set writeback and unlock the pages. 667 * clear dirty, set writeback and unlock the pages.
656 */ 668 */
@@ -672,13 +684,11 @@ retry:
672 async_extent->nr_pages); 684 async_extent->nr_pages);
673 685
674 BUG_ON(ret); 686 BUG_ON(ret);
675 trans = btrfs_join_transaction(root, 1);
676 alloc_hint = ins.objectid + ins.offset; 687 alloc_hint = ins.objectid + ins.offset;
677 kfree(async_extent); 688 kfree(async_extent);
678 cond_resched(); 689 cond_resched();
679 } 690 }
680 691
681 btrfs_end_transaction(trans, root);
682 return 0; 692 return 0;
683} 693}
684 694
@@ -742,6 +752,7 @@ static noinline int cow_file_range(struct inode *inode,
742 EXTENT_CLEAR_DIRTY | 752 EXTENT_CLEAR_DIRTY |
743 EXTENT_SET_WRITEBACK | 753 EXTENT_SET_WRITEBACK |
744 EXTENT_END_WRITEBACK); 754 EXTENT_END_WRITEBACK);
755
745 *nr_written = *nr_written + 756 *nr_written = *nr_written +
746 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 757 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
747 *page_started = 1; 758 *page_started = 1;
@@ -1596,7 +1607,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1596 struct inode *inode, u64 file_pos, 1607 struct inode *inode, u64 file_pos,
1597 u64 disk_bytenr, u64 disk_num_bytes, 1608 u64 disk_bytenr, u64 disk_num_bytes,
1598 u64 num_bytes, u64 ram_bytes, 1609 u64 num_bytes, u64 ram_bytes,
1599 u64 locked_end,
1600 u8 compression, u8 encryption, 1610 u8 compression, u8 encryption,
1601 u16 other_encoding, int extent_type) 1611 u16 other_encoding, int extent_type)
1602{ 1612{
@@ -1622,9 +1632,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1622 * the caller is expected to unpin it and allow it to be merged 1632 * the caller is expected to unpin it and allow it to be merged
1623 * with the others. 1633 * with the others.
1624 */ 1634 */
1625 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1635 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1626 file_pos + num_bytes, locked_end, 1636 &hint, 0);
1627 file_pos, &hint, 0);
1628 BUG_ON(ret); 1637 BUG_ON(ret);
1629 1638
1630 ins.objectid = inode->i_ino; 1639 ins.objectid = inode->i_ino;
@@ -1730,23 +1739,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1730 } 1739 }
1731 } 1740 }
1732 1741
1733 trans = btrfs_join_transaction(root, 1);
1734
1735 if (!ordered_extent) 1742 if (!ordered_extent)
1736 ordered_extent = btrfs_lookup_ordered_extent(inode, start); 1743 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1737 BUG_ON(!ordered_extent); 1744 BUG_ON(!ordered_extent);
1738 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) 1745 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1739 goto nocow; 1746 BUG_ON(!list_empty(&ordered_extent->list));
1747 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1748 if (!ret) {
1749 trans = btrfs_join_transaction(root, 1);
1750 ret = btrfs_update_inode(trans, root, inode);
1751 BUG_ON(ret);
1752 btrfs_end_transaction(trans, root);
1753 }
1754 goto out;
1755 }
1740 1756
1741 lock_extent(io_tree, ordered_extent->file_offset, 1757 lock_extent(io_tree, ordered_extent->file_offset,
1742 ordered_extent->file_offset + ordered_extent->len - 1, 1758 ordered_extent->file_offset + ordered_extent->len - 1,
1743 GFP_NOFS); 1759 GFP_NOFS);
1744 1760
1761 trans = btrfs_join_transaction(root, 1);
1762
1745 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1763 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1746 compressed = 1; 1764 compressed = 1;
1747 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1765 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1748 BUG_ON(compressed); 1766 BUG_ON(compressed);
1749 ret = btrfs_mark_extent_written(trans, root, inode, 1767 ret = btrfs_mark_extent_written(trans, inode,
1750 ordered_extent->file_offset, 1768 ordered_extent->file_offset,
1751 ordered_extent->file_offset + 1769 ordered_extent->file_offset +
1752 ordered_extent->len); 1770 ordered_extent->len);
@@ -1758,8 +1776,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1758 ordered_extent->disk_len, 1776 ordered_extent->disk_len,
1759 ordered_extent->len, 1777 ordered_extent->len,
1760 ordered_extent->len, 1778 ordered_extent->len,
1761 ordered_extent->file_offset +
1762 ordered_extent->len,
1763 compressed, 0, 0, 1779 compressed, 0, 0,
1764 BTRFS_FILE_EXTENT_REG); 1780 BTRFS_FILE_EXTENT_REG);
1765 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1781 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1770,22 +1786,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1770 unlock_extent(io_tree, ordered_extent->file_offset, 1786 unlock_extent(io_tree, ordered_extent->file_offset,
1771 ordered_extent->file_offset + ordered_extent->len - 1, 1787 ordered_extent->file_offset + ordered_extent->len - 1,
1772 GFP_NOFS); 1788 GFP_NOFS);
1773nocow:
1774 add_pending_csums(trans, inode, ordered_extent->file_offset, 1789 add_pending_csums(trans, inode, ordered_extent->file_offset,
1775 &ordered_extent->list); 1790 &ordered_extent->list);
1776 1791
1777 mutex_lock(&BTRFS_I(inode)->extent_mutex); 1792 /* this also removes the ordered extent from the tree */
1778 btrfs_ordered_update_i_size(inode, ordered_extent); 1793 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1779 btrfs_update_inode(trans, root, inode); 1794 ret = btrfs_update_inode(trans, root, inode);
1780 btrfs_remove_ordered_extent(inode, ordered_extent); 1795 BUG_ON(ret);
1781 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 1796 btrfs_end_transaction(trans, root);
1782 1797out:
1783 /* once for us */ 1798 /* once for us */
1784 btrfs_put_ordered_extent(ordered_extent); 1799 btrfs_put_ordered_extent(ordered_extent);
1785 /* once for the tree */ 1800 /* once for the tree */
1786 btrfs_put_ordered_extent(ordered_extent); 1801 btrfs_put_ordered_extent(ordered_extent);
1787 1802
1788 btrfs_end_transaction(trans, root);
1789 return 0; 1803 return 0;
1790} 1804}
1791 1805
@@ -2008,6 +2022,54 @@ zeroit:
2008 return -EIO; 2022 return -EIO;
2009} 2023}
2010 2024
2025struct delayed_iput {
2026 struct list_head list;
2027 struct inode *inode;
2028};
2029
2030void btrfs_add_delayed_iput(struct inode *inode)
2031{
2032 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2033 struct delayed_iput *delayed;
2034
2035 if (atomic_add_unless(&inode->i_count, -1, 1))
2036 return;
2037
2038 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
2039 delayed->inode = inode;
2040
2041 spin_lock(&fs_info->delayed_iput_lock);
2042 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
2043 spin_unlock(&fs_info->delayed_iput_lock);
2044}
2045
2046void btrfs_run_delayed_iputs(struct btrfs_root *root)
2047{
2048 LIST_HEAD(list);
2049 struct btrfs_fs_info *fs_info = root->fs_info;
2050 struct delayed_iput *delayed;
2051 int empty;
2052
2053 spin_lock(&fs_info->delayed_iput_lock);
2054 empty = list_empty(&fs_info->delayed_iputs);
2055 spin_unlock(&fs_info->delayed_iput_lock);
2056 if (empty)
2057 return;
2058
2059 down_read(&root->fs_info->cleanup_work_sem);
2060 spin_lock(&fs_info->delayed_iput_lock);
2061 list_splice_init(&fs_info->delayed_iputs, &list);
2062 spin_unlock(&fs_info->delayed_iput_lock);
2063
2064 while (!list_empty(&list)) {
2065 delayed = list_entry(list.next, struct delayed_iput, list);
2066 list_del(&delayed->list);
2067 iput(delayed->inode);
2068 kfree(delayed);
2069 }
2070 up_read(&root->fs_info->cleanup_work_sem);
2071}
2072
2011/* 2073/*
2012 * This creates an orphan entry for the given inode in case something goes 2074 * This creates an orphan entry for the given inode in case something goes
2013 * wrong in the middle of an unlink/truncate. 2075 * wrong in the middle of an unlink/truncate.
@@ -2080,16 +2142,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2080 struct inode *inode; 2142 struct inode *inode;
2081 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2143 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2082 2144
2083 path = btrfs_alloc_path(); 2145 if (!xchg(&root->clean_orphans, 0))
2084 if (!path)
2085 return; 2146 return;
2147
2148 path = btrfs_alloc_path();
2149 BUG_ON(!path);
2086 path->reada = -1; 2150 path->reada = -1;
2087 2151
2088 key.objectid = BTRFS_ORPHAN_OBJECTID; 2152 key.objectid = BTRFS_ORPHAN_OBJECTID;
2089 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 2153 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
2090 key.offset = (u64)-1; 2154 key.offset = (u64)-1;
2091 2155
2092
2093 while (1) { 2156 while (1) {
2094 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2157 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2095 if (ret < 0) { 2158 if (ret < 0) {
@@ -2834,37 +2897,40 @@ out:
2834 * min_type is the minimum key type to truncate down to. If set to 0, this 2897 * min_type is the minimum key type to truncate down to. If set to 0, this
2835 * will kill all the items on this inode, including the INODE_ITEM_KEY. 2898 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2836 */ 2899 */
2837noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2900int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root, 2901 struct btrfs_root *root,
2839 struct inode *inode, 2902 struct inode *inode,
2840 u64 new_size, u32 min_type) 2903 u64 new_size, u32 min_type)
2841{ 2904{
2842 int ret;
2843 struct btrfs_path *path; 2905 struct btrfs_path *path;
2844 struct btrfs_key key;
2845 struct btrfs_key found_key;
2846 u32 found_type = (u8)-1;
2847 struct extent_buffer *leaf; 2906 struct extent_buffer *leaf;
2848 struct btrfs_file_extent_item *fi; 2907 struct btrfs_file_extent_item *fi;
2908 struct btrfs_key key;
2909 struct btrfs_key found_key;
2849 u64 extent_start = 0; 2910 u64 extent_start = 0;
2850 u64 extent_num_bytes = 0; 2911 u64 extent_num_bytes = 0;
2851 u64 extent_offset = 0; 2912 u64 extent_offset = 0;
2852 u64 item_end = 0; 2913 u64 item_end = 0;
2914 u64 mask = root->sectorsize - 1;
2915 u32 found_type = (u8)-1;
2853 int found_extent; 2916 int found_extent;
2854 int del_item; 2917 int del_item;
2855 int pending_del_nr = 0; 2918 int pending_del_nr = 0;
2856 int pending_del_slot = 0; 2919 int pending_del_slot = 0;
2857 int extent_type = -1; 2920 int extent_type = -1;
2858 int encoding; 2921 int encoding;
2859 u64 mask = root->sectorsize - 1; 2922 int ret;
2923 int err = 0;
2924
2925 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2860 2926
2861 if (root->ref_cows) 2927 if (root->ref_cows)
2862 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2928 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2929
2863 path = btrfs_alloc_path(); 2930 path = btrfs_alloc_path();
2864 BUG_ON(!path); 2931 BUG_ON(!path);
2865 path->reada = -1; 2932 path->reada = -1;
2866 2933
2867 /* FIXME, add redo link to tree so we don't leak on crash */
2868 key.objectid = inode->i_ino; 2934 key.objectid = inode->i_ino;
2869 key.offset = (u64)-1; 2935 key.offset = (u64)-1;
2870 key.type = (u8)-1; 2936 key.type = (u8)-1;
@@ -2872,17 +2938,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2872search_again: 2938search_again:
2873 path->leave_spinning = 1; 2939 path->leave_spinning = 1;
2874 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2940 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2875 if (ret < 0) 2941 if (ret < 0) {
2876 goto error; 2942 err = ret;
2943 goto out;
2944 }
2877 2945
2878 if (ret > 0) { 2946 if (ret > 0) {
2879 /* there are no items in the tree for us to truncate, we're 2947 /* there are no items in the tree for us to truncate, we're
2880 * done 2948 * done
2881 */ 2949 */
2882 if (path->slots[0] == 0) { 2950 if (path->slots[0] == 0)
2883 ret = 0; 2951 goto out;
2884 goto error;
2885 }
2886 path->slots[0]--; 2952 path->slots[0]--;
2887 } 2953 }
2888 2954
@@ -2917,28 +2983,17 @@ search_again:
2917 } 2983 }
2918 item_end--; 2984 item_end--;
2919 } 2985 }
2920 if (item_end < new_size) { 2986 if (found_type > min_type) {
2921 if (found_type == BTRFS_DIR_ITEM_KEY) 2987 del_item = 1;
2922 found_type = BTRFS_INODE_ITEM_KEY; 2988 } else {
2923 else if (found_type == BTRFS_EXTENT_ITEM_KEY) 2989 if (item_end < new_size)
2924 found_type = BTRFS_EXTENT_DATA_KEY;
2925 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2926 found_type = BTRFS_XATTR_ITEM_KEY;
2927 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2928 found_type = BTRFS_INODE_REF_KEY;
2929 else if (found_type)
2930 found_type--;
2931 else
2932 break; 2990 break;
2933 btrfs_set_key_type(&key, found_type); 2991 if (found_key.offset >= new_size)
2934 goto next; 2992 del_item = 1;
2993 else
2994 del_item = 0;
2935 } 2995 }
2936 if (found_key.offset >= new_size)
2937 del_item = 1;
2938 else
2939 del_item = 0;
2940 found_extent = 0; 2996 found_extent = 0;
2941
2942 /* FIXME, shrink the extent if the ref count is only 1 */ 2997 /* FIXME, shrink the extent if the ref count is only 1 */
2943 if (found_type != BTRFS_EXTENT_DATA_KEY) 2998 if (found_type != BTRFS_EXTENT_DATA_KEY)
2944 goto delete; 2999 goto delete;
@@ -3025,42 +3080,36 @@ delete:
3025 inode->i_ino, extent_offset); 3080 inode->i_ino, extent_offset);
3026 BUG_ON(ret); 3081 BUG_ON(ret);
3027 } 3082 }
3028next:
3029 if (path->slots[0] == 0) {
3030 if (pending_del_nr)
3031 goto del_pending;
3032 btrfs_release_path(root, path);
3033 if (found_type == BTRFS_INODE_ITEM_KEY)
3034 break;
3035 goto search_again;
3036 }
3037 3083
3038 path->slots[0]--; 3084 if (found_type == BTRFS_INODE_ITEM_KEY)
3039 if (pending_del_nr && 3085 break;
3040 path->slots[0] + 1 != pending_del_slot) { 3086
3041 struct btrfs_key debug; 3087 if (path->slots[0] == 0 ||
3042del_pending: 3088 path->slots[0] != pending_del_slot) {
3043 btrfs_item_key_to_cpu(path->nodes[0], &debug, 3089 if (root->ref_cows) {
3044 pending_del_slot); 3090 err = -EAGAIN;
3045 ret = btrfs_del_items(trans, root, path, 3091 goto out;
3046 pending_del_slot, 3092 }
3047 pending_del_nr); 3093 if (pending_del_nr) {
3048 BUG_ON(ret); 3094 ret = btrfs_del_items(trans, root, path,
3049 pending_del_nr = 0; 3095 pending_del_slot,
3096 pending_del_nr);
3097 BUG_ON(ret);
3098 pending_del_nr = 0;
3099 }
3050 btrfs_release_path(root, path); 3100 btrfs_release_path(root, path);
3051 if (found_type == BTRFS_INODE_ITEM_KEY)
3052 break;
3053 goto search_again; 3101 goto search_again;
3102 } else {
3103 path->slots[0]--;
3054 } 3104 }
3055 } 3105 }
3056 ret = 0; 3106out:
3057error:
3058 if (pending_del_nr) { 3107 if (pending_del_nr) {
3059 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3108 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3060 pending_del_nr); 3109 pending_del_nr);
3061 } 3110 }
3062 btrfs_free_path(path); 3111 btrfs_free_path(path);
3063 return ret; 3112 return err;
3064} 3113}
3065 3114
3066/* 3115/*
@@ -3180,10 +3229,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3180 if (size <= hole_start) 3229 if (size <= hole_start)
3181 return 0; 3230 return 0;
3182 3231
3183 err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
3184 if (err)
3185 return err;
3186
3187 while (1) { 3232 while (1) {
3188 struct btrfs_ordered_extent *ordered; 3233 struct btrfs_ordered_extent *ordered;
3189 btrfs_wait_ordered_range(inode, hole_start, 3234 btrfs_wait_ordered_range(inode, hole_start,
@@ -3196,9 +3241,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3196 btrfs_put_ordered_extent(ordered); 3241 btrfs_put_ordered_extent(ordered);
3197 } 3242 }
3198 3243
3199 trans = btrfs_start_transaction(root, 1);
3200 btrfs_set_trans_block_group(trans, inode);
3201
3202 cur_offset = hole_start; 3244 cur_offset = hole_start;
3203 while (1) { 3245 while (1) {
3204 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3246 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3206,40 +3248,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3206 BUG_ON(IS_ERR(em) || !em); 3248 BUG_ON(IS_ERR(em) || !em);
3207 last_byte = min(extent_map_end(em), block_end); 3249 last_byte = min(extent_map_end(em), block_end);
3208 last_byte = (last_byte + mask) & ~mask; 3250 last_byte = (last_byte + mask) & ~mask;
3209 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 3251 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3210 u64 hint_byte = 0; 3252 u64 hint_byte = 0;
3211 hole_size = last_byte - cur_offset; 3253 hole_size = last_byte - cur_offset;
3212 err = btrfs_drop_extents(trans, root, inode,
3213 cur_offset,
3214 cur_offset + hole_size,
3215 block_end,
3216 cur_offset, &hint_byte, 1);
3217 if (err)
3218 break;
3219 3254
3220 err = btrfs_reserve_metadata_space(root, 1); 3255 err = btrfs_reserve_metadata_space(root, 2);
3221 if (err) 3256 if (err)
3222 break; 3257 break;
3223 3258
3259 trans = btrfs_start_transaction(root, 1);
3260 btrfs_set_trans_block_group(trans, inode);
3261
3262 err = btrfs_drop_extents(trans, inode, cur_offset,
3263 cur_offset + hole_size,
3264 &hint_byte, 1);
3265 BUG_ON(err);
3266
3224 err = btrfs_insert_file_extent(trans, root, 3267 err = btrfs_insert_file_extent(trans, root,
3225 inode->i_ino, cur_offset, 0, 3268 inode->i_ino, cur_offset, 0,
3226 0, hole_size, 0, hole_size, 3269 0, hole_size, 0, hole_size,
3227 0, 0, 0); 3270 0, 0, 0);
3271 BUG_ON(err);
3272
3228 btrfs_drop_extent_cache(inode, hole_start, 3273 btrfs_drop_extent_cache(inode, hole_start,
3229 last_byte - 1, 0); 3274 last_byte - 1, 0);
3230 btrfs_unreserve_metadata_space(root, 1); 3275
3276 btrfs_end_transaction(trans, root);
3277 btrfs_unreserve_metadata_space(root, 2);
3231 } 3278 }
3232 free_extent_map(em); 3279 free_extent_map(em);
3233 cur_offset = last_byte; 3280 cur_offset = last_byte;
3234 if (err || cur_offset >= block_end) 3281 if (cur_offset >= block_end)
3235 break; 3282 break;
3236 } 3283 }
3237 3284
3238 btrfs_end_transaction(trans, root);
3239 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3285 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
3240 return err; 3286 return err;
3241} 3287}
3242 3288
3289static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3290{
3291 struct btrfs_root *root = BTRFS_I(inode)->root;
3292 struct btrfs_trans_handle *trans;
3293 unsigned long nr;
3294 int ret;
3295
3296 if (attr->ia_size == inode->i_size)
3297 return 0;
3298
3299 if (attr->ia_size > inode->i_size) {
3300 unsigned long limit;
3301 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
3302 if (attr->ia_size > inode->i_sb->s_maxbytes)
3303 return -EFBIG;
3304 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3305 send_sig(SIGXFSZ, current, 0);
3306 return -EFBIG;
3307 }
3308 }
3309
3310 ret = btrfs_reserve_metadata_space(root, 1);
3311 if (ret)
3312 return ret;
3313
3314 trans = btrfs_start_transaction(root, 1);
3315 btrfs_set_trans_block_group(trans, inode);
3316
3317 ret = btrfs_orphan_add(trans, inode);
3318 BUG_ON(ret);
3319
3320 nr = trans->blocks_used;
3321 btrfs_end_transaction(trans, root);
3322 btrfs_unreserve_metadata_space(root, 1);
3323 btrfs_btree_balance_dirty(root, nr);
3324
3325 if (attr->ia_size > inode->i_size) {
3326 ret = btrfs_cont_expand(inode, attr->ia_size);
3327 if (ret) {
3328 btrfs_truncate(inode);
3329 return ret;
3330 }
3331
3332 i_size_write(inode, attr->ia_size);
3333 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3334
3335 trans = btrfs_start_transaction(root, 1);
3336 btrfs_set_trans_block_group(trans, inode);
3337
3338 ret = btrfs_update_inode(trans, root, inode);
3339 BUG_ON(ret);
3340 if (inode->i_nlink > 0) {
3341 ret = btrfs_orphan_del(trans, inode);
3342 BUG_ON(ret);
3343 }
3344 nr = trans->blocks_used;
3345 btrfs_end_transaction(trans, root);
3346 btrfs_btree_balance_dirty(root, nr);
3347 return 0;
3348 }
3349
3350 /*
3351 * We're truncating a file that used to have good data down to
3352 * zero. Make sure it gets into the ordered flush list so that
3353 * any new writes get down to disk quickly.
3354 */
3355 if (attr->ia_size == 0)
3356 BTRFS_I(inode)->ordered_data_close = 1;
3357
3358 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3359 ret = vmtruncate(inode, attr->ia_size);
3360 BUG_ON(ret);
3361
3362 return 0;
3363}
3364
3243static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3365static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3244{ 3366{
3245 struct inode *inode = dentry->d_inode; 3367 struct inode *inode = dentry->d_inode;
@@ -3250,23 +3372,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3250 return err; 3372 return err;
3251 3373
3252 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3374 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3253 if (attr->ia_size > inode->i_size) { 3375 err = btrfs_setattr_size(inode, attr);
3254 err = btrfs_cont_expand(inode, attr->ia_size); 3376 if (err)
3255 if (err) 3377 return err;
3256 return err;
3257 } else if (inode->i_size > 0 &&
3258 attr->ia_size == 0) {
3259
3260 /* we're truncating a file that used to have good
3261 * data down to zero. Make sure it gets into
3262 * the ordered flush list so that any new writes
3263 * get down to disk quickly.
3264 */
3265 BTRFS_I(inode)->ordered_data_close = 1;
3266 }
3267 } 3378 }
3379 attr->ia_valid &= ~ATTR_SIZE;
3268 3380
3269 err = inode_setattr(inode, attr); 3381 if (attr->ia_valid)
3382 err = inode_setattr(inode, attr);
3270 3383
3271 if (!err && ((attr->ia_valid & ATTR_MODE))) 3384 if (!err && ((attr->ia_valid & ATTR_MODE)))
3272 err = btrfs_acl_chmod(inode); 3385 err = btrfs_acl_chmod(inode);
@@ -3287,36 +3400,43 @@ void btrfs_delete_inode(struct inode *inode)
3287 } 3400 }
3288 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3401 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3289 3402
3403 if (root->fs_info->log_root_recovering) {
3404 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
3405 goto no_delete;
3406 }
3407
3290 if (inode->i_nlink > 0) { 3408 if (inode->i_nlink > 0) {
3291 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 3409 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3292 goto no_delete; 3410 goto no_delete;
3293 } 3411 }
3294 3412
3295 btrfs_i_size_write(inode, 0); 3413 btrfs_i_size_write(inode, 0);
3296 trans = btrfs_join_transaction(root, 1);
3297 3414
3298 btrfs_set_trans_block_group(trans, inode); 3415 while (1) {
3299 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); 3416 trans = btrfs_start_transaction(root, 1);
3300 if (ret) { 3417 btrfs_set_trans_block_group(trans, inode);
3301 btrfs_orphan_del(NULL, inode); 3418 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3302 goto no_delete_lock;
3303 }
3304 3419
3305 btrfs_orphan_del(trans, inode); 3420 if (ret != -EAGAIN)
3421 break;
3306 3422
3307 nr = trans->blocks_used; 3423 nr = trans->blocks_used;
3308 clear_inode(inode); 3424 btrfs_end_transaction(trans, root);
3425 trans = NULL;
3426 btrfs_btree_balance_dirty(root, nr);
3427 }
3309 3428
3310 btrfs_end_transaction(trans, root); 3429 if (ret == 0) {
3311 btrfs_btree_balance_dirty(root, nr); 3430 ret = btrfs_orphan_del(trans, inode);
3312 return; 3431 BUG_ON(ret);
3432 }
3313 3433
3314no_delete_lock:
3315 nr = trans->blocks_used; 3434 nr = trans->blocks_used;
3316 btrfs_end_transaction(trans, root); 3435 btrfs_end_transaction(trans, root);
3317 btrfs_btree_balance_dirty(root, nr); 3436 btrfs_btree_balance_dirty(root, nr);
3318no_delete: 3437no_delete:
3319 clear_inode(inode); 3438 clear_inode(inode);
3439 return;
3320} 3440}
3321 3441
3322/* 3442/*
@@ -3569,7 +3689,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3569 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3689 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3570 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3690 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3571 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3691 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3572 mutex_init(&BTRFS_I(inode)->extent_mutex);
3573 mutex_init(&BTRFS_I(inode)->log_mutex); 3692 mutex_init(&BTRFS_I(inode)->log_mutex);
3574} 3693}
3575 3694
@@ -3695,6 +3814,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3695 } 3814 }
3696 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3815 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3697 3816
3817 if (root != sub_root) {
3818 down_read(&root->fs_info->cleanup_work_sem);
3819 if (!(inode->i_sb->s_flags & MS_RDONLY))
3820 btrfs_orphan_cleanup(sub_root);
3821 up_read(&root->fs_info->cleanup_work_sem);
3822 }
3823
3698 return inode; 3824 return inode;
3699} 3825}
3700 3826
@@ -4219,7 +4345,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4219 if (IS_ERR(inode)) 4345 if (IS_ERR(inode))
4220 goto out_unlock; 4346 goto out_unlock;
4221 4347
4222 err = btrfs_init_inode_security(inode, dir); 4348 err = btrfs_init_inode_security(trans, inode, dir);
4223 if (err) { 4349 if (err) {
4224 drop_inode = 1; 4350 drop_inode = 1;
4225 goto out_unlock; 4351 goto out_unlock;
@@ -4290,7 +4416,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4290 if (IS_ERR(inode)) 4416 if (IS_ERR(inode))
4291 goto out_unlock; 4417 goto out_unlock;
4292 4418
4293 err = btrfs_init_inode_security(inode, dir); 4419 err = btrfs_init_inode_security(trans, inode, dir);
4294 if (err) { 4420 if (err) {
4295 drop_inode = 1; 4421 drop_inode = 1;
4296 goto out_unlock; 4422 goto out_unlock;
@@ -4336,6 +4462,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4336 if (inode->i_nlink == 0) 4462 if (inode->i_nlink == 0)
4337 return -ENOENT; 4463 return -ENOENT;
4338 4464
4465 /* do not allow sys_link's with other subvols of the same device */
4466 if (root->objectid != BTRFS_I(inode)->root->objectid)
4467 return -EPERM;
4468
4339 /* 4469 /*
4340 * 1 item for inode ref 4470 * 1 item for inode ref
4341 * 2 items for dir items 4471 * 2 items for dir items
@@ -4423,7 +4553,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4423 4553
4424 drop_on_err = 1; 4554 drop_on_err = 1;
4425 4555
4426 err = btrfs_init_inode_security(inode, dir); 4556 err = btrfs_init_inode_security(trans, inode, dir);
4427 if (err) 4557 if (err)
4428 goto out_fail; 4558 goto out_fail;
4429 4559
@@ -5074,17 +5204,20 @@ static void btrfs_truncate(struct inode *inode)
5074 unsigned long nr; 5204 unsigned long nr;
5075 u64 mask = root->sectorsize - 1; 5205 u64 mask = root->sectorsize - 1;
5076 5206
5077 if (!S_ISREG(inode->i_mode)) 5207 if (!S_ISREG(inode->i_mode)) {
5078 return; 5208 WARN_ON(1);
5079 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
5080 return; 5209 return;
5210 }
5081 5211
5082 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 5212 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
5083 if (ret) 5213 if (ret)
5084 return; 5214 return;
5215
5085 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 5216 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5217 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5086 5218
5087 trans = btrfs_start_transaction(root, 1); 5219 trans = btrfs_start_transaction(root, 1);
5220 btrfs_set_trans_block_group(trans, inode);
5088 5221
5089 /* 5222 /*
5090 * setattr is responsible for setting the ordered_data_close flag, 5223 * setattr is responsible for setting the ordered_data_close flag,
@@ -5106,21 +5239,32 @@ static void btrfs_truncate(struct inode *inode)
5106 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 5239 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
5107 btrfs_add_ordered_operation(trans, root, inode); 5240 btrfs_add_ordered_operation(trans, root, inode);
5108 5241
5109 btrfs_set_trans_block_group(trans, inode); 5242 while (1) {
5110 btrfs_i_size_write(inode, inode->i_size); 5243 ret = btrfs_truncate_inode_items(trans, root, inode,
5244 inode->i_size,
5245 BTRFS_EXTENT_DATA_KEY);
5246 if (ret != -EAGAIN)
5247 break;
5111 5248
5112 ret = btrfs_orphan_add(trans, inode); 5249 ret = btrfs_update_inode(trans, root, inode);
5113 if (ret) 5250 BUG_ON(ret);
5114 goto out; 5251
5115 /* FIXME, add redo link to tree so we don't leak on crash */ 5252 nr = trans->blocks_used;
5116 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 5253 btrfs_end_transaction(trans, root);
5117 BTRFS_EXTENT_DATA_KEY); 5254 btrfs_btree_balance_dirty(root, nr);
5118 btrfs_update_inode(trans, root, inode); 5255
5256 trans = btrfs_start_transaction(root, 1);
5257 btrfs_set_trans_block_group(trans, inode);
5258 }
5119 5259
5120 ret = btrfs_orphan_del(trans, inode); 5260 if (ret == 0 && inode->i_nlink > 0) {
5261 ret = btrfs_orphan_del(trans, inode);
5262 BUG_ON(ret);
5263 }
5264
5265 ret = btrfs_update_inode(trans, root, inode);
5121 BUG_ON(ret); 5266 BUG_ON(ret);
5122 5267
5123out:
5124 nr = trans->blocks_used; 5268 nr = trans->blocks_used;
5125 ret = btrfs_end_transaction_throttle(trans, root); 5269 ret = btrfs_end_transaction_throttle(trans, root);
5126 BUG_ON(ret); 5270 BUG_ON(ret);
@@ -5217,9 +5361,9 @@ void btrfs_destroy_inode(struct inode *inode)
5217 5361
5218 spin_lock(&root->list_lock); 5362 spin_lock(&root->list_lock);
5219 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 5363 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5220 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 5364 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5221 " list\n", inode->i_ino); 5365 inode->i_ino);
5222 dump_stack(); 5366 list_del_init(&BTRFS_I(inode)->i_orphan);
5223 } 5367 }
5224 spin_unlock(&root->list_lock); 5368 spin_unlock(&root->list_lock);
5225 5369
@@ -5476,7 +5620,7 @@ out_fail:
5476 * some fairly slow code that needs optimization. This walks the list 5620 * some fairly slow code that needs optimization. This walks the list
5477 * of all the inodes with pending delalloc and forces them to disk. 5621 * of all the inodes with pending delalloc and forces them to disk.
5478 */ 5622 */
5479int btrfs_start_delalloc_inodes(struct btrfs_root *root) 5623int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5480{ 5624{
5481 struct list_head *head = &root->fs_info->delalloc_inodes; 5625 struct list_head *head = &root->fs_info->delalloc_inodes;
5482 struct btrfs_inode *binode; 5626 struct btrfs_inode *binode;
@@ -5495,7 +5639,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
5495 spin_unlock(&root->fs_info->delalloc_lock); 5639 spin_unlock(&root->fs_info->delalloc_lock);
5496 if (inode) { 5640 if (inode) {
5497 filemap_flush(inode->i_mapping); 5641 filemap_flush(inode->i_mapping);
5498 iput(inode); 5642 if (delay_iput)
5643 btrfs_add_delayed_iput(inode);
5644 else
5645 iput(inode);
5499 } 5646 }
5500 cond_resched(); 5647 cond_resched();
5501 spin_lock(&root->fs_info->delalloc_lock); 5648 spin_lock(&root->fs_info->delalloc_lock);
@@ -5569,7 +5716,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5569 if (IS_ERR(inode)) 5716 if (IS_ERR(inode))
5570 goto out_unlock; 5717 goto out_unlock;
5571 5718
5572 err = btrfs_init_inode_security(inode, dir); 5719 err = btrfs_init_inode_security(trans, inode, dir);
5573 if (err) { 5720 if (err) {
5574 drop_inode = 1; 5721 drop_inode = 1;
5575 goto out_unlock; 5722 goto out_unlock;
@@ -5641,10 +5788,10 @@ out_fail:
5641 return err; 5788 return err;
5642} 5789}
5643 5790
5644static int prealloc_file_range(struct btrfs_trans_handle *trans, 5791static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5645 struct inode *inode, u64 start, u64 end, 5792 u64 alloc_hint, int mode)
5646 u64 locked_end, u64 alloc_hint, int mode)
5647{ 5793{
5794 struct btrfs_trans_handle *trans;
5648 struct btrfs_root *root = BTRFS_I(inode)->root; 5795 struct btrfs_root *root = BTRFS_I(inode)->root;
5649 struct btrfs_key ins; 5796 struct btrfs_key ins;
5650 u64 alloc_size; 5797 u64 alloc_size;
@@ -5655,43 +5802,56 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
5655 while (num_bytes > 0) { 5802 while (num_bytes > 0) {
5656 alloc_size = min(num_bytes, root->fs_info->max_extent); 5803 alloc_size = min(num_bytes, root->fs_info->max_extent);
5657 5804
5658 ret = btrfs_reserve_metadata_space(root, 1); 5805 trans = btrfs_start_transaction(root, 1);
5659 if (ret)
5660 goto out;
5661 5806
5662 ret = btrfs_reserve_extent(trans, root, alloc_size, 5807 ret = btrfs_reserve_extent(trans, root, alloc_size,
5663 root->sectorsize, 0, alloc_hint, 5808 root->sectorsize, 0, alloc_hint,
5664 (u64)-1, &ins, 1); 5809 (u64)-1, &ins, 1);
5665 if (ret) { 5810 if (ret) {
5666 WARN_ON(1); 5811 WARN_ON(1);
5667 goto out; 5812 goto stop_trans;
5813 }
5814
5815 ret = btrfs_reserve_metadata_space(root, 3);
5816 if (ret) {
5817 btrfs_free_reserved_extent(root, ins.objectid,
5818 ins.offset);
5819 goto stop_trans;
5668 } 5820 }
5821
5669 ret = insert_reserved_file_extent(trans, inode, 5822 ret = insert_reserved_file_extent(trans, inode,
5670 cur_offset, ins.objectid, 5823 cur_offset, ins.objectid,
5671 ins.offset, ins.offset, 5824 ins.offset, ins.offset,
5672 ins.offset, locked_end, 5825 ins.offset, 0, 0, 0,
5673 0, 0, 0,
5674 BTRFS_FILE_EXTENT_PREALLOC); 5826 BTRFS_FILE_EXTENT_PREALLOC);
5675 BUG_ON(ret); 5827 BUG_ON(ret);
5676 btrfs_drop_extent_cache(inode, cur_offset, 5828 btrfs_drop_extent_cache(inode, cur_offset,
5677 cur_offset + ins.offset -1, 0); 5829 cur_offset + ins.offset -1, 0);
5830
5678 num_bytes -= ins.offset; 5831 num_bytes -= ins.offset;
5679 cur_offset += ins.offset; 5832 cur_offset += ins.offset;
5680 alloc_hint = ins.objectid + ins.offset; 5833 alloc_hint = ins.objectid + ins.offset;
5681 btrfs_unreserve_metadata_space(root, 1); 5834
5682 }
5683out:
5684 if (cur_offset > start) {
5685 inode->i_ctime = CURRENT_TIME; 5835 inode->i_ctime = CURRENT_TIME;
5686 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5836 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5687 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5837 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5688 cur_offset > i_size_read(inode)) 5838 cur_offset > inode->i_size) {
5689 btrfs_i_size_write(inode, cur_offset); 5839 i_size_write(inode, cur_offset);
5840 btrfs_ordered_update_i_size(inode, cur_offset, NULL);
5841 }
5842
5690 ret = btrfs_update_inode(trans, root, inode); 5843 ret = btrfs_update_inode(trans, root, inode);
5691 BUG_ON(ret); 5844 BUG_ON(ret);
5845
5846 btrfs_end_transaction(trans, root);
5847 btrfs_unreserve_metadata_space(root, 3);
5692 } 5848 }
5849 return ret;
5693 5850
5851stop_trans:
5852 btrfs_end_transaction(trans, root);
5694 return ret; 5853 return ret;
5854
5695} 5855}
5696 5856
5697static long btrfs_fallocate(struct inode *inode, int mode, 5857static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5705,8 +5865,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5705 u64 locked_end; 5865 u64 locked_end;
5706 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5866 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5707 struct extent_map *em; 5867 struct extent_map *em;
5708 struct btrfs_trans_handle *trans;
5709 struct btrfs_root *root;
5710 int ret; 5868 int ret;
5711 5869
5712 alloc_start = offset & ~mask; 5870 alloc_start = offset & ~mask;
@@ -5725,9 +5883,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5725 goto out; 5883 goto out;
5726 } 5884 }
5727 5885
5728 root = BTRFS_I(inode)->root; 5886 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
5729
5730 ret = btrfs_check_data_free_space(root, inode,
5731 alloc_end - alloc_start); 5887 alloc_end - alloc_start);
5732 if (ret) 5888 if (ret)
5733 goto out; 5889 goto out;
@@ -5736,12 +5892,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5736 while (1) { 5892 while (1) {
5737 struct btrfs_ordered_extent *ordered; 5893 struct btrfs_ordered_extent *ordered;
5738 5894
5739 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5740 if (!trans) {
5741 ret = -EIO;
5742 goto out_free;
5743 }
5744
5745 /* the extent lock is ordered inside the running 5895 /* the extent lock is ordered inside the running
5746 * transaction 5896 * transaction
5747 */ 5897 */
@@ -5755,8 +5905,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5755 btrfs_put_ordered_extent(ordered); 5905 btrfs_put_ordered_extent(ordered);
5756 unlock_extent(&BTRFS_I(inode)->io_tree, 5906 unlock_extent(&BTRFS_I(inode)->io_tree,
5757 alloc_start, locked_end, GFP_NOFS); 5907 alloc_start, locked_end, GFP_NOFS);
5758 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5759
5760 /* 5908 /*
5761 * we can't wait on the range with the transaction 5909 * we can't wait on the range with the transaction
5762 * running or with the extent lock held 5910 * running or with the extent lock held
@@ -5777,10 +5925,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5777 BUG_ON(IS_ERR(em) || !em); 5925 BUG_ON(IS_ERR(em) || !em);
5778 last_byte = min(extent_map_end(em), alloc_end); 5926 last_byte = min(extent_map_end(em), alloc_end);
5779 last_byte = (last_byte + mask) & ~mask; 5927 last_byte = (last_byte + mask) & ~mask;
5780 if (em->block_start == EXTENT_MAP_HOLE) { 5928 if (em->block_start == EXTENT_MAP_HOLE ||
5781 ret = prealloc_file_range(trans, inode, cur_offset, 5929 (cur_offset >= inode->i_size &&
5782 last_byte, locked_end + 1, 5930 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5783 alloc_hint, mode); 5931 ret = prealloc_file_range(inode,
5932 cur_offset, last_byte,
5933 alloc_hint, mode);
5784 if (ret < 0) { 5934 if (ret < 0) {
5785 free_extent_map(em); 5935 free_extent_map(em);
5786 break; 5936 break;
@@ -5799,9 +5949,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5799 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5949 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5800 GFP_NOFS); 5950 GFP_NOFS);
5801 5951
5802 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5952 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5803out_free: 5953 alloc_end - alloc_start);
5804 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5805out: 5954out:
5806 mutex_unlock(&inode->i_mutex); 5955 mutex_unlock(&inode->i_mutex);
5807 return ret; 5956 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cdbb054102b9..645a17927a8f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root,
237 u64 objectid; 237 u64 objectid;
238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
239 u64 index = 0; 239 u64 index = 0;
240 unsigned long nr = 1;
241 240
242 /* 241 /*
243 * 1 - inode item 242 * 1 - inode item
@@ -290,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root,
290 btrfs_set_root_generation(&root_item, trans->transid); 289 btrfs_set_root_generation(&root_item, trans->transid);
291 btrfs_set_root_level(&root_item, 0); 290 btrfs_set_root_level(&root_item, 0);
292 btrfs_set_root_refs(&root_item, 1); 291 btrfs_set_root_refs(&root_item, 1);
293 btrfs_set_root_used(&root_item, 0); 292 btrfs_set_root_used(&root_item, leaf->len);
294 btrfs_set_root_last_snapshot(&root_item, 0); 293 btrfs_set_root_last_snapshot(&root_item, 0);
295 294
296 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 295 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
@@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root,
342 341
343 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 342 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
344fail: 343fail:
345 nr = trans->blocks_used;
346 err = btrfs_commit_transaction(trans, root); 344 err = btrfs_commit_transaction(trans, root);
347 if (err && !ret) 345 if (err && !ret)
348 ret = err; 346 ret = err;
349 347
350 btrfs_unreserve_metadata_space(root, 6); 348 btrfs_unreserve_metadata_space(root, 6);
351 btrfs_btree_balance_dirty(root, nr);
352 return ret; 349 return ret;
353} 350}
354 351
355static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 352static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
356 char *name, int namelen) 353 char *name, int namelen)
357{ 354{
355 struct inode *inode;
358 struct btrfs_pending_snapshot *pending_snapshot; 356 struct btrfs_pending_snapshot *pending_snapshot;
359 struct btrfs_trans_handle *trans; 357 struct btrfs_trans_handle *trans;
360 int ret = 0; 358 int ret;
361 int err;
362 unsigned long nr = 0;
363 359
364 if (!root->ref_cows) 360 if (!root->ref_cows)
365 return -EINVAL; 361 return -EINVAL;
@@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
372 */ 368 */
373 ret = btrfs_reserve_metadata_space(root, 6); 369 ret = btrfs_reserve_metadata_space(root, 6);
374 if (ret) 370 if (ret)
375 goto fail_unlock; 371 goto fail;
376 372
377 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 373 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
378 if (!pending_snapshot) { 374 if (!pending_snapshot) {
379 ret = -ENOMEM; 375 ret = -ENOMEM;
380 btrfs_unreserve_metadata_space(root, 6); 376 btrfs_unreserve_metadata_space(root, 6);
381 goto fail_unlock; 377 goto fail;
382 } 378 }
383 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); 379 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
384 if (!pending_snapshot->name) { 380 if (!pending_snapshot->name) {
385 ret = -ENOMEM; 381 ret = -ENOMEM;
386 kfree(pending_snapshot); 382 kfree(pending_snapshot);
387 btrfs_unreserve_metadata_space(root, 6); 383 btrfs_unreserve_metadata_space(root, 6);
388 goto fail_unlock; 384 goto fail;
389 } 385 }
390 memcpy(pending_snapshot->name, name, namelen); 386 memcpy(pending_snapshot->name, name, namelen);
391 pending_snapshot->name[namelen] = '\0'; 387 pending_snapshot->name[namelen] = '\0';
@@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
395 pending_snapshot->root = root; 391 pending_snapshot->root = root;
396 list_add(&pending_snapshot->list, 392 list_add(&pending_snapshot->list,
397 &trans->transaction->pending_snapshots); 393 &trans->transaction->pending_snapshots);
398 err = btrfs_commit_transaction(trans, root); 394 ret = btrfs_commit_transaction(trans, root);
395 BUG_ON(ret);
396 btrfs_unreserve_metadata_space(root, 6);
399 397
400fail_unlock: 398 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
401 btrfs_btree_balance_dirty(root, nr); 399 if (IS_ERR(inode)) {
400 ret = PTR_ERR(inode);
401 goto fail;
402 }
403 BUG_ON(!inode);
404 d_instantiate(dentry, inode);
405 ret = 0;
406fail:
402 return ret; 407 return ret;
403} 408}
404 409
@@ -1027,8 +1032,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1027 BUG_ON(!trans); 1032 BUG_ON(!trans);
1028 1033
1029 /* punch hole in destination first */ 1034 /* punch hole in destination first */
1030 btrfs_drop_extents(trans, root, inode, off, off + len, 1035 btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
1031 off + len, 0, &hint_byte, 1);
1032 1036
1033 /* clone data */ 1037 /* clone data */
1034 key.objectid = src->i_ino; 1038 key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5799bc46a309..b10a49d4bc6a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
291 291
292/* 292/*
293 * remove an ordered extent from the tree. No references are dropped 293 * remove an ordered extent from the tree. No references are dropped
294 * but, anyone waiting on this extent is woken up. 294 * and you must wake_up entry->wait. You must hold the tree mutex
295 * while you call this function.
295 */ 296 */
296int btrfs_remove_ordered_extent(struct inode *inode, 297static int __btrfs_remove_ordered_extent(struct inode *inode,
297 struct btrfs_ordered_extent *entry) 298 struct btrfs_ordered_extent *entry)
298{ 299{
299 struct btrfs_ordered_inode_tree *tree; 300 struct btrfs_ordered_inode_tree *tree;
300 struct rb_node *node; 301 struct rb_node *node;
301 302
302 tree = &BTRFS_I(inode)->ordered_tree; 303 tree = &BTRFS_I(inode)->ordered_tree;
303 mutex_lock(&tree->mutex);
304 node = &entry->rb_node; 304 node = &entry->rb_node;
305 rb_erase(node, &tree->tree); 305 rb_erase(node, &tree->tree);
306 tree->last = NULL; 306 tree->last = NULL;
@@ -326,16 +326,34 @@ int btrfs_remove_ordered_extent(struct inode *inode,
326 } 326 }
327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
328 328
329 return 0;
330}
331
332/*
333 * remove an ordered extent from the tree. No references are dropped
334 * but any waiters are woken.
335 */
336int btrfs_remove_ordered_extent(struct inode *inode,
337 struct btrfs_ordered_extent *entry)
338{
339 struct btrfs_ordered_inode_tree *tree;
340 int ret;
341
342 tree = &BTRFS_I(inode)->ordered_tree;
343 mutex_lock(&tree->mutex);
344 ret = __btrfs_remove_ordered_extent(inode, entry);
329 mutex_unlock(&tree->mutex); 345 mutex_unlock(&tree->mutex);
330 wake_up(&entry->wait); 346 wake_up(&entry->wait);
331 return 0; 347
348 return ret;
332} 349}
333 350
334/* 351/*
335 * wait for all the ordered extents in a root. This is done when balancing 352 * wait for all the ordered extents in a root. This is done when balancing
336 * space between drives. 353 * space between drives.
337 */ 354 */
338int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) 355int btrfs_wait_ordered_extents(struct btrfs_root *root,
356 int nocow_only, int delay_iput)
339{ 357{
340 struct list_head splice; 358 struct list_head splice;
341 struct list_head *cur; 359 struct list_head *cur;
@@ -372,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
372 if (inode) { 390 if (inode) {
373 btrfs_start_ordered_extent(inode, ordered, 1); 391 btrfs_start_ordered_extent(inode, ordered, 1);
374 btrfs_put_ordered_extent(ordered); 392 btrfs_put_ordered_extent(ordered);
375 iput(inode); 393 if (delay_iput)
394 btrfs_add_delayed_iput(inode);
395 else
396 iput(inode);
376 } else { 397 } else {
377 btrfs_put_ordered_extent(ordered); 398 btrfs_put_ordered_extent(ordered);
378 } 399 }
@@ -430,7 +451,7 @@ again:
430 btrfs_wait_ordered_range(inode, 0, (u64)-1); 451 btrfs_wait_ordered_range(inode, 0, (u64)-1);
431 else 452 else
432 filemap_flush(inode->i_mapping); 453 filemap_flush(inode->i_mapping);
433 iput(inode); 454 btrfs_add_delayed_iput(inode);
434 } 455 }
435 456
436 cond_resched(); 457 cond_resched();
@@ -589,7 +610,7 @@ out:
589 * After an extent is done, call this to conditionally update the on disk 610 * After an extent is done, call this to conditionally update the on disk
590 * i_size. i_size is updated to cover any fully written part of the file. 611 * i_size. i_size is updated to cover any fully written part of the file.
591 */ 612 */
592int btrfs_ordered_update_i_size(struct inode *inode, 613int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
593 struct btrfs_ordered_extent *ordered) 614 struct btrfs_ordered_extent *ordered)
594{ 615{
595 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 616 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
@@ -597,18 +618,30 @@ int btrfs_ordered_update_i_size(struct inode *inode,
597 u64 disk_i_size; 618 u64 disk_i_size;
598 u64 new_i_size; 619 u64 new_i_size;
599 u64 i_size_test; 620 u64 i_size_test;
621 u64 i_size = i_size_read(inode);
600 struct rb_node *node; 622 struct rb_node *node;
623 struct rb_node *prev = NULL;
601 struct btrfs_ordered_extent *test; 624 struct btrfs_ordered_extent *test;
625 int ret = 1;
626
627 if (ordered)
628 offset = entry_end(ordered);
602 629
603 mutex_lock(&tree->mutex); 630 mutex_lock(&tree->mutex);
604 disk_i_size = BTRFS_I(inode)->disk_i_size; 631 disk_i_size = BTRFS_I(inode)->disk_i_size;
605 632
633 /* truncate file */
634 if (disk_i_size > i_size) {
635 BTRFS_I(inode)->disk_i_size = i_size;
636 ret = 0;
637 goto out;
638 }
639
606 /* 640 /*
607 * if the disk i_size is already at the inode->i_size, or 641 * if the disk i_size is already at the inode->i_size, or
608 * this ordered extent is inside the disk i_size, we're done 642 * this ordered extent is inside the disk i_size, we're done
609 */ 643 */
610 if (disk_i_size >= inode->i_size || 644 if (disk_i_size == i_size || offset <= disk_i_size) {
611 ordered->file_offset + ordered->len <= disk_i_size) {
612 goto out; 645 goto out;
613 } 646 }
614 647
@@ -616,8 +649,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
616 * we can't update the disk_isize if there are delalloc bytes 649 * we can't update the disk_isize if there are delalloc bytes
617 * between disk_i_size and this ordered extent 650 * between disk_i_size and this ordered extent
618 */ 651 */
619 if (test_range_bit(io_tree, disk_i_size, 652 if (test_range_bit(io_tree, disk_i_size, offset - 1,
620 ordered->file_offset + ordered->len - 1,
621 EXTENT_DELALLOC, 0, NULL)) { 653 EXTENT_DELALLOC, 0, NULL)) {
622 goto out; 654 goto out;
623 } 655 }
@@ -626,20 +658,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
626 * if we find an ordered extent then we can't update disk i_size 658 * if we find an ordered extent then we can't update disk i_size
627 * yet 659 * yet
628 */ 660 */
629 node = &ordered->rb_node; 661 if (ordered) {
630 while (1) { 662 node = rb_prev(&ordered->rb_node);
631 node = rb_prev(node); 663 } else {
632 if (!node) 664 prev = tree_search(tree, offset);
633 break; 665 /*
666 * we insert file extents without involving ordered struct,
667 * so there should be no ordered struct cover this offset
668 */
669 if (prev) {
670 test = rb_entry(prev, struct btrfs_ordered_extent,
671 rb_node);
672 BUG_ON(offset_in_entry(test, offset));
673 }
674 node = prev;
675 }
676 while (node) {
634 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 677 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
635 if (test->file_offset + test->len <= disk_i_size) 678 if (test->file_offset + test->len <= disk_i_size)
636 break; 679 break;
637 if (test->file_offset >= inode->i_size) 680 if (test->file_offset >= i_size)
638 break; 681 break;
639 if (test->file_offset >= disk_i_size) 682 if (test->file_offset >= disk_i_size)
640 goto out; 683 goto out;
684 node = rb_prev(node);
641 } 685 }
642 new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); 686 new_i_size = min_t(u64, offset, i_size);
643 687
644 /* 688 /*
645 * at this point, we know we can safely update i_size to at least 689 * at this point, we know we can safely update i_size to at least
@@ -647,7 +691,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
647 * walk forward and see if ios from higher up in the file have 691 * walk forward and see if ios from higher up in the file have
648 * finished. 692 * finished.
649 */ 693 */
650 node = rb_next(&ordered->rb_node); 694 if (ordered) {
695 node = rb_next(&ordered->rb_node);
696 } else {
697 if (prev)
698 node = rb_next(prev);
699 else
700 node = rb_first(&tree->tree);
701 }
651 i_size_test = 0; 702 i_size_test = 0;
652 if (node) { 703 if (node) {
653 /* 704 /*
@@ -655,10 +706,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
655 * between our ordered extent and the next one. 706 * between our ordered extent and the next one.
656 */ 707 */
657 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 708 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
658 if (test->file_offset > entry_end(ordered)) 709 if (test->file_offset > offset)
659 i_size_test = test->file_offset; 710 i_size_test = test->file_offset;
660 } else { 711 } else {
661 i_size_test = i_size_read(inode); 712 i_size_test = i_size;
662 } 713 }
663 714
664 /* 715 /*
@@ -667,15 +718,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
667 * are no delalloc bytes in this area, it is safe to update 718 * are no delalloc bytes in this area, it is safe to update
668 * disk_i_size to the end of the region. 719 * disk_i_size to the end of the region.
669 */ 720 */
670 if (i_size_test > entry_end(ordered) && 721 if (i_size_test > offset &&
671 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 722 !test_range_bit(io_tree, offset, i_size_test - 1,
672 EXTENT_DELALLOC, 0, NULL)) { 723 EXTENT_DELALLOC, 0, NULL)) {
673 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 724 new_i_size = min_t(u64, i_size_test, i_size);
674 } 725 }
675 BTRFS_I(inode)->disk_i_size = new_i_size; 726 BTRFS_I(inode)->disk_i_size = new_i_size;
727 ret = 0;
676out: 728out:
729 /*
730 * we need to remove the ordered extent with the tree lock held
731 * so that other people calling this function don't find our fully
732 * processed ordered entry and skip updating the i_size
733 */
734 if (ordered)
735 __btrfs_remove_ordered_extent(inode, ordered);
677 mutex_unlock(&tree->mutex); 736 mutex_unlock(&tree->mutex);
678 return 0; 737 if (ordered)
738 wake_up(&ordered->wait);
739 return ret;
679} 740}
680 741
681/* 742/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f82e87488ca8..1fe1282ef47c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -150,12 +150,13 @@ void btrfs_start_ordered_extent(struct inode *inode,
150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
151struct btrfs_ordered_extent * 151struct btrfs_ordered_extent *
152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
153int btrfs_ordered_update_i_size(struct inode *inode, 153int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
154 struct btrfs_ordered_extent *ordered); 154 struct btrfs_ordered_extent *ordered);
155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
156int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
157int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 156int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
158int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 157int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
159 struct btrfs_root *root, 158 struct btrfs_root *root,
160 struct inode *inode); 159 struct inode *inode);
160int btrfs_wait_ordered_extents(struct btrfs_root *root,
161 int nocow_only, int delay_iput);
161#endif 162#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfcc93c93a7b..a9728680eca8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1561 return 0; 1561 return 0;
1562} 1562}
1563 1563
1564static void put_inodes(struct list_head *list)
1565{
1566 struct inodevec *ivec;
1567 while (!list_empty(list)) {
1568 ivec = list_entry(list->next, struct inodevec, list);
1569 list_del(&ivec->list);
1570 while (ivec->nr > 0) {
1571 ivec->nr--;
1572 iput(ivec->inode[ivec->nr]);
1573 }
1574 kfree(ivec);
1575 }
1576}
1577
1564static int find_next_key(struct btrfs_path *path, int level, 1578static int find_next_key(struct btrfs_path *path, int level,
1565 struct btrfs_key *key) 1579 struct btrfs_key *key)
1566 1580
@@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1723 1737
1724 btrfs_btree_balance_dirty(root, nr); 1738 btrfs_btree_balance_dirty(root, nr);
1725 1739
1740 /*
1741 * put inodes outside transaction, otherwise we may deadlock.
1742 */
1743 put_inodes(&inode_list);
1744
1726 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1745 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1727 invalidate_extent_cache(root, &key, &next_key); 1746 invalidate_extent_cache(root, &key, &next_key);
1728 } 1747 }
@@ -1752,19 +1771,7 @@ out:
1752 1771
1753 btrfs_btree_balance_dirty(root, nr); 1772 btrfs_btree_balance_dirty(root, nr);
1754 1773
1755 /* 1774 put_inodes(&inode_list);
1756 * put inodes while we aren't holding the tree locks
1757 */
1758 while (!list_empty(&inode_list)) {
1759 struct inodevec *ivec;
1760 ivec = list_entry(inode_list.next, struct inodevec, list);
1761 list_del(&ivec->list);
1762 while (ivec->nr > 0) {
1763 ivec->nr--;
1764 iput(ivec->inode[ivec->nr]);
1765 }
1766 kfree(ivec);
1767 }
1768 1775
1769 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1776 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1770 invalidate_extent_cache(root, &key, &next_key); 1777 invalidate_extent_cache(root, &key, &next_key);
@@ -3534,8 +3541,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3534 (unsigned long long)rc->block_group->key.objectid, 3541 (unsigned long long)rc->block_group->key.objectid,
3535 (unsigned long long)rc->block_group->flags); 3542 (unsigned long long)rc->block_group->flags);
3536 3543
3537 btrfs_start_delalloc_inodes(fs_info->tree_root); 3544 btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3538 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3545 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
3539 3546
3540 while (1) { 3547 while (1) {
3541 rc->extents_found = 0; 3548 rc->extents_found = 0;
@@ -3755,6 +3762,7 @@ out:
3755 BTRFS_DATA_RELOC_TREE_OBJECTID); 3762 BTRFS_DATA_RELOC_TREE_OBJECTID);
3756 if (IS_ERR(fs_root)) 3763 if (IS_ERR(fs_root))
3757 err = PTR_ERR(fs_root); 3764 err = PTR_ERR(fs_root);
3765 btrfs_orphan_cleanup(fs_root);
3758 } 3766 }
3759 return err; 3767 return err;
3760} 3768}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 752a5463bf53..3f9b45704fcd 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -128,6 +128,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
128 substring_t args[MAX_OPT_ARGS]; 128 substring_t args[MAX_OPT_ARGS];
129 char *p, *num; 129 char *p, *num;
130 int intarg; 130 int intarg;
131 int ret = 0;
131 132
132 if (!options) 133 if (!options)
133 return 0; 134 return 0;
@@ -262,12 +263,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
262 case Opt_discard: 263 case Opt_discard:
263 btrfs_set_opt(info->mount_opt, DISCARD); 264 btrfs_set_opt(info->mount_opt, DISCARD);
264 break; 265 break;
266 case Opt_err:
267 printk(KERN_INFO "btrfs: unrecognized mount option "
268 "'%s'\n", p);
269 ret = -EINVAL;
270 goto out;
265 default: 271 default:
266 break; 272 break;
267 } 273 }
268 } 274 }
275out:
269 kfree(options); 276 kfree(options);
270 return 0; 277 return ret;
271} 278}
272 279
273/* 280/*
@@ -405,8 +412,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
405 return 0; 412 return 0;
406 } 413 }
407 414
408 btrfs_start_delalloc_inodes(root); 415 btrfs_start_delalloc_inodes(root, 0);
409 btrfs_wait_ordered_extents(root, 0); 416 btrfs_wait_ordered_extents(root, 0, 0);
410 417
411 trans = btrfs_start_transaction(root, 1); 418 trans = btrfs_start_transaction(root, 1);
412 ret = btrfs_commit_transaction(trans, root); 419 ret = btrfs_commit_transaction(trans, root);
@@ -450,6 +457,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
450 seq_puts(seq, ",notreelog"); 457 seq_puts(seq, ",notreelog");
451 if (btrfs_test_opt(root, FLUSHONCOMMIT)) 458 if (btrfs_test_opt(root, FLUSHONCOMMIT))
452 seq_puts(seq, ",flushoncommit"); 459 seq_puts(seq, ",flushoncommit");
460 if (btrfs_test_opt(root, DISCARD))
461 seq_puts(seq, ",discard");
453 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 462 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
454 seq_puts(seq, ",noacl"); 463 seq_puts(seq, ",noacl");
455 return 0; 464 return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c207e8c32c9b..b2acc79f1b34 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
333 memset(trans, 0, sizeof(*trans)); 333 memset(trans, 0, sizeof(*trans));
334 kmem_cache_free(btrfs_trans_handle_cachep, trans); 334 kmem_cache_free(btrfs_trans_handle_cachep, trans);
335 335
336 if (throttle)
337 btrfs_run_delayed_iputs(root);
338
336 return 0; 339 return 0;
337} 340}
338 341
@@ -354,7 +357,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
354 * those extents are sent to disk but does not wait on them 357 * those extents are sent to disk but does not wait on them
355 */ 358 */
356int btrfs_write_marked_extents(struct btrfs_root *root, 359int btrfs_write_marked_extents(struct btrfs_root *root,
357 struct extent_io_tree *dirty_pages) 360 struct extent_io_tree *dirty_pages, int mark)
358{ 361{
359 int ret; 362 int ret;
360 int err = 0; 363 int err = 0;
@@ -367,7 +370,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
367 370
368 while (1) { 371 while (1) {
369 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 372 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
370 EXTENT_DIRTY); 373 mark);
371 if (ret) 374 if (ret)
372 break; 375 break;
373 while (start <= end) { 376 while (start <= end) {
@@ -413,7 +416,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
413 * on all the pages and clear them from the dirty pages state tree 416 * on all the pages and clear them from the dirty pages state tree
414 */ 417 */
415int btrfs_wait_marked_extents(struct btrfs_root *root, 418int btrfs_wait_marked_extents(struct btrfs_root *root,
416 struct extent_io_tree *dirty_pages) 419 struct extent_io_tree *dirty_pages, int mark)
417{ 420{
418 int ret; 421 int ret;
419 int err = 0; 422 int err = 0;
@@ -425,12 +428,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
425 unsigned long index; 428 unsigned long index;
426 429
427 while (1) { 430 while (1) {
428 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 431 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
429 EXTENT_DIRTY); 432 mark);
430 if (ret) 433 if (ret)
431 break; 434 break;
432 435
433 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 436 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
434 while (start <= end) { 437 while (start <= end) {
435 index = start >> PAGE_CACHE_SHIFT; 438 index = start >> PAGE_CACHE_SHIFT;
436 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 439 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
@@ -460,13 +463,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
460 * those extents are on disk for transaction or log commit 463 * those extents are on disk for transaction or log commit
461 */ 464 */
462int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 465int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
463 struct extent_io_tree *dirty_pages) 466 struct extent_io_tree *dirty_pages, int mark)
464{ 467{
465 int ret; 468 int ret;
466 int ret2; 469 int ret2;
467 470
468 ret = btrfs_write_marked_extents(root, dirty_pages); 471 ret = btrfs_write_marked_extents(root, dirty_pages, mark);
469 ret2 = btrfs_wait_marked_extents(root, dirty_pages); 472 ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
470 return ret || ret2; 473 return ret || ret2;
471} 474}
472 475
@@ -479,7 +482,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
479 return filemap_write_and_wait(btree_inode->i_mapping); 482 return filemap_write_and_wait(btree_inode->i_mapping);
480 } 483 }
481 return btrfs_write_and_wait_marked_extents(root, 484 return btrfs_write_and_wait_marked_extents(root,
482 &trans->transaction->dirty_pages); 485 &trans->transaction->dirty_pages,
486 EXTENT_DIRTY);
483} 487}
484 488
485/* 489/*
@@ -497,13 +501,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
497{ 501{
498 int ret; 502 int ret;
499 u64 old_root_bytenr; 503 u64 old_root_bytenr;
504 u64 old_root_used;
500 struct btrfs_root *tree_root = root->fs_info->tree_root; 505 struct btrfs_root *tree_root = root->fs_info->tree_root;
501 506
507 old_root_used = btrfs_root_used(&root->root_item);
502 btrfs_write_dirty_block_groups(trans, root); 508 btrfs_write_dirty_block_groups(trans, root);
503 509
504 while (1) { 510 while (1) {
505 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 511 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
506 if (old_root_bytenr == root->node->start) 512 if (old_root_bytenr == root->node->start &&
513 old_root_used == btrfs_root_used(&root->root_item))
507 break; 514 break;
508 515
509 btrfs_set_root_node(&root->root_item, root->node); 516 btrfs_set_root_node(&root->root_item, root->node);
@@ -512,6 +519,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
512 &root->root_item); 519 &root->root_item);
513 BUG_ON(ret); 520 BUG_ON(ret);
514 521
522 old_root_used = btrfs_root_used(&root->root_item);
515 ret = btrfs_write_dirty_block_groups(trans, root); 523 ret = btrfs_write_dirty_block_groups(trans, root);
516 BUG_ON(ret); 524 BUG_ON(ret);
517 } 525 }
@@ -795,7 +803,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
795 memcpy(&pending->root_key, &key, sizeof(key)); 803 memcpy(&pending->root_key, &key, sizeof(key));
796fail: 804fail:
797 kfree(new_root_item); 805 kfree(new_root_item);
798 btrfs_unreserve_metadata_space(root, 6);
799 return ret; 806 return ret;
800} 807}
801 808
@@ -807,7 +814,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
807 u64 index = 0; 814 u64 index = 0;
808 struct btrfs_trans_handle *trans; 815 struct btrfs_trans_handle *trans;
809 struct inode *parent_inode; 816 struct inode *parent_inode;
810 struct inode *inode;
811 struct btrfs_root *parent_root; 817 struct btrfs_root *parent_root;
812 818
813 parent_inode = pending->dentry->d_parent->d_inode; 819 parent_inode = pending->dentry->d_parent->d_inode;
@@ -839,8 +845,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
839 845
840 BUG_ON(ret); 846 BUG_ON(ret);
841 847
842 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
843 d_instantiate(pending->dentry, inode);
844fail: 848fail:
845 btrfs_end_transaction(trans, fs_info->fs_root); 849 btrfs_end_transaction(trans, fs_info->fs_root);
846 return ret; 850 return ret;
@@ -994,11 +998,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
994 mutex_unlock(&root->fs_info->trans_mutex); 998 mutex_unlock(&root->fs_info->trans_mutex);
995 999
996 if (flush_on_commit) { 1000 if (flush_on_commit) {
997 btrfs_start_delalloc_inodes(root); 1001 btrfs_start_delalloc_inodes(root, 1);
998 ret = btrfs_wait_ordered_extents(root, 0); 1002 ret = btrfs_wait_ordered_extents(root, 0, 1);
999 BUG_ON(ret); 1003 BUG_ON(ret);
1000 } else if (snap_pending) { 1004 } else if (snap_pending) {
1001 ret = btrfs_wait_ordered_extents(root, 1); 1005 ret = btrfs_wait_ordered_extents(root, 0, 1);
1002 BUG_ON(ret); 1006 BUG_ON(ret);
1003 } 1007 }
1004 1008
@@ -1116,6 +1120,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1116 current->journal_info = NULL; 1120 current->journal_info = NULL;
1117 1121
1118 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1122 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1123
1124 if (current != root->fs_info->transaction_kthread)
1125 btrfs_run_delayed_iputs(root);
1126
1119 return ret; 1127 return ret;
1120} 1128}
1121 1129
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d4e3e7a6938c..93c7ccb33118 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
108 struct btrfs_root *root); 108 struct btrfs_root *root);
109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
110 struct extent_io_tree *dirty_pages); 110 struct extent_io_tree *dirty_pages, int mark);
111int btrfs_write_marked_extents(struct btrfs_root *root, 111int btrfs_write_marked_extents(struct btrfs_root *root,
112 struct extent_io_tree *dirty_pages); 112 struct extent_io_tree *dirty_pages, int mark);
113int btrfs_wait_marked_extents(struct btrfs_root *root, 113int btrfs_wait_marked_extents(struct btrfs_root *root,
114 struct extent_io_tree *dirty_pages); 114 struct extent_io_tree *dirty_pages, int mark);
115int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 115int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
116#endif 116#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 741666a7676a..4a9434b622ec 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -542,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
542 542
543 saved_nbytes = inode_get_bytes(inode); 543 saved_nbytes = inode_get_bytes(inode);
544 /* drop any overlapping extents */ 544 /* drop any overlapping extents */
545 ret = btrfs_drop_extents(trans, root, inode, 545 ret = btrfs_drop_extents(trans, inode, start, extent_end,
546 start, extent_end, extent_end, start, &alloc_hint, 1); 546 &alloc_hint, 1);
547 BUG_ON(ret); 547 BUG_ON(ret);
548 548
549 if (found_type == BTRFS_FILE_EXTENT_REG || 549 if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -930,6 +930,17 @@ out_nowrite:
930 return 0; 930 return 0;
931} 931}
932 932
933static int insert_orphan_item(struct btrfs_trans_handle *trans,
934 struct btrfs_root *root, u64 offset)
935{
936 int ret;
937 ret = btrfs_find_orphan_item(root, offset);
938 if (ret > 0)
939 ret = btrfs_insert_orphan_item(trans, root, offset);
940 return ret;
941}
942
943
933/* 944/*
934 * There are a few corners where the link count of the file can't 945 * There are a few corners where the link count of the file can't
935 * be properly maintained during replay. So, instead of adding 946 * be properly maintained during replay. So, instead of adding
@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
997 } 1008 }
998 BTRFS_I(inode)->index_cnt = (u64)-1; 1009 BTRFS_I(inode)->index_cnt = (u64)-1;
999 1010
1000 if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { 1011 if (inode->i_nlink == 0) {
1001 ret = replay_dir_deletes(trans, root, NULL, path, 1012 if (S_ISDIR(inode->i_mode)) {
1002 inode->i_ino, 1); 1013 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1);
1015 BUG_ON(ret);
1016 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino);
1003 BUG_ON(ret); 1018 BUG_ON(ret);
1004 } 1019 }
1005 btrfs_free_path(path); 1020 btrfs_free_path(path);
@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1587 /* inode keys are done during the first stage */ 1602 /* inode keys are done during the first stage */
1588 if (key.type == BTRFS_INODE_ITEM_KEY && 1603 if (key.type == BTRFS_INODE_ITEM_KEY &&
1589 wc->stage == LOG_WALK_REPLAY_INODES) { 1604 wc->stage == LOG_WALK_REPLAY_INODES) {
1590 struct inode *inode;
1591 struct btrfs_inode_item *inode_item; 1605 struct btrfs_inode_item *inode_item;
1592 u32 mode; 1606 u32 mode;
1593 1607
@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1603 eb, i, &key); 1617 eb, i, &key);
1604 BUG_ON(ret); 1618 BUG_ON(ret);
1605 1619
1606 /* for regular files, truncate away 1620 /* for regular files, make sure corresponding
1607 * extents past the new EOF 1621 * orhpan item exist. extents past the new EOF
1622 * will be truncated later by orphan cleanup.
1608 */ 1623 */
1609 if (S_ISREG(mode)) { 1624 if (S_ISREG(mode)) {
1610 inode = read_one_inode(root, 1625 ret = insert_orphan_item(wc->trans, root,
1611 key.objectid); 1626 key.objectid);
1612 BUG_ON(!inode);
1613
1614 ret = btrfs_truncate_inode_items(wc->trans,
1615 root, inode, inode->i_size,
1616 BTRFS_EXTENT_DATA_KEY);
1617 BUG_ON(ret); 1627 BUG_ON(ret);
1618
1619 /* if the nlink count is zero here, the iput
1620 * will free the inode. We bump it to make
1621 * sure it doesn't get freed until the link
1622 * count fixup is done
1623 */
1624 if (inode->i_nlink == 0) {
1625 btrfs_inc_nlink(inode);
1626 btrfs_update_inode(wc->trans,
1627 root, inode);
1628 }
1629 iput(inode);
1630 } 1628 }
1629
1631 ret = link_to_fixup_dir(wc->trans, root, 1630 ret = link_to_fixup_dir(wc->trans, root,
1632 path, key.objectid); 1631 path, key.objectid);
1633 BUG_ON(ret); 1632 BUG_ON(ret);
@@ -1977,10 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1977{ 1976{
1978 int index1; 1977 int index1;
1979 int index2; 1978 int index2;
1979 int mark;
1980 int ret; 1980 int ret;
1981 struct btrfs_root *log = root->log_root; 1981 struct btrfs_root *log = root->log_root;
1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1983 u64 log_transid = 0; 1983 unsigned long log_transid = 0;
1984 1984
1985 mutex_lock(&root->log_mutex); 1985 mutex_lock(&root->log_mutex);
1986 index1 = root->log_transid % 2; 1986 index1 = root->log_transid % 2;
@@ -2014,24 +2014,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2014 goto out; 2014 goto out;
2015 } 2015 }
2016 2016
2017 log_transid = root->log_transid;
2018 if (log_transid % 2 == 0)
2019 mark = EXTENT_DIRTY;
2020 else
2021 mark = EXTENT_NEW;
2022
2017 /* we start IO on all the marked extents here, but we don't actually 2023 /* we start IO on all the marked extents here, but we don't actually
2018 * wait for them until later. 2024 * wait for them until later.
2019 */ 2025 */
2020 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); 2026 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2021 BUG_ON(ret); 2027 BUG_ON(ret);
2022 2028
2023 btrfs_set_root_node(&log->root_item, log->node); 2029 btrfs_set_root_node(&log->root_item, log->node);
2024 2030
2025 root->log_batch = 0; 2031 root->log_batch = 0;
2026 log_transid = root->log_transid;
2027 root->log_transid++; 2032 root->log_transid++;
2028 log->log_transid = root->log_transid; 2033 log->log_transid = root->log_transid;
2029 root->log_start_pid = 0; 2034 root->log_start_pid = 0;
2030 smp_mb(); 2035 smp_mb();
2031 /* 2036 /*
2032 * log tree has been flushed to disk, new modifications of 2037 * IO has been started, blocks of the log tree have WRITTEN flag set
2033 * the log will be written to new positions. so it's safe to 2038 * in their headers. new modifications of the log will be written to
2034 * allow log writers to go in. 2039 * new positions. so it's safe to allow log writers to go in.
2035 */ 2040 */
2036 mutex_unlock(&root->log_mutex); 2041 mutex_unlock(&root->log_mutex);
2037 2042
@@ -2052,7 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2052 2057
2053 index2 = log_root_tree->log_transid % 2; 2058 index2 = log_root_tree->log_transid % 2;
2054 if (atomic_read(&log_root_tree->log_commit[index2])) { 2059 if (atomic_read(&log_root_tree->log_commit[index2])) {
2055 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2060 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2056 wait_log_commit(trans, log_root_tree, 2061 wait_log_commit(trans, log_root_tree,
2057 log_root_tree->log_transid); 2062 log_root_tree->log_transid);
2058 mutex_unlock(&log_root_tree->log_mutex); 2063 mutex_unlock(&log_root_tree->log_mutex);
@@ -2072,16 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2072 * check the full commit flag again 2077 * check the full commit flag again
2073 */ 2078 */
2074 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2079 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2075 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2080 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2076 mutex_unlock(&log_root_tree->log_mutex); 2081 mutex_unlock(&log_root_tree->log_mutex);
2077 ret = -EAGAIN; 2082 ret = -EAGAIN;
2078 goto out_wake_log_root; 2083 goto out_wake_log_root;
2079 } 2084 }
2080 2085
2081 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2086 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2082 &log_root_tree->dirty_log_pages); 2087 &log_root_tree->dirty_log_pages,
2088 EXTENT_DIRTY | EXTENT_NEW);
2083 BUG_ON(ret); 2089 BUG_ON(ret);
2084 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2090 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2085 2091
2086 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 2092 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
2087 log_root_tree->node->start); 2093 log_root_tree->node->start);
@@ -2147,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2147 2153
2148 while (1) { 2154 while (1) {
2149 ret = find_first_extent_bit(&log->dirty_log_pages, 2155 ret = find_first_extent_bit(&log->dirty_log_pages,
2150 0, &start, &end, EXTENT_DIRTY); 2156 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
2151 if (ret) 2157 if (ret)
2152 break; 2158 break;
2153 2159
2154 clear_extent_dirty(&log->dirty_log_pages, 2160 clear_extent_bits(&log->dirty_log_pages, start, end,
2155 start, end, GFP_NOFS); 2161 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2156 } 2162 }
2157 2163
2158 if (log->log_transid > 0) { 2164 if (log->log_transid > 0) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7eda483d7b5a..198cff28766d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2209,7 +2209,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2209 max_chunk_size = 10 * calc_size; 2209 max_chunk_size = 10 * calc_size;
2210 min_stripe_size = 64 * 1024 * 1024; 2210 min_stripe_size = 64 * 1024 * 1024;
2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2212 max_chunk_size = 4 * calc_size; 2212 max_chunk_size = 256 * 1024 * 1024;
2213 min_stripe_size = 32 * 1024 * 1024; 2213 min_stripe_size = 32 * 1024 * 1024;
2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2215 calc_size = 8 * 1024 * 1024; 2215 calc_size = 8 * 1024 * 1024;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b6dd5967c48a..193b58f7d3f3 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -85,22 +85,23 @@ out:
85 return ret; 85 return ret;
86} 86}
87 87
88int __btrfs_setxattr(struct inode *inode, const char *name, 88static int do_setxattr(struct btrfs_trans_handle *trans,
89 const void *value, size_t size, int flags) 89 struct inode *inode, const char *name,
90 const void *value, size_t size, int flags)
90{ 91{
91 struct btrfs_dir_item *di; 92 struct btrfs_dir_item *di;
92 struct btrfs_root *root = BTRFS_I(inode)->root; 93 struct btrfs_root *root = BTRFS_I(inode)->root;
93 struct btrfs_trans_handle *trans;
94 struct btrfs_path *path; 94 struct btrfs_path *path;
95 int ret = 0, mod = 0; 95 size_t name_len = strlen(name);
96 int ret = 0;
97
98 if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
99 return -ENOSPC;
96 100
97 path = btrfs_alloc_path(); 101 path = btrfs_alloc_path();
98 if (!path) 102 if (!path)
99 return -ENOMEM; 103 return -ENOMEM;
100 104
101 trans = btrfs_join_transaction(root, 1);
102 btrfs_set_trans_block_group(trans, inode);
103
104 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
105 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
106 strlen(name), -1); 107 strlen(name), -1);
@@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
118 } 119 }
119 120
120 ret = btrfs_delete_one_dir_name(trans, root, path, di); 121 ret = btrfs_delete_one_dir_name(trans, root, path, di);
121 if (ret) 122 BUG_ON(ret);
122 goto out;
123 btrfs_release_path(root, path); 123 btrfs_release_path(root, path);
124 124
125 /* if we don't have a value then we are removing the xattr */ 125 /* if we don't have a value then we are removing the xattr */
126 if (!value) { 126 if (!value)
127 mod = 1;
128 goto out; 127 goto out;
129 }
130 } else { 128 } else {
131 btrfs_release_path(root, path); 129 btrfs_release_path(root, path);
132 130
@@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
138 } 136 }
139 137
140 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
141 ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), 139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino,
142 value, size, inode->i_ino); 140 name, name_len, value, size);
141 BUG_ON(ret);
142out:
143 btrfs_free_path(path);
144 return ret;
145}
146
147int __btrfs_setxattr(struct btrfs_trans_handle *trans,
148 struct inode *inode, const char *name,
149 const void *value, size_t size, int flags)
150{
151 struct btrfs_root *root = BTRFS_I(inode)->root;
152 int ret;
153
154 if (trans)
155 return do_setxattr(trans, inode, name, value, size, flags);
156
157 ret = btrfs_reserve_metadata_space(root, 2);
143 if (ret) 158 if (ret)
144 goto out; 159 return ret;
145 mod = 1;
146 160
147out: 161 trans = btrfs_start_transaction(root, 1);
148 if (mod) { 162 if (!trans) {
149 inode->i_ctime = CURRENT_TIME; 163 ret = -ENOMEM;
150 ret = btrfs_update_inode(trans, root, inode); 164 goto out;
151 } 165 }
166 btrfs_set_trans_block_group(trans, inode);
152 167
153 btrfs_end_transaction(trans, root); 168 ret = do_setxattr(trans, inode, name, value, size, flags);
154 btrfs_free_path(path); 169 if (ret)
170 goto out;
171
172 inode->i_ctime = CURRENT_TIME;
173 ret = btrfs_update_inode(trans, root, inode);
174 BUG_ON(ret);
175out:
176 btrfs_end_transaction_throttle(trans, root);
177 btrfs_unreserve_metadata_space(root, 2);
155 return ret; 178 return ret;
156} 179}
157 180
@@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
314 337
315 if (size == 0) 338 if (size == 0)
316 value = ""; /* empty EA, do not remove */ 339 value = ""; /* empty EA, do not remove */
317 return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); 340
341 return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
342 flags);
318} 343}
319 344
320int btrfs_removexattr(struct dentry *dentry, const char *name) 345int btrfs_removexattr(struct dentry *dentry, const char *name)
@@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
329 354
330 if (!btrfs_is_valid_xattr(name)) 355 if (!btrfs_is_valid_xattr(name))
331 return -EOPNOTSUPP; 356 return -EOPNOTSUPP;
332 return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); 357
358 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
359 XATTR_REPLACE);
333} 360}
334 361
335int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) 362int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
363 struct inode *inode, struct inode *dir)
336{ 364{
337 int err; 365 int err;
338 size_t len; 366 size_t len;
@@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
354 } else { 382 } else {
355 strcpy(name, XATTR_SECURITY_PREFIX); 383 strcpy(name, XATTR_SECURITY_PREFIX);
356 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); 384 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
357 err = __btrfs_setxattr(inode, name, value, len, 0); 385 err = __btrfs_setxattr(trans, inode, name, value, len, 0);
358 kfree(name); 386 kfree(name);
359 } 387 }
360 388
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index c71e9c3cf3f7..721efa0346e0 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[];
27 27
28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
29 void *buffer, size_t size); 29 void *buffer, size_t size);
30extern int __btrfs_setxattr(struct inode *inode, const char *name, 30extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
31 const void *value, size_t size, int flags); 31 struct inode *inode, const char *name,
32 32 const void *value, size_t size, int flags);
33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
34 void *buffer, size_t size); 34 void *buffer, size_t size);
35extern int btrfs_setxattr(struct dentry *dentry, const char *name, 35extern int btrfs_setxattr(struct dentry *dentry, const char *name,
36 const void *value, size_t size, int flags); 36 const void *value, size_t size, int flags);
37extern int btrfs_removexattr(struct dentry *dentry, const char *name); 37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38 38
39extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); 39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
40 struct inode *inode, struct inode *dir);
40 41
41#endif /* __XATTR__ */ 42#endif /* __XATTR__ */