aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorMark Brown <broonie@opensource.wolfsonmicro.com>2009-12-26 09:52:54 -0500
committerMark Brown <broonie@opensource.wolfsonmicro.com>2009-12-26 09:52:54 -0500
commit7f50548abb5454bd82c25aae15f0a3bf6a530f46 (patch)
tree175b5d695437151f0f9f778ad8eb7f274468842f /fs/btrfs
parentb3172f222ab5afdc91ea058bd11c42cf169728f3 (diff)
parent6b7b284958d47b77d06745b36bc7f36dab769d9b (diff)
Merge commit 'v2.6.33-rc2' into for-2.6.33
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c68
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/ctree.c229
-rw-r--r--fs/btrfs/ctree.h40
-rw-r--r--fs/btrfs/dir-item.c19
-rw-r--r--fs/btrfs/disk-io.c27
-rw-r--r--fs/btrfs/extent-tree.c185
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file.c673
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/inode.c658
-rw-r--r--fs/btrfs/ioctl.c34
-rw-r--r--fs/btrfs/ordered-data.c115
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c38
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/super.c15
-rw-r--r--fs/btrfs/transaction.c63
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/tree-log.c86
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/btrfs/xattr.c80
-rw-r--r--fs/btrfs/xattr.h9
23 files changed, 1366 insertions, 999 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 361604244271..2e9e69987a82 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -73,13 +73,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
73 return acl; 73 return acl;
74} 74}
75 75
76static int btrfs_xattr_get_acl(struct inode *inode, int type, 76static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
77 void *value, size_t size) 77 void *value, size_t size, int type)
78{ 78{
79 struct posix_acl *acl; 79 struct posix_acl *acl;
80 int ret = 0; 80 int ret = 0;
81 81
82 acl = btrfs_get_acl(inode, type); 82 acl = btrfs_get_acl(dentry->d_inode, type);
83 83
84 if (IS_ERR(acl)) 84 if (IS_ERR(acl))
85 return PTR_ERR(acl); 85 return PTR_ERR(acl);
@@ -94,7 +94,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type,
94/* 94/*
95 * Needs to be called with fs_mutex held 95 * Needs to be called with fs_mutex held
96 */ 96 */
97static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) 97static int btrfs_set_acl(struct btrfs_trans_handle *trans,
98 struct inode *inode, struct posix_acl *acl, int type)
98{ 99{
99 int ret, size = 0; 100 int ret, size = 0;
100 const char *name; 101 const char *name;
@@ -140,8 +141,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
140 goto out; 141 goto out;
141 } 142 }
142 143
143 ret = __btrfs_setxattr(inode, name, value, size, 0); 144 ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
144
145out: 145out:
146 kfree(value); 146 kfree(value);
147 147
@@ -151,10 +151,10 @@ out:
151 return ret; 151 return ret;
152} 152}
153 153
154static int btrfs_xattr_set_acl(struct inode *inode, int type, 154static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
155 const void *value, size_t size) 155 const void *value, size_t size, int flags, int type)
156{ 156{
157 int ret = 0; 157 int ret;
158 struct posix_acl *acl = NULL; 158 struct posix_acl *acl = NULL;
159 159
160 if (value) { 160 if (value) {
@@ -167,38 +167,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
167 } 167 }
168 } 168 }
169 169
170 ret = btrfs_set_acl(inode, acl, type); 170 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
171 171
172 posix_acl_release(acl); 172 posix_acl_release(acl);
173 173
174 return ret; 174 return ret;
175} 175}
176 176
177
178static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
179 void *value, size_t size)
180{
181 return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
182}
183
184static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
185 const void *value, size_t size, int flags)
186{
187 return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
188}
189
190static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
191 void *value, size_t size)
192{
193 return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
194}
195
196static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
197 const void *value, size_t size, int flags)
198{
199 return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
200}
201
202int btrfs_check_acl(struct inode *inode, int mask) 177int btrfs_check_acl(struct inode *inode, int mask)
203{ 178{
204 struct posix_acl *acl; 179 struct posix_acl *acl;
@@ -221,7 +196,8 @@ int btrfs_check_acl(struct inode *inode, int mask)
221 * stuff has been fixed to work with that. If the locking stuff changes, we 196 * stuff has been fixed to work with that. If the locking stuff changes, we
222 * need to re-evaluate the acl locking stuff. 197 * need to re-evaluate the acl locking stuff.
223 */ 198 */
224int btrfs_init_acl(struct inode *inode, struct inode *dir) 199int btrfs_init_acl(struct btrfs_trans_handle *trans,
200 struct inode *inode, struct inode *dir)
225{ 201{
226 struct posix_acl *acl = NULL; 202 struct posix_acl *acl = NULL;
227 int ret = 0; 203 int ret = 0;
@@ -246,7 +222,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
246 mode_t mode; 222 mode_t mode;
247 223
248 if (S_ISDIR(inode->i_mode)) { 224 if (S_ISDIR(inode->i_mode)) {
249 ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); 225 ret = btrfs_set_acl(trans, inode, acl,
226 ACL_TYPE_DEFAULT);
250 if (ret) 227 if (ret)
251 goto failed; 228 goto failed;
252 } 229 }
@@ -261,7 +238,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
261 inode->i_mode = mode; 238 inode->i_mode = mode;
262 if (ret > 0) { 239 if (ret > 0) {
263 /* we need an acl */ 240 /* we need an acl */
264 ret = btrfs_set_acl(inode, clone, 241 ret = btrfs_set_acl(trans, inode, clone,
265 ACL_TYPE_ACCESS); 242 ACL_TYPE_ACCESS);
266 } 243 }
267 } 244 }
@@ -294,7 +271,7 @@ int btrfs_acl_chmod(struct inode *inode)
294 271
295 ret = posix_acl_chmod_masq(clone, inode->i_mode); 272 ret = posix_acl_chmod_masq(clone, inode->i_mode);
296 if (!ret) 273 if (!ret)
297 ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); 274 ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
298 275
299 posix_acl_release(clone); 276 posix_acl_release(clone);
300 277
@@ -303,14 +280,16 @@ int btrfs_acl_chmod(struct inode *inode)
303 280
304struct xattr_handler btrfs_xattr_acl_default_handler = { 281struct xattr_handler btrfs_xattr_acl_default_handler = {
305 .prefix = POSIX_ACL_XATTR_DEFAULT, 282 .prefix = POSIX_ACL_XATTR_DEFAULT,
306 .get = btrfs_xattr_acl_default_get, 283 .flags = ACL_TYPE_DEFAULT,
307 .set = btrfs_xattr_acl_default_set, 284 .get = btrfs_xattr_acl_get,
285 .set = btrfs_xattr_acl_set,
308}; 286};
309 287
310struct xattr_handler btrfs_xattr_acl_access_handler = { 288struct xattr_handler btrfs_xattr_acl_access_handler = {
311 .prefix = POSIX_ACL_XATTR_ACCESS, 289 .prefix = POSIX_ACL_XATTR_ACCESS,
312 .get = btrfs_xattr_acl_access_get, 290 .flags = ACL_TYPE_ACCESS,
313 .set = btrfs_xattr_acl_access_set, 291 .get = btrfs_xattr_acl_get,
292 .set = btrfs_xattr_acl_set,
314}; 293};
315 294
316#else /* CONFIG_BTRFS_FS_POSIX_ACL */ 295#else /* CONFIG_BTRFS_FS_POSIX_ACL */
@@ -320,7 +299,8 @@ int btrfs_acl_chmod(struct inode *inode)
320 return 0; 299 return 0;
321} 300}
322 301
323int btrfs_init_acl(struct inode *inode, struct inode *dir) 302int btrfs_init_acl(struct btrfs_trans_handle *trans,
303 struct inode *inode, struct inode *dir)
324{ 304{
325 return 0; 305 return 0;
326} 306}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f6783a42f010..3f1f50d9d916 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,9 +44,6 @@ struct btrfs_inode {
44 */ 44 */
45 struct extent_io_tree io_failure_tree; 45 struct extent_io_tree io_failure_tree;
46 46
47 /* held while inesrting or deleting extents from files */
48 struct mutex extent_mutex;
49
50 /* held while logging the inode in tree-log.c */ 47 /* held while logging the inode in tree-log.c */
51 struct mutex log_mutex; 48 struct mutex log_mutex;
52 49
@@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
166 163
167static inline void btrfs_i_size_write(struct inode *inode, u64 size) 164static inline void btrfs_i_size_write(struct inode *inode, u64 size)
168{ 165{
169 inode->i_size = size; 166 i_size_write(inode, size);
170 BTRFS_I(inode)->disk_i_size = size; 167 BTRFS_I(inode)->disk_i_size = size;
171} 168}
172 169
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ec96f3a6d536..c4bc570a396e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,6 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct extent_buffer *src_buf); 37 struct extent_buffer *src_buf);
38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 struct btrfs_path *path, int level, int slot); 39 struct btrfs_path *path, int level, int slot);
40static int setup_items_for_insert(struct btrfs_trans_handle *trans,
41 struct btrfs_root *root, struct btrfs_path *path,
42 struct btrfs_key *cpu_key, u32 *data_size,
43 u32 total_data, u32 total_size, int nr);
44
40 45
41struct btrfs_path *btrfs_alloc_path(void) 46struct btrfs_path *btrfs_alloc_path(void)
42{ 47{
@@ -451,9 +456,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
451 extent_buffer_get(cow); 456 extent_buffer_get(cow);
452 spin_unlock(&root->node_lock); 457 spin_unlock(&root->node_lock);
453 458
454 btrfs_free_extent(trans, root, buf->start, buf->len, 459 btrfs_free_tree_block(trans, root, buf->start, buf->len,
455 parent_start, root->root_key.objectid, 460 parent_start, root->root_key.objectid, level);
456 level, 0);
457 free_extent_buffer(buf); 461 free_extent_buffer(buf);
458 add_root_to_dirty_list(root); 462 add_root_to_dirty_list(root);
459 } else { 463 } else {
@@ -468,9 +472,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
468 btrfs_set_node_ptr_generation(parent, parent_slot, 472 btrfs_set_node_ptr_generation(parent, parent_slot,
469 trans->transid); 473 trans->transid);
470 btrfs_mark_buffer_dirty(parent); 474 btrfs_mark_buffer_dirty(parent);
471 btrfs_free_extent(trans, root, buf->start, buf->len, 475 btrfs_free_tree_block(trans, root, buf->start, buf->len,
472 parent_start, root->root_key.objectid, 476 parent_start, root->root_key.objectid, level);
473 level, 0);
474 } 477 }
475 if (unlock_orig) 478 if (unlock_orig)
476 btrfs_tree_unlock(buf); 479 btrfs_tree_unlock(buf);
@@ -1030,8 +1033,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1030 btrfs_tree_unlock(mid); 1033 btrfs_tree_unlock(mid);
1031 /* once for the path */ 1034 /* once for the path */
1032 free_extent_buffer(mid); 1035 free_extent_buffer(mid);
1033 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1036 ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
1034 0, root->root_key.objectid, level, 1); 1037 0, root->root_key.objectid, level);
1035 /* once for the root ptr */ 1038 /* once for the root ptr */
1036 free_extent_buffer(mid); 1039 free_extent_buffer(mid);
1037 return ret; 1040 return ret;
@@ -1095,10 +1098,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1095 1); 1098 1);
1096 if (wret) 1099 if (wret)
1097 ret = wret; 1100 ret = wret;
1098 wret = btrfs_free_extent(trans, root, bytenr, 1101 wret = btrfs_free_tree_block(trans, root,
1099 blocksize, 0, 1102 bytenr, blocksize, 0,
1100 root->root_key.objectid, 1103 root->root_key.objectid,
1101 level, 0); 1104 level);
1102 if (wret) 1105 if (wret)
1103 ret = wret; 1106 ret = wret;
1104 } else { 1107 } else {
@@ -1143,9 +1146,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1143 wret = del_ptr(trans, root, path, level + 1, pslot); 1146 wret = del_ptr(trans, root, path, level + 1, pslot);
1144 if (wret) 1147 if (wret)
1145 ret = wret; 1148 ret = wret;
1146 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1149 wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
1147 0, root->root_key.objectid, 1150 0, root->root_key.objectid, level);
1148 level, 0);
1149 if (wret) 1151 if (wret)
1150 ret = wret; 1152 ret = wret;
1151 } else { 1153 } else {
@@ -2997,75 +2999,85 @@ again:
2997 return ret; 2999 return ret;
2998} 3000}
2999 3001
3000/* 3002static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3001 * This function splits a single item into two items, 3003 struct btrfs_root *root,
3002 * giving 'new_key' to the new item and splitting the 3004 struct btrfs_path *path, int ins_len)
3003 * old one at split_offset (from the start of the item).
3004 *
3005 * The path may be released by this operation. After
3006 * the split, the path is pointing to the old item. The
3007 * new item is going to be in the same node as the old one.
3008 *
3009 * Note, the item being split must be smaller enough to live alone on
3010 * a tree block with room for one extra struct btrfs_item
3011 *
3012 * This allows us to split the item in place, keeping a lock on the
3013 * leaf the entire time.
3014 */
3015int btrfs_split_item(struct btrfs_trans_handle *trans,
3016 struct btrfs_root *root,
3017 struct btrfs_path *path,
3018 struct btrfs_key *new_key,
3019 unsigned long split_offset)
3020{ 3005{
3021 u32 item_size; 3006 struct btrfs_key key;
3022 struct extent_buffer *leaf; 3007 struct extent_buffer *leaf;
3023 struct btrfs_key orig_key; 3008 struct btrfs_file_extent_item *fi;
3024 struct btrfs_item *item; 3009 u64 extent_len = 0;
3025 struct btrfs_item *new_item; 3010 u32 item_size;
3026 int ret = 0; 3011 int ret;
3027 int slot;
3028 u32 nritems;
3029 u32 orig_offset;
3030 struct btrfs_disk_key disk_key;
3031 char *buf;
3032 3012
3033 leaf = path->nodes[0]; 3013 leaf = path->nodes[0];
3034 btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); 3014 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3035 if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) 3015
3036 goto split; 3016 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3017 key.type != BTRFS_EXTENT_CSUM_KEY);
3018
3019 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
3020 return 0;
3037 3021
3038 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3022 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3023 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3024 fi = btrfs_item_ptr(leaf, path->slots[0],
3025 struct btrfs_file_extent_item);
3026 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3027 }
3039 btrfs_release_path(root, path); 3028 btrfs_release_path(root, path);
3040 3029
3041 path->search_for_split = 1;
3042 path->keep_locks = 1; 3030 path->keep_locks = 1;
3043 3031 path->search_for_split = 1;
3044 ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); 3032 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3045 path->search_for_split = 0; 3033 path->search_for_split = 0;
3034 if (ret < 0)
3035 goto err;
3046 3036
3037 ret = -EAGAIN;
3038 leaf = path->nodes[0];
3047 /* if our item isn't there or got smaller, return now */ 3039 /* if our item isn't there or got smaller, return now */
3048 if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], 3040 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3049 path->slots[0])) { 3041 goto err;
3050 path->keep_locks = 0; 3042
3051 return -EAGAIN; 3043 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3044 fi = btrfs_item_ptr(leaf, path->slots[0],
3045 struct btrfs_file_extent_item);
3046 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3047 goto err;
3052 } 3048 }
3053 3049
3054 btrfs_set_path_blocking(path); 3050 btrfs_set_path_blocking(path);
3055 ret = split_leaf(trans, root, &orig_key, path, 3051 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3056 sizeof(struct btrfs_item), 1);
3057 path->keep_locks = 0;
3058 BUG_ON(ret); 3052 BUG_ON(ret);
3059 3053
3054 path->keep_locks = 0;
3060 btrfs_unlock_up_safe(path, 1); 3055 btrfs_unlock_up_safe(path, 1);
3056 return 0;
3057err:
3058 path->keep_locks = 0;
3059 return ret;
3060}
3061
3062static noinline int split_item(struct btrfs_trans_handle *trans,
3063 struct btrfs_root *root,
3064 struct btrfs_path *path,
3065 struct btrfs_key *new_key,
3066 unsigned long split_offset)
3067{
3068 struct extent_buffer *leaf;
3069 struct btrfs_item *item;
3070 struct btrfs_item *new_item;
3071 int slot;
3072 char *buf;
3073 u32 nritems;
3074 u32 item_size;
3075 u32 orig_offset;
3076 struct btrfs_disk_key disk_key;
3077
3061 leaf = path->nodes[0]; 3078 leaf = path->nodes[0];
3062 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3079 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3063 3080
3064split:
3065 /*
3066 * make sure any changes to the path from split_leaf leave it
3067 * in a blocking state
3068 */
3069 btrfs_set_path_blocking(path); 3081 btrfs_set_path_blocking(path);
3070 3082
3071 item = btrfs_item_nr(leaf, path->slots[0]); 3083 item = btrfs_item_nr(leaf, path->slots[0]);
@@ -3073,19 +3085,19 @@ split:
3073 item_size = btrfs_item_size(leaf, item); 3085 item_size = btrfs_item_size(leaf, item);
3074 3086
3075 buf = kmalloc(item_size, GFP_NOFS); 3087 buf = kmalloc(item_size, GFP_NOFS);
3088 if (!buf)
3089 return -ENOMEM;
3090
3076 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3091 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3077 path->slots[0]), item_size); 3092 path->slots[0]), item_size);
3078 slot = path->slots[0] + 1;
3079 leaf = path->nodes[0];
3080 3093
3094 slot = path->slots[0] + 1;
3081 nritems = btrfs_header_nritems(leaf); 3095 nritems = btrfs_header_nritems(leaf);
3082
3083 if (slot != nritems) { 3096 if (slot != nritems) {
3084 /* shift the items */ 3097 /* shift the items */
3085 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3098 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3086 btrfs_item_nr_offset(slot), 3099 btrfs_item_nr_offset(slot),
3087 (nritems - slot) * sizeof(struct btrfs_item)); 3100 (nritems - slot) * sizeof(struct btrfs_item));
3088
3089 } 3101 }
3090 3102
3091 btrfs_cpu_key_to_disk(&disk_key, new_key); 3103 btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3113,16 +3125,81 @@ split:
3113 item_size - split_offset); 3125 item_size - split_offset);
3114 btrfs_mark_buffer_dirty(leaf); 3126 btrfs_mark_buffer_dirty(leaf);
3115 3127
3116 ret = 0; 3128 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
3117 if (btrfs_leaf_free_space(root, leaf) < 0) {
3118 btrfs_print_leaf(root, leaf);
3119 BUG();
3120 }
3121 kfree(buf); 3129 kfree(buf);
3130 return 0;
3131}
3132
3133/*
3134 * This function splits a single item into two items,
3135 * giving 'new_key' to the new item and splitting the
3136 * old one at split_offset (from the start of the item).
3137 *
3138 * The path may be released by this operation. After
3139 * the split, the path is pointing to the old item. The
3140 * new item is going to be in the same node as the old one.
3141 *
3142 * Note, the item being split must be smaller enough to live alone on
3143 * a tree block with room for one extra struct btrfs_item
3144 *
3145 * This allows us to split the item in place, keeping a lock on the
3146 * leaf the entire time.
3147 */
3148int btrfs_split_item(struct btrfs_trans_handle *trans,
3149 struct btrfs_root *root,
3150 struct btrfs_path *path,
3151 struct btrfs_key *new_key,
3152 unsigned long split_offset)
3153{
3154 int ret;
3155 ret = setup_leaf_for_split(trans, root, path,
3156 sizeof(struct btrfs_item));
3157 if (ret)
3158 return ret;
3159
3160 ret = split_item(trans, root, path, new_key, split_offset);
3122 return ret; 3161 return ret;
3123} 3162}
3124 3163
3125/* 3164/*
3165 * This function duplicate a item, giving 'new_key' to the new item.
3166 * It guarantees both items live in the same tree leaf and the new item
3167 * is contiguous with the original item.
3168 *
3169 * This allows us to split file extent in place, keeping a lock on the
3170 * leaf the entire time.
3171 */
3172int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct btrfs_key *new_key)
3176{
3177 struct extent_buffer *leaf;
3178 int ret;
3179 u32 item_size;
3180
3181 leaf = path->nodes[0];
3182 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3183 ret = setup_leaf_for_split(trans, root, path,
3184 item_size + sizeof(struct btrfs_item));
3185 if (ret)
3186 return ret;
3187
3188 path->slots[0]++;
3189 ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
3190 item_size, item_size +
3191 sizeof(struct btrfs_item), 1);
3192 BUG_ON(ret);
3193
3194 leaf = path->nodes[0];
3195 memcpy_extent_buffer(leaf,
3196 btrfs_item_ptr_offset(leaf, path->slots[0]),
3197 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3198 item_size);
3199 return 0;
3200}
3201
3202/*
3126 * make the item pointed to by the path smaller. new_size indicates 3203 * make the item pointed to by the path smaller. new_size indicates
3127 * how small to make it, and from_end tells us if we just chop bytes 3204 * how small to make it, and from_end tells us if we just chop bytes
3128 * off the end of the item or if we shift the item to chop bytes off 3205 * off the end of the item or if we shift the item to chop bytes off
@@ -3714,8 +3791,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3714 */ 3791 */
3715 btrfs_unlock_up_safe(path, 0); 3792 btrfs_unlock_up_safe(path, 0);
3716 3793
3717 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, 3794 ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
3718 0, root->root_key.objectid, 0, 0); 3795 0, root->root_key.objectid, 0);
3719 return ret; 3796 return ret;
3720} 3797}
3721/* 3798/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 444b3e9b92a4..9f806dd04c27 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -310,6 +310,9 @@ struct btrfs_header {
310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ 310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
311 sizeof(struct btrfs_item) - \ 311 sizeof(struct btrfs_item) - \
312 sizeof(struct btrfs_file_extent_item)) 312 sizeof(struct btrfs_file_extent_item))
313#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
314 sizeof(struct btrfs_item) -\
315 sizeof(struct btrfs_dir_item))
313 316
314 317
315/* 318/*
@@ -859,8 +862,9 @@ struct btrfs_fs_info {
859 struct mutex ordered_operations_mutex; 862 struct mutex ordered_operations_mutex;
860 struct rw_semaphore extent_commit_sem; 863 struct rw_semaphore extent_commit_sem;
861 864
862 struct rw_semaphore subvol_sem; 865 struct rw_semaphore cleanup_work_sem;
863 866
867 struct rw_semaphore subvol_sem;
864 struct srcu_struct subvol_srcu; 868 struct srcu_struct subvol_srcu;
865 869
866 struct list_head trans_list; 870 struct list_head trans_list;
@@ -868,6 +872,9 @@ struct btrfs_fs_info {
868 struct list_head dead_roots; 872 struct list_head dead_roots;
869 struct list_head caching_block_groups; 873 struct list_head caching_block_groups;
870 874
875 spinlock_t delayed_iput_lock;
876 struct list_head delayed_iputs;
877
871 atomic_t nr_async_submits; 878 atomic_t nr_async_submits;
872 atomic_t async_submit_draining; 879 atomic_t async_submit_draining;
873 atomic_t nr_async_bios; 880 atomic_t nr_async_bios;
@@ -1034,12 +1041,12 @@ struct btrfs_root {
1034 int ref_cows; 1041 int ref_cows;
1035 int track_dirty; 1042 int track_dirty;
1036 int in_radix; 1043 int in_radix;
1044 int clean_orphans;
1037 1045
1038 u64 defrag_trans_start; 1046 u64 defrag_trans_start;
1039 struct btrfs_key defrag_progress; 1047 struct btrfs_key defrag_progress;
1040 struct btrfs_key defrag_max; 1048 struct btrfs_key defrag_max;
1041 int defrag_running; 1049 int defrag_running;
1042 int defrag_level;
1043 char *name; 1050 char *name;
1044 int in_sysfs; 1051 int in_sysfs;
1045 1052
@@ -1975,6 +1982,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1975 u64 parent, u64 root_objectid, 1982 u64 parent, u64 root_objectid,
1976 struct btrfs_disk_key *key, int level, 1983 struct btrfs_disk_key *key, int level,
1977 u64 hint, u64 empty_size); 1984 u64 hint, u64 empty_size);
1985int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
1986 struct btrfs_root *root,
1987 u64 bytenr, u32 blocksize,
1988 u64 parent, u64 root_objectid, int level);
1978struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1989struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1979 struct btrfs_root *root, 1990 struct btrfs_root *root,
1980 u64 bytenr, u32 blocksize, 1991 u64 bytenr, u32 blocksize,
@@ -2089,6 +2100,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
2089 struct btrfs_path *path, 2100 struct btrfs_path *path,
2090 struct btrfs_key *new_key, 2101 struct btrfs_key *new_key,
2091 unsigned long split_offset); 2102 unsigned long split_offset);
2103int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
2104 struct btrfs_root *root,
2105 struct btrfs_path *path,
2106 struct btrfs_key *new_key);
2092int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 2107int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2093 *root, struct btrfs_key *key, struct btrfs_path *p, int 2108 *root, struct btrfs_key *key, struct btrfs_path *p, int
2094 ins_len, int cow); 2109 ins_len, int cow);
@@ -2196,9 +2211,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
2196 struct btrfs_path *path, 2211 struct btrfs_path *path,
2197 struct btrfs_dir_item *di); 2212 struct btrfs_dir_item *di);
2198int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 2213int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
2199 struct btrfs_root *root, const char *name, 2214 struct btrfs_root *root,
2200 u16 name_len, const void *data, u16 data_len, 2215 struct btrfs_path *path, u64 objectid,
2201 u64 dir); 2216 const char *name, u16 name_len,
2217 const void *data, u16 data_len);
2202struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, 2218struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
2203 struct btrfs_root *root, 2219 struct btrfs_root *root,
2204 struct btrfs_path *path, u64 dir, 2220 struct btrfs_path *path, u64 dir,
@@ -2292,7 +2308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2292 struct inode *inode, u64 new_size, 2308 struct inode *inode, u64 new_size,
2293 u32 min_type); 2309 u32 min_type);
2294 2310
2295int btrfs_start_delalloc_inodes(struct btrfs_root *root); 2311int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2296int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); 2312int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
2297int btrfs_writepages(struct address_space *mapping, 2313int btrfs_writepages(struct address_space *mapping,
2298 struct writeback_control *wbc); 2314 struct writeback_control *wbc);
@@ -2332,6 +2348,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2332void btrfs_orphan_cleanup(struct btrfs_root *root); 2348void btrfs_orphan_cleanup(struct btrfs_root *root);
2333int btrfs_cont_expand(struct inode *inode, loff_t size); 2349int btrfs_cont_expand(struct inode *inode, loff_t size);
2334int btrfs_invalidate_inodes(struct btrfs_root *root); 2350int btrfs_invalidate_inodes(struct btrfs_root *root);
2351void btrfs_add_delayed_iput(struct inode *inode);
2352void btrfs_run_delayed_iputs(struct btrfs_root *root);
2335extern const struct dentry_operations btrfs_dentry_operations; 2353extern const struct dentry_operations btrfs_dentry_operations;
2336 2354
2337/* ioctl.c */ 2355/* ioctl.c */
@@ -2345,12 +2363,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2345 int skip_pinned); 2363 int skip_pinned);
2346int btrfs_check_file(struct btrfs_root *root, struct inode *inode); 2364int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2347extern const struct file_operations btrfs_file_operations; 2365extern const struct file_operations btrfs_file_operations;
2348int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2366int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2349 struct btrfs_root *root, struct inode *inode, 2367 u64 start, u64 end, u64 *hint_byte, int drop_cache);
2350 u64 start, u64 end, u64 locked_end,
2351 u64 inline_limit, u64 *hint_block, int drop_cache);
2352int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2368int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2353 struct btrfs_root *root,
2354 struct inode *inode, u64 start, u64 end); 2369 struct inode *inode, u64 start, u64 end);
2355int btrfs_release_file(struct inode *inode, struct file *file); 2370int btrfs_release_file(struct inode *inode, struct file *file);
2356 2371
@@ -2380,7 +2395,8 @@ int btrfs_check_acl(struct inode *inode, int mask);
2380#else 2395#else
2381#define btrfs_check_acl NULL 2396#define btrfs_check_acl NULL
2382#endif 2397#endif
2383int btrfs_init_acl(struct inode *inode, struct inode *dir); 2398int btrfs_init_acl(struct btrfs_trans_handle *trans,
2399 struct inode *inode, struct inode *dir);
2384int btrfs_acl_chmod(struct inode *inode); 2400int btrfs_acl_chmod(struct inode *inode);
2385 2401
2386/* relocation.c */ 2402/* relocation.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f3a6075519cc..e9103b3baa49 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
68 * into the tree 68 * into the tree
69 */ 69 */
70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
71 struct btrfs_root *root, const char *name, 71 struct btrfs_root *root,
72 u16 name_len, const void *data, u16 data_len, 72 struct btrfs_path *path, u64 objectid,
73 u64 dir) 73 const char *name, u16 name_len,
74 const void *data, u16 data_len)
74{ 75{
75 int ret = 0; 76 int ret = 0;
76 struct btrfs_path *path;
77 struct btrfs_dir_item *dir_item; 77 struct btrfs_dir_item *dir_item;
78 unsigned long name_ptr, data_ptr; 78 unsigned long name_ptr, data_ptr;
79 struct btrfs_key key, location; 79 struct btrfs_key key, location;
@@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
81 struct extent_buffer *leaf; 81 struct extent_buffer *leaf;
82 u32 data_size; 82 u32 data_size;
83 83
84 key.objectid = dir; 84 BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
85
86 key.objectid = objectid;
85 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 87 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
86 key.offset = btrfs_name_hash(name, name_len); 88 key.offset = btrfs_name_hash(name, name_len);
87 path = btrfs_alloc_path();
88 if (!path)
89 return -ENOMEM;
90 if (name_len + data_len + sizeof(struct btrfs_dir_item) >
91 BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
92 return -ENOSPC;
93 89
94 data_size = sizeof(*dir_item) + name_len + data_len; 90 data_size = sizeof(*dir_item) + name_len + data_len;
95 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 91 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
@@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
117 write_extent_buffer(leaf, data, data_ptr, data_len); 113 write_extent_buffer(leaf, data, data_ptr, data_len);
118 btrfs_mark_buffer_dirty(path->nodes[0]); 114 btrfs_mark_buffer_dirty(path->nodes[0]);
119 115
120 btrfs_free_path(path);
121 return ret; 116 return ret;
122} 117}
123 118
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02b6afbd7450..009e3bd18f23 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
892 root->stripesize = stripesize; 892 root->stripesize = stripesize;
893 root->ref_cows = 0; 893 root->ref_cows = 0;
894 root->track_dirty = 0; 894 root->track_dirty = 0;
895 root->in_radix = 0;
896 root->clean_orphans = 0;
895 897
896 root->fs_info = fs_info; 898 root->fs_info = fs_info;
897 root->objectid = objectid; 899 root->objectid = objectid;
@@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
928 root->defrag_trans_start = fs_info->generation; 930 root->defrag_trans_start = fs_info->generation;
929 init_completion(&root->kobj_unregister); 931 init_completion(&root->kobj_unregister);
930 root->defrag_running = 0; 932 root->defrag_running = 0;
931 root->defrag_level = 0;
932 root->root_key.objectid = objectid; 933 root->root_key.objectid = objectid;
933 root->anon_super.s_root = NULL; 934 root->anon_super.s_root = NULL;
934 root->anon_super.s_dev = 0; 935 root->anon_super.s_dev = 0;
@@ -980,12 +981,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
980 981
981 while (1) { 982 while (1) {
982 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 983 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
983 0, &start, &end, EXTENT_DIRTY); 984 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
984 if (ret) 985 if (ret)
985 break; 986 break;
986 987
987 clear_extent_dirty(&log_root_tree->dirty_log_pages, 988 clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
988 start, end, GFP_NOFS); 989 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
989 } 990 }
990 eb = fs_info->log_root_tree->node; 991 eb = fs_info->log_root_tree->node;
991 992
@@ -1210,8 +1211,10 @@ again:
1210 ret = radix_tree_insert(&fs_info->fs_roots_radix, 1211 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1211 (unsigned long)root->root_key.objectid, 1212 (unsigned long)root->root_key.objectid,
1212 root); 1213 root);
1213 if (ret == 0) 1214 if (ret == 0) {
1214 root->in_radix = 1; 1215 root->in_radix = 1;
1216 root->clean_orphans = 1;
1217 }
1215 spin_unlock(&fs_info->fs_roots_radix_lock); 1218 spin_unlock(&fs_info->fs_roots_radix_lock);
1216 radix_tree_preload_end(); 1219 radix_tree_preload_end();
1217 if (ret) { 1220 if (ret) {
@@ -1225,10 +1228,6 @@ again:
1225 ret = btrfs_find_dead_roots(fs_info->tree_root, 1228 ret = btrfs_find_dead_roots(fs_info->tree_root,
1226 root->root_key.objectid); 1229 root->root_key.objectid);
1227 WARN_ON(ret); 1230 WARN_ON(ret);
1228
1229 if (!(fs_info->sb->s_flags & MS_RDONLY))
1230 btrfs_orphan_cleanup(root);
1231
1232 return root; 1231 return root;
1233fail: 1232fail:
1234 free_fs_root(root); 1233 free_fs_root(root);
@@ -1477,6 +1476,7 @@ static int cleaner_kthread(void *arg)
1477 1476
1478 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1477 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1479 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1478 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1479 btrfs_run_delayed_iputs(root);
1480 btrfs_clean_old_snapshots(root); 1480 btrfs_clean_old_snapshots(root);
1481 mutex_unlock(&root->fs_info->cleaner_mutex); 1481 mutex_unlock(&root->fs_info->cleaner_mutex);
1482 } 1482 }
@@ -1606,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1607 INIT_LIST_HEAD(&fs_info->trans_list); 1607 INIT_LIST_HEAD(&fs_info->trans_list);
1608 INIT_LIST_HEAD(&fs_info->dead_roots); 1608 INIT_LIST_HEAD(&fs_info->dead_roots);
1609 INIT_LIST_HEAD(&fs_info->delayed_iputs);
1609 INIT_LIST_HEAD(&fs_info->hashers); 1610 INIT_LIST_HEAD(&fs_info->hashers);
1610 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1611 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1611 INIT_LIST_HEAD(&fs_info->ordered_operations); 1612 INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1614,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1614 spin_lock_init(&fs_info->new_trans_lock); 1615 spin_lock_init(&fs_info->new_trans_lock);
1615 spin_lock_init(&fs_info->ref_cache_lock); 1616 spin_lock_init(&fs_info->ref_cache_lock);
1616 spin_lock_init(&fs_info->fs_roots_radix_lock); 1617 spin_lock_init(&fs_info->fs_roots_radix_lock);
1618 spin_lock_init(&fs_info->delayed_iput_lock);
1617 1619
1618 init_completion(&fs_info->kobj_unregister); 1620 init_completion(&fs_info->kobj_unregister);
1619 fs_info->tree_root = tree_root; 1621 fs_info->tree_root = tree_root;
@@ -1689,6 +1691,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1689 mutex_init(&fs_info->cleaner_mutex); 1691 mutex_init(&fs_info->cleaner_mutex);
1690 mutex_init(&fs_info->volume_mutex); 1692 mutex_init(&fs_info->volume_mutex);
1691 init_rwsem(&fs_info->extent_commit_sem); 1693 init_rwsem(&fs_info->extent_commit_sem);
1694 init_rwsem(&fs_info->cleanup_work_sem);
1692 init_rwsem(&fs_info->subvol_sem); 1695 init_rwsem(&fs_info->subvol_sem);
1693 1696
1694 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1697 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -2386,8 +2389,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2386 int ret; 2389 int ret;
2387 2390
2388 mutex_lock(&root->fs_info->cleaner_mutex); 2391 mutex_lock(&root->fs_info->cleaner_mutex);
2392 btrfs_run_delayed_iputs(root);
2389 btrfs_clean_old_snapshots(root); 2393 btrfs_clean_old_snapshots(root);
2390 mutex_unlock(&root->fs_info->cleaner_mutex); 2394 mutex_unlock(&root->fs_info->cleaner_mutex);
2395
2396 /* wait until ongoing cleanup work done */
2397 down_write(&root->fs_info->cleanup_work_sem);
2398 up_write(&root->fs_info->cleanup_work_sem);
2399
2391 trans = btrfs_start_transaction(root, 1); 2400 trans = btrfs_start_transaction(root, 1);
2392 ret = btrfs_commit_transaction(trans, root); 2401 ret = btrfs_commit_transaction(trans, root);
2393 BUG_ON(ret); 2402 BUG_ON(ret);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e238a0cdac67..56e50137d0e6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -195,6 +195,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
195 int stripe_len; 195 int stripe_len;
196 int i, nr, ret; 196 int i, nr, ret;
197 197
198 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
199 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
200 cache->bytes_super += stripe_len;
201 ret = add_excluded_extent(root, cache->key.objectid,
202 stripe_len);
203 BUG_ON(ret);
204 }
205
198 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 206 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
199 bytenr = btrfs_sb_offset(i); 207 bytenr = btrfs_sb_offset(i);
200 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 208 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
@@ -255,7 +263,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
255 if (ret) 263 if (ret)
256 break; 264 break;
257 265
258 if (extent_start == start) { 266 if (extent_start <= start) {
259 start = extent_end + 1; 267 start = extent_end + 1;
260 } else if (extent_start > start && extent_start < end) { 268 } else if (extent_start > start && extent_start < end) {
261 size = extent_start - start; 269 size = extent_start - start;
@@ -2880,9 +2888,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
2880 root = async->root; 2888 root = async->root;
2881 info = async->info; 2889 info = async->info;
2882 2890
2883 btrfs_start_delalloc_inodes(root); 2891 btrfs_start_delalloc_inodes(root, 0);
2884 wake_up(&info->flush_wait); 2892 wake_up(&info->flush_wait);
2885 btrfs_wait_ordered_extents(root, 0); 2893 btrfs_wait_ordered_extents(root, 0, 0);
2886 2894
2887 spin_lock(&info->lock); 2895 spin_lock(&info->lock);
2888 info->flushing = 0; 2896 info->flushing = 0;
@@ -2956,8 +2964,8 @@ static void flush_delalloc(struct btrfs_root *root,
2956 return; 2964 return;
2957 2965
2958flush: 2966flush:
2959 btrfs_start_delalloc_inodes(root); 2967 btrfs_start_delalloc_inodes(root, 0);
2960 btrfs_wait_ordered_extents(root, 0); 2968 btrfs_wait_ordered_extents(root, 0, 0);
2961 2969
2962 spin_lock(&info->lock); 2970 spin_lock(&info->lock);
2963 info->flushing = 0; 2971 info->flushing = 0;
@@ -2977,10 +2985,10 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
2977 2985
2978 free_space = btrfs_super_total_bytes(disk_super); 2986 free_space = btrfs_super_total_bytes(disk_super);
2979 /* 2987 /*
2980 * we allow the metadata to grow to a max of either 5gb or 5% of the 2988 * we allow the metadata to grow to a max of either 10gb or 5% of the
2981 * space in the volume. 2989 * space in the volume.
2982 */ 2990 */
2983 min_metadata = min((u64)5 * 1024 * 1024 * 1024, 2991 min_metadata = min((u64)10 * 1024 * 1024 * 1024,
2984 div64_u64(free_space * 5, 100)); 2992 div64_u64(free_space * 5, 100));
2985 if (info->total_bytes >= min_metadata) { 2993 if (info->total_bytes >= min_metadata) {
2986 spin_unlock(&info->lock); 2994 spin_unlock(&info->lock);
@@ -3454,14 +3462,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3454 else 3462 else
3455 old_val -= num_bytes; 3463 old_val -= num_bytes;
3456 btrfs_set_super_bytes_used(&info->super_copy, old_val); 3464 btrfs_set_super_bytes_used(&info->super_copy, old_val);
3457
3458 /* block accounting for root item */
3459 old_val = btrfs_root_used(&root->root_item);
3460 if (alloc)
3461 old_val += num_bytes;
3462 else
3463 old_val -= num_bytes;
3464 btrfs_set_root_used(&root->root_item, old_val);
3465 spin_unlock(&info->delalloc_lock); 3465 spin_unlock(&info->delalloc_lock);
3466 3466
3467 while (total) { 3467 while (total) {
@@ -4049,6 +4049,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4049 return ret; 4049 return ret;
4050} 4050}
4051 4051
4052int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4053 struct btrfs_root *root,
4054 u64 bytenr, u32 blocksize,
4055 u64 parent, u64 root_objectid, int level)
4056{
4057 u64 used;
4058 spin_lock(&root->node_lock);
4059 used = btrfs_root_used(&root->root_item) - blocksize;
4060 btrfs_set_root_used(&root->root_item, used);
4061 spin_unlock(&root->node_lock);
4062
4063 return btrfs_free_extent(trans, root, bytenr, blocksize,
4064 parent, root_objectid, level, 0);
4065}
4066
4052static u64 stripe_align(struct btrfs_root *root, u64 val) 4067static u64 stripe_align(struct btrfs_root *root, u64 val)
4053{ 4068{
4054 u64 mask = ((u64)root->stripesize - 1); 4069 u64 mask = ((u64)root->stripesize - 1);
@@ -4102,7 +4117,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
4102} 4117}
4103 4118
4104enum btrfs_loop_type { 4119enum btrfs_loop_type {
4105 LOOP_CACHED_ONLY = 0, 4120 LOOP_FIND_IDEAL = 0,
4106 LOOP_CACHING_NOWAIT = 1, 4121 LOOP_CACHING_NOWAIT = 1,
4107 LOOP_CACHING_WAIT = 2, 4122 LOOP_CACHING_WAIT = 2,
4108 LOOP_ALLOC_CHUNK = 3, 4123 LOOP_ALLOC_CHUNK = 3,
@@ -4131,12 +4146,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4131 struct btrfs_block_group_cache *block_group = NULL; 4146 struct btrfs_block_group_cache *block_group = NULL;
4132 int empty_cluster = 2 * 1024 * 1024; 4147 int empty_cluster = 2 * 1024 * 1024;
4133 int allowed_chunk_alloc = 0; 4148 int allowed_chunk_alloc = 0;
4149 int done_chunk_alloc = 0;
4134 struct btrfs_space_info *space_info; 4150 struct btrfs_space_info *space_info;
4135 int last_ptr_loop = 0; 4151 int last_ptr_loop = 0;
4136 int loop = 0; 4152 int loop = 0;
4137 bool found_uncached_bg = false; 4153 bool found_uncached_bg = false;
4138 bool failed_cluster_refill = false; 4154 bool failed_cluster_refill = false;
4139 bool failed_alloc = false; 4155 bool failed_alloc = false;
4156 u64 ideal_cache_percent = 0;
4157 u64 ideal_cache_offset = 0;
4140 4158
4141 WARN_ON(num_bytes < root->sectorsize); 4159 WARN_ON(num_bytes < root->sectorsize);
4142 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4160 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4172,14 +4190,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4172 empty_cluster = 0; 4190 empty_cluster = 0;
4173 4191
4174 if (search_start == hint_byte) { 4192 if (search_start == hint_byte) {
4193ideal_cache:
4175 block_group = btrfs_lookup_block_group(root->fs_info, 4194 block_group = btrfs_lookup_block_group(root->fs_info,
4176 search_start); 4195 search_start);
4177 /* 4196 /*
4178 * we don't want to use the block group if it doesn't match our 4197 * we don't want to use the block group if it doesn't match our
4179 * allocation bits, or if its not cached. 4198 * allocation bits, or if its not cached.
4199 *
4200 * However if we are re-searching with an ideal block group
4201 * picked out then we don't care that the block group is cached.
4180 */ 4202 */
4181 if (block_group && block_group_bits(block_group, data) && 4203 if (block_group && block_group_bits(block_group, data) &&
4182 block_group_cache_done(block_group)) { 4204 (block_group->cached != BTRFS_CACHE_NO ||
4205 search_start == ideal_cache_offset)) {
4183 down_read(&space_info->groups_sem); 4206 down_read(&space_info->groups_sem);
4184 if (list_empty(&block_group->list) || 4207 if (list_empty(&block_group->list) ||
4185 block_group->ro) { 4208 block_group->ro) {
@@ -4191,13 +4214,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4191 */ 4214 */
4192 btrfs_put_block_group(block_group); 4215 btrfs_put_block_group(block_group);
4193 up_read(&space_info->groups_sem); 4216 up_read(&space_info->groups_sem);
4194 } else 4217 } else {
4195 goto have_block_group; 4218 goto have_block_group;
4219 }
4196 } else if (block_group) { 4220 } else if (block_group) {
4197 btrfs_put_block_group(block_group); 4221 btrfs_put_block_group(block_group);
4198 } 4222 }
4199 } 4223 }
4200
4201search: 4224search:
4202 down_read(&space_info->groups_sem); 4225 down_read(&space_info->groups_sem);
4203 list_for_each_entry(block_group, &space_info->block_groups, list) { 4226 list_for_each_entry(block_group, &space_info->block_groups, list) {
@@ -4209,28 +4232,45 @@ search:
4209 4232
4210have_block_group: 4233have_block_group:
4211 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4234 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4235 u64 free_percent;
4236
4237 free_percent = btrfs_block_group_used(&block_group->item);
4238 free_percent *= 100;
4239 free_percent = div64_u64(free_percent,
4240 block_group->key.offset);
4241 free_percent = 100 - free_percent;
4242 if (free_percent > ideal_cache_percent &&
4243 likely(!block_group->ro)) {
4244 ideal_cache_offset = block_group->key.objectid;
4245 ideal_cache_percent = free_percent;
4246 }
4247
4212 /* 4248 /*
4213 * we want to start caching kthreads, but not too many 4249 * We only want to start kthread caching if we are at
4214 * right off the bat so we don't overwhelm the system, 4250 * the point where we will wait for caching to make
4215 * so only start them if there are less than 2 and we're 4251 * progress, or if our ideal search is over and we've
4216 * in the initial allocation phase. 4252 * found somebody to start caching.
4217 */ 4253 */
4218 if (loop > LOOP_CACHING_NOWAIT || 4254 if (loop > LOOP_CACHING_NOWAIT ||
4219 atomic_read(&space_info->caching_threads) < 2) { 4255 (loop > LOOP_FIND_IDEAL &&
4256 atomic_read(&space_info->caching_threads) < 2)) {
4220 ret = cache_block_group(block_group); 4257 ret = cache_block_group(block_group);
4221 BUG_ON(ret); 4258 BUG_ON(ret);
4222 } 4259 }
4223 }
4224
4225 cached = block_group_cache_done(block_group);
4226 if (unlikely(!cached)) {
4227 found_uncached_bg = true; 4260 found_uncached_bg = true;
4228 4261
4229 /* if we only want cached bgs, loop */ 4262 /*
4230 if (loop == LOOP_CACHED_ONLY) 4263 * If loop is set for cached only, try the next block
4264 * group.
4265 */
4266 if (loop == LOOP_FIND_IDEAL)
4231 goto loop; 4267 goto loop;
4232 } 4268 }
4233 4269
4270 cached = block_group_cache_done(block_group);
4271 if (unlikely(!cached))
4272 found_uncached_bg = true;
4273
4234 if (unlikely(block_group->ro)) 4274 if (unlikely(block_group->ro))
4235 goto loop; 4275 goto loop;
4236 4276
@@ -4410,9 +4450,11 @@ loop:
4410 } 4450 }
4411 up_read(&space_info->groups_sem); 4451 up_read(&space_info->groups_sem);
4412 4452
4413 /* LOOP_CACHED_ONLY, only search fully cached block groups 4453 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
4414 * LOOP_CACHING_NOWAIT, search partially cached block groups, but 4454 * for them to make caching progress. Also
4415 * dont wait foR them to finish caching 4455 * determine the best possible bg to cache
4456 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
4457 * caching kthreads as we move along
4416 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 4458 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
4417 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 4459 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
4418 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 4460 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
@@ -4421,12 +4463,47 @@ loop:
4421 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 4463 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
4422 (found_uncached_bg || empty_size || empty_cluster || 4464 (found_uncached_bg || empty_size || empty_cluster ||
4423 allowed_chunk_alloc)) { 4465 allowed_chunk_alloc)) {
4424 if (found_uncached_bg) { 4466 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
4425 found_uncached_bg = false; 4467 found_uncached_bg = false;
4426 if (loop < LOOP_CACHING_WAIT) { 4468 loop++;
4427 loop++; 4469 if (!ideal_cache_percent &&
4470 atomic_read(&space_info->caching_threads))
4428 goto search; 4471 goto search;
4429 } 4472
4473 /*
4474 * 1 of the following 2 things have happened so far
4475 *
4476 * 1) We found an ideal block group for caching that
4477 * is mostly full and will cache quickly, so we might
4478 * as well wait for it.
4479 *
4480 * 2) We searched for cached only and we didn't find
4481 * anything, and we didn't start any caching kthreads
4482 * either, so chances are we will loop through and
4483 * start a couple caching kthreads, and then come back
4484 * around and just wait for them. This will be slower
4485 * because we will have 2 caching kthreads reading at
4486 * the same time when we could have just started one
4487 * and waited for it to get far enough to give us an
4488 * allocation, so go ahead and go to the wait caching
4489 * loop.
4490 */
4491 loop = LOOP_CACHING_WAIT;
4492 search_start = ideal_cache_offset;
4493 ideal_cache_percent = 0;
4494 goto ideal_cache;
4495 } else if (loop == LOOP_FIND_IDEAL) {
4496 /*
4497 * Didn't find a uncached bg, wait on anything we find
4498 * next.
4499 */
4500 loop = LOOP_CACHING_WAIT;
4501 goto search;
4502 }
4503
4504 if (loop < LOOP_CACHING_WAIT) {
4505 loop++;
4506 goto search;
4430 } 4507 }
4431 4508
4432 if (loop == LOOP_ALLOC_CHUNK) { 4509 if (loop == LOOP_ALLOC_CHUNK) {
@@ -4438,7 +4515,8 @@ loop:
4438 ret = do_chunk_alloc(trans, root, num_bytes + 4515 ret = do_chunk_alloc(trans, root, num_bytes +
4439 2 * 1024 * 1024, data, 1); 4516 2 * 1024 * 1024, data, 1);
4440 allowed_chunk_alloc = 0; 4517 allowed_chunk_alloc = 0;
4441 } else { 4518 done_chunk_alloc = 1;
4519 } else if (!done_chunk_alloc) {
4442 space_info->force_alloc = 1; 4520 space_info->force_alloc = 1;
4443 } 4521 }
4444 4522
@@ -4515,7 +4593,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4515{ 4593{
4516 int ret; 4594 int ret;
4517 u64 search_start = 0; 4595 u64 search_start = 0;
4518 struct btrfs_fs_info *info = root->fs_info;
4519 4596
4520 data = btrfs_get_alloc_profile(root, data); 4597 data = btrfs_get_alloc_profile(root, data);
4521again: 4598again:
@@ -4523,17 +4600,9 @@ again:
4523 * the only place that sets empty_size is btrfs_realloc_node, which 4600 * the only place that sets empty_size is btrfs_realloc_node, which
4524 * is not called recursively on allocations 4601 * is not called recursively on allocations
4525 */ 4602 */
4526 if (empty_size || root->ref_cows) { 4603 if (empty_size || root->ref_cows)
4527 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
4528 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4529 2 * 1024 * 1024,
4530 BTRFS_BLOCK_GROUP_METADATA |
4531 (info->metadata_alloc_profile &
4532 info->avail_metadata_alloc_bits), 0);
4533 }
4534 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4604 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4535 num_bytes + 2 * 1024 * 1024, data, 0); 4605 num_bytes + 2 * 1024 * 1024, data, 0);
4536 }
4537 4606
4538 WARN_ON(num_bytes < root->sectorsize); 4607 WARN_ON(num_bytes < root->sectorsize);
4539 ret = find_free_extent(trans, root, num_bytes, empty_size, 4608 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -4834,6 +4903,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4834 extent_op); 4903 extent_op);
4835 BUG_ON(ret); 4904 BUG_ON(ret);
4836 } 4905 }
4906
4907 if (root_objectid == root->root_key.objectid) {
4908 u64 used;
4909 spin_lock(&root->node_lock);
4910 used = btrfs_root_used(&root->root_item) + num_bytes;
4911 btrfs_set_root_used(&root->root_item, used);
4912 spin_unlock(&root->node_lock);
4913 }
4837 return ret; 4914 return ret;
4838} 4915}
4839 4916
@@ -4856,8 +4933,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4856 btrfs_set_buffer_uptodate(buf); 4933 btrfs_set_buffer_uptodate(buf);
4857 4934
4858 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4935 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4859 set_extent_dirty(&root->dirty_log_pages, buf->start, 4936 /*
4860 buf->start + buf->len - 1, GFP_NOFS); 4937 * we allow two log transactions at a time, use different
4938 * EXENT bit to differentiate dirty pages.
4939 */
4940 if (root->log_transid % 2 == 0)
4941 set_extent_dirty(&root->dirty_log_pages, buf->start,
4942 buf->start + buf->len - 1, GFP_NOFS);
4943 else
4944 set_extent_new(&root->dirty_log_pages, buf->start,
4945 buf->start + buf->len - 1, GFP_NOFS);
4861 } else { 4946 } else {
4862 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4947 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4863 buf->start + buf->len - 1, GFP_NOFS); 4948 buf->start + buf->len - 1, GFP_NOFS);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2c726b7b9faa..46bea0f4dc7b 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -208,7 +208,7 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
208 write_lock(&tree->lock); 208 write_lock(&tree->lock);
209 em = lookup_extent_mapping(tree, start, len); 209 em = lookup_extent_mapping(tree, start, len);
210 210
211 WARN_ON(em->start != start || !em); 211 WARN_ON(!em || em->start != start);
212 212
213 if (!em) 213 if (!em)
214 goto out; 214 goto out;
@@ -256,7 +256,7 @@ out:
256 * Insert @em into @tree or perform a simple forward/backward merge with 256 * Insert @em into @tree or perform a simple forward/backward merge with
257 * existing mappings. The extent_map struct passed in will be inserted 257 * existing mappings. The extent_map struct passed in will be inserted
258 * into the tree directly, with an additional reference taken, or a 258 * into the tree directly, with an additional reference taken, or a
259 * reference dropped if the merge attempt was sucessfull. 259 * reference dropped if the merge attempt was successfull.
260 */ 260 */
261int add_extent_mapping(struct extent_map_tree *tree, 261int add_extent_mapping(struct extent_map_tree *tree,
262 struct extent_map *em) 262 struct extent_map *em)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 06550affbd27..feaa13b105d9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -179,18 +179,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
179 } 179 }
180 flags = em->flags; 180 flags = em->flags;
181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
182 if (em->start <= start && 182 if (testend && em->start + em->len >= start + len) {
183 (!testend || em->start + em->len >= start + len)) {
184 free_extent_map(em); 183 free_extent_map(em);
185 write_unlock(&em_tree->lock); 184 write_unlock(&em_tree->lock);
186 break; 185 break;
187 } 186 }
188 if (start < em->start) { 187 start = em->start + em->len;
189 len = em->start - start; 188 if (testend)
190 } else {
191 len = start + len - (em->start + em->len); 189 len = start + len - (em->start + em->len);
192 start = em->start + em->len;
193 }
194 free_extent_map(em); 190 free_extent_map(em);
195 write_unlock(&em_tree->lock); 191 write_unlock(&em_tree->lock);
196 continue; 192 continue;
@@ -265,319 +261,247 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
265 * If an extent intersects the range but is not entirely inside the range 261 * If an extent intersects the range but is not entirely inside the range
266 * it is either truncated or split. Anything entirely inside the range 262 * it is either truncated or split. Anything entirely inside the range
267 * is deleted from the tree. 263 * is deleted from the tree.
268 *
269 * inline_limit is used to tell this code which offsets in the file to keep
270 * if they contain inline extents.
271 */ 264 */
272noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 265int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
273 struct btrfs_root *root, struct inode *inode, 266 u64 start, u64 end, u64 *hint_byte, int drop_cache)
274 u64 start, u64 end, u64 locked_end,
275 u64 inline_limit, u64 *hint_byte, int drop_cache)
276{ 267{
277 u64 extent_end = 0; 268 struct btrfs_root *root = BTRFS_I(inode)->root;
278 u64 search_start = start;
279 u64 ram_bytes = 0;
280 u64 disk_bytenr = 0;
281 u64 orig_locked_end = locked_end;
282 u8 compression;
283 u8 encryption;
284 u16 other_encoding = 0;
285 struct extent_buffer *leaf; 269 struct extent_buffer *leaf;
286 struct btrfs_file_extent_item *extent; 270 struct btrfs_file_extent_item *fi;
287 struct btrfs_path *path; 271 struct btrfs_path *path;
288 struct btrfs_key key; 272 struct btrfs_key key;
289 struct btrfs_file_extent_item old; 273 struct btrfs_key new_key;
290 int keep; 274 u64 search_start = start;
291 int slot; 275 u64 disk_bytenr = 0;
292 int bookend; 276 u64 num_bytes = 0;
293 int found_type = 0; 277 u64 extent_offset = 0;
294 int found_extent; 278 u64 extent_end = 0;
295 int found_inline; 279 int del_nr = 0;
280 int del_slot = 0;
281 int extent_type;
296 int recow; 282 int recow;
297 int ret; 283 int ret;
298 284
299 inline_limit = 0;
300 if (drop_cache) 285 if (drop_cache)
301 btrfs_drop_extent_cache(inode, start, end - 1, 0); 286 btrfs_drop_extent_cache(inode, start, end - 1, 0);
302 287
303 path = btrfs_alloc_path(); 288 path = btrfs_alloc_path();
304 if (!path) 289 if (!path)
305 return -ENOMEM; 290 return -ENOMEM;
291
306 while (1) { 292 while (1) {
307 recow = 0; 293 recow = 0;
308 btrfs_release_path(root, path);
309 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 294 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
310 search_start, -1); 295 search_start, -1);
311 if (ret < 0) 296 if (ret < 0)
312 goto out; 297 break;
313 if (ret > 0) { 298 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
314 if (path->slots[0] == 0) { 299 leaf = path->nodes[0];
315 ret = 0; 300 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
316 goto out; 301 if (key.objectid == inode->i_ino &&
317 } 302 key.type == BTRFS_EXTENT_DATA_KEY)
318 path->slots[0]--; 303 path->slots[0]--;
319 } 304 }
305 ret = 0;
320next_slot: 306next_slot:
321 keep = 0;
322 bookend = 0;
323 found_extent = 0;
324 found_inline = 0;
325 compression = 0;
326 encryption = 0;
327 extent = NULL;
328 leaf = path->nodes[0]; 307 leaf = path->nodes[0];
329 slot = path->slots[0]; 308 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
330 ret = 0; 309 BUG_ON(del_nr > 0);
331 btrfs_item_key_to_cpu(leaf, &key, slot); 310 ret = btrfs_next_leaf(root, path);
332 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY && 311 if (ret < 0)
333 key.offset >= end) { 312 break;
334 goto out; 313 if (ret > 0) {
335 } 314 ret = 0;
336 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 315 break;
337 key.objectid != inode->i_ino) {
338 goto out;
339 }
340 if (recow) {
341 search_start = max(key.offset, start);
342 continue;
343 }
344 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
345 extent = btrfs_item_ptr(leaf, slot,
346 struct btrfs_file_extent_item);
347 found_type = btrfs_file_extent_type(leaf, extent);
348 compression = btrfs_file_extent_compression(leaf,
349 extent);
350 encryption = btrfs_file_extent_encryption(leaf,
351 extent);
352 other_encoding = btrfs_file_extent_other_encoding(leaf,
353 extent);
354 if (found_type == BTRFS_FILE_EXTENT_REG ||
355 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
356 extent_end =
357 btrfs_file_extent_disk_bytenr(leaf,
358 extent);
359 if (extent_end)
360 *hint_byte = extent_end;
361
362 extent_end = key.offset +
363 btrfs_file_extent_num_bytes(leaf, extent);
364 ram_bytes = btrfs_file_extent_ram_bytes(leaf,
365 extent);
366 found_extent = 1;
367 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
368 found_inline = 1;
369 extent_end = key.offset +
370 btrfs_file_extent_inline_len(leaf, extent);
371 } 316 }
317 leaf = path->nodes[0];
318 recow = 1;
319 }
320
321 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
322 if (key.objectid > inode->i_ino ||
323 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
324 break;
325
326 fi = btrfs_item_ptr(leaf, path->slots[0],
327 struct btrfs_file_extent_item);
328 extent_type = btrfs_file_extent_type(leaf, fi);
329
330 if (extent_type == BTRFS_FILE_EXTENT_REG ||
331 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
332 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
333 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
334 extent_offset = btrfs_file_extent_offset(leaf, fi);
335 extent_end = key.offset +
336 btrfs_file_extent_num_bytes(leaf, fi);
337 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
338 extent_end = key.offset +
339 btrfs_file_extent_inline_len(leaf, fi);
372 } else { 340 } else {
341 WARN_ON(1);
373 extent_end = search_start; 342 extent_end = search_start;
374 } 343 }
375 344
376 /* we found nothing we can drop */ 345 if (extent_end <= search_start) {
377 if ((!found_extent && !found_inline) || 346 path->slots[0]++;
378 search_start >= extent_end) {
379 int nextret;
380 u32 nritems;
381 nritems = btrfs_header_nritems(leaf);
382 if (slot >= nritems - 1) {
383 nextret = btrfs_next_leaf(root, path);
384 if (nextret)
385 goto out;
386 recow = 1;
387 } else {
388 path->slots[0]++;
389 }
390 goto next_slot; 347 goto next_slot;
391 } 348 }
392 349
393 if (end <= extent_end && start >= key.offset && found_inline) 350 search_start = max(key.offset, start);
394 *hint_byte = EXTENT_MAP_INLINE; 351 if (recow) {
395 352 btrfs_release_path(root, path);
396 if (found_extent) { 353 continue;
397 read_extent_buffer(leaf, &old, (unsigned long)extent,
398 sizeof(old));
399 }
400
401 if (end < extent_end && end >= key.offset) {
402 bookend = 1;
403 if (found_inline && start <= key.offset)
404 keep = 1;
405 } 354 }
406 355
407 if (bookend && found_extent) { 356 /*
408 if (locked_end < extent_end) { 357 * | - range to drop - |
409 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 358 * | -------- extent -------- |
410 locked_end, extent_end - 1, 359 */
411 GFP_NOFS); 360 if (start > key.offset && end < extent_end) {
412 if (!ret) { 361 BUG_ON(del_nr > 0);
413 btrfs_release_path(root, path); 362 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
414 lock_extent(&BTRFS_I(inode)->io_tree, 363
415 locked_end, extent_end - 1, 364 memcpy(&new_key, &key, sizeof(new_key));
416 GFP_NOFS); 365 new_key.offset = start;
417 locked_end = extent_end; 366 ret = btrfs_duplicate_item(trans, root, path,
418 continue; 367 &new_key);
419 } 368 if (ret == -EAGAIN) {
420 locked_end = extent_end; 369 btrfs_release_path(root, path);
370 continue;
421 } 371 }
422 disk_bytenr = le64_to_cpu(old.disk_bytenr); 372 if (ret < 0)
423 if (disk_bytenr != 0) { 373 break;
374
375 leaf = path->nodes[0];
376 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
377 struct btrfs_file_extent_item);
378 btrfs_set_file_extent_num_bytes(leaf, fi,
379 start - key.offset);
380
381 fi = btrfs_item_ptr(leaf, path->slots[0],
382 struct btrfs_file_extent_item);
383
384 extent_offset += start - key.offset;
385 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
386 btrfs_set_file_extent_num_bytes(leaf, fi,
387 extent_end - start);
388 btrfs_mark_buffer_dirty(leaf);
389
390 if (disk_bytenr > 0) {
424 ret = btrfs_inc_extent_ref(trans, root, 391 ret = btrfs_inc_extent_ref(trans, root,
425 disk_bytenr, 392 disk_bytenr, num_bytes, 0,
426 le64_to_cpu(old.disk_num_bytes), 0, 393 root->root_key.objectid,
427 root->root_key.objectid, 394 new_key.objectid,
428 key.objectid, key.offset - 395 start - extent_offset);
429 le64_to_cpu(old.offset));
430 BUG_ON(ret); 396 BUG_ON(ret);
397 *hint_byte = disk_bytenr;
431 } 398 }
399 key.offset = start;
432 } 400 }
401 /*
402 * | ---- range to drop ----- |
403 * | -------- extent -------- |
404 */
405 if (start <= key.offset && end < extent_end) {
406 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
433 407
434 if (found_inline) { 408 memcpy(&new_key, &key, sizeof(new_key));
435 u64 mask = root->sectorsize - 1; 409 new_key.offset = end;
436 search_start = (extent_end + mask) & ~mask; 410 btrfs_set_item_key_safe(trans, root, path, &new_key);
437 } else 411
438 search_start = extent_end; 412 extent_offset += end - key.offset;
439 413 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
440 /* truncate existing extent */ 414 btrfs_set_file_extent_num_bytes(leaf, fi,
441 if (start > key.offset) { 415 extent_end - end);
442 u64 new_num; 416 btrfs_mark_buffer_dirty(leaf);
443 u64 old_num; 417 if (disk_bytenr > 0) {
444 keep = 1; 418 inode_sub_bytes(inode, end - key.offset);
445 WARN_ON(start & (root->sectorsize - 1)); 419 *hint_byte = disk_bytenr;
446 if (found_extent) {
447 new_num = start - key.offset;
448 old_num = btrfs_file_extent_num_bytes(leaf,
449 extent);
450 *hint_byte =
451 btrfs_file_extent_disk_bytenr(leaf,
452 extent);
453 if (btrfs_file_extent_disk_bytenr(leaf,
454 extent)) {
455 inode_sub_bytes(inode, old_num -
456 new_num);
457 }
458 btrfs_set_file_extent_num_bytes(leaf,
459 extent, new_num);
460 btrfs_mark_buffer_dirty(leaf);
461 } else if (key.offset < inline_limit &&
462 (end > extent_end) &&
463 (inline_limit < extent_end)) {
464 u32 new_size;
465 new_size = btrfs_file_extent_calc_inline_size(
466 inline_limit - key.offset);
467 inode_sub_bytes(inode, extent_end -
468 inline_limit);
469 btrfs_set_file_extent_ram_bytes(leaf, extent,
470 new_size);
471 if (!compression && !encryption) {
472 btrfs_truncate_item(trans, root, path,
473 new_size, 1);
474 }
475 } 420 }
421 break;
476 } 422 }
477 /* delete the entire extent */
478 if (!keep) {
479 if (found_inline)
480 inode_sub_bytes(inode, extent_end -
481 key.offset);
482 ret = btrfs_del_item(trans, root, path);
483 /* TODO update progress marker and return */
484 BUG_ON(ret);
485 extent = NULL;
486 btrfs_release_path(root, path);
487 /* the extent will be freed later */
488 }
489 if (bookend && found_inline && start <= key.offset) {
490 u32 new_size;
491 new_size = btrfs_file_extent_calc_inline_size(
492 extent_end - end);
493 inode_sub_bytes(inode, end - key.offset);
494 btrfs_set_file_extent_ram_bytes(leaf, extent,
495 new_size);
496 if (!compression && !encryption)
497 ret = btrfs_truncate_item(trans, root, path,
498 new_size, 0);
499 BUG_ON(ret);
500 }
501 /* create bookend, splitting the extent in two */
502 if (bookend && found_extent) {
503 struct btrfs_key ins;
504 ins.objectid = inode->i_ino;
505 ins.offset = end;
506 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
507 423
508 btrfs_release_path(root, path); 424 search_start = extent_end;
509 path->leave_spinning = 1; 425 /*
510 ret = btrfs_insert_empty_item(trans, root, path, &ins, 426 * | ---- range to drop ----- |
511 sizeof(*extent)); 427 * | -------- extent -------- |
512 BUG_ON(ret); 428 */
429 if (start > key.offset && end >= extent_end) {
430 BUG_ON(del_nr > 0);
431 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
513 432
514 leaf = path->nodes[0]; 433 btrfs_set_file_extent_num_bytes(leaf, fi,
515 extent = btrfs_item_ptr(leaf, path->slots[0], 434 start - key.offset);
516 struct btrfs_file_extent_item); 435 btrfs_mark_buffer_dirty(leaf);
517 write_extent_buffer(leaf, &old, 436 if (disk_bytenr > 0) {
518 (unsigned long)extent, sizeof(old)); 437 inode_sub_bytes(inode, extent_end - start);
519 438 *hint_byte = disk_bytenr;
520 btrfs_set_file_extent_compression(leaf, extent, 439 }
521 compression); 440 if (end == extent_end)
522 btrfs_set_file_extent_encryption(leaf, extent, 441 break;
523 encryption);
524 btrfs_set_file_extent_other_encoding(leaf, extent,
525 other_encoding);
526 btrfs_set_file_extent_offset(leaf, extent,
527 le64_to_cpu(old.offset) + end - key.offset);
528 WARN_ON(le64_to_cpu(old.num_bytes) <
529 (extent_end - end));
530 btrfs_set_file_extent_num_bytes(leaf, extent,
531 extent_end - end);
532 442
533 /* 443 path->slots[0]++;
534 * set the ram bytes to the size of the full extent 444 goto next_slot;
535 * before splitting. This is a worst case flag,
536 * but its the best we can do because we don't know
537 * how splitting affects compression
538 */
539 btrfs_set_file_extent_ram_bytes(leaf, extent,
540 ram_bytes);
541 btrfs_set_file_extent_type(leaf, extent, found_type);
542
543 btrfs_unlock_up_safe(path, 1);
544 btrfs_mark_buffer_dirty(path->nodes[0]);
545 btrfs_set_lock_blocking(path->nodes[0]);
546
547 path->leave_spinning = 0;
548 btrfs_release_path(root, path);
549 if (disk_bytenr != 0)
550 inode_add_bytes(inode, extent_end - end);
551 } 445 }
552 446
553 if (found_extent && !keep) { 447 /*
554 u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr); 448 * | ---- range to drop ----- |
449 * | ------ extent ------ |
450 */
451 if (start <= key.offset && end >= extent_end) {
452 if (del_nr == 0) {
453 del_slot = path->slots[0];
454 del_nr = 1;
455 } else {
456 BUG_ON(del_slot + del_nr != path->slots[0]);
457 del_nr++;
458 }
555 459
556 if (old_disk_bytenr != 0) { 460 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
557 inode_sub_bytes(inode, 461 inode_sub_bytes(inode,
558 le64_to_cpu(old.num_bytes)); 462 extent_end - key.offset);
463 extent_end = ALIGN(extent_end,
464 root->sectorsize);
465 } else if (disk_bytenr > 0) {
559 ret = btrfs_free_extent(trans, root, 466 ret = btrfs_free_extent(trans, root,
560 old_disk_bytenr, 467 disk_bytenr, num_bytes, 0,
561 le64_to_cpu(old.disk_num_bytes), 468 root->root_key.objectid,
562 0, root->root_key.objectid,
563 key.objectid, key.offset - 469 key.objectid, key.offset -
564 le64_to_cpu(old.offset)); 470 extent_offset);
565 BUG_ON(ret); 471 BUG_ON(ret);
566 *hint_byte = old_disk_bytenr; 472 inode_sub_bytes(inode,
473 extent_end - key.offset);
474 *hint_byte = disk_bytenr;
567 } 475 }
568 }
569 476
570 if (search_start >= end) { 477 if (end == extent_end)
571 ret = 0; 478 break;
572 goto out; 479
480 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
481 path->slots[0]++;
482 goto next_slot;
483 }
484
485 ret = btrfs_del_items(trans, root, path, del_slot,
486 del_nr);
487 BUG_ON(ret);
488
489 del_nr = 0;
490 del_slot = 0;
491
492 btrfs_release_path(root, path);
493 continue;
573 } 494 }
495
496 BUG_ON(1);
574 } 497 }
575out: 498
576 btrfs_free_path(path); 499 if (del_nr > 0) {
577 if (locked_end > orig_locked_end) { 500 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
578 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, 501 BUG_ON(ret);
579 locked_end - 1, GFP_NOFS);
580 } 502 }
503
504 btrfs_free_path(path);
581 return ret; 505 return ret;
582} 506}
583 507
@@ -620,23 +544,23 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
620 * two or three. 544 * two or three.
621 */ 545 */
622int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 546int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
623 struct btrfs_root *root,
624 struct inode *inode, u64 start, u64 end) 547 struct inode *inode, u64 start, u64 end)
625{ 548{
549 struct btrfs_root *root = BTRFS_I(inode)->root;
626 struct extent_buffer *leaf; 550 struct extent_buffer *leaf;
627 struct btrfs_path *path; 551 struct btrfs_path *path;
628 struct btrfs_file_extent_item *fi; 552 struct btrfs_file_extent_item *fi;
629 struct btrfs_key key; 553 struct btrfs_key key;
554 struct btrfs_key new_key;
630 u64 bytenr; 555 u64 bytenr;
631 u64 num_bytes; 556 u64 num_bytes;
632 u64 extent_end; 557 u64 extent_end;
633 u64 orig_offset; 558 u64 orig_offset;
634 u64 other_start; 559 u64 other_start;
635 u64 other_end; 560 u64 other_end;
636 u64 split = start; 561 u64 split;
637 u64 locked_end = end; 562 int del_nr = 0;
638 int extent_type; 563 int del_slot = 0;
639 int split_end = 1;
640 int ret; 564 int ret;
641 565
642 btrfs_drop_extent_cache(inode, start, end - 1, 0); 566 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -644,12 +568,10 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
644 path = btrfs_alloc_path(); 568 path = btrfs_alloc_path();
645 BUG_ON(!path); 569 BUG_ON(!path);
646again: 570again:
571 split = start;
647 key.objectid = inode->i_ino; 572 key.objectid = inode->i_ino;
648 key.type = BTRFS_EXTENT_DATA_KEY; 573 key.type = BTRFS_EXTENT_DATA_KEY;
649 if (split == start) 574 key.offset = split;
650 key.offset = split;
651 else
652 key.offset = split - 1;
653 575
654 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 576 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
655 if (ret > 0 && path->slots[0] > 0) 577 if (ret > 0 && path->slots[0] > 0)
@@ -661,8 +583,8 @@ again:
661 key.type != BTRFS_EXTENT_DATA_KEY); 583 key.type != BTRFS_EXTENT_DATA_KEY);
662 fi = btrfs_item_ptr(leaf, path->slots[0], 584 fi = btrfs_item_ptr(leaf, path->slots[0],
663 struct btrfs_file_extent_item); 585 struct btrfs_file_extent_item);
664 extent_type = btrfs_file_extent_type(leaf, fi); 586 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
665 BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC); 587 BTRFS_FILE_EXTENT_PREALLOC);
666 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 588 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
667 BUG_ON(key.offset > start || extent_end < end); 589 BUG_ON(key.offset > start || extent_end < end);
668 590
@@ -670,150 +592,91 @@ again:
670 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 592 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
671 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 593 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
672 594
673 if (key.offset == start) 595 while (start > key.offset || end < extent_end) {
674 split = end; 596 if (key.offset == start)
675 597 split = end;
676 if (key.offset == start && extent_end == end) { 598
677 int del_nr = 0; 599 memcpy(&new_key, &key, sizeof(new_key));
678 int del_slot = 0; 600 new_key.offset = split;
679 other_start = end; 601 ret = btrfs_duplicate_item(trans, root, path, &new_key);
680 other_end = 0; 602 if (ret == -EAGAIN) {
681 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 603 btrfs_release_path(root, path);
682 bytenr, &other_start, &other_end)) { 604 goto again;
683 extent_end = other_end;
684 del_slot = path->slots[0] + 1;
685 del_nr++;
686 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
687 0, root->root_key.objectid,
688 inode->i_ino, orig_offset);
689 BUG_ON(ret);
690 }
691 other_start = 0;
692 other_end = start;
693 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
694 bytenr, &other_start, &other_end)) {
695 key.offset = other_start;
696 del_slot = path->slots[0];
697 del_nr++;
698 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
699 0, root->root_key.objectid,
700 inode->i_ino, orig_offset);
701 BUG_ON(ret);
702 }
703 split_end = 0;
704 if (del_nr == 0) {
705 btrfs_set_file_extent_type(leaf, fi,
706 BTRFS_FILE_EXTENT_REG);
707 goto done;
708 } 605 }
606 BUG_ON(ret < 0);
709 607
710 fi = btrfs_item_ptr(leaf, del_slot - 1, 608 leaf = path->nodes[0];
609 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
711 struct btrfs_file_extent_item); 610 struct btrfs_file_extent_item);
712 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
713 btrfs_set_file_extent_num_bytes(leaf, fi, 611 btrfs_set_file_extent_num_bytes(leaf, fi,
714 extent_end - key.offset); 612 split - key.offset);
613
614 fi = btrfs_item_ptr(leaf, path->slots[0],
615 struct btrfs_file_extent_item);
616
617 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
618 btrfs_set_file_extent_num_bytes(leaf, fi,
619 extent_end - split);
715 btrfs_mark_buffer_dirty(leaf); 620 btrfs_mark_buffer_dirty(leaf);
716 621
717 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 622 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
623 root->root_key.objectid,
624 inode->i_ino, orig_offset);
718 BUG_ON(ret); 625 BUG_ON(ret);
719 goto release;
720 } else if (split == start) {
721 if (locked_end < extent_end) {
722 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
723 locked_end, extent_end - 1, GFP_NOFS);
724 if (!ret) {
725 btrfs_release_path(root, path);
726 lock_extent(&BTRFS_I(inode)->io_tree,
727 locked_end, extent_end - 1, GFP_NOFS);
728 locked_end = extent_end;
729 goto again;
730 }
731 locked_end = extent_end;
732 }
733 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
734 } else {
735 BUG_ON(key.offset != start);
736 key.offset = split;
737 btrfs_set_file_extent_offset(leaf, fi, key.offset -
738 orig_offset);
739 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
740 btrfs_set_item_key_safe(trans, root, path, &key);
741 extent_end = split;
742 }
743 626
744 if (extent_end == end) { 627 if (split == start) {
745 split_end = 0; 628 key.offset = start;
746 extent_type = BTRFS_FILE_EXTENT_REG; 629 } else {
747 } 630 BUG_ON(start != key.offset);
748 if (extent_end == end && split == start) {
749 other_start = end;
750 other_end = 0;
751 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
752 bytenr, &other_start, &other_end)) {
753 path->slots[0]++;
754 fi = btrfs_item_ptr(leaf, path->slots[0],
755 struct btrfs_file_extent_item);
756 key.offset = split;
757 btrfs_set_item_key_safe(trans, root, path, &key);
758 btrfs_set_file_extent_offset(leaf, fi, key.offset -
759 orig_offset);
760 btrfs_set_file_extent_num_bytes(leaf, fi,
761 other_end - split);
762 goto done;
763 }
764 }
765 if (extent_end == end && split == end) {
766 other_start = 0;
767 other_end = start;
768 if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
769 bytenr, &other_start, &other_end)) {
770 path->slots[0]--; 631 path->slots[0]--;
771 fi = btrfs_item_ptr(leaf, path->slots[0], 632 extent_end = end;
772 struct btrfs_file_extent_item);
773 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
774 other_start);
775 goto done;
776 } 633 }
777 } 634 }
778 635
779 btrfs_mark_buffer_dirty(leaf);
780
781 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
782 root->root_key.objectid,
783 inode->i_ino, orig_offset);
784 BUG_ON(ret);
785 btrfs_release_path(root, path);
786
787 key.offset = start;
788 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
789 BUG_ON(ret);
790
791 leaf = path->nodes[0];
792 fi = btrfs_item_ptr(leaf, path->slots[0], 636 fi = btrfs_item_ptr(leaf, path->slots[0],
793 struct btrfs_file_extent_item); 637 struct btrfs_file_extent_item);
794 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
795 btrfs_set_file_extent_type(leaf, fi, extent_type);
796 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
797 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
798 btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
799 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
800 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
801 btrfs_set_file_extent_compression(leaf, fi, 0);
802 btrfs_set_file_extent_encryption(leaf, fi, 0);
803 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
804done:
805 btrfs_mark_buffer_dirty(leaf);
806 638
807release: 639 other_start = end;
808 btrfs_release_path(root, path); 640 other_end = 0;
809 if (split_end && split == start) { 641 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
810 split = end; 642 bytenr, &other_start, &other_end)) {
811 goto again; 643 extent_end = other_end;
644 del_slot = path->slots[0] + 1;
645 del_nr++;
646 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
647 0, root->root_key.objectid,
648 inode->i_ino, orig_offset);
649 BUG_ON(ret);
812 } 650 }
813 if (locked_end > end) { 651 other_start = 0;
814 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 652 other_end = start;
815 GFP_NOFS); 653 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
654 bytenr, &other_start, &other_end)) {
655 key.offset = other_start;
656 del_slot = path->slots[0];
657 del_nr++;
658 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
659 0, root->root_key.objectid,
660 inode->i_ino, orig_offset);
661 BUG_ON(ret);
816 } 662 }
663 if (del_nr == 0) {
664 btrfs_set_file_extent_type(leaf, fi,
665 BTRFS_FILE_EXTENT_REG);
666 btrfs_mark_buffer_dirty(leaf);
667 goto out;
668 }
669
670 fi = btrfs_item_ptr(leaf, del_slot - 1,
671 struct btrfs_file_extent_item);
672 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
673 btrfs_set_file_extent_num_bytes(leaf, fi,
674 extent_end - key.offset);
675 btrfs_mark_buffer_dirty(leaf);
676
677 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
678 BUG_ON(ret);
679out:
817 btrfs_free_path(path); 680 btrfs_free_path(path);
818 return 0; 681 return 0;
819} 682}
@@ -909,7 +772,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
909 unsigned long last_index; 772 unsigned long last_index;
910 int will_write; 773 int will_write;
911 774
912 will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || 775 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
913 (file->f_flags & O_DIRECT)); 776 (file->f_flags & O_DIRECT));
914 777
915 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, 778 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
@@ -1076,7 +939,7 @@ out_nolock:
1076 if (err) 939 if (err)
1077 num_written = err; 940 num_written = err;
1078 941
1079 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 942 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1080 trans = btrfs_start_transaction(root, 1); 943 trans = btrfs_start_transaction(root, 1);
1081 ret = btrfs_log_dentry_safe(trans, root, 944 ret = btrfs_log_dentry_safe(trans, root,
1082 file->f_dentry); 945 file->f_dentry);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 5c2caad76212..cb2849f03251 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1296,7 +1296,7 @@ again:
1296 window_start = entry->offset; 1296 window_start = entry->offset;
1297 window_free = entry->bytes; 1297 window_free = entry->bytes;
1298 last = entry; 1298 last = entry;
1299 max_extent = 0; 1299 max_extent = entry->bytes;
1300 } else { 1300 } else {
1301 last = next; 1301 last = next;
1302 window_free += next->bytes; 1302 window_free += next->bytes;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dae12dc7e159..5440bab23635 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode,
88 u64 start, u64 end, int *page_started, 88 u64 start, u64 end, int *page_started,
89 unsigned long *nr_written, int unlock); 89 unsigned long *nr_written, int unlock);
90 90
91static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) 91static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
92 struct inode *inode, struct inode *dir)
92{ 93{
93 int err; 94 int err;
94 95
95 err = btrfs_init_acl(inode, dir); 96 err = btrfs_init_acl(trans, inode, dir);
96 if (!err) 97 if (!err)
97 err = btrfs_xattr_security_init(inode, dir); 98 err = btrfs_xattr_security_init(trans, inode, dir);
98 return err; 99 return err;
99} 100}
100 101
@@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
188 btrfs_mark_buffer_dirty(leaf); 189 btrfs_mark_buffer_dirty(leaf);
189 btrfs_free_path(path); 190 btrfs_free_path(path);
190 191
192 /*
193 * we're an inline extent, so nobody can
194 * extend the file past i_size without locking
195 * a page we already have locked.
196 *
197 * We must do any isize and inode updates
198 * before we unlock the pages. Otherwise we
199 * could end up racing with unlink.
200 */
191 BTRFS_I(inode)->disk_i_size = inode->i_size; 201 BTRFS_I(inode)->disk_i_size = inode->i_size;
192 btrfs_update_inode(trans, root, inode); 202 btrfs_update_inode(trans, root, inode);
203
193 return 0; 204 return 0;
194fail: 205fail:
195 btrfs_free_path(path); 206 btrfs_free_path(path);
@@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
230 return 1; 241 return 1;
231 } 242 }
232 243
233 ret = btrfs_drop_extents(trans, root, inode, start, 244 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
234 aligned_end, aligned_end, start,
235 &hint_byte, 1); 245 &hint_byte, 1);
236 BUG_ON(ret); 246 BUG_ON(ret);
237 247
@@ -416,7 +426,6 @@ again:
416 start, end, 426 start, end,
417 total_compressed, pages); 427 total_compressed, pages);
418 } 428 }
419 btrfs_end_transaction(trans, root);
420 if (ret == 0) { 429 if (ret == 0) {
421 /* 430 /*
422 * inline extent creation worked, we don't need 431 * inline extent creation worked, we don't need
@@ -430,9 +439,11 @@ again:
430 EXTENT_CLEAR_DELALLOC | 439 EXTENT_CLEAR_DELALLOC |
431 EXTENT_CLEAR_ACCOUNTING | 440 EXTENT_CLEAR_ACCOUNTING |
432 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 441 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
433 ret = 0; 442
443 btrfs_end_transaction(trans, root);
434 goto free_pages_out; 444 goto free_pages_out;
435 } 445 }
446 btrfs_end_transaction(trans, root);
436 } 447 }
437 448
438 if (will_compress) { 449 if (will_compress) {
@@ -538,12 +549,11 @@ static noinline int submit_compressed_extents(struct inode *inode,
538 struct btrfs_root *root = BTRFS_I(inode)->root; 549 struct btrfs_root *root = BTRFS_I(inode)->root;
539 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 550 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
540 struct extent_io_tree *io_tree; 551 struct extent_io_tree *io_tree;
541 int ret; 552 int ret = 0;
542 553
543 if (list_empty(&async_cow->extents)) 554 if (list_empty(&async_cow->extents))
544 return 0; 555 return 0;
545 556
546 trans = btrfs_join_transaction(root, 1);
547 557
548 while (!list_empty(&async_cow->extents)) { 558 while (!list_empty(&async_cow->extents)) {
549 async_extent = list_entry(async_cow->extents.next, 559 async_extent = list_entry(async_cow->extents.next,
@@ -552,6 +562,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
552 562
553 io_tree = &BTRFS_I(inode)->io_tree; 563 io_tree = &BTRFS_I(inode)->io_tree;
554 564
565retry:
555 /* did the compression code fall back to uncompressed IO? */ 566 /* did the compression code fall back to uncompressed IO? */
556 if (!async_extent->pages) { 567 if (!async_extent->pages) {
557 int page_started = 0; 568 int page_started = 0;
@@ -562,11 +573,11 @@ static noinline int submit_compressed_extents(struct inode *inode,
562 async_extent->ram_size - 1, GFP_NOFS); 573 async_extent->ram_size - 1, GFP_NOFS);
563 574
564 /* allocate blocks */ 575 /* allocate blocks */
565 cow_file_range(inode, async_cow->locked_page, 576 ret = cow_file_range(inode, async_cow->locked_page,
566 async_extent->start, 577 async_extent->start,
567 async_extent->start + 578 async_extent->start +
568 async_extent->ram_size - 1, 579 async_extent->ram_size - 1,
569 &page_started, &nr_written, 0); 580 &page_started, &nr_written, 0);
570 581
571 /* 582 /*
572 * if page_started, cow_file_range inserted an 583 * if page_started, cow_file_range inserted an
@@ -574,7 +585,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
574 * and IO for us. Otherwise, we need to submit 585 * and IO for us. Otherwise, we need to submit
575 * all those pages down to the drive. 586 * all those pages down to the drive.
576 */ 587 */
577 if (!page_started) 588 if (!page_started && !ret)
578 extent_write_locked_range(io_tree, 589 extent_write_locked_range(io_tree,
579 inode, async_extent->start, 590 inode, async_extent->start,
580 async_extent->start + 591 async_extent->start +
@@ -589,6 +600,30 @@ static noinline int submit_compressed_extents(struct inode *inode,
589 lock_extent(io_tree, async_extent->start, 600 lock_extent(io_tree, async_extent->start,
590 async_extent->start + async_extent->ram_size - 1, 601 async_extent->start + async_extent->ram_size - 1,
591 GFP_NOFS); 602 GFP_NOFS);
603
604 trans = btrfs_join_transaction(root, 1);
605 ret = btrfs_reserve_extent(trans, root,
606 async_extent->compressed_size,
607 async_extent->compressed_size,
608 0, alloc_hint,
609 (u64)-1, &ins, 1);
610 btrfs_end_transaction(trans, root);
611
612 if (ret) {
613 int i;
614 for (i = 0; i < async_extent->nr_pages; i++) {
615 WARN_ON(async_extent->pages[i]->mapping);
616 page_cache_release(async_extent->pages[i]);
617 }
618 kfree(async_extent->pages);
619 async_extent->nr_pages = 0;
620 async_extent->pages = NULL;
621 unlock_extent(io_tree, async_extent->start,
622 async_extent->start +
623 async_extent->ram_size - 1, GFP_NOFS);
624 goto retry;
625 }
626
592 /* 627 /*
593 * here we're doing allocation and writeback of the 628 * here we're doing allocation and writeback of the
594 * compressed pages 629 * compressed pages
@@ -597,12 +632,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
597 async_extent->start + 632 async_extent->start +
598 async_extent->ram_size - 1, 0); 633 async_extent->ram_size - 1, 0);
599 634
600 ret = btrfs_reserve_extent(trans, root,
601 async_extent->compressed_size,
602 async_extent->compressed_size,
603 0, alloc_hint,
604 (u64)-1, &ins, 1);
605 BUG_ON(ret);
606 em = alloc_extent_map(GFP_NOFS); 635 em = alloc_extent_map(GFP_NOFS);
607 em->start = async_extent->start; 636 em->start = async_extent->start;
608 em->len = async_extent->ram_size; 637 em->len = async_extent->ram_size;
@@ -634,8 +663,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
634 BTRFS_ORDERED_COMPRESSED); 663 BTRFS_ORDERED_COMPRESSED);
635 BUG_ON(ret); 664 BUG_ON(ret);
636 665
637 btrfs_end_transaction(trans, root);
638
639 /* 666 /*
640 * clear dirty, set writeback and unlock the pages. 667 * clear dirty, set writeback and unlock the pages.
641 */ 668 */
@@ -657,13 +684,11 @@ static noinline int submit_compressed_extents(struct inode *inode,
657 async_extent->nr_pages); 684 async_extent->nr_pages);
658 685
659 BUG_ON(ret); 686 BUG_ON(ret);
660 trans = btrfs_join_transaction(root, 1);
661 alloc_hint = ins.objectid + ins.offset; 687 alloc_hint = ins.objectid + ins.offset;
662 kfree(async_extent); 688 kfree(async_extent);
663 cond_resched(); 689 cond_resched();
664 } 690 }
665 691
666 btrfs_end_transaction(trans, root);
667 return 0; 692 return 0;
668} 693}
669 694
@@ -727,6 +752,7 @@ static noinline int cow_file_range(struct inode *inode,
727 EXTENT_CLEAR_DIRTY | 752 EXTENT_CLEAR_DIRTY |
728 EXTENT_SET_WRITEBACK | 753 EXTENT_SET_WRITEBACK |
729 EXTENT_END_WRITEBACK); 754 EXTENT_END_WRITEBACK);
755
730 *nr_written = *nr_written + 756 *nr_written = *nr_written +
731 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 757 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
732 *page_started = 1; 758 *page_started = 1;
@@ -743,8 +769,22 @@ static noinline int cow_file_range(struct inode *inode,
743 em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, 769 em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
744 start, num_bytes); 770 start, num_bytes);
745 if (em) { 771 if (em) {
746 alloc_hint = em->block_start; 772 /*
747 free_extent_map(em); 773 * if block start isn't an actual block number then find the
774 * first block in this inode and use that as a hint. If that
775 * block is also bogus then just don't worry about it.
776 */
777 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
778 free_extent_map(em);
779 em = search_extent_mapping(em_tree, 0, 0);
780 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
781 alloc_hint = em->block_start;
782 if (em)
783 free_extent_map(em);
784 } else {
785 alloc_hint = em->block_start;
786 free_extent_map(em);
787 }
748 } 788 }
749 read_unlock(&BTRFS_I(inode)->extent_tree.lock); 789 read_unlock(&BTRFS_I(inode)->extent_tree.lock);
750 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 790 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
@@ -1567,7 +1607,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1567 struct inode *inode, u64 file_pos, 1607 struct inode *inode, u64 file_pos,
1568 u64 disk_bytenr, u64 disk_num_bytes, 1608 u64 disk_bytenr, u64 disk_num_bytes,
1569 u64 num_bytes, u64 ram_bytes, 1609 u64 num_bytes, u64 ram_bytes,
1570 u64 locked_end,
1571 u8 compression, u8 encryption, 1610 u8 compression, u8 encryption,
1572 u16 other_encoding, int extent_type) 1611 u16 other_encoding, int extent_type)
1573{ 1612{
@@ -1593,9 +1632,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1593 * the caller is expected to unpin it and allow it to be merged 1632 * the caller is expected to unpin it and allow it to be merged
1594 * with the others. 1633 * with the others.
1595 */ 1634 */
1596 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1635 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1597 file_pos + num_bytes, locked_end, 1636 &hint, 0);
1598 file_pos, &hint, 0);
1599 BUG_ON(ret); 1637 BUG_ON(ret);
1600 1638
1601 ins.objectid = inode->i_ino; 1639 ins.objectid = inode->i_ino;
@@ -1701,23 +1739,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1701 } 1739 }
1702 } 1740 }
1703 1741
1704 trans = btrfs_join_transaction(root, 1);
1705
1706 if (!ordered_extent) 1742 if (!ordered_extent)
1707 ordered_extent = btrfs_lookup_ordered_extent(inode, start); 1743 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1708 BUG_ON(!ordered_extent); 1744 BUG_ON(!ordered_extent);
1709 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) 1745 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1710 goto nocow; 1746 BUG_ON(!list_empty(&ordered_extent->list));
1747 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1748 if (!ret) {
1749 trans = btrfs_join_transaction(root, 1);
1750 ret = btrfs_update_inode(trans, root, inode);
1751 BUG_ON(ret);
1752 btrfs_end_transaction(trans, root);
1753 }
1754 goto out;
1755 }
1711 1756
1712 lock_extent(io_tree, ordered_extent->file_offset, 1757 lock_extent(io_tree, ordered_extent->file_offset,
1713 ordered_extent->file_offset + ordered_extent->len - 1, 1758 ordered_extent->file_offset + ordered_extent->len - 1,
1714 GFP_NOFS); 1759 GFP_NOFS);
1715 1760
1761 trans = btrfs_join_transaction(root, 1);
1762
1716 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1763 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1717 compressed = 1; 1764 compressed = 1;
1718 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1765 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1719 BUG_ON(compressed); 1766 BUG_ON(compressed);
1720 ret = btrfs_mark_extent_written(trans, root, inode, 1767 ret = btrfs_mark_extent_written(trans, inode,
1721 ordered_extent->file_offset, 1768 ordered_extent->file_offset,
1722 ordered_extent->file_offset + 1769 ordered_extent->file_offset +
1723 ordered_extent->len); 1770 ordered_extent->len);
@@ -1729,8 +1776,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1729 ordered_extent->disk_len, 1776 ordered_extent->disk_len,
1730 ordered_extent->len, 1777 ordered_extent->len,
1731 ordered_extent->len, 1778 ordered_extent->len,
1732 ordered_extent->file_offset +
1733 ordered_extent->len,
1734 compressed, 0, 0, 1779 compressed, 0, 0,
1735 BTRFS_FILE_EXTENT_REG); 1780 BTRFS_FILE_EXTENT_REG);
1736 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1781 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1741,22 +1786,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1741 unlock_extent(io_tree, ordered_extent->file_offset, 1786 unlock_extent(io_tree, ordered_extent->file_offset,
1742 ordered_extent->file_offset + ordered_extent->len - 1, 1787 ordered_extent->file_offset + ordered_extent->len - 1,
1743 GFP_NOFS); 1788 GFP_NOFS);
1744nocow:
1745 add_pending_csums(trans, inode, ordered_extent->file_offset, 1789 add_pending_csums(trans, inode, ordered_extent->file_offset,
1746 &ordered_extent->list); 1790 &ordered_extent->list);
1747 1791
1748 mutex_lock(&BTRFS_I(inode)->extent_mutex); 1792 /* this also removes the ordered extent from the tree */
1749 btrfs_ordered_update_i_size(inode, ordered_extent); 1793 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1750 btrfs_update_inode(trans, root, inode); 1794 ret = btrfs_update_inode(trans, root, inode);
1751 btrfs_remove_ordered_extent(inode, ordered_extent); 1795 BUG_ON(ret);
1752 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 1796 btrfs_end_transaction(trans, root);
1753 1797out:
1754 /* once for us */ 1798 /* once for us */
1755 btrfs_put_ordered_extent(ordered_extent); 1799 btrfs_put_ordered_extent(ordered_extent);
1756 /* once for the tree */ 1800 /* once for the tree */
1757 btrfs_put_ordered_extent(ordered_extent); 1801 btrfs_put_ordered_extent(ordered_extent);
1758 1802
1759 btrfs_end_transaction(trans, root);
1760 return 0; 1803 return 0;
1761} 1804}
1762 1805
@@ -1979,6 +2022,54 @@ zeroit:
1979 return -EIO; 2022 return -EIO;
1980} 2023}
1981 2024
2025struct delayed_iput {
2026 struct list_head list;
2027 struct inode *inode;
2028};
2029
2030void btrfs_add_delayed_iput(struct inode *inode)
2031{
2032 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2033 struct delayed_iput *delayed;
2034
2035 if (atomic_add_unless(&inode->i_count, -1, 1))
2036 return;
2037
2038 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
2039 delayed->inode = inode;
2040
2041 spin_lock(&fs_info->delayed_iput_lock);
2042 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
2043 spin_unlock(&fs_info->delayed_iput_lock);
2044}
2045
2046void btrfs_run_delayed_iputs(struct btrfs_root *root)
2047{
2048 LIST_HEAD(list);
2049 struct btrfs_fs_info *fs_info = root->fs_info;
2050 struct delayed_iput *delayed;
2051 int empty;
2052
2053 spin_lock(&fs_info->delayed_iput_lock);
2054 empty = list_empty(&fs_info->delayed_iputs);
2055 spin_unlock(&fs_info->delayed_iput_lock);
2056 if (empty)
2057 return;
2058
2059 down_read(&root->fs_info->cleanup_work_sem);
2060 spin_lock(&fs_info->delayed_iput_lock);
2061 list_splice_init(&fs_info->delayed_iputs, &list);
2062 spin_unlock(&fs_info->delayed_iput_lock);
2063
2064 while (!list_empty(&list)) {
2065 delayed = list_entry(list.next, struct delayed_iput, list);
2066 list_del(&delayed->list);
2067 iput(delayed->inode);
2068 kfree(delayed);
2069 }
2070 up_read(&root->fs_info->cleanup_work_sem);
2071}
2072
1982/* 2073/*
1983 * This creates an orphan entry for the given inode in case something goes 2074 * This creates an orphan entry for the given inode in case something goes
1984 * wrong in the middle of an unlink/truncate. 2075 * wrong in the middle of an unlink/truncate.
@@ -2051,16 +2142,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2051 struct inode *inode; 2142 struct inode *inode;
2052 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2143 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2053 2144
2054 path = btrfs_alloc_path(); 2145 if (!xchg(&root->clean_orphans, 0))
2055 if (!path)
2056 return; 2146 return;
2147
2148 path = btrfs_alloc_path();
2149 BUG_ON(!path);
2057 path->reada = -1; 2150 path->reada = -1;
2058 2151
2059 key.objectid = BTRFS_ORPHAN_OBJECTID; 2152 key.objectid = BTRFS_ORPHAN_OBJECTID;
2060 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 2153 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
2061 key.offset = (u64)-1; 2154 key.offset = (u64)-1;
2062 2155
2063
2064 while (1) { 2156 while (1) {
2065 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2157 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2066 if (ret < 0) { 2158 if (ret < 0) {
@@ -2474,7 +2566,19 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2474 2566
2475 root = BTRFS_I(dir)->root; 2567 root = BTRFS_I(dir)->root;
2476 2568
2569 /*
2570 * 5 items for unlink inode
2571 * 1 for orphan
2572 */
2573 ret = btrfs_reserve_metadata_space(root, 6);
2574 if (ret)
2575 return ret;
2576
2477 trans = btrfs_start_transaction(root, 1); 2577 trans = btrfs_start_transaction(root, 1);
2578 if (IS_ERR(trans)) {
2579 btrfs_unreserve_metadata_space(root, 6);
2580 return PTR_ERR(trans);
2581 }
2478 2582
2479 btrfs_set_trans_block_group(trans, dir); 2583 btrfs_set_trans_block_group(trans, dir);
2480 2584
@@ -2489,6 +2593,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2489 nr = trans->blocks_used; 2593 nr = trans->blocks_used;
2490 2594
2491 btrfs_end_transaction_throttle(trans, root); 2595 btrfs_end_transaction_throttle(trans, root);
2596 btrfs_unreserve_metadata_space(root, 6);
2492 btrfs_btree_balance_dirty(root, nr); 2597 btrfs_btree_balance_dirty(root, nr);
2493 return ret; 2598 return ret;
2494} 2599}
@@ -2569,7 +2674,16 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2569 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 2674 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
2570 return -ENOTEMPTY; 2675 return -ENOTEMPTY;
2571 2676
2677 ret = btrfs_reserve_metadata_space(root, 5);
2678 if (ret)
2679 return ret;
2680
2572 trans = btrfs_start_transaction(root, 1); 2681 trans = btrfs_start_transaction(root, 1);
2682 if (IS_ERR(trans)) {
2683 btrfs_unreserve_metadata_space(root, 5);
2684 return PTR_ERR(trans);
2685 }
2686
2573 btrfs_set_trans_block_group(trans, dir); 2687 btrfs_set_trans_block_group(trans, dir);
2574 2688
2575 if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 2689 if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
@@ -2592,6 +2706,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2592out: 2706out:
2593 nr = trans->blocks_used; 2707 nr = trans->blocks_used;
2594 ret = btrfs_end_transaction_throttle(trans, root); 2708 ret = btrfs_end_transaction_throttle(trans, root);
2709 btrfs_unreserve_metadata_space(root, 5);
2595 btrfs_btree_balance_dirty(root, nr); 2710 btrfs_btree_balance_dirty(root, nr);
2596 2711
2597 if (ret && !err) 2712 if (ret && !err)
@@ -2782,37 +2897,40 @@ out:
2782 * min_type is the minimum key type to truncate down to. If set to 0, this 2897 * min_type is the minimum key type to truncate down to. If set to 0, this
2783 * will kill all the items on this inode, including the INODE_ITEM_KEY. 2898 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2784 */ 2899 */
2785noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2900int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2786 struct btrfs_root *root, 2901 struct btrfs_root *root,
2787 struct inode *inode, 2902 struct inode *inode,
2788 u64 new_size, u32 min_type) 2903 u64 new_size, u32 min_type)
2789{ 2904{
2790 int ret;
2791 struct btrfs_path *path; 2905 struct btrfs_path *path;
2792 struct btrfs_key key;
2793 struct btrfs_key found_key;
2794 u32 found_type = (u8)-1;
2795 struct extent_buffer *leaf; 2906 struct extent_buffer *leaf;
2796 struct btrfs_file_extent_item *fi; 2907 struct btrfs_file_extent_item *fi;
2908 struct btrfs_key key;
2909 struct btrfs_key found_key;
2797 u64 extent_start = 0; 2910 u64 extent_start = 0;
2798 u64 extent_num_bytes = 0; 2911 u64 extent_num_bytes = 0;
2799 u64 extent_offset = 0; 2912 u64 extent_offset = 0;
2800 u64 item_end = 0; 2913 u64 item_end = 0;
2914 u64 mask = root->sectorsize - 1;
2915 u32 found_type = (u8)-1;
2801 int found_extent; 2916 int found_extent;
2802 int del_item; 2917 int del_item;
2803 int pending_del_nr = 0; 2918 int pending_del_nr = 0;
2804 int pending_del_slot = 0; 2919 int pending_del_slot = 0;
2805 int extent_type = -1; 2920 int extent_type = -1;
2806 int encoding; 2921 int encoding;
2807 u64 mask = root->sectorsize - 1; 2922 int ret;
2923 int err = 0;
2924
2925 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2808 2926
2809 if (root->ref_cows) 2927 if (root->ref_cows)
2810 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2928 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2929
2811 path = btrfs_alloc_path(); 2930 path = btrfs_alloc_path();
2812 BUG_ON(!path); 2931 BUG_ON(!path);
2813 path->reada = -1; 2932 path->reada = -1;
2814 2933
2815 /* FIXME, add redo link to tree so we don't leak on crash */
2816 key.objectid = inode->i_ino; 2934 key.objectid = inode->i_ino;
2817 key.offset = (u64)-1; 2935 key.offset = (u64)-1;
2818 key.type = (u8)-1; 2936 key.type = (u8)-1;
@@ -2820,17 +2938,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2820search_again: 2938search_again:
2821 path->leave_spinning = 1; 2939 path->leave_spinning = 1;
2822 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2940 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2823 if (ret < 0) 2941 if (ret < 0) {
2824 goto error; 2942 err = ret;
2943 goto out;
2944 }
2825 2945
2826 if (ret > 0) { 2946 if (ret > 0) {
2827 /* there are no items in the tree for us to truncate, we're 2947 /* there are no items in the tree for us to truncate, we're
2828 * done 2948 * done
2829 */ 2949 */
2830 if (path->slots[0] == 0) { 2950 if (path->slots[0] == 0)
2831 ret = 0; 2951 goto out;
2832 goto error;
2833 }
2834 path->slots[0]--; 2952 path->slots[0]--;
2835 } 2953 }
2836 2954
@@ -2865,28 +2983,17 @@ search_again:
2865 } 2983 }
2866 item_end--; 2984 item_end--;
2867 } 2985 }
2868 if (item_end < new_size) { 2986 if (found_type > min_type) {
2869 if (found_type == BTRFS_DIR_ITEM_KEY) 2987 del_item = 1;
2870 found_type = BTRFS_INODE_ITEM_KEY; 2988 } else {
2871 else if (found_type == BTRFS_EXTENT_ITEM_KEY) 2989 if (item_end < new_size)
2872 found_type = BTRFS_EXTENT_DATA_KEY;
2873 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2874 found_type = BTRFS_XATTR_ITEM_KEY;
2875 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2876 found_type = BTRFS_INODE_REF_KEY;
2877 else if (found_type)
2878 found_type--;
2879 else
2880 break; 2990 break;
2881 btrfs_set_key_type(&key, found_type); 2991 if (found_key.offset >= new_size)
2882 goto next; 2992 del_item = 1;
2993 else
2994 del_item = 0;
2883 } 2995 }
2884 if (found_key.offset >= new_size)
2885 del_item = 1;
2886 else
2887 del_item = 0;
2888 found_extent = 0; 2996 found_extent = 0;
2889
2890 /* FIXME, shrink the extent if the ref count is only 1 */ 2997 /* FIXME, shrink the extent if the ref count is only 1 */
2891 if (found_type != BTRFS_EXTENT_DATA_KEY) 2998 if (found_type != BTRFS_EXTENT_DATA_KEY)
2892 goto delete; 2999 goto delete;
@@ -2973,42 +3080,36 @@ delete:
2973 inode->i_ino, extent_offset); 3080 inode->i_ino, extent_offset);
2974 BUG_ON(ret); 3081 BUG_ON(ret);
2975 } 3082 }
2976next:
2977 if (path->slots[0] == 0) {
2978 if (pending_del_nr)
2979 goto del_pending;
2980 btrfs_release_path(root, path);
2981 if (found_type == BTRFS_INODE_ITEM_KEY)
2982 break;
2983 goto search_again;
2984 }
2985 3083
2986 path->slots[0]--; 3084 if (found_type == BTRFS_INODE_ITEM_KEY)
2987 if (pending_del_nr && 3085 break;
2988 path->slots[0] + 1 != pending_del_slot) { 3086
2989 struct btrfs_key debug; 3087 if (path->slots[0] == 0 ||
2990del_pending: 3088 path->slots[0] != pending_del_slot) {
2991 btrfs_item_key_to_cpu(path->nodes[0], &debug, 3089 if (root->ref_cows) {
2992 pending_del_slot); 3090 err = -EAGAIN;
2993 ret = btrfs_del_items(trans, root, path, 3091 goto out;
2994 pending_del_slot, 3092 }
2995 pending_del_nr); 3093 if (pending_del_nr) {
2996 BUG_ON(ret); 3094 ret = btrfs_del_items(trans, root, path,
2997 pending_del_nr = 0; 3095 pending_del_slot,
3096 pending_del_nr);
3097 BUG_ON(ret);
3098 pending_del_nr = 0;
3099 }
2998 btrfs_release_path(root, path); 3100 btrfs_release_path(root, path);
2999 if (found_type == BTRFS_INODE_ITEM_KEY)
3000 break;
3001 goto search_again; 3101 goto search_again;
3102 } else {
3103 path->slots[0]--;
3002 } 3104 }
3003 } 3105 }
3004 ret = 0; 3106out:
3005error:
3006 if (pending_del_nr) { 3107 if (pending_del_nr) {
3007 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3108 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3008 pending_del_nr); 3109 pending_del_nr);
3009 } 3110 }
3010 btrfs_free_path(path); 3111 btrfs_free_path(path);
3011 return ret; 3112 return err;
3012} 3113}
3013 3114
3014/* 3115/*
@@ -3128,10 +3229,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3128 if (size <= hole_start) 3229 if (size <= hole_start)
3129 return 0; 3230 return 0;
3130 3231
3131 err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
3132 if (err)
3133 return err;
3134
3135 while (1) { 3232 while (1) {
3136 struct btrfs_ordered_extent *ordered; 3233 struct btrfs_ordered_extent *ordered;
3137 btrfs_wait_ordered_range(inode, hole_start, 3234 btrfs_wait_ordered_range(inode, hole_start,
@@ -3144,9 +3241,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3144 btrfs_put_ordered_extent(ordered); 3241 btrfs_put_ordered_extent(ordered);
3145 } 3242 }
3146 3243
3147 trans = btrfs_start_transaction(root, 1);
3148 btrfs_set_trans_block_group(trans, inode);
3149
3150 cur_offset = hole_start; 3244 cur_offset = hole_start;
3151 while (1) { 3245 while (1) {
3152 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3246 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3154,40 +3248,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3154 BUG_ON(IS_ERR(em) || !em); 3248 BUG_ON(IS_ERR(em) || !em);
3155 last_byte = min(extent_map_end(em), block_end); 3249 last_byte = min(extent_map_end(em), block_end);
3156 last_byte = (last_byte + mask) & ~mask; 3250 last_byte = (last_byte + mask) & ~mask;
3157 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 3251 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3158 u64 hint_byte = 0; 3252 u64 hint_byte = 0;
3159 hole_size = last_byte - cur_offset; 3253 hole_size = last_byte - cur_offset;
3160 err = btrfs_drop_extents(trans, root, inode,
3161 cur_offset,
3162 cur_offset + hole_size,
3163 block_end,
3164 cur_offset, &hint_byte, 1);
3165 if (err)
3166 break;
3167 3254
3168 err = btrfs_reserve_metadata_space(root, 1); 3255 err = btrfs_reserve_metadata_space(root, 2);
3169 if (err) 3256 if (err)
3170 break; 3257 break;
3171 3258
3259 trans = btrfs_start_transaction(root, 1);
3260 btrfs_set_trans_block_group(trans, inode);
3261
3262 err = btrfs_drop_extents(trans, inode, cur_offset,
3263 cur_offset + hole_size,
3264 &hint_byte, 1);
3265 BUG_ON(err);
3266
3172 err = btrfs_insert_file_extent(trans, root, 3267 err = btrfs_insert_file_extent(trans, root,
3173 inode->i_ino, cur_offset, 0, 3268 inode->i_ino, cur_offset, 0,
3174 0, hole_size, 0, hole_size, 3269 0, hole_size, 0, hole_size,
3175 0, 0, 0); 3270 0, 0, 0);
3271 BUG_ON(err);
3272
3176 btrfs_drop_extent_cache(inode, hole_start, 3273 btrfs_drop_extent_cache(inode, hole_start,
3177 last_byte - 1, 0); 3274 last_byte - 1, 0);
3178 btrfs_unreserve_metadata_space(root, 1); 3275
3276 btrfs_end_transaction(trans, root);
3277 btrfs_unreserve_metadata_space(root, 2);
3179 } 3278 }
3180 free_extent_map(em); 3279 free_extent_map(em);
3181 cur_offset = last_byte; 3280 cur_offset = last_byte;
3182 if (err || cur_offset >= block_end) 3281 if (cur_offset >= block_end)
3183 break; 3282 break;
3184 } 3283 }
3185 3284
3186 btrfs_end_transaction(trans, root);
3187 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3285 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
3188 return err; 3286 return err;
3189} 3287}
3190 3288
3289static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3290{
3291 struct btrfs_root *root = BTRFS_I(inode)->root;
3292 struct btrfs_trans_handle *trans;
3293 unsigned long nr;
3294 int ret;
3295
3296 if (attr->ia_size == inode->i_size)
3297 return 0;
3298
3299 if (attr->ia_size > inode->i_size) {
3300 unsigned long limit;
3301 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
3302 if (attr->ia_size > inode->i_sb->s_maxbytes)
3303 return -EFBIG;
3304 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3305 send_sig(SIGXFSZ, current, 0);
3306 return -EFBIG;
3307 }
3308 }
3309
3310 ret = btrfs_reserve_metadata_space(root, 1);
3311 if (ret)
3312 return ret;
3313
3314 trans = btrfs_start_transaction(root, 1);
3315 btrfs_set_trans_block_group(trans, inode);
3316
3317 ret = btrfs_orphan_add(trans, inode);
3318 BUG_ON(ret);
3319
3320 nr = trans->blocks_used;
3321 btrfs_end_transaction(trans, root);
3322 btrfs_unreserve_metadata_space(root, 1);
3323 btrfs_btree_balance_dirty(root, nr);
3324
3325 if (attr->ia_size > inode->i_size) {
3326 ret = btrfs_cont_expand(inode, attr->ia_size);
3327 if (ret) {
3328 btrfs_truncate(inode);
3329 return ret;
3330 }
3331
3332 i_size_write(inode, attr->ia_size);
3333 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3334
3335 trans = btrfs_start_transaction(root, 1);
3336 btrfs_set_trans_block_group(trans, inode);
3337
3338 ret = btrfs_update_inode(trans, root, inode);
3339 BUG_ON(ret);
3340 if (inode->i_nlink > 0) {
3341 ret = btrfs_orphan_del(trans, inode);
3342 BUG_ON(ret);
3343 }
3344 nr = trans->blocks_used;
3345 btrfs_end_transaction(trans, root);
3346 btrfs_btree_balance_dirty(root, nr);
3347 return 0;
3348 }
3349
3350 /*
3351 * We're truncating a file that used to have good data down to
3352 * zero. Make sure it gets into the ordered flush list so that
3353 * any new writes get down to disk quickly.
3354 */
3355 if (attr->ia_size == 0)
3356 BTRFS_I(inode)->ordered_data_close = 1;
3357
3358 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3359 ret = vmtruncate(inode, attr->ia_size);
3360 BUG_ON(ret);
3361
3362 return 0;
3363}
3364
3191static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3365static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3192{ 3366{
3193 struct inode *inode = dentry->d_inode; 3367 struct inode *inode = dentry->d_inode;
@@ -3198,23 +3372,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3198 return err; 3372 return err;
3199 3373
3200 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3374 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3201 if (attr->ia_size > inode->i_size) { 3375 err = btrfs_setattr_size(inode, attr);
3202 err = btrfs_cont_expand(inode, attr->ia_size); 3376 if (err)
3203 if (err) 3377 return err;
3204 return err;
3205 } else if (inode->i_size > 0 &&
3206 attr->ia_size == 0) {
3207
3208 /* we're truncating a file that used to have good
3209 * data down to zero. Make sure it gets into
3210 * the ordered flush list so that any new writes
3211 * get down to disk quickly.
3212 */
3213 BTRFS_I(inode)->ordered_data_close = 1;
3214 }
3215 } 3378 }
3379 attr->ia_valid &= ~ATTR_SIZE;
3216 3380
3217 err = inode_setattr(inode, attr); 3381 if (attr->ia_valid)
3382 err = inode_setattr(inode, attr);
3218 3383
3219 if (!err && ((attr->ia_valid & ATTR_MODE))) 3384 if (!err && ((attr->ia_valid & ATTR_MODE)))
3220 err = btrfs_acl_chmod(inode); 3385 err = btrfs_acl_chmod(inode);
@@ -3235,36 +3400,43 @@ void btrfs_delete_inode(struct inode *inode)
3235 } 3400 }
3236 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3401 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3237 3402
3403 if (root->fs_info->log_root_recovering) {
3404 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
3405 goto no_delete;
3406 }
3407
3238 if (inode->i_nlink > 0) { 3408 if (inode->i_nlink > 0) {
3239 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 3409 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3240 goto no_delete; 3410 goto no_delete;
3241 } 3411 }
3242 3412
3243 btrfs_i_size_write(inode, 0); 3413 btrfs_i_size_write(inode, 0);
3244 trans = btrfs_join_transaction(root, 1);
3245 3414
3246 btrfs_set_trans_block_group(trans, inode); 3415 while (1) {
3247 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); 3416 trans = btrfs_start_transaction(root, 1);
3248 if (ret) { 3417 btrfs_set_trans_block_group(trans, inode);
3249 btrfs_orphan_del(NULL, inode); 3418 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3250 goto no_delete_lock;
3251 }
3252 3419
3253 btrfs_orphan_del(trans, inode); 3420 if (ret != -EAGAIN)
3421 break;
3254 3422
3255 nr = trans->blocks_used; 3423 nr = trans->blocks_used;
3256 clear_inode(inode); 3424 btrfs_end_transaction(trans, root);
3425 trans = NULL;
3426 btrfs_btree_balance_dirty(root, nr);
3427 }
3257 3428
3258 btrfs_end_transaction(trans, root); 3429 if (ret == 0) {
3259 btrfs_btree_balance_dirty(root, nr); 3430 ret = btrfs_orphan_del(trans, inode);
3260 return; 3431 BUG_ON(ret);
3432 }
3261 3433
3262no_delete_lock:
3263 nr = trans->blocks_used; 3434 nr = trans->blocks_used;
3264 btrfs_end_transaction(trans, root); 3435 btrfs_end_transaction(trans, root);
3265 btrfs_btree_balance_dirty(root, nr); 3436 btrfs_btree_balance_dirty(root, nr);
3266no_delete: 3437no_delete:
3267 clear_inode(inode); 3438 clear_inode(inode);
3439 return;
3268} 3440}
3269 3441
3270/* 3442/*
@@ -3517,7 +3689,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3517 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3689 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3518 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3690 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3519 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3691 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3520 mutex_init(&BTRFS_I(inode)->extent_mutex);
3521 mutex_init(&BTRFS_I(inode)->log_mutex); 3692 mutex_init(&BTRFS_I(inode)->log_mutex);
3522} 3693}
3523 3694
@@ -3643,6 +3814,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3643 } 3814 }
3644 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3815 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3645 3816
3817 if (root != sub_root) {
3818 down_read(&root->fs_info->cleanup_work_sem);
3819 if (!(inode->i_sb->s_flags & MS_RDONLY))
3820 btrfs_orphan_cleanup(sub_root);
3821 up_read(&root->fs_info->cleanup_work_sem);
3822 }
3823
3646 return inode; 3824 return inode;
3647} 3825}
3648 3826
@@ -4167,7 +4345,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4167 if (IS_ERR(inode)) 4345 if (IS_ERR(inode))
4168 goto out_unlock; 4346 goto out_unlock;
4169 4347
4170 err = btrfs_init_inode_security(inode, dir); 4348 err = btrfs_init_inode_security(trans, inode, dir);
4171 if (err) { 4349 if (err) {
4172 drop_inode = 1; 4350 drop_inode = 1;
4173 goto out_unlock; 4351 goto out_unlock;
@@ -4238,7 +4416,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4238 if (IS_ERR(inode)) 4416 if (IS_ERR(inode))
4239 goto out_unlock; 4417 goto out_unlock;
4240 4418
4241 err = btrfs_init_inode_security(inode, dir); 4419 err = btrfs_init_inode_security(trans, inode, dir);
4242 if (err) { 4420 if (err) {
4243 drop_inode = 1; 4421 drop_inode = 1;
4244 goto out_unlock; 4422 goto out_unlock;
@@ -4284,6 +4462,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4284 if (inode->i_nlink == 0) 4462 if (inode->i_nlink == 0)
4285 return -ENOENT; 4463 return -ENOENT;
4286 4464
4465 /* do not allow sys_link's with other subvols of the same device */
4466 if (root->objectid != BTRFS_I(inode)->root->objectid)
4467 return -EPERM;
4468
4287 /* 4469 /*
4288 * 1 item for inode ref 4470 * 1 item for inode ref
4289 * 2 items for dir items 4471 * 2 items for dir items
@@ -4371,7 +4553,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4371 4553
4372 drop_on_err = 1; 4554 drop_on_err = 1;
4373 4555
4374 err = btrfs_init_inode_security(inode, dir); 4556 err = btrfs_init_inode_security(trans, inode, dir);
4375 if (err) 4557 if (err)
4376 goto out_fail; 4558 goto out_fail;
4377 4559
@@ -5022,17 +5204,20 @@ static void btrfs_truncate(struct inode *inode)
5022 unsigned long nr; 5204 unsigned long nr;
5023 u64 mask = root->sectorsize - 1; 5205 u64 mask = root->sectorsize - 1;
5024 5206
5025 if (!S_ISREG(inode->i_mode)) 5207 if (!S_ISREG(inode->i_mode)) {
5026 return; 5208 WARN_ON(1);
5027 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
5028 return; 5209 return;
5210 }
5029 5211
5030 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 5212 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
5031 if (ret) 5213 if (ret)
5032 return; 5214 return;
5215
5033 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 5216 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5217 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5034 5218
5035 trans = btrfs_start_transaction(root, 1); 5219 trans = btrfs_start_transaction(root, 1);
5220 btrfs_set_trans_block_group(trans, inode);
5036 5221
5037 /* 5222 /*
5038 * setattr is responsible for setting the ordered_data_close flag, 5223 * setattr is responsible for setting the ordered_data_close flag,
@@ -5054,21 +5239,32 @@ static void btrfs_truncate(struct inode *inode)
5054 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 5239 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
5055 btrfs_add_ordered_operation(trans, root, inode); 5240 btrfs_add_ordered_operation(trans, root, inode);
5056 5241
5057 btrfs_set_trans_block_group(trans, inode); 5242 while (1) {
5058 btrfs_i_size_write(inode, inode->i_size); 5243 ret = btrfs_truncate_inode_items(trans, root, inode,
5244 inode->i_size,
5245 BTRFS_EXTENT_DATA_KEY);
5246 if (ret != -EAGAIN)
5247 break;
5059 5248
5060 ret = btrfs_orphan_add(trans, inode); 5249 ret = btrfs_update_inode(trans, root, inode);
5061 if (ret) 5250 BUG_ON(ret);
5062 goto out; 5251
5063 /* FIXME, add redo link to tree so we don't leak on crash */ 5252 nr = trans->blocks_used;
5064 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 5253 btrfs_end_transaction(trans, root);
5065 BTRFS_EXTENT_DATA_KEY); 5254 btrfs_btree_balance_dirty(root, nr);
5066 btrfs_update_inode(trans, root, inode); 5255
5256 trans = btrfs_start_transaction(root, 1);
5257 btrfs_set_trans_block_group(trans, inode);
5258 }
5067 5259
5068 ret = btrfs_orphan_del(trans, inode); 5260 if (ret == 0 && inode->i_nlink > 0) {
5261 ret = btrfs_orphan_del(trans, inode);
5262 BUG_ON(ret);
5263 }
5264
5265 ret = btrfs_update_inode(trans, root, inode);
5069 BUG_ON(ret); 5266 BUG_ON(ret);
5070 5267
5071out:
5072 nr = trans->blocks_used; 5268 nr = trans->blocks_used;
5073 ret = btrfs_end_transaction_throttle(trans, root); 5269 ret = btrfs_end_transaction_throttle(trans, root);
5074 BUG_ON(ret); 5270 BUG_ON(ret);
@@ -5128,6 +5324,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
5128 ei->logged_trans = 0; 5324 ei->logged_trans = 0;
5129 ei->outstanding_extents = 0; 5325 ei->outstanding_extents = 0;
5130 ei->reserved_extents = 0; 5326 ei->reserved_extents = 0;
5327 ei->root = NULL;
5131 spin_lock_init(&ei->accounting_lock); 5328 spin_lock_init(&ei->accounting_lock);
5132 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 5329 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
5133 INIT_LIST_HEAD(&ei->i_orphan); 5330 INIT_LIST_HEAD(&ei->i_orphan);
@@ -5144,6 +5341,14 @@ void btrfs_destroy_inode(struct inode *inode)
5144 WARN_ON(inode->i_data.nrpages); 5341 WARN_ON(inode->i_data.nrpages);
5145 5342
5146 /* 5343 /*
5344 * This can happen where we create an inode, but somebody else also
5345 * created the same inode and we need to destroy the one we already
5346 * created.
5347 */
5348 if (!root)
5349 goto free;
5350
5351 /*
5147 * Make sure we're properly removed from the ordered operation 5352 * Make sure we're properly removed from the ordered operation
5148 * lists. 5353 * lists.
5149 */ 5354 */
@@ -5156,9 +5361,9 @@ void btrfs_destroy_inode(struct inode *inode)
5156 5361
5157 spin_lock(&root->list_lock); 5362 spin_lock(&root->list_lock);
5158 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 5363 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5159 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 5364 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5160 " list\n", inode->i_ino); 5365 inode->i_ino);
5161 dump_stack(); 5366 list_del_init(&BTRFS_I(inode)->i_orphan);
5162 } 5367 }
5163 spin_unlock(&root->list_lock); 5368 spin_unlock(&root->list_lock);
5164 5369
@@ -5178,6 +5383,7 @@ void btrfs_destroy_inode(struct inode *inode)
5178 } 5383 }
5179 inode_tree_del(inode); 5384 inode_tree_del(inode);
5180 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 5385 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
5386free:
5181 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 5387 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
5182} 5388}
5183 5389
@@ -5283,11 +5489,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5283 return -ENOTEMPTY; 5489 return -ENOTEMPTY;
5284 5490
5285 /* 5491 /*
5286 * 2 items for dir items 5492 * We want to reserve the absolute worst case amount of items. So if
5287 * 1 item for orphan entry 5493 * both inodes are subvols and we need to unlink them then that would
5288 * 1 item for ref 5494 * require 4 item modifications, but if they are both normal inodes it
5495 * would require 5 item modifications, so we'll assume their normal
5496 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
5497 * should cover the worst case number of items we'll modify.
5289 */ 5498 */
5290 ret = btrfs_reserve_metadata_space(root, 4); 5499 ret = btrfs_reserve_metadata_space(root, 11);
5291 if (ret) 5500 if (ret)
5292 return ret; 5501 return ret;
5293 5502
@@ -5403,7 +5612,7 @@ out_fail:
5403 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 5612 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5404 up_read(&root->fs_info->subvol_sem); 5613 up_read(&root->fs_info->subvol_sem);
5405 5614
5406 btrfs_unreserve_metadata_space(root, 4); 5615 btrfs_unreserve_metadata_space(root, 11);
5407 return ret; 5616 return ret;
5408} 5617}
5409 5618
@@ -5411,7 +5620,7 @@ out_fail:
5411 * some fairly slow code that needs optimization. This walks the list 5620 * some fairly slow code that needs optimization. This walks the list
5412 * of all the inodes with pending delalloc and forces them to disk. 5621 * of all the inodes with pending delalloc and forces them to disk.
5413 */ 5622 */
5414int btrfs_start_delalloc_inodes(struct btrfs_root *root) 5623int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5415{ 5624{
5416 struct list_head *head = &root->fs_info->delalloc_inodes; 5625 struct list_head *head = &root->fs_info->delalloc_inodes;
5417 struct btrfs_inode *binode; 5626 struct btrfs_inode *binode;
@@ -5430,7 +5639,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
5430 spin_unlock(&root->fs_info->delalloc_lock); 5639 spin_unlock(&root->fs_info->delalloc_lock);
5431 if (inode) { 5640 if (inode) {
5432 filemap_flush(inode->i_mapping); 5641 filemap_flush(inode->i_mapping);
5433 iput(inode); 5642 if (delay_iput)
5643 btrfs_add_delayed_iput(inode);
5644 else
5645 iput(inode);
5434 } 5646 }
5435 cond_resched(); 5647 cond_resched();
5436 spin_lock(&root->fs_info->delalloc_lock); 5648 spin_lock(&root->fs_info->delalloc_lock);
@@ -5504,7 +5716,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5504 if (IS_ERR(inode)) 5716 if (IS_ERR(inode))
5505 goto out_unlock; 5717 goto out_unlock;
5506 5718
5507 err = btrfs_init_inode_security(inode, dir); 5719 err = btrfs_init_inode_security(trans, inode, dir);
5508 if (err) { 5720 if (err) {
5509 drop_inode = 1; 5721 drop_inode = 1;
5510 goto out_unlock; 5722 goto out_unlock;
@@ -5576,10 +5788,10 @@ out_fail:
5576 return err; 5788 return err;
5577} 5789}
5578 5790
5579static int prealloc_file_range(struct btrfs_trans_handle *trans, 5791static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5580 struct inode *inode, u64 start, u64 end, 5792 u64 alloc_hint, int mode)
5581 u64 locked_end, u64 alloc_hint, int mode)
5582{ 5793{
5794 struct btrfs_trans_handle *trans;
5583 struct btrfs_root *root = BTRFS_I(inode)->root; 5795 struct btrfs_root *root = BTRFS_I(inode)->root;
5584 struct btrfs_key ins; 5796 struct btrfs_key ins;
5585 u64 alloc_size; 5797 u64 alloc_size;
@@ -5590,43 +5802,56 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
5590 while (num_bytes > 0) { 5802 while (num_bytes > 0) {
5591 alloc_size = min(num_bytes, root->fs_info->max_extent); 5803 alloc_size = min(num_bytes, root->fs_info->max_extent);
5592 5804
5593 ret = btrfs_reserve_metadata_space(root, 1); 5805 trans = btrfs_start_transaction(root, 1);
5594 if (ret)
5595 goto out;
5596 5806
5597 ret = btrfs_reserve_extent(trans, root, alloc_size, 5807 ret = btrfs_reserve_extent(trans, root, alloc_size,
5598 root->sectorsize, 0, alloc_hint, 5808 root->sectorsize, 0, alloc_hint,
5599 (u64)-1, &ins, 1); 5809 (u64)-1, &ins, 1);
5600 if (ret) { 5810 if (ret) {
5601 WARN_ON(1); 5811 WARN_ON(1);
5602 goto out; 5812 goto stop_trans;
5603 } 5813 }
5814
5815 ret = btrfs_reserve_metadata_space(root, 3);
5816 if (ret) {
5817 btrfs_free_reserved_extent(root, ins.objectid,
5818 ins.offset);
5819 goto stop_trans;
5820 }
5821
5604 ret = insert_reserved_file_extent(trans, inode, 5822 ret = insert_reserved_file_extent(trans, inode,
5605 cur_offset, ins.objectid, 5823 cur_offset, ins.objectid,
5606 ins.offset, ins.offset, 5824 ins.offset, ins.offset,
5607 ins.offset, locked_end, 5825 ins.offset, 0, 0, 0,
5608 0, 0, 0,
5609 BTRFS_FILE_EXTENT_PREALLOC); 5826 BTRFS_FILE_EXTENT_PREALLOC);
5610 BUG_ON(ret); 5827 BUG_ON(ret);
5611 btrfs_drop_extent_cache(inode, cur_offset, 5828 btrfs_drop_extent_cache(inode, cur_offset,
5612 cur_offset + ins.offset -1, 0); 5829 cur_offset + ins.offset -1, 0);
5830
5613 num_bytes -= ins.offset; 5831 num_bytes -= ins.offset;
5614 cur_offset += ins.offset; 5832 cur_offset += ins.offset;
5615 alloc_hint = ins.objectid + ins.offset; 5833 alloc_hint = ins.objectid + ins.offset;
5616 btrfs_unreserve_metadata_space(root, 1); 5834
5617 }
5618out:
5619 if (cur_offset > start) {
5620 inode->i_ctime = CURRENT_TIME; 5835 inode->i_ctime = CURRENT_TIME;
5621 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5836 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5622 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5837 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5623 cur_offset > i_size_read(inode)) 5838 cur_offset > inode->i_size) {
5624 btrfs_i_size_write(inode, cur_offset); 5839 i_size_write(inode, cur_offset);
5840 btrfs_ordered_update_i_size(inode, cur_offset, NULL);
5841 }
5842
5625 ret = btrfs_update_inode(trans, root, inode); 5843 ret = btrfs_update_inode(trans, root, inode);
5626 BUG_ON(ret); 5844 BUG_ON(ret);
5845
5846 btrfs_end_transaction(trans, root);
5847 btrfs_unreserve_metadata_space(root, 3);
5627 } 5848 }
5849 return ret;
5628 5850
5851stop_trans:
5852 btrfs_end_transaction(trans, root);
5629 return ret; 5853 return ret;
5854
5630} 5855}
5631 5856
5632static long btrfs_fallocate(struct inode *inode, int mode, 5857static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5640,8 +5865,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5640 u64 locked_end; 5865 u64 locked_end;
5641 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5866 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5642 struct extent_map *em; 5867 struct extent_map *em;
5643 struct btrfs_trans_handle *trans;
5644 struct btrfs_root *root;
5645 int ret; 5868 int ret;
5646 5869
5647 alloc_start = offset & ~mask; 5870 alloc_start = offset & ~mask;
@@ -5660,9 +5883,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5660 goto out; 5883 goto out;
5661 } 5884 }
5662 5885
5663 root = BTRFS_I(inode)->root; 5886 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
5664
5665 ret = btrfs_check_data_free_space(root, inode,
5666 alloc_end - alloc_start); 5887 alloc_end - alloc_start);
5667 if (ret) 5888 if (ret)
5668 goto out; 5889 goto out;
@@ -5671,12 +5892,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5671 while (1) { 5892 while (1) {
5672 struct btrfs_ordered_extent *ordered; 5893 struct btrfs_ordered_extent *ordered;
5673 5894
5674 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5675 if (!trans) {
5676 ret = -EIO;
5677 goto out_free;
5678 }
5679
5680 /* the extent lock is ordered inside the running 5895 /* the extent lock is ordered inside the running
5681 * transaction 5896 * transaction
5682 */ 5897 */
@@ -5690,8 +5905,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5690 btrfs_put_ordered_extent(ordered); 5905 btrfs_put_ordered_extent(ordered);
5691 unlock_extent(&BTRFS_I(inode)->io_tree, 5906 unlock_extent(&BTRFS_I(inode)->io_tree,
5692 alloc_start, locked_end, GFP_NOFS); 5907 alloc_start, locked_end, GFP_NOFS);
5693 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5694
5695 /* 5908 /*
5696 * we can't wait on the range with the transaction 5909 * we can't wait on the range with the transaction
5697 * running or with the extent lock held 5910 * running or with the extent lock held
@@ -5712,10 +5925,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5712 BUG_ON(IS_ERR(em) || !em); 5925 BUG_ON(IS_ERR(em) || !em);
5713 last_byte = min(extent_map_end(em), alloc_end); 5926 last_byte = min(extent_map_end(em), alloc_end);
5714 last_byte = (last_byte + mask) & ~mask; 5927 last_byte = (last_byte + mask) & ~mask;
5715 if (em->block_start == EXTENT_MAP_HOLE) { 5928 if (em->block_start == EXTENT_MAP_HOLE ||
5716 ret = prealloc_file_range(trans, inode, cur_offset, 5929 (cur_offset >= inode->i_size &&
5717 last_byte, locked_end + 1, 5930 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5718 alloc_hint, mode); 5931 ret = prealloc_file_range(inode,
5932 cur_offset, last_byte,
5933 alloc_hint, mode);
5719 if (ret < 0) { 5934 if (ret < 0) {
5720 free_extent_map(em); 5935 free_extent_map(em);
5721 break; 5936 break;
@@ -5734,9 +5949,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5734 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5949 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5735 GFP_NOFS); 5950 GFP_NOFS);
5736 5951
5737 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5952 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5738out_free: 5953 alloc_end - alloc_start);
5739 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5740out: 5954out:
5741 mutex_unlock(&inode->i_mutex); 5955 mutex_unlock(&inode->i_mutex);
5742 return ret; 5956 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cdbb054102b9..645a17927a8f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root,
237 u64 objectid; 237 u64 objectid;
238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
239 u64 index = 0; 239 u64 index = 0;
240 unsigned long nr = 1;
241 240
242 /* 241 /*
243 * 1 - inode item 242 * 1 - inode item
@@ -290,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root,
290 btrfs_set_root_generation(&root_item, trans->transid); 289 btrfs_set_root_generation(&root_item, trans->transid);
291 btrfs_set_root_level(&root_item, 0); 290 btrfs_set_root_level(&root_item, 0);
292 btrfs_set_root_refs(&root_item, 1); 291 btrfs_set_root_refs(&root_item, 1);
293 btrfs_set_root_used(&root_item, 0); 292 btrfs_set_root_used(&root_item, leaf->len);
294 btrfs_set_root_last_snapshot(&root_item, 0); 293 btrfs_set_root_last_snapshot(&root_item, 0);
295 294
296 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 295 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
@@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root,
342 341
343 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 342 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
344fail: 343fail:
345 nr = trans->blocks_used;
346 err = btrfs_commit_transaction(trans, root); 344 err = btrfs_commit_transaction(trans, root);
347 if (err && !ret) 345 if (err && !ret)
348 ret = err; 346 ret = err;
349 347
350 btrfs_unreserve_metadata_space(root, 6); 348 btrfs_unreserve_metadata_space(root, 6);
351 btrfs_btree_balance_dirty(root, nr);
352 return ret; 349 return ret;
353} 350}
354 351
355static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 352static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
356 char *name, int namelen) 353 char *name, int namelen)
357{ 354{
355 struct inode *inode;
358 struct btrfs_pending_snapshot *pending_snapshot; 356 struct btrfs_pending_snapshot *pending_snapshot;
359 struct btrfs_trans_handle *trans; 357 struct btrfs_trans_handle *trans;
360 int ret = 0; 358 int ret;
361 int err;
362 unsigned long nr = 0;
363 359
364 if (!root->ref_cows) 360 if (!root->ref_cows)
365 return -EINVAL; 361 return -EINVAL;
@@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
372 */ 368 */
373 ret = btrfs_reserve_metadata_space(root, 6); 369 ret = btrfs_reserve_metadata_space(root, 6);
374 if (ret) 370 if (ret)
375 goto fail_unlock; 371 goto fail;
376 372
377 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 373 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
378 if (!pending_snapshot) { 374 if (!pending_snapshot) {
379 ret = -ENOMEM; 375 ret = -ENOMEM;
380 btrfs_unreserve_metadata_space(root, 6); 376 btrfs_unreserve_metadata_space(root, 6);
381 goto fail_unlock; 377 goto fail;
382 } 378 }
383 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); 379 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
384 if (!pending_snapshot->name) { 380 if (!pending_snapshot->name) {
385 ret = -ENOMEM; 381 ret = -ENOMEM;
386 kfree(pending_snapshot); 382 kfree(pending_snapshot);
387 btrfs_unreserve_metadata_space(root, 6); 383 btrfs_unreserve_metadata_space(root, 6);
388 goto fail_unlock; 384 goto fail;
389 } 385 }
390 memcpy(pending_snapshot->name, name, namelen); 386 memcpy(pending_snapshot->name, name, namelen);
391 pending_snapshot->name[namelen] = '\0'; 387 pending_snapshot->name[namelen] = '\0';
@@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
395 pending_snapshot->root = root; 391 pending_snapshot->root = root;
396 list_add(&pending_snapshot->list, 392 list_add(&pending_snapshot->list,
397 &trans->transaction->pending_snapshots); 393 &trans->transaction->pending_snapshots);
398 err = btrfs_commit_transaction(trans, root); 394 ret = btrfs_commit_transaction(trans, root);
395 BUG_ON(ret);
396 btrfs_unreserve_metadata_space(root, 6);
399 397
400fail_unlock: 398 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
401 btrfs_btree_balance_dirty(root, nr); 399 if (IS_ERR(inode)) {
400 ret = PTR_ERR(inode);
401 goto fail;
402 }
403 BUG_ON(!inode);
404 d_instantiate(dentry, inode);
405 ret = 0;
406fail:
402 return ret; 407 return ret;
403} 408}
404 409
@@ -1027,8 +1032,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1027 BUG_ON(!trans); 1032 BUG_ON(!trans);
1028 1033
1029 /* punch hole in destination first */ 1034 /* punch hole in destination first */
1030 btrfs_drop_extents(trans, root, inode, off, off + len, 1035 btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
1031 off + len, 0, &hint_byte, 1);
1032 1036
1033 /* clone data */ 1037 /* clone data */
1034 key.objectid = src->i_ino; 1038 key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5799bc46a309..b10a49d4bc6a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
291 291
292/* 292/*
293 * remove an ordered extent from the tree. No references are dropped 293 * remove an ordered extent from the tree. No references are dropped
294 * but, anyone waiting on this extent is woken up. 294 * and you must wake_up entry->wait. You must hold the tree mutex
295 * while you call this function.
295 */ 296 */
296int btrfs_remove_ordered_extent(struct inode *inode, 297static int __btrfs_remove_ordered_extent(struct inode *inode,
297 struct btrfs_ordered_extent *entry) 298 struct btrfs_ordered_extent *entry)
298{ 299{
299 struct btrfs_ordered_inode_tree *tree; 300 struct btrfs_ordered_inode_tree *tree;
300 struct rb_node *node; 301 struct rb_node *node;
301 302
302 tree = &BTRFS_I(inode)->ordered_tree; 303 tree = &BTRFS_I(inode)->ordered_tree;
303 mutex_lock(&tree->mutex);
304 node = &entry->rb_node; 304 node = &entry->rb_node;
305 rb_erase(node, &tree->tree); 305 rb_erase(node, &tree->tree);
306 tree->last = NULL; 306 tree->last = NULL;
@@ -326,16 +326,34 @@ int btrfs_remove_ordered_extent(struct inode *inode,
326 } 326 }
327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
328 328
329 return 0;
330}
331
332/*
333 * remove an ordered extent from the tree. No references are dropped
334 * but any waiters are woken.
335 */
336int btrfs_remove_ordered_extent(struct inode *inode,
337 struct btrfs_ordered_extent *entry)
338{
339 struct btrfs_ordered_inode_tree *tree;
340 int ret;
341
342 tree = &BTRFS_I(inode)->ordered_tree;
343 mutex_lock(&tree->mutex);
344 ret = __btrfs_remove_ordered_extent(inode, entry);
329 mutex_unlock(&tree->mutex); 345 mutex_unlock(&tree->mutex);
330 wake_up(&entry->wait); 346 wake_up(&entry->wait);
331 return 0; 347
348 return ret;
332} 349}
333 350
334/* 351/*
335 * wait for all the ordered extents in a root. This is done when balancing 352 * wait for all the ordered extents in a root. This is done when balancing
336 * space between drives. 353 * space between drives.
337 */ 354 */
338int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) 355int btrfs_wait_ordered_extents(struct btrfs_root *root,
356 int nocow_only, int delay_iput)
339{ 357{
340 struct list_head splice; 358 struct list_head splice;
341 struct list_head *cur; 359 struct list_head *cur;
@@ -372,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
372 if (inode) { 390 if (inode) {
373 btrfs_start_ordered_extent(inode, ordered, 1); 391 btrfs_start_ordered_extent(inode, ordered, 1);
374 btrfs_put_ordered_extent(ordered); 392 btrfs_put_ordered_extent(ordered);
375 iput(inode); 393 if (delay_iput)
394 btrfs_add_delayed_iput(inode);
395 else
396 iput(inode);
376 } else { 397 } else {
377 btrfs_put_ordered_extent(ordered); 398 btrfs_put_ordered_extent(ordered);
378 } 399 }
@@ -430,7 +451,7 @@ again:
430 btrfs_wait_ordered_range(inode, 0, (u64)-1); 451 btrfs_wait_ordered_range(inode, 0, (u64)-1);
431 else 452 else
432 filemap_flush(inode->i_mapping); 453 filemap_flush(inode->i_mapping);
433 iput(inode); 454 btrfs_add_delayed_iput(inode);
434 } 455 }
435 456
436 cond_resched(); 457 cond_resched();
@@ -589,7 +610,7 @@ out:
589 * After an extent is done, call this to conditionally update the on disk 610 * After an extent is done, call this to conditionally update the on disk
590 * i_size. i_size is updated to cover any fully written part of the file. 611 * i_size. i_size is updated to cover any fully written part of the file.
591 */ 612 */
592int btrfs_ordered_update_i_size(struct inode *inode, 613int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
593 struct btrfs_ordered_extent *ordered) 614 struct btrfs_ordered_extent *ordered)
594{ 615{
595 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 616 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
@@ -597,18 +618,30 @@ int btrfs_ordered_update_i_size(struct inode *inode,
597 u64 disk_i_size; 618 u64 disk_i_size;
598 u64 new_i_size; 619 u64 new_i_size;
599 u64 i_size_test; 620 u64 i_size_test;
621 u64 i_size = i_size_read(inode);
600 struct rb_node *node; 622 struct rb_node *node;
623 struct rb_node *prev = NULL;
601 struct btrfs_ordered_extent *test; 624 struct btrfs_ordered_extent *test;
625 int ret = 1;
626
627 if (ordered)
628 offset = entry_end(ordered);
602 629
603 mutex_lock(&tree->mutex); 630 mutex_lock(&tree->mutex);
604 disk_i_size = BTRFS_I(inode)->disk_i_size; 631 disk_i_size = BTRFS_I(inode)->disk_i_size;
605 632
633 /* truncate file */
634 if (disk_i_size > i_size) {
635 BTRFS_I(inode)->disk_i_size = i_size;
636 ret = 0;
637 goto out;
638 }
639
606 /* 640 /*
607 * if the disk i_size is already at the inode->i_size, or 641 * if the disk i_size is already at the inode->i_size, or
608 * this ordered extent is inside the disk i_size, we're done 642 * this ordered extent is inside the disk i_size, we're done
609 */ 643 */
610 if (disk_i_size >= inode->i_size || 644 if (disk_i_size == i_size || offset <= disk_i_size) {
611 ordered->file_offset + ordered->len <= disk_i_size) {
612 goto out; 645 goto out;
613 } 646 }
614 647
@@ -616,8 +649,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
616 * we can't update the disk_isize if there are delalloc bytes 649 * we can't update the disk_isize if there are delalloc bytes
617 * between disk_i_size and this ordered extent 650 * between disk_i_size and this ordered extent
618 */ 651 */
619 if (test_range_bit(io_tree, disk_i_size, 652 if (test_range_bit(io_tree, disk_i_size, offset - 1,
620 ordered->file_offset + ordered->len - 1,
621 EXTENT_DELALLOC, 0, NULL)) { 653 EXTENT_DELALLOC, 0, NULL)) {
622 goto out; 654 goto out;
623 } 655 }
@@ -626,20 +658,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
626 * if we find an ordered extent then we can't update disk i_size 658 * if we find an ordered extent then we can't update disk i_size
627 * yet 659 * yet
628 */ 660 */
629 node = &ordered->rb_node; 661 if (ordered) {
630 while (1) { 662 node = rb_prev(&ordered->rb_node);
631 node = rb_prev(node); 663 } else {
632 if (!node) 664 prev = tree_search(tree, offset);
633 break; 665 /*
666 * we insert file extents without involving ordered struct,
667 * so there should be no ordered struct cover this offset
668 */
669 if (prev) {
670 test = rb_entry(prev, struct btrfs_ordered_extent,
671 rb_node);
672 BUG_ON(offset_in_entry(test, offset));
673 }
674 node = prev;
675 }
676 while (node) {
634 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 677 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
635 if (test->file_offset + test->len <= disk_i_size) 678 if (test->file_offset + test->len <= disk_i_size)
636 break; 679 break;
637 if (test->file_offset >= inode->i_size) 680 if (test->file_offset >= i_size)
638 break; 681 break;
639 if (test->file_offset >= disk_i_size) 682 if (test->file_offset >= disk_i_size)
640 goto out; 683 goto out;
684 node = rb_prev(node);
641 } 685 }
642 new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); 686 new_i_size = min_t(u64, offset, i_size);
643 687
644 /* 688 /*
645 * at this point, we know we can safely update i_size to at least 689 * at this point, we know we can safely update i_size to at least
@@ -647,7 +691,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
647 * walk forward and see if ios from higher up in the file have 691 * walk forward and see if ios from higher up in the file have
648 * finished. 692 * finished.
649 */ 693 */
650 node = rb_next(&ordered->rb_node); 694 if (ordered) {
695 node = rb_next(&ordered->rb_node);
696 } else {
697 if (prev)
698 node = rb_next(prev);
699 else
700 node = rb_first(&tree->tree);
701 }
651 i_size_test = 0; 702 i_size_test = 0;
652 if (node) { 703 if (node) {
653 /* 704 /*
@@ -655,10 +706,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
655 * between our ordered extent and the next one. 706 * between our ordered extent and the next one.
656 */ 707 */
657 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 708 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
658 if (test->file_offset > entry_end(ordered)) 709 if (test->file_offset > offset)
659 i_size_test = test->file_offset; 710 i_size_test = test->file_offset;
660 } else { 711 } else {
661 i_size_test = i_size_read(inode); 712 i_size_test = i_size;
662 } 713 }
663 714
664 /* 715 /*
@@ -667,15 +718,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
667 * are no delalloc bytes in this area, it is safe to update 718 * are no delalloc bytes in this area, it is safe to update
668 * disk_i_size to the end of the region. 719 * disk_i_size to the end of the region.
669 */ 720 */
670 if (i_size_test > entry_end(ordered) && 721 if (i_size_test > offset &&
671 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 722 !test_range_bit(io_tree, offset, i_size_test - 1,
672 EXTENT_DELALLOC, 0, NULL)) { 723 EXTENT_DELALLOC, 0, NULL)) {
673 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 724 new_i_size = min_t(u64, i_size_test, i_size);
674 } 725 }
675 BTRFS_I(inode)->disk_i_size = new_i_size; 726 BTRFS_I(inode)->disk_i_size = new_i_size;
727 ret = 0;
676out: 728out:
729 /*
730 * we need to remove the ordered extent with the tree lock held
731 * so that other people calling this function don't find our fully
732 * processed ordered entry and skip updating the i_size
733 */
734 if (ordered)
735 __btrfs_remove_ordered_extent(inode, ordered);
677 mutex_unlock(&tree->mutex); 736 mutex_unlock(&tree->mutex);
678 return 0; 737 if (ordered)
738 wake_up(&ordered->wait);
739 return ret;
679} 740}
680 741
681/* 742/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f82e87488ca8..1fe1282ef47c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -150,12 +150,13 @@ void btrfs_start_ordered_extent(struct inode *inode,
150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
151struct btrfs_ordered_extent * 151struct btrfs_ordered_extent *
152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
153int btrfs_ordered_update_i_size(struct inode *inode, 153int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
154 struct btrfs_ordered_extent *ordered); 154 struct btrfs_ordered_extent *ordered);
155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
156int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
157int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 156int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
158int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 157int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
159 struct btrfs_root *root, 158 struct btrfs_root *root,
160 struct inode *inode); 159 struct inode *inode);
160int btrfs_wait_ordered_extents(struct btrfs_root *root,
161 int nocow_only, int delay_iput);
161#endif 162#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfcc93c93a7b..a9728680eca8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1561 return 0; 1561 return 0;
1562} 1562}
1563 1563
1564static void put_inodes(struct list_head *list)
1565{
1566 struct inodevec *ivec;
1567 while (!list_empty(list)) {
1568 ivec = list_entry(list->next, struct inodevec, list);
1569 list_del(&ivec->list);
1570 while (ivec->nr > 0) {
1571 ivec->nr--;
1572 iput(ivec->inode[ivec->nr]);
1573 }
1574 kfree(ivec);
1575 }
1576}
1577
1564static int find_next_key(struct btrfs_path *path, int level, 1578static int find_next_key(struct btrfs_path *path, int level,
1565 struct btrfs_key *key) 1579 struct btrfs_key *key)
1566 1580
@@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1723 1737
1724 btrfs_btree_balance_dirty(root, nr); 1738 btrfs_btree_balance_dirty(root, nr);
1725 1739
1740 /*
1741 * put inodes outside transaction, otherwise we may deadlock.
1742 */
1743 put_inodes(&inode_list);
1744
1726 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1745 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1727 invalidate_extent_cache(root, &key, &next_key); 1746 invalidate_extent_cache(root, &key, &next_key);
1728 } 1747 }
@@ -1752,19 +1771,7 @@ out:
1752 1771
1753 btrfs_btree_balance_dirty(root, nr); 1772 btrfs_btree_balance_dirty(root, nr);
1754 1773
1755 /* 1774 put_inodes(&inode_list);
1756 * put inodes while we aren't holding the tree locks
1757 */
1758 while (!list_empty(&inode_list)) {
1759 struct inodevec *ivec;
1760 ivec = list_entry(inode_list.next, struct inodevec, list);
1761 list_del(&ivec->list);
1762 while (ivec->nr > 0) {
1763 ivec->nr--;
1764 iput(ivec->inode[ivec->nr]);
1765 }
1766 kfree(ivec);
1767 }
1768 1775
1769 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1776 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1770 invalidate_extent_cache(root, &key, &next_key); 1777 invalidate_extent_cache(root, &key, &next_key);
@@ -3534,8 +3541,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3534 (unsigned long long)rc->block_group->key.objectid, 3541 (unsigned long long)rc->block_group->key.objectid,
3535 (unsigned long long)rc->block_group->flags); 3542 (unsigned long long)rc->block_group->flags);
3536 3543
3537 btrfs_start_delalloc_inodes(fs_info->tree_root); 3544 btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3538 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3545 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
3539 3546
3540 while (1) { 3547 while (1) {
3541 rc->extents_found = 0; 3548 rc->extents_found = 0;
@@ -3755,6 +3762,7 @@ out:
3755 BTRFS_DATA_RELOC_TREE_OBJECTID); 3762 BTRFS_DATA_RELOC_TREE_OBJECTID);
3756 if (IS_ERR(fs_root)) 3763 if (IS_ERR(fs_root))
3757 err = PTR_ERR(fs_root); 3764 err = PTR_ERR(fs_root);
3765 btrfs_orphan_cleanup(fs_root);
3758 } 3766 }
3759 return err; 3767 return err;
3760} 3768}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 9351428f30e2..67fa2d29d663 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -159,7 +159,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
159 write_extent_buffer(l, item, ptr, sizeof(*item)); 159 write_extent_buffer(l, item, ptr, sizeof(*item));
160 btrfs_mark_buffer_dirty(path->nodes[0]); 160 btrfs_mark_buffer_dirty(path->nodes[0]);
161out: 161out:
162 btrfs_release_path(root, path);
163 btrfs_free_path(path); 162 btrfs_free_path(path);
164 return ret; 163 return ret;
165} 164}
@@ -332,7 +331,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
332 BUG_ON(refs != 0); 331 BUG_ON(refs != 0);
333 ret = btrfs_del_item(trans, root, path); 332 ret = btrfs_del_item(trans, root, path);
334out: 333out:
335 btrfs_release_path(root, path);
336 btrfs_free_path(path); 334 btrfs_free_path(path);
337 return ret; 335 return ret;
338} 336}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 752a5463bf53..3f9b45704fcd 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -128,6 +128,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
128 substring_t args[MAX_OPT_ARGS]; 128 substring_t args[MAX_OPT_ARGS];
129 char *p, *num; 129 char *p, *num;
130 int intarg; 130 int intarg;
131 int ret = 0;
131 132
132 if (!options) 133 if (!options)
133 return 0; 134 return 0;
@@ -262,12 +263,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
262 case Opt_discard: 263 case Opt_discard:
263 btrfs_set_opt(info->mount_opt, DISCARD); 264 btrfs_set_opt(info->mount_opt, DISCARD);
264 break; 265 break;
266 case Opt_err:
267 printk(KERN_INFO "btrfs: unrecognized mount option "
268 "'%s'\n", p);
269 ret = -EINVAL;
270 goto out;
265 default: 271 default:
266 break; 272 break;
267 } 273 }
268 } 274 }
275out:
269 kfree(options); 276 kfree(options);
270 return 0; 277 return ret;
271} 278}
272 279
273/* 280/*
@@ -405,8 +412,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
405 return 0; 412 return 0;
406 } 413 }
407 414
408 btrfs_start_delalloc_inodes(root); 415 btrfs_start_delalloc_inodes(root, 0);
409 btrfs_wait_ordered_extents(root, 0); 416 btrfs_wait_ordered_extents(root, 0, 0);
410 417
411 trans = btrfs_start_transaction(root, 1); 418 trans = btrfs_start_transaction(root, 1);
412 ret = btrfs_commit_transaction(trans, root); 419 ret = btrfs_commit_transaction(trans, root);
@@ -450,6 +457,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
450 seq_puts(seq, ",notreelog"); 457 seq_puts(seq, ",notreelog");
451 if (btrfs_test_opt(root, FLUSHONCOMMIT)) 458 if (btrfs_test_opt(root, FLUSHONCOMMIT))
452 seq_puts(seq, ",flushoncommit"); 459 seq_puts(seq, ",flushoncommit");
460 if (btrfs_test_opt(root, DISCARD))
461 seq_puts(seq, ",discard");
453 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 462 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
454 seq_puts(seq, ",noacl"); 463 seq_puts(seq, ",noacl");
455 return 0; 464 return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bca82a4ca8e6..b2acc79f1b34 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,8 +163,14 @@ static void wait_current_trans(struct btrfs_root *root)
163 } 163 }
164} 164}
165 165
166enum btrfs_trans_type {
167 TRANS_START,
168 TRANS_JOIN,
169 TRANS_USERSPACE,
170};
171
166static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, 172static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
167 int num_blocks, int wait) 173 int num_blocks, int type)
168{ 174{
169 struct btrfs_trans_handle *h = 175 struct btrfs_trans_handle *h =
170 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 176 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
@@ -172,7 +178,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
172 178
173 mutex_lock(&root->fs_info->trans_mutex); 179 mutex_lock(&root->fs_info->trans_mutex);
174 if (!root->fs_info->log_root_recovering && 180 if (!root->fs_info->log_root_recovering &&
175 ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) 181 ((type == TRANS_START && !root->fs_info->open_ioctl_trans) ||
182 type == TRANS_USERSPACE))
176 wait_current_trans(root); 183 wait_current_trans(root);
177 ret = join_transaction(root); 184 ret = join_transaction(root);
178 BUG_ON(ret); 185 BUG_ON(ret);
@@ -186,7 +193,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
186 h->alloc_exclude_start = 0; 193 h->alloc_exclude_start = 0;
187 h->delayed_ref_updates = 0; 194 h->delayed_ref_updates = 0;
188 195
189 if (!current->journal_info) 196 if (!current->journal_info && type != TRANS_USERSPACE)
190 current->journal_info = h; 197 current->journal_info = h;
191 198
192 root->fs_info->running_transaction->use_count++; 199 root->fs_info->running_transaction->use_count++;
@@ -198,18 +205,18 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
198struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 205struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
199 int num_blocks) 206 int num_blocks)
200{ 207{
201 return start_transaction(root, num_blocks, 1); 208 return start_transaction(root, num_blocks, TRANS_START);
202} 209}
203struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 210struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
204 int num_blocks) 211 int num_blocks)
205{ 212{
206 return start_transaction(root, num_blocks, 0); 213 return start_transaction(root, num_blocks, TRANS_JOIN);
207} 214}
208 215
209struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 216struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
210 int num_blocks) 217 int num_blocks)
211{ 218{
212 return start_transaction(r, num_blocks, 2); 219 return start_transaction(r, num_blocks, TRANS_USERSPACE);
213} 220}
214 221
215/* wait for a transaction commit to be fully complete */ 222/* wait for a transaction commit to be fully complete */
@@ -326,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
326 memset(trans, 0, sizeof(*trans)); 333 memset(trans, 0, sizeof(*trans));
327 kmem_cache_free(btrfs_trans_handle_cachep, trans); 334 kmem_cache_free(btrfs_trans_handle_cachep, trans);
328 335
336 if (throttle)
337 btrfs_run_delayed_iputs(root);
338
329 return 0; 339 return 0;
330} 340}
331 341
@@ -347,7 +357,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
347 * those extents are sent to disk but does not wait on them 357 * those extents are sent to disk but does not wait on them
348 */ 358 */
349int btrfs_write_marked_extents(struct btrfs_root *root, 359int btrfs_write_marked_extents(struct btrfs_root *root,
350 struct extent_io_tree *dirty_pages) 360 struct extent_io_tree *dirty_pages, int mark)
351{ 361{
352 int ret; 362 int ret;
353 int err = 0; 363 int err = 0;
@@ -360,7 +370,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
360 370
361 while (1) { 371 while (1) {
362 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 372 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
363 EXTENT_DIRTY); 373 mark);
364 if (ret) 374 if (ret)
365 break; 375 break;
366 while (start <= end) { 376 while (start <= end) {
@@ -406,7 +416,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
406 * on all the pages and clear them from the dirty pages state tree 416 * on all the pages and clear them from the dirty pages state tree
407 */ 417 */
408int btrfs_wait_marked_extents(struct btrfs_root *root, 418int btrfs_wait_marked_extents(struct btrfs_root *root,
409 struct extent_io_tree *dirty_pages) 419 struct extent_io_tree *dirty_pages, int mark)
410{ 420{
411 int ret; 421 int ret;
412 int err = 0; 422 int err = 0;
@@ -418,12 +428,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
418 unsigned long index; 428 unsigned long index;
419 429
420 while (1) { 430 while (1) {
421 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 431 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
422 EXTENT_DIRTY); 432 mark);
423 if (ret) 433 if (ret)
424 break; 434 break;
425 435
426 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 436 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
427 while (start <= end) { 437 while (start <= end) {
428 index = start >> PAGE_CACHE_SHIFT; 438 index = start >> PAGE_CACHE_SHIFT;
429 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 439 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
@@ -453,13 +463,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
453 * those extents are on disk for transaction or log commit 463 * those extents are on disk for transaction or log commit
454 */ 464 */
455int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 465int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
456 struct extent_io_tree *dirty_pages) 466 struct extent_io_tree *dirty_pages, int mark)
457{ 467{
458 int ret; 468 int ret;
459 int ret2; 469 int ret2;
460 470
461 ret = btrfs_write_marked_extents(root, dirty_pages); 471 ret = btrfs_write_marked_extents(root, dirty_pages, mark);
462 ret2 = btrfs_wait_marked_extents(root, dirty_pages); 472 ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
463 return ret || ret2; 473 return ret || ret2;
464} 474}
465 475
@@ -472,7 +482,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
472 return filemap_write_and_wait(btree_inode->i_mapping); 482 return filemap_write_and_wait(btree_inode->i_mapping);
473 } 483 }
474 return btrfs_write_and_wait_marked_extents(root, 484 return btrfs_write_and_wait_marked_extents(root,
475 &trans->transaction->dirty_pages); 485 &trans->transaction->dirty_pages,
486 EXTENT_DIRTY);
476} 487}
477 488
478/* 489/*
@@ -490,13 +501,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
490{ 501{
491 int ret; 502 int ret;
492 u64 old_root_bytenr; 503 u64 old_root_bytenr;
504 u64 old_root_used;
493 struct btrfs_root *tree_root = root->fs_info->tree_root; 505 struct btrfs_root *tree_root = root->fs_info->tree_root;
494 506
507 old_root_used = btrfs_root_used(&root->root_item);
495 btrfs_write_dirty_block_groups(trans, root); 508 btrfs_write_dirty_block_groups(trans, root);
496 509
497 while (1) { 510 while (1) {
498 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 511 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
499 if (old_root_bytenr == root->node->start) 512 if (old_root_bytenr == root->node->start &&
513 old_root_used == btrfs_root_used(&root->root_item))
500 break; 514 break;
501 515
502 btrfs_set_root_node(&root->root_item, root->node); 516 btrfs_set_root_node(&root->root_item, root->node);
@@ -505,6 +519,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
505 &root->root_item); 519 &root->root_item);
506 BUG_ON(ret); 520 BUG_ON(ret);
507 521
522 old_root_used = btrfs_root_used(&root->root_item);
508 ret = btrfs_write_dirty_block_groups(trans, root); 523 ret = btrfs_write_dirty_block_groups(trans, root);
509 BUG_ON(ret); 524 BUG_ON(ret);
510 } 525 }
@@ -788,7 +803,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
788 memcpy(&pending->root_key, &key, sizeof(key)); 803 memcpy(&pending->root_key, &key, sizeof(key));
789fail: 804fail:
790 kfree(new_root_item); 805 kfree(new_root_item);
791 btrfs_unreserve_metadata_space(root, 6);
792 return ret; 806 return ret;
793} 807}
794 808
@@ -800,7 +814,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
800 u64 index = 0; 814 u64 index = 0;
801 struct btrfs_trans_handle *trans; 815 struct btrfs_trans_handle *trans;
802 struct inode *parent_inode; 816 struct inode *parent_inode;
803 struct inode *inode;
804 struct btrfs_root *parent_root; 817 struct btrfs_root *parent_root;
805 818
806 parent_inode = pending->dentry->d_parent->d_inode; 819 parent_inode = pending->dentry->d_parent->d_inode;
@@ -832,8 +845,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
832 845
833 BUG_ON(ret); 846 BUG_ON(ret);
834 847
835 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
836 d_instantiate(pending->dentry, inode);
837fail: 848fail:
838 btrfs_end_transaction(trans, fs_info->fs_root); 849 btrfs_end_transaction(trans, fs_info->fs_root);
839 return ret; 850 return ret;
@@ -987,11 +998,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
987 mutex_unlock(&root->fs_info->trans_mutex); 998 mutex_unlock(&root->fs_info->trans_mutex);
988 999
989 if (flush_on_commit) { 1000 if (flush_on_commit) {
990 btrfs_start_delalloc_inodes(root); 1001 btrfs_start_delalloc_inodes(root, 1);
991 ret = btrfs_wait_ordered_extents(root, 0); 1002 ret = btrfs_wait_ordered_extents(root, 0, 1);
992 BUG_ON(ret); 1003 BUG_ON(ret);
993 } else if (snap_pending) { 1004 } else if (snap_pending) {
994 ret = btrfs_wait_ordered_extents(root, 1); 1005 ret = btrfs_wait_ordered_extents(root, 0, 1);
995 BUG_ON(ret); 1006 BUG_ON(ret);
996 } 1007 }
997 1008
@@ -1109,6 +1120,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1109 current->journal_info = NULL; 1120 current->journal_info = NULL;
1110 1121
1111 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1122 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1123
1124 if (current != root->fs_info->transaction_kthread)
1125 btrfs_run_delayed_iputs(root);
1126
1112 return ret; 1127 return ret;
1113} 1128}
1114 1129
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d4e3e7a6938c..93c7ccb33118 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
108 struct btrfs_root *root); 108 struct btrfs_root *root);
109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
110 struct extent_io_tree *dirty_pages); 110 struct extent_io_tree *dirty_pages, int mark);
111int btrfs_write_marked_extents(struct btrfs_root *root, 111int btrfs_write_marked_extents(struct btrfs_root *root,
112 struct extent_io_tree *dirty_pages); 112 struct extent_io_tree *dirty_pages, int mark);
113int btrfs_wait_marked_extents(struct btrfs_root *root, 113int btrfs_wait_marked_extents(struct btrfs_root *root,
114 struct extent_io_tree *dirty_pages); 114 struct extent_io_tree *dirty_pages, int mark);
115int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 115int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
116#endif 116#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 741666a7676a..4a9434b622ec 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -542,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
542 542
543 saved_nbytes = inode_get_bytes(inode); 543 saved_nbytes = inode_get_bytes(inode);
544 /* drop any overlapping extents */ 544 /* drop any overlapping extents */
545 ret = btrfs_drop_extents(trans, root, inode, 545 ret = btrfs_drop_extents(trans, inode, start, extent_end,
546 start, extent_end, extent_end, start, &alloc_hint, 1); 546 &alloc_hint, 1);
547 BUG_ON(ret); 547 BUG_ON(ret);
548 548
549 if (found_type == BTRFS_FILE_EXTENT_REG || 549 if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -930,6 +930,17 @@ out_nowrite:
930 return 0; 930 return 0;
931} 931}
932 932
933static int insert_orphan_item(struct btrfs_trans_handle *trans,
934 struct btrfs_root *root, u64 offset)
935{
936 int ret;
937 ret = btrfs_find_orphan_item(root, offset);
938 if (ret > 0)
939 ret = btrfs_insert_orphan_item(trans, root, offset);
940 return ret;
941}
942
943
933/* 944/*
934 * There are a few corners where the link count of the file can't 945 * There are a few corners where the link count of the file can't
935 * be properly maintained during replay. So, instead of adding 946 * be properly maintained during replay. So, instead of adding
@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
997 } 1008 }
998 BTRFS_I(inode)->index_cnt = (u64)-1; 1009 BTRFS_I(inode)->index_cnt = (u64)-1;
999 1010
1000 if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { 1011 if (inode->i_nlink == 0) {
1001 ret = replay_dir_deletes(trans, root, NULL, path, 1012 if (S_ISDIR(inode->i_mode)) {
1002 inode->i_ino, 1); 1013 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1);
1015 BUG_ON(ret);
1016 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino);
1003 BUG_ON(ret); 1018 BUG_ON(ret);
1004 } 1019 }
1005 btrfs_free_path(path); 1020 btrfs_free_path(path);
@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1587 /* inode keys are done during the first stage */ 1602 /* inode keys are done during the first stage */
1588 if (key.type == BTRFS_INODE_ITEM_KEY && 1603 if (key.type == BTRFS_INODE_ITEM_KEY &&
1589 wc->stage == LOG_WALK_REPLAY_INODES) { 1604 wc->stage == LOG_WALK_REPLAY_INODES) {
1590 struct inode *inode;
1591 struct btrfs_inode_item *inode_item; 1605 struct btrfs_inode_item *inode_item;
1592 u32 mode; 1606 u32 mode;
1593 1607
@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1603 eb, i, &key); 1617 eb, i, &key);
1604 BUG_ON(ret); 1618 BUG_ON(ret);
1605 1619
1606 /* for regular files, truncate away 1620 /* for regular files, make sure corresponding
1607 * extents past the new EOF 1621 * orhpan item exist. extents past the new EOF
1622 * will be truncated later by orphan cleanup.
1608 */ 1623 */
1609 if (S_ISREG(mode)) { 1624 if (S_ISREG(mode)) {
1610 inode = read_one_inode(root, 1625 ret = insert_orphan_item(wc->trans, root,
1611 key.objectid); 1626 key.objectid);
1612 BUG_ON(!inode);
1613
1614 ret = btrfs_truncate_inode_items(wc->trans,
1615 root, inode, inode->i_size,
1616 BTRFS_EXTENT_DATA_KEY);
1617 BUG_ON(ret); 1627 BUG_ON(ret);
1618
1619 /* if the nlink count is zero here, the iput
1620 * will free the inode. We bump it to make
1621 * sure it doesn't get freed until the link
1622 * count fixup is done
1623 */
1624 if (inode->i_nlink == 0) {
1625 btrfs_inc_nlink(inode);
1626 btrfs_update_inode(wc->trans,
1627 root, inode);
1628 }
1629 iput(inode);
1630 } 1628 }
1629
1631 ret = link_to_fixup_dir(wc->trans, root, 1630 ret = link_to_fixup_dir(wc->trans, root,
1632 path, key.objectid); 1631 path, key.objectid);
1633 BUG_ON(ret); 1632 BUG_ON(ret);
@@ -1977,10 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1977{ 1976{
1978 int index1; 1977 int index1;
1979 int index2; 1978 int index2;
1979 int mark;
1980 int ret; 1980 int ret;
1981 struct btrfs_root *log = root->log_root; 1981 struct btrfs_root *log = root->log_root;
1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1983 u64 log_transid = 0; 1983 unsigned long log_transid = 0;
1984 1984
1985 mutex_lock(&root->log_mutex); 1985 mutex_lock(&root->log_mutex);
1986 index1 = root->log_transid % 2; 1986 index1 = root->log_transid % 2;
@@ -2014,24 +2014,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2014 goto out; 2014 goto out;
2015 } 2015 }
2016 2016
2017 log_transid = root->log_transid;
2018 if (log_transid % 2 == 0)
2019 mark = EXTENT_DIRTY;
2020 else
2021 mark = EXTENT_NEW;
2022
2017 /* we start IO on all the marked extents here, but we don't actually 2023 /* we start IO on all the marked extents here, but we don't actually
2018 * wait for them until later. 2024 * wait for them until later.
2019 */ 2025 */
2020 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); 2026 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2021 BUG_ON(ret); 2027 BUG_ON(ret);
2022 2028
2023 btrfs_set_root_node(&log->root_item, log->node); 2029 btrfs_set_root_node(&log->root_item, log->node);
2024 2030
2025 root->log_batch = 0; 2031 root->log_batch = 0;
2026 log_transid = root->log_transid;
2027 root->log_transid++; 2032 root->log_transid++;
2028 log->log_transid = root->log_transid; 2033 log->log_transid = root->log_transid;
2029 root->log_start_pid = 0; 2034 root->log_start_pid = 0;
2030 smp_mb(); 2035 smp_mb();
2031 /* 2036 /*
2032 * log tree has been flushed to disk, new modifications of 2037 * IO has been started, blocks of the log tree have WRITTEN flag set
2033 * the log will be written to new positions. so it's safe to 2038 * in their headers. new modifications of the log will be written to
2034 * allow log writers to go in. 2039 * new positions. so it's safe to allow log writers to go in.
2035 */ 2040 */
2036 mutex_unlock(&root->log_mutex); 2041 mutex_unlock(&root->log_mutex);
2037 2042
@@ -2052,7 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2052 2057
2053 index2 = log_root_tree->log_transid % 2; 2058 index2 = log_root_tree->log_transid % 2;
2054 if (atomic_read(&log_root_tree->log_commit[index2])) { 2059 if (atomic_read(&log_root_tree->log_commit[index2])) {
2055 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2060 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2056 wait_log_commit(trans, log_root_tree, 2061 wait_log_commit(trans, log_root_tree,
2057 log_root_tree->log_transid); 2062 log_root_tree->log_transid);
2058 mutex_unlock(&log_root_tree->log_mutex); 2063 mutex_unlock(&log_root_tree->log_mutex);
@@ -2072,16 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2072 * check the full commit flag again 2077 * check the full commit flag again
2073 */ 2078 */
2074 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2079 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2075 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2080 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2076 mutex_unlock(&log_root_tree->log_mutex); 2081 mutex_unlock(&log_root_tree->log_mutex);
2077 ret = -EAGAIN; 2082 ret = -EAGAIN;
2078 goto out_wake_log_root; 2083 goto out_wake_log_root;
2079 } 2084 }
2080 2085
2081 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2086 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2082 &log_root_tree->dirty_log_pages); 2087 &log_root_tree->dirty_log_pages,
2088 EXTENT_DIRTY | EXTENT_NEW);
2083 BUG_ON(ret); 2089 BUG_ON(ret);
2084 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2090 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2085 2091
2086 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 2092 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
2087 log_root_tree->node->start); 2093 log_root_tree->node->start);
@@ -2147,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2147 2153
2148 while (1) { 2154 while (1) {
2149 ret = find_first_extent_bit(&log->dirty_log_pages, 2155 ret = find_first_extent_bit(&log->dirty_log_pages,
2150 0, &start, &end, EXTENT_DIRTY); 2156 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
2151 if (ret) 2157 if (ret)
2152 break; 2158 break;
2153 2159
2154 clear_extent_dirty(&log->dirty_log_pages, 2160 clear_extent_bits(&log->dirty_log_pages, start, end,
2155 start, end, GFP_NOFS); 2161 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2156 } 2162 }
2157 2163
2158 if (log->log_transid > 0) { 2164 if (log->log_transid > 0) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7eda483d7b5a..198cff28766d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2209,7 +2209,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2209 max_chunk_size = 10 * calc_size; 2209 max_chunk_size = 10 * calc_size;
2210 min_stripe_size = 64 * 1024 * 1024; 2210 min_stripe_size = 64 * 1024 * 1024;
2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2212 max_chunk_size = 4 * calc_size; 2212 max_chunk_size = 256 * 1024 * 1024;
2213 min_stripe_size = 32 * 1024 * 1024; 2213 min_stripe_size = 32 * 1024 * 1024;
2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2215 calc_size = 8 * 1024 * 1024; 2215 calc_size = 8 * 1024 * 1024;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b6dd5967c48a..193b58f7d3f3 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -85,22 +85,23 @@ out:
85 return ret; 85 return ret;
86} 86}
87 87
88int __btrfs_setxattr(struct inode *inode, const char *name, 88static int do_setxattr(struct btrfs_trans_handle *trans,
89 const void *value, size_t size, int flags) 89 struct inode *inode, const char *name,
90 const void *value, size_t size, int flags)
90{ 91{
91 struct btrfs_dir_item *di; 92 struct btrfs_dir_item *di;
92 struct btrfs_root *root = BTRFS_I(inode)->root; 93 struct btrfs_root *root = BTRFS_I(inode)->root;
93 struct btrfs_trans_handle *trans;
94 struct btrfs_path *path; 94 struct btrfs_path *path;
95 int ret = 0, mod = 0; 95 size_t name_len = strlen(name);
96 int ret = 0;
97
98 if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
99 return -ENOSPC;
96 100
97 path = btrfs_alloc_path(); 101 path = btrfs_alloc_path();
98 if (!path) 102 if (!path)
99 return -ENOMEM; 103 return -ENOMEM;
100 104
101 trans = btrfs_join_transaction(root, 1);
102 btrfs_set_trans_block_group(trans, inode);
103
104 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
105 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
106 strlen(name), -1); 107 strlen(name), -1);
@@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
118 } 119 }
119 120
120 ret = btrfs_delete_one_dir_name(trans, root, path, di); 121 ret = btrfs_delete_one_dir_name(trans, root, path, di);
121 if (ret) 122 BUG_ON(ret);
122 goto out;
123 btrfs_release_path(root, path); 123 btrfs_release_path(root, path);
124 124
125 /* if we don't have a value then we are removing the xattr */ 125 /* if we don't have a value then we are removing the xattr */
126 if (!value) { 126 if (!value)
127 mod = 1;
128 goto out; 127 goto out;
129 }
130 } else { 128 } else {
131 btrfs_release_path(root, path); 129 btrfs_release_path(root, path);
132 130
@@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
138 } 136 }
139 137
140 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
141 ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), 139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino,
142 value, size, inode->i_ino); 140 name, name_len, value, size);
141 BUG_ON(ret);
142out:
143 btrfs_free_path(path);
144 return ret;
145}
146
147int __btrfs_setxattr(struct btrfs_trans_handle *trans,
148 struct inode *inode, const char *name,
149 const void *value, size_t size, int flags)
150{
151 struct btrfs_root *root = BTRFS_I(inode)->root;
152 int ret;
153
154 if (trans)
155 return do_setxattr(trans, inode, name, value, size, flags);
156
157 ret = btrfs_reserve_metadata_space(root, 2);
143 if (ret) 158 if (ret)
144 goto out; 159 return ret;
145 mod = 1;
146 160
147out: 161 trans = btrfs_start_transaction(root, 1);
148 if (mod) { 162 if (!trans) {
149 inode->i_ctime = CURRENT_TIME; 163 ret = -ENOMEM;
150 ret = btrfs_update_inode(trans, root, inode); 164 goto out;
151 } 165 }
166 btrfs_set_trans_block_group(trans, inode);
152 167
153 btrfs_end_transaction(trans, root); 168 ret = do_setxattr(trans, inode, name, value, size, flags);
154 btrfs_free_path(path); 169 if (ret)
170 goto out;
171
172 inode->i_ctime = CURRENT_TIME;
173 ret = btrfs_update_inode(trans, root, inode);
174 BUG_ON(ret);
175out:
176 btrfs_end_transaction_throttle(trans, root);
177 btrfs_unreserve_metadata_space(root, 2);
155 return ret; 178 return ret;
156} 179}
157 180
@@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
314 337
315 if (size == 0) 338 if (size == 0)
316 value = ""; /* empty EA, do not remove */ 339 value = ""; /* empty EA, do not remove */
317 return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); 340
341 return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
342 flags);
318} 343}
319 344
320int btrfs_removexattr(struct dentry *dentry, const char *name) 345int btrfs_removexattr(struct dentry *dentry, const char *name)
@@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
329 354
330 if (!btrfs_is_valid_xattr(name)) 355 if (!btrfs_is_valid_xattr(name))
331 return -EOPNOTSUPP; 356 return -EOPNOTSUPP;
332 return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); 357
358 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
359 XATTR_REPLACE);
333} 360}
334 361
335int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) 362int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
363 struct inode *inode, struct inode *dir)
336{ 364{
337 int err; 365 int err;
338 size_t len; 366 size_t len;
@@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
354 } else { 382 } else {
355 strcpy(name, XATTR_SECURITY_PREFIX); 383 strcpy(name, XATTR_SECURITY_PREFIX);
356 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); 384 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
357 err = __btrfs_setxattr(inode, name, value, len, 0); 385 err = __btrfs_setxattr(trans, inode, name, value, len, 0);
358 kfree(name); 386 kfree(name);
359 } 387 }
360 388
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index c71e9c3cf3f7..721efa0346e0 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[];
27 27
28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
29 void *buffer, size_t size); 29 void *buffer, size_t size);
30extern int __btrfs_setxattr(struct inode *inode, const char *name, 30extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
31 const void *value, size_t size, int flags); 31 struct inode *inode, const char *name,
32 32 const void *value, size_t size, int flags);
33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
34 void *buffer, size_t size); 34 void *buffer, size_t size);
35extern int btrfs_setxattr(struct dentry *dentry, const char *name, 35extern int btrfs_setxattr(struct dentry *dentry, const char *name,
36 const void *value, size_t size, int flags); 36 const void *value, size_t size, int flags);
37extern int btrfs_removexattr(struct dentry *dentry, const char *name); 37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38 38
39extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); 39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
40 struct inode *inode, struct inode *dir);
40 41
41#endif /* __XATTR__ */ 42#endif /* __XATTR__ */