aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c87
-rw-r--r--fs/btrfs/btrfs_inode.h11
-rw-r--r--fs/btrfs/ctree.c229
-rw-r--r--fs/btrfs/ctree.h45
-rw-r--r--fs/btrfs/dir-item.c19
-rw-r--r--fs/btrfs/disk-io.c42
-rw-r--r--fs/btrfs/extent-tree.c242
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/extent_map.c18
-rw-r--r--fs/btrfs/file.c774
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/inode.c743
-rw-r--r--fs/btrfs/ioctl.c34
-rw-r--r--fs/btrfs/ordered-data.c117
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/super.c31
-rw-r--r--fs/btrfs/transaction.c98
-rw-r--r--fs/btrfs/transaction.h7
-rw-r--r--fs/btrfs/tree-log.c122
-rw-r--r--fs/btrfs/tree-log.h3
-rw-r--r--fs/btrfs/volumes.c19
-rw-r--r--fs/btrfs/xattr.c82
-rw-r--r--fs/btrfs/xattr.h9
25 files changed, 1674 insertions, 1113 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 69b355ae7f49..6df6d6ed74fd 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -27,7 +27,7 @@
27#include "btrfs_inode.h" 27#include "btrfs_inode.h"
28#include "xattr.h" 28#include "xattr.h"
29 29
30#ifdef CONFIG_BTRFS_POSIX_ACL 30#ifdef CONFIG_BTRFS_FS_POSIX_ACL
31 31
32static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) 32static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
33{ 33{
@@ -73,13 +73,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
73 return acl; 73 return acl;
74} 74}
75 75
76static int btrfs_xattr_get_acl(struct inode *inode, int type, 76static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
77 void *value, size_t size) 77 void *value, size_t size, int type)
78{ 78{
79 struct posix_acl *acl; 79 struct posix_acl *acl;
80 int ret = 0; 80 int ret = 0;
81 81
82 acl = btrfs_get_acl(inode, type); 82 acl = btrfs_get_acl(dentry->d_inode, type);
83 83
84 if (IS_ERR(acl)) 84 if (IS_ERR(acl))
85 return PTR_ERR(acl); 85 return PTR_ERR(acl);
@@ -94,7 +94,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type,
94/* 94/*
95 * Needs to be called with fs_mutex held 95 * Needs to be called with fs_mutex held
96 */ 96 */
97static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) 97static int btrfs_set_acl(struct btrfs_trans_handle *trans,
98 struct inode *inode, struct posix_acl *acl, int type)
98{ 99{
99 int ret, size = 0; 100 int ret, size = 0;
100 const char *name; 101 const char *name;
@@ -111,12 +112,14 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
111 switch (type) { 112 switch (type) {
112 case ACL_TYPE_ACCESS: 113 case ACL_TYPE_ACCESS:
113 mode = inode->i_mode; 114 mode = inode->i_mode;
114 ret = posix_acl_equiv_mode(acl, &mode);
115 if (ret < 0)
116 return ret;
117 ret = 0;
118 inode->i_mode = mode;
119 name = POSIX_ACL_XATTR_ACCESS; 115 name = POSIX_ACL_XATTR_ACCESS;
116 if (acl) {
117 ret = posix_acl_equiv_mode(acl, &mode);
118 if (ret < 0)
119 return ret;
120 inode->i_mode = mode;
121 }
122 ret = 0;
120 break; 123 break;
121 case ACL_TYPE_DEFAULT: 124 case ACL_TYPE_DEFAULT:
122 if (!S_ISDIR(inode->i_mode)) 125 if (!S_ISDIR(inode->i_mode))
@@ -140,8 +143,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
140 goto out; 143 goto out;
141 } 144 }
142 145
143 ret = __btrfs_setxattr(inode, name, value, size, 0); 146 ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
144
145out: 147out:
146 kfree(value); 148 kfree(value);
147 149
@@ -151,10 +153,10 @@ out:
151 return ret; 153 return ret;
152} 154}
153 155
154static int btrfs_xattr_set_acl(struct inode *inode, int type, 156static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
155 const void *value, size_t size) 157 const void *value, size_t size, int flags, int type)
156{ 158{
157 int ret = 0; 159 int ret;
158 struct posix_acl *acl = NULL; 160 struct posix_acl *acl = NULL;
159 161
160 if (value) { 162 if (value) {
@@ -167,38 +169,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
167 } 169 }
168 } 170 }
169 171
170 ret = btrfs_set_acl(inode, acl, type); 172 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
171 173
172 posix_acl_release(acl); 174 posix_acl_release(acl);
173 175
174 return ret; 176 return ret;
175} 177}
176 178
177
178static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
179 void *value, size_t size)
180{
181 return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
182}
183
184static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
185 const void *value, size_t size, int flags)
186{
187 return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
188}
189
190static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
191 void *value, size_t size)
192{
193 return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
194}
195
196static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
197 const void *value, size_t size, int flags)
198{
199 return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
200}
201
202int btrfs_check_acl(struct inode *inode, int mask) 179int btrfs_check_acl(struct inode *inode, int mask)
203{ 180{
204 struct posix_acl *acl; 181 struct posix_acl *acl;
@@ -221,7 +198,8 @@ int btrfs_check_acl(struct inode *inode, int mask)
221 * stuff has been fixed to work with that. If the locking stuff changes, we 198 * stuff has been fixed to work with that. If the locking stuff changes, we
222 * need to re-evaluate the acl locking stuff. 199 * need to re-evaluate the acl locking stuff.
223 */ 200 */
224int btrfs_init_acl(struct inode *inode, struct inode *dir) 201int btrfs_init_acl(struct btrfs_trans_handle *trans,
202 struct inode *inode, struct inode *dir)
225{ 203{
226 struct posix_acl *acl = NULL; 204 struct posix_acl *acl = NULL;
227 int ret = 0; 205 int ret = 0;
@@ -246,7 +224,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
246 mode_t mode; 224 mode_t mode;
247 225
248 if (S_ISDIR(inode->i_mode)) { 226 if (S_ISDIR(inode->i_mode)) {
249 ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); 227 ret = btrfs_set_acl(trans, inode, acl,
228 ACL_TYPE_DEFAULT);
250 if (ret) 229 if (ret)
251 goto failed; 230 goto failed;
252 } 231 }
@@ -261,10 +240,11 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
261 inode->i_mode = mode; 240 inode->i_mode = mode;
262 if (ret > 0) { 241 if (ret > 0) {
263 /* we need an acl */ 242 /* we need an acl */
264 ret = btrfs_set_acl(inode, clone, 243 ret = btrfs_set_acl(trans, inode, clone,
265 ACL_TYPE_ACCESS); 244 ACL_TYPE_ACCESS);
266 } 245 }
267 } 246 }
247 posix_acl_release(clone);
268 } 248 }
269failed: 249failed:
270 posix_acl_release(acl); 250 posix_acl_release(acl);
@@ -294,7 +274,7 @@ int btrfs_acl_chmod(struct inode *inode)
294 274
295 ret = posix_acl_chmod_masq(clone, inode->i_mode); 275 ret = posix_acl_chmod_masq(clone, inode->i_mode);
296 if (!ret) 276 if (!ret)
297 ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); 277 ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
298 278
299 posix_acl_release(clone); 279 posix_acl_release(clone);
300 280
@@ -303,26 +283,29 @@ int btrfs_acl_chmod(struct inode *inode)
303 283
304struct xattr_handler btrfs_xattr_acl_default_handler = { 284struct xattr_handler btrfs_xattr_acl_default_handler = {
305 .prefix = POSIX_ACL_XATTR_DEFAULT, 285 .prefix = POSIX_ACL_XATTR_DEFAULT,
306 .get = btrfs_xattr_acl_default_get, 286 .flags = ACL_TYPE_DEFAULT,
307 .set = btrfs_xattr_acl_default_set, 287 .get = btrfs_xattr_acl_get,
288 .set = btrfs_xattr_acl_set,
308}; 289};
309 290
310struct xattr_handler btrfs_xattr_acl_access_handler = { 291struct xattr_handler btrfs_xattr_acl_access_handler = {
311 .prefix = POSIX_ACL_XATTR_ACCESS, 292 .prefix = POSIX_ACL_XATTR_ACCESS,
312 .get = btrfs_xattr_acl_access_get, 293 .flags = ACL_TYPE_ACCESS,
313 .set = btrfs_xattr_acl_access_set, 294 .get = btrfs_xattr_acl_get,
295 .set = btrfs_xattr_acl_set,
314}; 296};
315 297
316#else /* CONFIG_BTRFS_POSIX_ACL */ 298#else /* CONFIG_BTRFS_FS_POSIX_ACL */
317 299
318int btrfs_acl_chmod(struct inode *inode) 300int btrfs_acl_chmod(struct inode *inode)
319{ 301{
320 return 0; 302 return 0;
321} 303}
322 304
323int btrfs_init_acl(struct inode *inode, struct inode *dir) 305int btrfs_init_acl(struct btrfs_trans_handle *trans,
306 struct inode *inode, struct inode *dir)
324{ 307{
325 return 0; 308 return 0;
326} 309}
327 310
328#endif /* CONFIG_BTRFS_POSIX_ACL */ 311#endif /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index c71abec0ab90..3f1f50d9d916 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,9 +44,6 @@ struct btrfs_inode {
44 */ 44 */
45 struct extent_io_tree io_failure_tree; 45 struct extent_io_tree io_failure_tree;
46 46
47 /* held while inesrting or deleting extents from files */
48 struct mutex extent_mutex;
49
50 /* held while logging the inode in tree-log.c */ 47 /* held while logging the inode in tree-log.c */
51 struct mutex log_mutex; 48 struct mutex log_mutex;
52 49
@@ -86,6 +83,12 @@ struct btrfs_inode {
86 * transid of the trans_handle that last modified this inode 83 * transid of the trans_handle that last modified this inode
87 */ 84 */
88 u64 last_trans; 85 u64 last_trans;
86
87 /*
88 * log transid when this inode was last modified
89 */
90 u64 last_sub_trans;
91
89 /* 92 /*
90 * transid that last logged this inode 93 * transid that last logged this inode
91 */ 94 */
@@ -160,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
160 163
161static inline void btrfs_i_size_write(struct inode *inode, u64 size) 164static inline void btrfs_i_size_write(struct inode *inode, u64 size)
162{ 165{
163 inode->i_size = size; 166 i_size_write(inode, size);
164 BTRFS_I(inode)->disk_i_size = size; 167 BTRFS_I(inode)->disk_i_size = size;
165} 168}
166 169
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ec96f3a6d536..c4bc570a396e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,6 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct extent_buffer *src_buf); 37 struct extent_buffer *src_buf);
38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 struct btrfs_path *path, int level, int slot); 39 struct btrfs_path *path, int level, int slot);
40static int setup_items_for_insert(struct btrfs_trans_handle *trans,
41 struct btrfs_root *root, struct btrfs_path *path,
42 struct btrfs_key *cpu_key, u32 *data_size,
43 u32 total_data, u32 total_size, int nr);
44
40 45
41struct btrfs_path *btrfs_alloc_path(void) 46struct btrfs_path *btrfs_alloc_path(void)
42{ 47{
@@ -451,9 +456,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
451 extent_buffer_get(cow); 456 extent_buffer_get(cow);
452 spin_unlock(&root->node_lock); 457 spin_unlock(&root->node_lock);
453 458
454 btrfs_free_extent(trans, root, buf->start, buf->len, 459 btrfs_free_tree_block(trans, root, buf->start, buf->len,
455 parent_start, root->root_key.objectid, 460 parent_start, root->root_key.objectid, level);
456 level, 0);
457 free_extent_buffer(buf); 461 free_extent_buffer(buf);
458 add_root_to_dirty_list(root); 462 add_root_to_dirty_list(root);
459 } else { 463 } else {
@@ -468,9 +472,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
468 btrfs_set_node_ptr_generation(parent, parent_slot, 472 btrfs_set_node_ptr_generation(parent, parent_slot,
469 trans->transid); 473 trans->transid);
470 btrfs_mark_buffer_dirty(parent); 474 btrfs_mark_buffer_dirty(parent);
471 btrfs_free_extent(trans, root, buf->start, buf->len, 475 btrfs_free_tree_block(trans, root, buf->start, buf->len,
472 parent_start, root->root_key.objectid, 476 parent_start, root->root_key.objectid, level);
473 level, 0);
474 } 477 }
475 if (unlock_orig) 478 if (unlock_orig)
476 btrfs_tree_unlock(buf); 479 btrfs_tree_unlock(buf);
@@ -1030,8 +1033,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1030 btrfs_tree_unlock(mid); 1033 btrfs_tree_unlock(mid);
1031 /* once for the path */ 1034 /* once for the path */
1032 free_extent_buffer(mid); 1035 free_extent_buffer(mid);
1033 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1036 ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
1034 0, root->root_key.objectid, level, 1); 1037 0, root->root_key.objectid, level);
1035 /* once for the root ptr */ 1038 /* once for the root ptr */
1036 free_extent_buffer(mid); 1039 free_extent_buffer(mid);
1037 return ret; 1040 return ret;
@@ -1095,10 +1098,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1095 1); 1098 1);
1096 if (wret) 1099 if (wret)
1097 ret = wret; 1100 ret = wret;
1098 wret = btrfs_free_extent(trans, root, bytenr, 1101 wret = btrfs_free_tree_block(trans, root,
1099 blocksize, 0, 1102 bytenr, blocksize, 0,
1100 root->root_key.objectid, 1103 root->root_key.objectid,
1101 level, 0); 1104 level);
1102 if (wret) 1105 if (wret)
1103 ret = wret; 1106 ret = wret;
1104 } else { 1107 } else {
@@ -1143,9 +1146,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1143 wret = del_ptr(trans, root, path, level + 1, pslot); 1146 wret = del_ptr(trans, root, path, level + 1, pslot);
1144 if (wret) 1147 if (wret)
1145 ret = wret; 1148 ret = wret;
1146 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1149 wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
1147 0, root->root_key.objectid, 1150 0, root->root_key.objectid, level);
1148 level, 0);
1149 if (wret) 1151 if (wret)
1150 ret = wret; 1152 ret = wret;
1151 } else { 1153 } else {
@@ -2997,75 +2999,85 @@ again:
2997 return ret; 2999 return ret;
2998} 3000}
2999 3001
3000/* 3002static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3001 * This function splits a single item into two items, 3003 struct btrfs_root *root,
3002 * giving 'new_key' to the new item and splitting the 3004 struct btrfs_path *path, int ins_len)
3003 * old one at split_offset (from the start of the item).
3004 *
3005 * The path may be released by this operation. After
3006 * the split, the path is pointing to the old item. The
3007 * new item is going to be in the same node as the old one.
3008 *
3009 * Note, the item being split must be smaller enough to live alone on
3010 * a tree block with room for one extra struct btrfs_item
3011 *
3012 * This allows us to split the item in place, keeping a lock on the
3013 * leaf the entire time.
3014 */
3015int btrfs_split_item(struct btrfs_trans_handle *trans,
3016 struct btrfs_root *root,
3017 struct btrfs_path *path,
3018 struct btrfs_key *new_key,
3019 unsigned long split_offset)
3020{ 3005{
3021 u32 item_size; 3006 struct btrfs_key key;
3022 struct extent_buffer *leaf; 3007 struct extent_buffer *leaf;
3023 struct btrfs_key orig_key; 3008 struct btrfs_file_extent_item *fi;
3024 struct btrfs_item *item; 3009 u64 extent_len = 0;
3025 struct btrfs_item *new_item; 3010 u32 item_size;
3026 int ret = 0; 3011 int ret;
3027 int slot;
3028 u32 nritems;
3029 u32 orig_offset;
3030 struct btrfs_disk_key disk_key;
3031 char *buf;
3032 3012
3033 leaf = path->nodes[0]; 3013 leaf = path->nodes[0];
3034 btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); 3014 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3035 if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) 3015
3036 goto split; 3016 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3017 key.type != BTRFS_EXTENT_CSUM_KEY);
3018
3019 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
3020 return 0;
3037 3021
3038 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3022 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3023 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3024 fi = btrfs_item_ptr(leaf, path->slots[0],
3025 struct btrfs_file_extent_item);
3026 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3027 }
3039 btrfs_release_path(root, path); 3028 btrfs_release_path(root, path);
3040 3029
3041 path->search_for_split = 1;
3042 path->keep_locks = 1; 3030 path->keep_locks = 1;
3043 3031 path->search_for_split = 1;
3044 ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); 3032 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3045 path->search_for_split = 0; 3033 path->search_for_split = 0;
3034 if (ret < 0)
3035 goto err;
3046 3036
3037 ret = -EAGAIN;
3038 leaf = path->nodes[0];
3047 /* if our item isn't there or got smaller, return now */ 3039 /* if our item isn't there or got smaller, return now */
3048 if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], 3040 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3049 path->slots[0])) { 3041 goto err;
3050 path->keep_locks = 0; 3042
3051 return -EAGAIN; 3043 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3044 fi = btrfs_item_ptr(leaf, path->slots[0],
3045 struct btrfs_file_extent_item);
3046 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3047 goto err;
3052 } 3048 }
3053 3049
3054 btrfs_set_path_blocking(path); 3050 btrfs_set_path_blocking(path);
3055 ret = split_leaf(trans, root, &orig_key, path, 3051 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3056 sizeof(struct btrfs_item), 1);
3057 path->keep_locks = 0;
3058 BUG_ON(ret); 3052 BUG_ON(ret);
3059 3053
3054 path->keep_locks = 0;
3060 btrfs_unlock_up_safe(path, 1); 3055 btrfs_unlock_up_safe(path, 1);
3056 return 0;
3057err:
3058 path->keep_locks = 0;
3059 return ret;
3060}
3061
3062static noinline int split_item(struct btrfs_trans_handle *trans,
3063 struct btrfs_root *root,
3064 struct btrfs_path *path,
3065 struct btrfs_key *new_key,
3066 unsigned long split_offset)
3067{
3068 struct extent_buffer *leaf;
3069 struct btrfs_item *item;
3070 struct btrfs_item *new_item;
3071 int slot;
3072 char *buf;
3073 u32 nritems;
3074 u32 item_size;
3075 u32 orig_offset;
3076 struct btrfs_disk_key disk_key;
3077
3061 leaf = path->nodes[0]; 3078 leaf = path->nodes[0];
3062 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3079 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3063 3080
3064split:
3065 /*
3066 * make sure any changes to the path from split_leaf leave it
3067 * in a blocking state
3068 */
3069 btrfs_set_path_blocking(path); 3081 btrfs_set_path_blocking(path);
3070 3082
3071 item = btrfs_item_nr(leaf, path->slots[0]); 3083 item = btrfs_item_nr(leaf, path->slots[0]);
@@ -3073,19 +3085,19 @@ split:
3073 item_size = btrfs_item_size(leaf, item); 3085 item_size = btrfs_item_size(leaf, item);
3074 3086
3075 buf = kmalloc(item_size, GFP_NOFS); 3087 buf = kmalloc(item_size, GFP_NOFS);
3088 if (!buf)
3089 return -ENOMEM;
3090
3076 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3091 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3077 path->slots[0]), item_size); 3092 path->slots[0]), item_size);
3078 slot = path->slots[0] + 1;
3079 leaf = path->nodes[0];
3080 3093
3094 slot = path->slots[0] + 1;
3081 nritems = btrfs_header_nritems(leaf); 3095 nritems = btrfs_header_nritems(leaf);
3082
3083 if (slot != nritems) { 3096 if (slot != nritems) {
3084 /* shift the items */ 3097 /* shift the items */
3085 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3098 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3086 btrfs_item_nr_offset(slot), 3099 btrfs_item_nr_offset(slot),
3087 (nritems - slot) * sizeof(struct btrfs_item)); 3100 (nritems - slot) * sizeof(struct btrfs_item));
3088
3089 } 3101 }
3090 3102
3091 btrfs_cpu_key_to_disk(&disk_key, new_key); 3103 btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3113,16 +3125,81 @@ split:
3113 item_size - split_offset); 3125 item_size - split_offset);
3114 btrfs_mark_buffer_dirty(leaf); 3126 btrfs_mark_buffer_dirty(leaf);
3115 3127
3116 ret = 0; 3128 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
3117 if (btrfs_leaf_free_space(root, leaf) < 0) {
3118 btrfs_print_leaf(root, leaf);
3119 BUG();
3120 }
3121 kfree(buf); 3129 kfree(buf);
3130 return 0;
3131}
3132
3133/*
3134 * This function splits a single item into two items,
3135 * giving 'new_key' to the new item and splitting the
3136 * old one at split_offset (from the start of the item).
3137 *
3138 * The path may be released by this operation. After
3139 * the split, the path is pointing to the old item. The
3140 * new item is going to be in the same node as the old one.
3141 *
3142 * Note, the item being split must be smaller enough to live alone on
3143 * a tree block with room for one extra struct btrfs_item
3144 *
3145 * This allows us to split the item in place, keeping a lock on the
3146 * leaf the entire time.
3147 */
3148int btrfs_split_item(struct btrfs_trans_handle *trans,
3149 struct btrfs_root *root,
3150 struct btrfs_path *path,
3151 struct btrfs_key *new_key,
3152 unsigned long split_offset)
3153{
3154 int ret;
3155 ret = setup_leaf_for_split(trans, root, path,
3156 sizeof(struct btrfs_item));
3157 if (ret)
3158 return ret;
3159
3160 ret = split_item(trans, root, path, new_key, split_offset);
3122 return ret; 3161 return ret;
3123} 3162}
3124 3163
3125/* 3164/*
3165 * This function duplicate a item, giving 'new_key' to the new item.
3166 * It guarantees both items live in the same tree leaf and the new item
3167 * is contiguous with the original item.
3168 *
3169 * This allows us to split file extent in place, keeping a lock on the
3170 * leaf the entire time.
3171 */
3172int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct btrfs_key *new_key)
3176{
3177 struct extent_buffer *leaf;
3178 int ret;
3179 u32 item_size;
3180
3181 leaf = path->nodes[0];
3182 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3183 ret = setup_leaf_for_split(trans, root, path,
3184 item_size + sizeof(struct btrfs_item));
3185 if (ret)
3186 return ret;
3187
3188 path->slots[0]++;
3189 ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
3190 item_size, item_size +
3191 sizeof(struct btrfs_item), 1);
3192 BUG_ON(ret);
3193
3194 leaf = path->nodes[0];
3195 memcpy_extent_buffer(leaf,
3196 btrfs_item_ptr_offset(leaf, path->slots[0]),
3197 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3198 item_size);
3199 return 0;
3200}
3201
3202/*
3126 * make the item pointed to by the path smaller. new_size indicates 3203 * make the item pointed to by the path smaller. new_size indicates
3127 * how small to make it, and from_end tells us if we just chop bytes 3204 * how small to make it, and from_end tells us if we just chop bytes
3128 * off the end of the item or if we shift the item to chop bytes off 3205 * off the end of the item or if we shift the item to chop bytes off
@@ -3714,8 +3791,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3714 */ 3791 */
3715 btrfs_unlock_up_safe(path, 0); 3792 btrfs_unlock_up_safe(path, 0);
3716 3793
3717 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, 3794 ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
3718 0, root->root_key.objectid, 0, 0); 3795 0, root->root_key.objectid, 0);
3719 return ret; 3796 return ret;
3720} 3797}
3721/* 3798/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1bb897ecdeeb..2aa8ec6a0981 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -310,6 +310,9 @@ struct btrfs_header {
310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ 310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
311 sizeof(struct btrfs_item) - \ 311 sizeof(struct btrfs_item) - \
312 sizeof(struct btrfs_file_extent_item)) 312 sizeof(struct btrfs_file_extent_item))
313#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
314 sizeof(struct btrfs_item) -\
315 sizeof(struct btrfs_dir_item))
313 316
314 317
315/* 318/*
@@ -859,8 +862,9 @@ struct btrfs_fs_info {
859 struct mutex ordered_operations_mutex; 862 struct mutex ordered_operations_mutex;
860 struct rw_semaphore extent_commit_sem; 863 struct rw_semaphore extent_commit_sem;
861 864
862 struct rw_semaphore subvol_sem; 865 struct rw_semaphore cleanup_work_sem;
863 866
867 struct rw_semaphore subvol_sem;
864 struct srcu_struct subvol_srcu; 868 struct srcu_struct subvol_srcu;
865 869
866 struct list_head trans_list; 870 struct list_head trans_list;
@@ -868,6 +872,9 @@ struct btrfs_fs_info {
868 struct list_head dead_roots; 872 struct list_head dead_roots;
869 struct list_head caching_block_groups; 873 struct list_head caching_block_groups;
870 874
875 spinlock_t delayed_iput_lock;
876 struct list_head delayed_iputs;
877
871 atomic_t nr_async_submits; 878 atomic_t nr_async_submits;
872 atomic_t async_submit_draining; 879 atomic_t async_submit_draining;
873 atomic_t nr_async_bios; 880 atomic_t nr_async_bios;
@@ -1009,6 +1016,7 @@ struct btrfs_root {
1009 atomic_t log_writers; 1016 atomic_t log_writers;
1010 atomic_t log_commit[2]; 1017 atomic_t log_commit[2];
1011 unsigned long log_transid; 1018 unsigned long log_transid;
1019 unsigned long last_log_commit;
1012 unsigned long log_batch; 1020 unsigned long log_batch;
1013 pid_t log_start_pid; 1021 pid_t log_start_pid;
1014 bool log_multiple_pids; 1022 bool log_multiple_pids;
@@ -1033,12 +1041,12 @@ struct btrfs_root {
1033 int ref_cows; 1041 int ref_cows;
1034 int track_dirty; 1042 int track_dirty;
1035 int in_radix; 1043 int in_radix;
1044 int clean_orphans;
1036 1045
1037 u64 defrag_trans_start; 1046 u64 defrag_trans_start;
1038 struct btrfs_key defrag_progress; 1047 struct btrfs_key defrag_progress;
1039 struct btrfs_key defrag_max; 1048 struct btrfs_key defrag_max;
1040 int defrag_running; 1049 int defrag_running;
1041 int defrag_level;
1042 char *name; 1050 char *name;
1043 int in_sysfs; 1051 int in_sysfs;
1044 1052
@@ -1152,6 +1160,8 @@ struct btrfs_root {
1152#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) 1160#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
1153#define BTRFS_MOUNT_SSD_SPREAD (1 << 8) 1161#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
1154#define BTRFS_MOUNT_NOSSD (1 << 9) 1162#define BTRFS_MOUNT_NOSSD (1 << 9)
1163#define BTRFS_MOUNT_DISCARD (1 << 10)
1164#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
1155 1165
1156#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1166#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1157#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1167#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1973,6 +1983,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1973 u64 parent, u64 root_objectid, 1983 u64 parent, u64 root_objectid,
1974 struct btrfs_disk_key *key, int level, 1984 struct btrfs_disk_key *key, int level,
1975 u64 hint, u64 empty_size); 1985 u64 hint, u64 empty_size);
1986int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
1987 struct btrfs_root *root,
1988 u64 bytenr, u32 blocksize,
1989 u64 parent, u64 root_objectid, int level);
1976struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1990struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1977 struct btrfs_root *root, 1991 struct btrfs_root *root,
1978 u64 bytenr, u32 blocksize, 1992 u64 bytenr, u32 blocksize,
@@ -2087,6 +2101,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
2087 struct btrfs_path *path, 2101 struct btrfs_path *path,
2088 struct btrfs_key *new_key, 2102 struct btrfs_key *new_key,
2089 unsigned long split_offset); 2103 unsigned long split_offset);
2104int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
2105 struct btrfs_root *root,
2106 struct btrfs_path *path,
2107 struct btrfs_key *new_key);
2090int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 2108int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2091 *root, struct btrfs_key *key, struct btrfs_path *p, int 2109 *root, struct btrfs_key *key, struct btrfs_path *p, int
2092 ins_len, int cow); 2110 ins_len, int cow);
@@ -2194,9 +2212,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
2194 struct btrfs_path *path, 2212 struct btrfs_path *path,
2195 struct btrfs_dir_item *di); 2213 struct btrfs_dir_item *di);
2196int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 2214int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
2197 struct btrfs_root *root, const char *name, 2215 struct btrfs_root *root,
2198 u16 name_len, const void *data, u16 data_len, 2216 struct btrfs_path *path, u64 objectid,
2199 u64 dir); 2217 const char *name, u16 name_len,
2218 const void *data, u16 data_len);
2200struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, 2219struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
2201 struct btrfs_root *root, 2220 struct btrfs_root *root,
2202 struct btrfs_path *path, u64 dir, 2221 struct btrfs_path *path, u64 dir,
@@ -2290,7 +2309,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2290 struct inode *inode, u64 new_size, 2309 struct inode *inode, u64 new_size,
2291 u32 min_type); 2310 u32 min_type);
2292 2311
2293int btrfs_start_delalloc_inodes(struct btrfs_root *root); 2312int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2294int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); 2313int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
2295int btrfs_writepages(struct address_space *mapping, 2314int btrfs_writepages(struct address_space *mapping,
2296 struct writeback_control *wbc); 2315 struct writeback_control *wbc);
@@ -2330,6 +2349,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2330void btrfs_orphan_cleanup(struct btrfs_root *root); 2349void btrfs_orphan_cleanup(struct btrfs_root *root);
2331int btrfs_cont_expand(struct inode *inode, loff_t size); 2350int btrfs_cont_expand(struct inode *inode, loff_t size);
2332int btrfs_invalidate_inodes(struct btrfs_root *root); 2351int btrfs_invalidate_inodes(struct btrfs_root *root);
2352void btrfs_add_delayed_iput(struct inode *inode);
2353void btrfs_run_delayed_iputs(struct btrfs_root *root);
2333extern const struct dentry_operations btrfs_dentry_operations; 2354extern const struct dentry_operations btrfs_dentry_operations;
2334 2355
2335/* ioctl.c */ 2356/* ioctl.c */
@@ -2343,12 +2364,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2343 int skip_pinned); 2364 int skip_pinned);
2344int btrfs_check_file(struct btrfs_root *root, struct inode *inode); 2365int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2345extern const struct file_operations btrfs_file_operations; 2366extern const struct file_operations btrfs_file_operations;
2346int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2367int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2347 struct btrfs_root *root, struct inode *inode, 2368 u64 start, u64 end, u64 *hint_byte, int drop_cache);
2348 u64 start, u64 end, u64 locked_end,
2349 u64 inline_limit, u64 *hint_block, int drop_cache);
2350int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2369int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2351 struct btrfs_root *root,
2352 struct inode *inode, u64 start, u64 end); 2370 struct inode *inode, u64 start, u64 end);
2353int btrfs_release_file(struct inode *inode, struct file *file); 2371int btrfs_release_file(struct inode *inode, struct file *file);
2354 2372
@@ -2373,12 +2391,13 @@ int btrfs_parse_options(struct btrfs_root *root, char *options);
2373int btrfs_sync_fs(struct super_block *sb, int wait); 2391int btrfs_sync_fs(struct super_block *sb, int wait);
2374 2392
2375/* acl.c */ 2393/* acl.c */
2376#ifdef CONFIG_BTRFS_POSIX_ACL 2394#ifdef CONFIG_BTRFS_FS_POSIX_ACL
2377int btrfs_check_acl(struct inode *inode, int mask); 2395int btrfs_check_acl(struct inode *inode, int mask);
2378#else 2396#else
2379#define btrfs_check_acl NULL 2397#define btrfs_check_acl NULL
2380#endif 2398#endif
2381int btrfs_init_acl(struct inode *inode, struct inode *dir); 2399int btrfs_init_acl(struct btrfs_trans_handle *trans,
2400 struct inode *inode, struct inode *dir);
2382int btrfs_acl_chmod(struct inode *inode); 2401int btrfs_acl_chmod(struct inode *inode);
2383 2402
2384/* relocation.c */ 2403/* relocation.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f3a6075519cc..e9103b3baa49 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
68 * into the tree 68 * into the tree
69 */ 69 */
70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
71 struct btrfs_root *root, const char *name, 71 struct btrfs_root *root,
72 u16 name_len, const void *data, u16 data_len, 72 struct btrfs_path *path, u64 objectid,
73 u64 dir) 73 const char *name, u16 name_len,
74 const void *data, u16 data_len)
74{ 75{
75 int ret = 0; 76 int ret = 0;
76 struct btrfs_path *path;
77 struct btrfs_dir_item *dir_item; 77 struct btrfs_dir_item *dir_item;
78 unsigned long name_ptr, data_ptr; 78 unsigned long name_ptr, data_ptr;
79 struct btrfs_key key, location; 79 struct btrfs_key key, location;
@@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
81 struct extent_buffer *leaf; 81 struct extent_buffer *leaf;
82 u32 data_size; 82 u32 data_size;
83 83
84 key.objectid = dir; 84 BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
85
86 key.objectid = objectid;
85 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 87 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
86 key.offset = btrfs_name_hash(name, name_len); 88 key.offset = btrfs_name_hash(name, name_len);
87 path = btrfs_alloc_path();
88 if (!path)
89 return -ENOMEM;
90 if (name_len + data_len + sizeof(struct btrfs_dir_item) >
91 BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
92 return -ENOSPC;
93 89
94 data_size = sizeof(*dir_item) + name_len + data_len; 90 data_size = sizeof(*dir_item) + name_len + data_len;
95 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 91 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
@@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
117 write_extent_buffer(leaf, data, data_ptr, data_len); 113 write_extent_buffer(leaf, data, data_ptr, data_len);
118 btrfs_mark_buffer_dirty(path->nodes[0]); 114 btrfs_mark_buffer_dirty(path->nodes[0]);
119 115
120 btrfs_free_path(path);
121 return ret; 116 return ret;
122} 117}
123 118
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 100551a66c46..2b59201b955c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
892 root->stripesize = stripesize; 892 root->stripesize = stripesize;
893 root->ref_cows = 0; 893 root->ref_cows = 0;
894 root->track_dirty = 0; 894 root->track_dirty = 0;
895 root->in_radix = 0;
896 root->clean_orphans = 0;
895 897
896 root->fs_info = fs_info; 898 root->fs_info = fs_info;
897 root->objectid = objectid; 899 root->objectid = objectid;
@@ -917,6 +919,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
917 atomic_set(&root->log_writers, 0); 919 atomic_set(&root->log_writers, 0);
918 root->log_batch = 0; 920 root->log_batch = 0;
919 root->log_transid = 0; 921 root->log_transid = 0;
922 root->last_log_commit = 0;
920 extent_io_tree_init(&root->dirty_log_pages, 923 extent_io_tree_init(&root->dirty_log_pages,
921 fs_info->btree_inode->i_mapping, GFP_NOFS); 924 fs_info->btree_inode->i_mapping, GFP_NOFS);
922 925
@@ -927,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
927 root->defrag_trans_start = fs_info->generation; 930 root->defrag_trans_start = fs_info->generation;
928 init_completion(&root->kobj_unregister); 931 init_completion(&root->kobj_unregister);
929 root->defrag_running = 0; 932 root->defrag_running = 0;
930 root->defrag_level = 0;
931 root->root_key.objectid = objectid; 933 root->root_key.objectid = objectid;
932 root->anon_super.s_root = NULL; 934 root->anon_super.s_root = NULL;
933 root->anon_super.s_dev = 0; 935 root->anon_super.s_dev = 0;
@@ -979,12 +981,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
979 981
980 while (1) { 982 while (1) {
981 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 983 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
982 0, &start, &end, EXTENT_DIRTY); 984 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
983 if (ret) 985 if (ret)
984 break; 986 break;
985 987
986 clear_extent_dirty(&log_root_tree->dirty_log_pages, 988 clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
987 start, end, GFP_NOFS); 989 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
988 } 990 }
989 eb = fs_info->log_root_tree->node; 991 eb = fs_info->log_root_tree->node;
990 992
@@ -1087,6 +1089,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1087 WARN_ON(root->log_root); 1089 WARN_ON(root->log_root);
1088 root->log_root = log_root; 1090 root->log_root = log_root;
1089 root->log_transid = 0; 1091 root->log_transid = 0;
1092 root->last_log_commit = 0;
1090 return 0; 1093 return 0;
1091} 1094}
1092 1095
@@ -1208,8 +1211,10 @@ again:
1208 ret = radix_tree_insert(&fs_info->fs_roots_radix, 1211 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1209 (unsigned long)root->root_key.objectid, 1212 (unsigned long)root->root_key.objectid,
1210 root); 1213 root);
1211 if (ret == 0) 1214 if (ret == 0) {
1212 root->in_radix = 1; 1215 root->in_radix = 1;
1216 root->clean_orphans = 1;
1217 }
1213 spin_unlock(&fs_info->fs_roots_radix_lock); 1218 spin_unlock(&fs_info->fs_roots_radix_lock);
1214 radix_tree_preload_end(); 1219 radix_tree_preload_end();
1215 if (ret) { 1220 if (ret) {
@@ -1223,10 +1228,6 @@ again:
1223 ret = btrfs_find_dead_roots(fs_info->tree_root, 1228 ret = btrfs_find_dead_roots(fs_info->tree_root,
1224 root->root_key.objectid); 1229 root->root_key.objectid);
1225 WARN_ON(ret); 1230 WARN_ON(ret);
1226
1227 if (!(fs_info->sb->s_flags & MS_RDONLY))
1228 btrfs_orphan_cleanup(root);
1229
1230 return root; 1231 return root;
1231fail: 1232fail:
1232 free_fs_root(root); 1233 free_fs_root(root);
@@ -1475,6 +1476,7 @@ static int cleaner_kthread(void *arg)
1475 1476
1476 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1477 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1477 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1478 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1479 btrfs_run_delayed_iputs(root);
1478 btrfs_clean_old_snapshots(root); 1480 btrfs_clean_old_snapshots(root);
1479 mutex_unlock(&root->fs_info->cleaner_mutex); 1481 mutex_unlock(&root->fs_info->cleaner_mutex);
1480 } 1482 }
@@ -1604,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1604 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1605 INIT_LIST_HEAD(&fs_info->trans_list); 1607 INIT_LIST_HEAD(&fs_info->trans_list);
1606 INIT_LIST_HEAD(&fs_info->dead_roots); 1608 INIT_LIST_HEAD(&fs_info->dead_roots);
1609 INIT_LIST_HEAD(&fs_info->delayed_iputs);
1607 INIT_LIST_HEAD(&fs_info->hashers); 1610 INIT_LIST_HEAD(&fs_info->hashers);
1608 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1611 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1609 INIT_LIST_HEAD(&fs_info->ordered_operations); 1612 INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1612,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1612 spin_lock_init(&fs_info->new_trans_lock); 1615 spin_lock_init(&fs_info->new_trans_lock);
1613 spin_lock_init(&fs_info->ref_cache_lock); 1616 spin_lock_init(&fs_info->ref_cache_lock);
1614 spin_lock_init(&fs_info->fs_roots_radix_lock); 1617 spin_lock_init(&fs_info->fs_roots_radix_lock);
1618 spin_lock_init(&fs_info->delayed_iput_lock);
1615 1619
1616 init_completion(&fs_info->kobj_unregister); 1620 init_completion(&fs_info->kobj_unregister);
1617 fs_info->tree_root = tree_root; 1621 fs_info->tree_root = tree_root;
@@ -1687,6 +1691,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1687 mutex_init(&fs_info->cleaner_mutex); 1691 mutex_init(&fs_info->cleaner_mutex);
1688 mutex_init(&fs_info->volume_mutex); 1692 mutex_init(&fs_info->volume_mutex);
1689 init_rwsem(&fs_info->extent_commit_sem); 1693 init_rwsem(&fs_info->extent_commit_sem);
1694 init_rwsem(&fs_info->cleanup_work_sem);
1690 init_rwsem(&fs_info->subvol_sem); 1695 init_rwsem(&fs_info->subvol_sem);
1691 1696
1692 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1697 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -1977,7 +1982,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1977 1982
1978 if (!(sb->s_flags & MS_RDONLY)) { 1983 if (!(sb->s_flags & MS_RDONLY)) {
1979 ret = btrfs_recover_relocation(tree_root); 1984 ret = btrfs_recover_relocation(tree_root);
1980 BUG_ON(ret); 1985 if (ret < 0) {
1986 printk(KERN_WARNING
1987 "btrfs: failed to recover relocation\n");
1988 err = -EINVAL;
1989 goto fail_trans_kthread;
1990 }
1981 } 1991 }
1982 1992
1983 location.objectid = BTRFS_FS_TREE_OBJECTID; 1993 location.objectid = BTRFS_FS_TREE_OBJECTID;
@@ -1988,6 +1998,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1988 if (!fs_info->fs_root) 1998 if (!fs_info->fs_root)
1989 goto fail_trans_kthread; 1999 goto fail_trans_kthread;
1990 2000
2001 if (!(sb->s_flags & MS_RDONLY)) {
2002 down_read(&fs_info->cleanup_work_sem);
2003 btrfs_orphan_cleanup(fs_info->fs_root);
2004 up_read(&fs_info->cleanup_work_sem);
2005 }
2006
1991 return tree_root; 2007 return tree_root;
1992 2008
1993fail_trans_kthread: 2009fail_trans_kthread:
@@ -2384,8 +2400,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2384 int ret; 2400 int ret;
2385 2401
2386 mutex_lock(&root->fs_info->cleaner_mutex); 2402 mutex_lock(&root->fs_info->cleaner_mutex);
2403 btrfs_run_delayed_iputs(root);
2387 btrfs_clean_old_snapshots(root); 2404 btrfs_clean_old_snapshots(root);
2388 mutex_unlock(&root->fs_info->cleaner_mutex); 2405 mutex_unlock(&root->fs_info->cleaner_mutex);
2406
2407 /* wait until ongoing cleanup work done */
2408 down_write(&root->fs_info->cleanup_work_sem);
2409 up_write(&root->fs_info->cleanup_work_sem);
2410
2389 trans = btrfs_start_transaction(root, 1); 2411 trans = btrfs_start_transaction(root, 1);
2390 ret = btrfs_commit_transaction(trans, root); 2412 ret = btrfs_commit_transaction(trans, root);
2391 BUG_ON(ret); 2413 BUG_ON(ret);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d0c4d584efad..559f72489b3b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
83 return (cache->flags & bits) == bits; 83 return (cache->flags & bits) == bits;
84} 84}
85 85
86void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
87{
88 atomic_inc(&cache->count);
89}
90
91void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
92{
93 if (atomic_dec_and_test(&cache->count))
94 kfree(cache);
95}
96
86/* 97/*
87 * this adds the block group to the fs_info rb tree for the block group 98 * this adds the block group to the fs_info rb tree for the block group
88 * cache 99 * cache
@@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
156 } 167 }
157 } 168 }
158 if (ret) 169 if (ret)
159 atomic_inc(&ret->count); 170 btrfs_get_block_group(ret);
160 spin_unlock(&info->block_group_cache_lock); 171 spin_unlock(&info->block_group_cache_lock);
161 172
162 return ret; 173 return ret;
@@ -195,6 +206,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
195 int stripe_len; 206 int stripe_len;
196 int i, nr, ret; 207 int i, nr, ret;
197 208
209 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
210 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
211 cache->bytes_super += stripe_len;
212 ret = add_excluded_extent(root, cache->key.objectid,
213 stripe_len);
214 BUG_ON(ret);
215 }
216
198 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 217 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
199 bytenr = btrfs_sb_offset(i); 218 bytenr = btrfs_sb_offset(i);
200 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 219 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
@@ -255,7 +274,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
255 if (ret) 274 if (ret)
256 break; 275 break;
257 276
258 if (extent_start == start) { 277 if (extent_start <= start) {
259 start = extent_end + 1; 278 start = extent_end + 1;
260 } else if (extent_start > start && extent_start < end) { 279 } else if (extent_start > start && extent_start < end) {
261 size = extent_start - start; 280 size = extent_start - start;
@@ -399,6 +418,8 @@ err:
399 418
400 put_caching_control(caching_ctl); 419 put_caching_control(caching_ctl);
401 atomic_dec(&block_group->space_info->caching_threads); 420 atomic_dec(&block_group->space_info->caching_threads);
421 btrfs_put_block_group(block_group);
422
402 return 0; 423 return 0;
403} 424}
404 425
@@ -439,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
439 up_write(&fs_info->extent_commit_sem); 460 up_write(&fs_info->extent_commit_sem);
440 461
441 atomic_inc(&cache->space_info->caching_threads); 462 atomic_inc(&cache->space_info->caching_threads);
463 btrfs_get_block_group(cache);
442 464
443 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 465 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
444 cache->key.objectid); 466 cache->key.objectid);
@@ -478,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
478 return cache; 500 return cache;
479} 501}
480 502
481void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
482{
483 if (atomic_dec_and_test(&cache->count))
484 kfree(cache);
485}
486
487static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 503static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
488 u64 flags) 504 u64 flags)
489{ 505{
@@ -1568,23 +1584,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1568 return ret; 1584 return ret;
1569} 1585}
1570 1586
1571#ifdef BIO_RW_DISCARD
1572static void btrfs_issue_discard(struct block_device *bdev, 1587static void btrfs_issue_discard(struct block_device *bdev,
1573 u64 start, u64 len) 1588 u64 start, u64 len)
1574{ 1589{
1575 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1590 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1576 DISCARD_FL_BARRIER); 1591 DISCARD_FL_BARRIER);
1577} 1592}
1578#endif
1579 1593
1580static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1594static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1581 u64 num_bytes) 1595 u64 num_bytes)
1582{ 1596{
1583#ifdef BIO_RW_DISCARD
1584 int ret; 1597 int ret;
1585 u64 map_length = num_bytes; 1598 u64 map_length = num_bytes;
1586 struct btrfs_multi_bio *multi = NULL; 1599 struct btrfs_multi_bio *multi = NULL;
1587 1600
1601 if (!btrfs_test_opt(root, DISCARD))
1602 return 0;
1603
1588 /* Tell the block device(s) that the sectors can be discarded */ 1604 /* Tell the block device(s) that the sectors can be discarded */
1589 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1605 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1590 bytenr, &map_length, &multi, 0); 1606 bytenr, &map_length, &multi, 0);
@@ -1604,9 +1620,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1604 } 1620 }
1605 1621
1606 return ret; 1622 return ret;
1607#else
1608 return 0;
1609#endif
1610} 1623}
1611 1624
1612int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1625int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
@@ -2577,7 +2590,7 @@ next_block_group(struct btrfs_root *root,
2577 if (node) { 2590 if (node) {
2578 cache = rb_entry(node, struct btrfs_block_group_cache, 2591 cache = rb_entry(node, struct btrfs_block_group_cache,
2579 cache_node); 2592 cache_node);
2580 atomic_inc(&cache->count); 2593 btrfs_get_block_group(cache);
2581 } else 2594 } else
2582 cache = NULL; 2595 cache = NULL;
2583 spin_unlock(&root->fs_info->block_group_cache_lock); 2596 spin_unlock(&root->fs_info->block_group_cache_lock);
@@ -2883,9 +2896,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
2883 root = async->root; 2896 root = async->root;
2884 info = async->info; 2897 info = async->info;
2885 2898
2886 btrfs_start_delalloc_inodes(root); 2899 btrfs_start_delalloc_inodes(root, 0);
2887 wake_up(&info->flush_wait); 2900 wake_up(&info->flush_wait);
2888 btrfs_wait_ordered_extents(root, 0); 2901 btrfs_wait_ordered_extents(root, 0, 0);
2889 2902
2890 spin_lock(&info->lock); 2903 spin_lock(&info->lock);
2891 info->flushing = 0; 2904 info->flushing = 0;
@@ -2959,8 +2972,8 @@ static void flush_delalloc(struct btrfs_root *root,
2959 return; 2972 return;
2960 2973
2961flush: 2974flush:
2962 btrfs_start_delalloc_inodes(root); 2975 btrfs_start_delalloc_inodes(root, 0);
2963 btrfs_wait_ordered_extents(root, 0); 2976 btrfs_wait_ordered_extents(root, 0, 0);
2964 2977
2965 spin_lock(&info->lock); 2978 spin_lock(&info->lock);
2966 info->flushing = 0; 2979 info->flushing = 0;
@@ -2980,10 +2993,10 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
2980 2993
2981 free_space = btrfs_super_total_bytes(disk_super); 2994 free_space = btrfs_super_total_bytes(disk_super);
2982 /* 2995 /*
2983 * we allow the metadata to grow to a max of either 5gb or 5% of the 2996 * we allow the metadata to grow to a max of either 10gb or 5% of the
2984 * space in the volume. 2997 * space in the volume.
2985 */ 2998 */
2986 min_metadata = min((u64)5 * 1024 * 1024 * 1024, 2999 min_metadata = min((u64)10 * 1024 * 1024 * 1024,
2987 div64_u64(free_space * 5, 100)); 3000 div64_u64(free_space * 5, 100));
2988 if (info->total_bytes >= min_metadata) { 3001 if (info->total_bytes >= min_metadata) {
2989 spin_unlock(&info->lock); 3002 spin_unlock(&info->lock);
@@ -3457,14 +3470,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3457 else 3470 else
3458 old_val -= num_bytes; 3471 old_val -= num_bytes;
3459 btrfs_set_super_bytes_used(&info->super_copy, old_val); 3472 btrfs_set_super_bytes_used(&info->super_copy, old_val);
3460
3461 /* block accounting for root item */
3462 old_val = btrfs_root_used(&root->root_item);
3463 if (alloc)
3464 old_val += num_bytes;
3465 else
3466 old_val -= num_bytes;
3467 btrfs_set_root_used(&root->root_item, old_val);
3468 spin_unlock(&info->delalloc_lock); 3473 spin_unlock(&info->delalloc_lock);
3469 3474
3470 while (total) { 3475 while (total) {
@@ -3690,6 +3695,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
3690 if (is_data) 3695 if (is_data)
3691 goto pinit; 3696 goto pinit;
3692 3697
3698 /*
3699 * discard is sloooow, and so triggering discards on
3700 * individual btree blocks isn't a good plan. Just
3701 * pin everything in discard mode.
3702 */
3703 if (btrfs_test_opt(root, DISCARD))
3704 goto pinit;
3705
3693 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3706 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
3694 if (!buf) 3707 if (!buf)
3695 goto pinit; 3708 goto pinit;
@@ -4044,6 +4057,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4044 return ret; 4057 return ret;
4045} 4058}
4046 4059
4060int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4061 struct btrfs_root *root,
4062 u64 bytenr, u32 blocksize,
4063 u64 parent, u64 root_objectid, int level)
4064{
4065 u64 used;
4066 spin_lock(&root->node_lock);
4067 used = btrfs_root_used(&root->root_item) - blocksize;
4068 btrfs_set_root_used(&root->root_item, used);
4069 spin_unlock(&root->node_lock);
4070
4071 return btrfs_free_extent(trans, root, bytenr, blocksize,
4072 parent, root_objectid, level, 0);
4073}
4074
4047static u64 stripe_align(struct btrfs_root *root, u64 val) 4075static u64 stripe_align(struct btrfs_root *root, u64 val)
4048{ 4076{
4049 u64 mask = ((u64)root->stripesize - 1); 4077 u64 mask = ((u64)root->stripesize - 1);
@@ -4097,7 +4125,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
4097} 4125}
4098 4126
4099enum btrfs_loop_type { 4127enum btrfs_loop_type {
4100 LOOP_CACHED_ONLY = 0, 4128 LOOP_FIND_IDEAL = 0,
4101 LOOP_CACHING_NOWAIT = 1, 4129 LOOP_CACHING_NOWAIT = 1,
4102 LOOP_CACHING_WAIT = 2, 4130 LOOP_CACHING_WAIT = 2,
4103 LOOP_ALLOC_CHUNK = 3, 4131 LOOP_ALLOC_CHUNK = 3,
@@ -4126,12 +4154,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4126 struct btrfs_block_group_cache *block_group = NULL; 4154 struct btrfs_block_group_cache *block_group = NULL;
4127 int empty_cluster = 2 * 1024 * 1024; 4155 int empty_cluster = 2 * 1024 * 1024;
4128 int allowed_chunk_alloc = 0; 4156 int allowed_chunk_alloc = 0;
4157 int done_chunk_alloc = 0;
4129 struct btrfs_space_info *space_info; 4158 struct btrfs_space_info *space_info;
4130 int last_ptr_loop = 0; 4159 int last_ptr_loop = 0;
4131 int loop = 0; 4160 int loop = 0;
4132 bool found_uncached_bg = false; 4161 bool found_uncached_bg = false;
4133 bool failed_cluster_refill = false; 4162 bool failed_cluster_refill = false;
4134 bool failed_alloc = false; 4163 bool failed_alloc = false;
4164 u64 ideal_cache_percent = 0;
4165 u64 ideal_cache_offset = 0;
4135 4166
4136 WARN_ON(num_bytes < root->sectorsize); 4167 WARN_ON(num_bytes < root->sectorsize);
4137 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4168 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4167,14 +4198,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4167 empty_cluster = 0; 4198 empty_cluster = 0;
4168 4199
4169 if (search_start == hint_byte) { 4200 if (search_start == hint_byte) {
4201ideal_cache:
4170 block_group = btrfs_lookup_block_group(root->fs_info, 4202 block_group = btrfs_lookup_block_group(root->fs_info,
4171 search_start); 4203 search_start);
4172 /* 4204 /*
4173 * we don't want to use the block group if it doesn't match our 4205 * we don't want to use the block group if it doesn't match our
4174 * allocation bits, or if its not cached. 4206 * allocation bits, or if its not cached.
4207 *
4208 * However if we are re-searching with an ideal block group
4209 * picked out then we don't care that the block group is cached.
4175 */ 4210 */
4176 if (block_group && block_group_bits(block_group, data) && 4211 if (block_group && block_group_bits(block_group, data) &&
4177 block_group_cache_done(block_group)) { 4212 (block_group->cached != BTRFS_CACHE_NO ||
4213 search_start == ideal_cache_offset)) {
4178 down_read(&space_info->groups_sem); 4214 down_read(&space_info->groups_sem);
4179 if (list_empty(&block_group->list) || 4215 if (list_empty(&block_group->list) ||
4180 block_group->ro) { 4216 block_group->ro) {
@@ -4186,46 +4222,63 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4186 */ 4222 */
4187 btrfs_put_block_group(block_group); 4223 btrfs_put_block_group(block_group);
4188 up_read(&space_info->groups_sem); 4224 up_read(&space_info->groups_sem);
4189 } else 4225 } else {
4190 goto have_block_group; 4226 goto have_block_group;
4227 }
4191 } else if (block_group) { 4228 } else if (block_group) {
4192 btrfs_put_block_group(block_group); 4229 btrfs_put_block_group(block_group);
4193 } 4230 }
4194 } 4231 }
4195
4196search: 4232search:
4197 down_read(&space_info->groups_sem); 4233 down_read(&space_info->groups_sem);
4198 list_for_each_entry(block_group, &space_info->block_groups, list) { 4234 list_for_each_entry(block_group, &space_info->block_groups, list) {
4199 u64 offset; 4235 u64 offset;
4200 int cached; 4236 int cached;
4201 4237
4202 atomic_inc(&block_group->count); 4238 btrfs_get_block_group(block_group);
4203 search_start = block_group->key.objectid; 4239 search_start = block_group->key.objectid;
4204 4240
4205have_block_group: 4241have_block_group:
4206 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4242 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4243 u64 free_percent;
4244
4245 free_percent = btrfs_block_group_used(&block_group->item);
4246 free_percent *= 100;
4247 free_percent = div64_u64(free_percent,
4248 block_group->key.offset);
4249 free_percent = 100 - free_percent;
4250 if (free_percent > ideal_cache_percent &&
4251 likely(!block_group->ro)) {
4252 ideal_cache_offset = block_group->key.objectid;
4253 ideal_cache_percent = free_percent;
4254 }
4255
4207 /* 4256 /*
4208 * we want to start caching kthreads, but not too many 4257 * We only want to start kthread caching if we are at
4209 * right off the bat so we don't overwhelm the system, 4258 * the point where we will wait for caching to make
4210 * so only start them if there are less than 2 and we're 4259 * progress, or if our ideal search is over and we've
4211 * in the initial allocation phase. 4260 * found somebody to start caching.
4212 */ 4261 */
4213 if (loop > LOOP_CACHING_NOWAIT || 4262 if (loop > LOOP_CACHING_NOWAIT ||
4214 atomic_read(&space_info->caching_threads) < 2) { 4263 (loop > LOOP_FIND_IDEAL &&
4264 atomic_read(&space_info->caching_threads) < 2)) {
4215 ret = cache_block_group(block_group); 4265 ret = cache_block_group(block_group);
4216 BUG_ON(ret); 4266 BUG_ON(ret);
4217 } 4267 }
4218 }
4219
4220 cached = block_group_cache_done(block_group);
4221 if (unlikely(!cached)) {
4222 found_uncached_bg = true; 4268 found_uncached_bg = true;
4223 4269
4224 /* if we only want cached bgs, loop */ 4270 /*
4225 if (loop == LOOP_CACHED_ONLY) 4271 * If loop is set for cached only, try the next block
4272 * group.
4273 */
4274 if (loop == LOOP_FIND_IDEAL)
4226 goto loop; 4275 goto loop;
4227 } 4276 }
4228 4277
4278 cached = block_group_cache_done(block_group);
4279 if (unlikely(!cached))
4280 found_uncached_bg = true;
4281
4229 if (unlikely(block_group->ro)) 4282 if (unlikely(block_group->ro))
4230 goto loop; 4283 goto loop;
4231 4284
@@ -4270,7 +4323,7 @@ have_block_group:
4270 4323
4271 btrfs_put_block_group(block_group); 4324 btrfs_put_block_group(block_group);
4272 block_group = last_ptr->block_group; 4325 block_group = last_ptr->block_group;
4273 atomic_inc(&block_group->count); 4326 btrfs_get_block_group(block_group);
4274 spin_unlock(&last_ptr->lock); 4327 spin_unlock(&last_ptr->lock);
4275 spin_unlock(&last_ptr->refill_lock); 4328 spin_unlock(&last_ptr->refill_lock);
4276 4329
@@ -4405,9 +4458,11 @@ loop:
4405 } 4458 }
4406 up_read(&space_info->groups_sem); 4459 up_read(&space_info->groups_sem);
4407 4460
4408 /* LOOP_CACHED_ONLY, only search fully cached block groups 4461 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
4409 * LOOP_CACHING_NOWAIT, search partially cached block groups, but 4462 * for them to make caching progress. Also
4410 * dont wait foR them to finish caching 4463 * determine the best possible bg to cache
4464 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
4465 * caching kthreads as we move along
4411 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 4466 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
4412 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 4467 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
4413 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 4468 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
@@ -4416,12 +4471,47 @@ loop:
4416 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 4471 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
4417 (found_uncached_bg || empty_size || empty_cluster || 4472 (found_uncached_bg || empty_size || empty_cluster ||
4418 allowed_chunk_alloc)) { 4473 allowed_chunk_alloc)) {
4419 if (found_uncached_bg) { 4474 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
4420 found_uncached_bg = false; 4475 found_uncached_bg = false;
4421 if (loop < LOOP_CACHING_WAIT) { 4476 loop++;
4422 loop++; 4477 if (!ideal_cache_percent &&
4478 atomic_read(&space_info->caching_threads))
4423 goto search; 4479 goto search;
4424 } 4480
4481 /*
4482 * 1 of the following 2 things have happened so far
4483 *
4484 * 1) We found an ideal block group for caching that
4485 * is mostly full and will cache quickly, so we might
4486 * as well wait for it.
4487 *
4488 * 2) We searched for cached only and we didn't find
4489 * anything, and we didn't start any caching kthreads
4490 * either, so chances are we will loop through and
4491 * start a couple caching kthreads, and then come back
4492 * around and just wait for them. This will be slower
4493 * because we will have 2 caching kthreads reading at
4494 * the same time when we could have just started one
4495 * and waited for it to get far enough to give us an
4496 * allocation, so go ahead and go to the wait caching
4497 * loop.
4498 */
4499 loop = LOOP_CACHING_WAIT;
4500 search_start = ideal_cache_offset;
4501 ideal_cache_percent = 0;
4502 goto ideal_cache;
4503 } else if (loop == LOOP_FIND_IDEAL) {
4504 /*
4505 * Didn't find a uncached bg, wait on anything we find
4506 * next.
4507 */
4508 loop = LOOP_CACHING_WAIT;
4509 goto search;
4510 }
4511
4512 if (loop < LOOP_CACHING_WAIT) {
4513 loop++;
4514 goto search;
4425 } 4515 }
4426 4516
4427 if (loop == LOOP_ALLOC_CHUNK) { 4517 if (loop == LOOP_ALLOC_CHUNK) {
@@ -4433,7 +4523,8 @@ loop:
4433 ret = do_chunk_alloc(trans, root, num_bytes + 4523 ret = do_chunk_alloc(trans, root, num_bytes +
4434 2 * 1024 * 1024, data, 1); 4524 2 * 1024 * 1024, data, 1);
4435 allowed_chunk_alloc = 0; 4525 allowed_chunk_alloc = 0;
4436 } else { 4526 done_chunk_alloc = 1;
4527 } else if (!done_chunk_alloc) {
4437 space_info->force_alloc = 1; 4528 space_info->force_alloc = 1;
4438 } 4529 }
4439 4530
@@ -4510,7 +4601,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4510{ 4601{
4511 int ret; 4602 int ret;
4512 u64 search_start = 0; 4603 u64 search_start = 0;
4513 struct btrfs_fs_info *info = root->fs_info;
4514 4604
4515 data = btrfs_get_alloc_profile(root, data); 4605 data = btrfs_get_alloc_profile(root, data);
4516again: 4606again:
@@ -4518,17 +4608,9 @@ again:
4518 * the only place that sets empty_size is btrfs_realloc_node, which 4608 * the only place that sets empty_size is btrfs_realloc_node, which
4519 * is not called recursively on allocations 4609 * is not called recursively on allocations
4520 */ 4610 */
4521 if (empty_size || root->ref_cows) { 4611 if (empty_size || root->ref_cows)
4522 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
4523 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4524 2 * 1024 * 1024,
4525 BTRFS_BLOCK_GROUP_METADATA |
4526 (info->metadata_alloc_profile &
4527 info->avail_metadata_alloc_bits), 0);
4528 }
4529 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4612 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4530 num_bytes + 2 * 1024 * 1024, data, 0); 4613 num_bytes + 2 * 1024 * 1024, data, 0);
4531 }
4532 4614
4533 WARN_ON(num_bytes < root->sectorsize); 4615 WARN_ON(num_bytes < root->sectorsize);
4534 ret = find_free_extent(trans, root, num_bytes, empty_size, 4616 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -4829,6 +4911,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4829 extent_op); 4911 extent_op);
4830 BUG_ON(ret); 4912 BUG_ON(ret);
4831 } 4913 }
4914
4915 if (root_objectid == root->root_key.objectid) {
4916 u64 used;
4917 spin_lock(&root->node_lock);
4918 used = btrfs_root_used(&root->root_item) + num_bytes;
4919 btrfs_set_root_used(&root->root_item, used);
4920 spin_unlock(&root->node_lock);
4921 }
4832 return ret; 4922 return ret;
4833} 4923}
4834 4924
@@ -4851,8 +4941,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4851 btrfs_set_buffer_uptodate(buf); 4941 btrfs_set_buffer_uptodate(buf);
4852 4942
4853 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4943 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4854 set_extent_dirty(&root->dirty_log_pages, buf->start, 4944 /*
4855 buf->start + buf->len - 1, GFP_NOFS); 4945 * we allow two log transactions at a time, use different
4946 * EXENT bit to differentiate dirty pages.
4947 */
4948 if (root->log_transid % 2 == 0)
4949 set_extent_dirty(&root->dirty_log_pages, buf->start,
4950 buf->start + buf->len - 1, GFP_NOFS);
4951 else
4952 set_extent_new(&root->dirty_log_pages, buf->start,
4953 buf->start + buf->len - 1, GFP_NOFS);
4856 } else { 4954 } else {
4857 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4955 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4858 buf->start + buf->len - 1, GFP_NOFS); 4956 buf->start + buf->len - 1, GFP_NOFS);
@@ -5304,10 +5402,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5304 int ret; 5402 int ret;
5305 5403
5306 while (level >= 0) { 5404 while (level >= 0) {
5307 if (path->slots[level] >=
5308 btrfs_header_nritems(path->nodes[level]))
5309 break;
5310
5311 ret = walk_down_proc(trans, root, path, wc, lookup_info); 5405 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5312 if (ret > 0) 5406 if (ret > 0)
5313 break; 5407 break;
@@ -5315,6 +5409,10 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5315 if (level == 0) 5409 if (level == 0)
5316 break; 5410 break;
5317 5411
5412 if (path->slots[level] >=
5413 btrfs_header_nritems(path->nodes[level]))
5414 break;
5415
5318 ret = do_walk_down(trans, root, path, wc, &lookup_info); 5416 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5319 if (ret > 0) { 5417 if (ret > 0) {
5320 path->slots[level]++; 5418 path->slots[level]++;
@@ -7305,9 +7403,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7305 wait_block_group_cache_done(block_group); 7403 wait_block_group_cache_done(block_group);
7306 7404
7307 btrfs_remove_free_space_cache(block_group); 7405 btrfs_remove_free_space_cache(block_group);
7308 7406 btrfs_put_block_group(block_group);
7309 WARN_ON(atomic_read(&block_group->count) != 1);
7310 kfree(block_group);
7311 7407
7312 spin_lock(&info->block_group_cache_lock); 7408 spin_lock(&info->block_group_cache_lock);
7313 } 7409 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 96577e8bf9fd..b177ed319612 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3165,10 +3165,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3165 spin_unlock(&tree->buffer_lock); 3165 spin_unlock(&tree->buffer_lock);
3166 goto free_eb; 3166 goto free_eb;
3167 } 3167 }
3168 spin_unlock(&tree->buffer_lock);
3169
3170 /* add one reference for the tree */ 3168 /* add one reference for the tree */
3171 atomic_inc(&eb->refs); 3169 atomic_inc(&eb->refs);
3170 spin_unlock(&tree->buffer_lock);
3172 return eb; 3171 return eb;
3173 3172
3174free_eb: 3173free_eb:
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2c726b7b9faa..428fcac45f90 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -155,20 +155,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
155 return NULL; 155 return NULL;
156} 156}
157 157
158/*
159 * look for an offset in the tree, and if it can't be found, return
160 * the first offset we can find smaller than 'offset'.
161 */
162static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
163{
164 struct rb_node *prev;
165 struct rb_node *ret;
166 ret = __tree_search(root, offset, &prev, NULL);
167 if (!ret)
168 return prev;
169 return ret;
170}
171
172/* check to see if two extent_map structs are adjacent and safe to merge */ 158/* check to see if two extent_map structs are adjacent and safe to merge */
173static int mergable_maps(struct extent_map *prev, struct extent_map *next) 159static int mergable_maps(struct extent_map *prev, struct extent_map *next)
174{ 160{
@@ -208,7 +194,7 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
208 write_lock(&tree->lock); 194 write_lock(&tree->lock);
209 em = lookup_extent_mapping(tree, start, len); 195 em = lookup_extent_mapping(tree, start, len);
210 196
211 WARN_ON(em->start != start || !em); 197 WARN_ON(!em || em->start != start);
212 198
213 if (!em) 199 if (!em)
214 goto out; 200 goto out;
@@ -256,7 +242,7 @@ out:
256 * Insert @em into @tree or perform a simple forward/backward merge with 242 * Insert @em into @tree or perform a simple forward/backward merge with
257 * existing mappings. The extent_map struct passed in will be inserted 243 * existing mappings. The extent_map struct passed in will be inserted
258 * into the tree directly, with an additional reference taken, or a 244 * into the tree directly, with an additional reference taken, or a
259 * reference dropped if the merge attempt was sucessfull. 245 * reference dropped if the merge attempt was successfull.
260 */ 246 */
261int add_extent_mapping(struct extent_map_tree *tree, 247int add_extent_mapping(struct extent_map_tree *tree,
262 struct extent_map *em) 248 struct extent_map *em)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2d623aa0625f..6ed434ac037f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -179,18 +179,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
179 } 179 }
180 flags = em->flags; 180 flags = em->flags;
181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
182 if (em->start <= start && 182 if (testend && em->start + em->len >= start + len) {
183 (!testend || em->start + em->len >= start + len)) {
184 free_extent_map(em); 183 free_extent_map(em);
185 write_unlock(&em_tree->lock); 184 write_unlock(&em_tree->lock);
186 break; 185 break;
187 } 186 }
188 if (start < em->start) { 187 start = em->start + em->len;
189 len = em->start - start; 188 if (testend)
190 } else {
191 len = start + len - (em->start + em->len); 189 len = start + len - (em->start + em->len);
192 start = em->start + em->len;
193 }
194 free_extent_map(em); 190 free_extent_map(em);
195 write_unlock(&em_tree->lock); 191 write_unlock(&em_tree->lock);
196 continue; 192 continue;
@@ -265,324 +261,253 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
265 * If an extent intersects the range but is not entirely inside the range 261 * If an extent intersects the range but is not entirely inside the range
266 * it is either truncated or split. Anything entirely inside the range 262 * it is either truncated or split. Anything entirely inside the range
267 * is deleted from the tree. 263 * is deleted from the tree.
268 *
269 * inline_limit is used to tell this code which offsets in the file to keep
270 * if they contain inline extents.
271 */ 264 */
272noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 265int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
273 struct btrfs_root *root, struct inode *inode, 266 u64 start, u64 end, u64 *hint_byte, int drop_cache)
274 u64 start, u64 end, u64 locked_end,
275 u64 inline_limit, u64 *hint_byte, int drop_cache)
276{ 267{
277 u64 extent_end = 0; 268 struct btrfs_root *root = BTRFS_I(inode)->root;
278 u64 search_start = start;
279 u64 ram_bytes = 0;
280 u64 disk_bytenr = 0;
281 u64 orig_locked_end = locked_end;
282 u8 compression;
283 u8 encryption;
284 u16 other_encoding = 0;
285 struct extent_buffer *leaf; 269 struct extent_buffer *leaf;
286 struct btrfs_file_extent_item *extent; 270 struct btrfs_file_extent_item *fi;
287 struct btrfs_path *path; 271 struct btrfs_path *path;
288 struct btrfs_key key; 272 struct btrfs_key key;
289 struct btrfs_file_extent_item old; 273 struct btrfs_key new_key;
290 int keep; 274 u64 search_start = start;
291 int slot; 275 u64 disk_bytenr = 0;
292 int bookend; 276 u64 num_bytes = 0;
293 int found_type = 0; 277 u64 extent_offset = 0;
294 int found_extent; 278 u64 extent_end = 0;
295 int found_inline; 279 int del_nr = 0;
280 int del_slot = 0;
281 int extent_type;
296 int recow; 282 int recow;
297 int ret; 283 int ret;
298 284
299 inline_limit = 0;
300 if (drop_cache) 285 if (drop_cache)
301 btrfs_drop_extent_cache(inode, start, end - 1, 0); 286 btrfs_drop_extent_cache(inode, start, end - 1, 0);
302 287
303 path = btrfs_alloc_path(); 288 path = btrfs_alloc_path();
304 if (!path) 289 if (!path)
305 return -ENOMEM; 290 return -ENOMEM;
291
306 while (1) { 292 while (1) {
307 recow = 0; 293 recow = 0;
308 btrfs_release_path(root, path);
309 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 294 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
310 search_start, -1); 295 search_start, -1);
311 if (ret < 0) 296 if (ret < 0)
312 goto out; 297 break;
313 if (ret > 0) { 298 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
314 if (path->slots[0] == 0) { 299 leaf = path->nodes[0];
315 ret = 0; 300 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
316 goto out; 301 if (key.objectid == inode->i_ino &&
317 } 302 key.type == BTRFS_EXTENT_DATA_KEY)
318 path->slots[0]--; 303 path->slots[0]--;
319 } 304 }
305 ret = 0;
320next_slot: 306next_slot:
321 keep = 0;
322 bookend = 0;
323 found_extent = 0;
324 found_inline = 0;
325 compression = 0;
326 encryption = 0;
327 extent = NULL;
328 leaf = path->nodes[0]; 307 leaf = path->nodes[0];
329 slot = path->slots[0]; 308 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
330 ret = 0; 309 BUG_ON(del_nr > 0);
331 btrfs_item_key_to_cpu(leaf, &key, slot); 310 ret = btrfs_next_leaf(root, path);
332 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY && 311 if (ret < 0)
333 key.offset >= end) { 312 break;
334 goto out; 313 if (ret > 0) {
335 } 314 ret = 0;
336 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 315 break;
337 key.objectid != inode->i_ino) {
338 goto out;
339 }
340 if (recow) {
341 search_start = max(key.offset, start);
342 continue;
343 }
344 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
345 extent = btrfs_item_ptr(leaf, slot,
346 struct btrfs_file_extent_item);
347 found_type = btrfs_file_extent_type(leaf, extent);
348 compression = btrfs_file_extent_compression(leaf,
349 extent);
350 encryption = btrfs_file_extent_encryption(leaf,
351 extent);
352 other_encoding = btrfs_file_extent_other_encoding(leaf,
353 extent);
354 if (found_type == BTRFS_FILE_EXTENT_REG ||
355 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
356 extent_end =
357 btrfs_file_extent_disk_bytenr(leaf,
358 extent);
359 if (extent_end)
360 *hint_byte = extent_end;
361
362 extent_end = key.offset +
363 btrfs_file_extent_num_bytes(leaf, extent);
364 ram_bytes = btrfs_file_extent_ram_bytes(leaf,
365 extent);
366 found_extent = 1;
367 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
368 found_inline = 1;
369 extent_end = key.offset +
370 btrfs_file_extent_inline_len(leaf, extent);
371 } 316 }
317 leaf = path->nodes[0];
318 recow = 1;
319 }
320
321 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
322 if (key.objectid > inode->i_ino ||
323 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
324 break;
325
326 fi = btrfs_item_ptr(leaf, path->slots[0],
327 struct btrfs_file_extent_item);
328 extent_type = btrfs_file_extent_type(leaf, fi);
329
330 if (extent_type == BTRFS_FILE_EXTENT_REG ||
331 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
332 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
333 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
334 extent_offset = btrfs_file_extent_offset(leaf, fi);
335 extent_end = key.offset +
336 btrfs_file_extent_num_bytes(leaf, fi);
337 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
338 extent_end = key.offset +
339 btrfs_file_extent_inline_len(leaf, fi);
372 } else { 340 } else {
341 WARN_ON(1);
373 extent_end = search_start; 342 extent_end = search_start;
374 } 343 }
375 344
376 /* we found nothing we can drop */ 345 if (extent_end <= search_start) {
377 if ((!found_extent && !found_inline) || 346 path->slots[0]++;
378 search_start >= extent_end) {
379 int nextret;
380 u32 nritems;
381 nritems = btrfs_header_nritems(leaf);
382 if (slot >= nritems - 1) {
383 nextret = btrfs_next_leaf(root, path);
384 if (nextret)
385 goto out;
386 recow = 1;
387 } else {
388 path->slots[0]++;
389 }
390 goto next_slot; 347 goto next_slot;
391 } 348 }
392 349
393 if (end <= extent_end && start >= key.offset && found_inline) 350 search_start = max(key.offset, start);
394 *hint_byte = EXTENT_MAP_INLINE; 351 if (recow) {
395 352 btrfs_release_path(root, path);
396 if (found_extent) { 353 continue;
397 read_extent_buffer(leaf, &old, (unsigned long)extent,
398 sizeof(old));
399 }
400
401 if (end < extent_end && end >= key.offset) {
402 bookend = 1;
403 if (found_inline && start <= key.offset)
404 keep = 1;
405 } 354 }
406 355
407 if (bookend && found_extent) { 356 /*
408 if (locked_end < extent_end) { 357 * | - range to drop - |
409 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 358 * | -------- extent -------- |
410 locked_end, extent_end - 1, 359 */
411 GFP_NOFS); 360 if (start > key.offset && end < extent_end) {
412 if (!ret) { 361 BUG_ON(del_nr > 0);
413 btrfs_release_path(root, path); 362 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
414 lock_extent(&BTRFS_I(inode)->io_tree, 363
415 locked_end, extent_end - 1, 364 memcpy(&new_key, &key, sizeof(new_key));
416 GFP_NOFS); 365 new_key.offset = start;
417 locked_end = extent_end; 366 ret = btrfs_duplicate_item(trans, root, path,
418 continue; 367 &new_key);
419 } 368 if (ret == -EAGAIN) {
420 locked_end = extent_end; 369 btrfs_release_path(root, path);
370 continue;
421 } 371 }
422 disk_bytenr = le64_to_cpu(old.disk_bytenr); 372 if (ret < 0)
423 if (disk_bytenr != 0) { 373 break;
374
375 leaf = path->nodes[0];
376 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
377 struct btrfs_file_extent_item);
378 btrfs_set_file_extent_num_bytes(leaf, fi,
379 start - key.offset);
380
381 fi = btrfs_item_ptr(leaf, path->slots[0],
382 struct btrfs_file_extent_item);
383
384 extent_offset += start - key.offset;
385 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
386 btrfs_set_file_extent_num_bytes(leaf, fi,
387 extent_end - start);
388 btrfs_mark_buffer_dirty(leaf);
389
390 if (disk_bytenr > 0) {
424 ret = btrfs_inc_extent_ref(trans, root, 391 ret = btrfs_inc_extent_ref(trans, root,
425 disk_bytenr, 392 disk_bytenr, num_bytes, 0,
426 le64_to_cpu(old.disk_num_bytes), 0, 393 root->root_key.objectid,
427 root->root_key.objectid, 394 new_key.objectid,
428 key.objectid, key.offset - 395 start - extent_offset);
429 le64_to_cpu(old.offset));
430 BUG_ON(ret); 396 BUG_ON(ret);
397 *hint_byte = disk_bytenr;
431 } 398 }
399 key.offset = start;
432 } 400 }
401 /*
402 * | ---- range to drop ----- |
403 * | -------- extent -------- |
404 */
405 if (start <= key.offset && end < extent_end) {
406 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
407
408 memcpy(&new_key, &key, sizeof(new_key));
409 new_key.offset = end;
410 btrfs_set_item_key_safe(trans, root, path, &new_key);
433 411
434 if (found_inline) { 412 extent_offset += end - key.offset;
435 u64 mask = root->sectorsize - 1; 413 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
436 search_start = (extent_end + mask) & ~mask; 414 btrfs_set_file_extent_num_bytes(leaf, fi,
437 } else 415 extent_end - end);
438 search_start = extent_end; 416 btrfs_mark_buffer_dirty(leaf);
439 417 if (disk_bytenr > 0) {
440 /* truncate existing extent */ 418 inode_sub_bytes(inode, end - key.offset);
441 if (start > key.offset) { 419 *hint_byte = disk_bytenr;
442 u64 new_num;
443 u64 old_num;
444 keep = 1;
445 WARN_ON(start & (root->sectorsize - 1));
446 if (found_extent) {
447 new_num = start - key.offset;
448 old_num = btrfs_file_extent_num_bytes(leaf,
449 extent);
450 *hint_byte =
451 btrfs_file_extent_disk_bytenr(leaf,
452 extent);
453 if (btrfs_file_extent_disk_bytenr(leaf,
454 extent)) {
455 inode_sub_bytes(inode, old_num -
456 new_num);
457 }
458 btrfs_set_file_extent_num_bytes(leaf,
459 extent, new_num);
460 btrfs_mark_buffer_dirty(leaf);
461 } else if (key.offset < inline_limit &&
462 (end > extent_end) &&
463 (inline_limit < extent_end)) {
464 u32 new_size;
465 new_size = btrfs_file_extent_calc_inline_size(
466 inline_limit - key.offset);
467 inode_sub_bytes(inode, extent_end -
468 inline_limit);
469 btrfs_set_file_extent_ram_bytes(leaf, extent,
470 new_size);
471 if (!compression && !encryption) {
472 btrfs_truncate_item(trans, root, path,
473 new_size, 1);
474 }
475 } 420 }
421 break;
476 } 422 }
477 /* delete the entire extent */
478 if (!keep) {
479 if (found_inline)
480 inode_sub_bytes(inode, extent_end -
481 key.offset);
482 ret = btrfs_del_item(trans, root, path);
483 /* TODO update progress marker and return */
484 BUG_ON(ret);
485 extent = NULL;
486 btrfs_release_path(root, path);
487 /* the extent will be freed later */
488 }
489 if (bookend && found_inline && start <= key.offset) {
490 u32 new_size;
491 new_size = btrfs_file_extent_calc_inline_size(
492 extent_end - end);
493 inode_sub_bytes(inode, end - key.offset);
494 btrfs_set_file_extent_ram_bytes(leaf, extent,
495 new_size);
496 if (!compression && !encryption)
497 ret = btrfs_truncate_item(trans, root, path,
498 new_size, 0);
499 BUG_ON(ret);
500 }
501 /* create bookend, splitting the extent in two */
502 if (bookend && found_extent) {
503 struct btrfs_key ins;
504 ins.objectid = inode->i_ino;
505 ins.offset = end;
506 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
507 423
508 btrfs_release_path(root, path); 424 search_start = extent_end;
509 path->leave_spinning = 1; 425 /*
510 ret = btrfs_insert_empty_item(trans, root, path, &ins, 426 * | ---- range to drop ----- |
511 sizeof(*extent)); 427 * | -------- extent -------- |
512 BUG_ON(ret); 428 */
429 if (start > key.offset && end >= extent_end) {
430 BUG_ON(del_nr > 0);
431 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
513 432
514 leaf = path->nodes[0]; 433 btrfs_set_file_extent_num_bytes(leaf, fi,
515 extent = btrfs_item_ptr(leaf, path->slots[0], 434 start - key.offset);
516 struct btrfs_file_extent_item); 435 btrfs_mark_buffer_dirty(leaf);
517 write_extent_buffer(leaf, &old, 436 if (disk_bytenr > 0) {
518 (unsigned long)extent, sizeof(old)); 437 inode_sub_bytes(inode, extent_end - start);
519 438 *hint_byte = disk_bytenr;
520 btrfs_set_file_extent_compression(leaf, extent, 439 }
521 compression); 440 if (end == extent_end)
522 btrfs_set_file_extent_encryption(leaf, extent, 441 break;
523 encryption);
524 btrfs_set_file_extent_other_encoding(leaf, extent,
525 other_encoding);
526 btrfs_set_file_extent_offset(leaf, extent,
527 le64_to_cpu(old.offset) + end - key.offset);
528 WARN_ON(le64_to_cpu(old.num_bytes) <
529 (extent_end - end));
530 btrfs_set_file_extent_num_bytes(leaf, extent,
531 extent_end - end);
532 442
533 /* 443 path->slots[0]++;
534 * set the ram bytes to the size of the full extent 444 goto next_slot;
535 * before splitting. This is a worst case flag,
536 * but its the best we can do because we don't know
537 * how splitting affects compression
538 */
539 btrfs_set_file_extent_ram_bytes(leaf, extent,
540 ram_bytes);
541 btrfs_set_file_extent_type(leaf, extent, found_type);
542
543 btrfs_unlock_up_safe(path, 1);
544 btrfs_mark_buffer_dirty(path->nodes[0]);
545 btrfs_set_lock_blocking(path->nodes[0]);
546
547 path->leave_spinning = 0;
548 btrfs_release_path(root, path);
549 if (disk_bytenr != 0)
550 inode_add_bytes(inode, extent_end - end);
551 } 445 }
552 446
553 if (found_extent && !keep) { 447 /*
554 u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr); 448 * | ---- range to drop ----- |
449 * | ------ extent ------ |
450 */
451 if (start <= key.offset && end >= extent_end) {
452 if (del_nr == 0) {
453 del_slot = path->slots[0];
454 del_nr = 1;
455 } else {
456 BUG_ON(del_slot + del_nr != path->slots[0]);
457 del_nr++;
458 }
555 459
556 if (old_disk_bytenr != 0) { 460 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
557 inode_sub_bytes(inode, 461 inode_sub_bytes(inode,
558 le64_to_cpu(old.num_bytes)); 462 extent_end - key.offset);
463 extent_end = ALIGN(extent_end,
464 root->sectorsize);
465 } else if (disk_bytenr > 0) {
559 ret = btrfs_free_extent(trans, root, 466 ret = btrfs_free_extent(trans, root,
560 old_disk_bytenr, 467 disk_bytenr, num_bytes, 0,
561 le64_to_cpu(old.disk_num_bytes), 468 root->root_key.objectid,
562 0, root->root_key.objectid,
563 key.objectid, key.offset - 469 key.objectid, key.offset -
564 le64_to_cpu(old.offset)); 470 extent_offset);
565 BUG_ON(ret); 471 BUG_ON(ret);
566 *hint_byte = old_disk_bytenr; 472 inode_sub_bytes(inode,
473 extent_end - key.offset);
474 *hint_byte = disk_bytenr;
567 } 475 }
568 }
569 476
570 if (search_start >= end) { 477 if (end == extent_end)
571 ret = 0; 478 break;
572 goto out; 479
480 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
481 path->slots[0]++;
482 goto next_slot;
483 }
484
485 ret = btrfs_del_items(trans, root, path, del_slot,
486 del_nr);
487 BUG_ON(ret);
488
489 del_nr = 0;
490 del_slot = 0;
491
492 btrfs_release_path(root, path);
493 continue;
573 } 494 }
495
496 BUG_ON(1);
574 } 497 }
575out: 498
576 btrfs_free_path(path); 499 if (del_nr > 0) {
577 if (locked_end > orig_locked_end) { 500 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
578 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, 501 BUG_ON(ret);
579 locked_end - 1, GFP_NOFS);
580 } 502 }
503
504 btrfs_free_path(path);
581 return ret; 505 return ret;
582} 506}
583 507
584static int extent_mergeable(struct extent_buffer *leaf, int slot, 508static int extent_mergeable(struct extent_buffer *leaf, int slot,
585 u64 objectid, u64 bytenr, u64 *start, u64 *end) 509 u64 objectid, u64 bytenr, u64 orig_offset,
510 u64 *start, u64 *end)
586{ 511{
587 struct btrfs_file_extent_item *fi; 512 struct btrfs_file_extent_item *fi;
588 struct btrfs_key key; 513 struct btrfs_key key;
@@ -598,6 +523,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
598 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 523 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
599 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || 524 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
600 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || 525 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
526 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
601 btrfs_file_extent_compression(leaf, fi) || 527 btrfs_file_extent_compression(leaf, fi) ||
602 btrfs_file_extent_encryption(leaf, fi) || 528 btrfs_file_extent_encryption(leaf, fi) ||
603 btrfs_file_extent_other_encoding(leaf, fi)) 529 btrfs_file_extent_other_encoding(leaf, fi))
@@ -620,23 +546,24 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
620 * two or three. 546 * two or three.
621 */ 547 */
622int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 548int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
623 struct btrfs_root *root,
624 struct inode *inode, u64 start, u64 end) 549 struct inode *inode, u64 start, u64 end)
625{ 550{
551 struct btrfs_root *root = BTRFS_I(inode)->root;
626 struct extent_buffer *leaf; 552 struct extent_buffer *leaf;
627 struct btrfs_path *path; 553 struct btrfs_path *path;
628 struct btrfs_file_extent_item *fi; 554 struct btrfs_file_extent_item *fi;
629 struct btrfs_key key; 555 struct btrfs_key key;
556 struct btrfs_key new_key;
630 u64 bytenr; 557 u64 bytenr;
631 u64 num_bytes; 558 u64 num_bytes;
632 u64 extent_end; 559 u64 extent_end;
633 u64 orig_offset; 560 u64 orig_offset;
634 u64 other_start; 561 u64 other_start;
635 u64 other_end; 562 u64 other_end;
636 u64 split = start; 563 u64 split;
637 u64 locked_end = end; 564 int del_nr = 0;
638 int extent_type; 565 int del_slot = 0;
639 int split_end = 1; 566 int recow;
640 int ret; 567 int ret;
641 568
642 btrfs_drop_extent_cache(inode, start, end - 1, 0); 569 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -644,12 +571,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
644 path = btrfs_alloc_path(); 571 path = btrfs_alloc_path();
645 BUG_ON(!path); 572 BUG_ON(!path);
646again: 573again:
574 recow = 0;
575 split = start;
647 key.objectid = inode->i_ino; 576 key.objectid = inode->i_ino;
648 key.type = BTRFS_EXTENT_DATA_KEY; 577 key.type = BTRFS_EXTENT_DATA_KEY;
649 if (split == start) 578 key.offset = split;
650 key.offset = split;
651 else
652 key.offset = split - 1;
653 579
654 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 580 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
655 if (ret > 0 && path->slots[0] > 0) 581 if (ret > 0 && path->slots[0] > 0)
@@ -661,159 +587,158 @@ again:
661 key.type != BTRFS_EXTENT_DATA_KEY); 587 key.type != BTRFS_EXTENT_DATA_KEY);
662 fi = btrfs_item_ptr(leaf, path->slots[0], 588 fi = btrfs_item_ptr(leaf, path->slots[0],
663 struct btrfs_file_extent_item); 589 struct btrfs_file_extent_item);
664 extent_type = btrfs_file_extent_type(leaf, fi); 590 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
665 BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC); 591 BTRFS_FILE_EXTENT_PREALLOC);
666 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 592 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
667 BUG_ON(key.offset > start || extent_end < end); 593 BUG_ON(key.offset > start || extent_end < end);
668 594
669 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 595 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
670 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 596 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
671 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 597 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
598 memcpy(&new_key, &key, sizeof(new_key));
672 599
673 if (key.offset == start) 600 if (start == key.offset && end < extent_end) {
674 split = end;
675
676 if (key.offset == start && extent_end == end) {
677 int del_nr = 0;
678 int del_slot = 0;
679 other_start = end;
680 other_end = 0;
681 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
682 bytenr, &other_start, &other_end)) {
683 extent_end = other_end;
684 del_slot = path->slots[0] + 1;
685 del_nr++;
686 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
687 0, root->root_key.objectid,
688 inode->i_ino, orig_offset);
689 BUG_ON(ret);
690 }
691 other_start = 0; 601 other_start = 0;
692 other_end = start; 602 other_end = start;
693 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino, 603 if (extent_mergeable(leaf, path->slots[0] - 1,
694 bytenr, &other_start, &other_end)) { 604 inode->i_ino, bytenr, orig_offset,
695 key.offset = other_start; 605 &other_start, &other_end)) {
696 del_slot = path->slots[0]; 606 new_key.offset = end;
697 del_nr++; 607 btrfs_set_item_key_safe(trans, root, path, &new_key);
698 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 608 fi = btrfs_item_ptr(leaf, path->slots[0],
699 0, root->root_key.objectid, 609 struct btrfs_file_extent_item);
700 inode->i_ino, orig_offset); 610 btrfs_set_file_extent_num_bytes(leaf, fi,
701 BUG_ON(ret); 611 extent_end - end);
702 } 612 btrfs_set_file_extent_offset(leaf, fi,
703 split_end = 0; 613 end - orig_offset);
704 if (del_nr == 0) { 614 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
705 btrfs_set_file_extent_type(leaf, fi, 615 struct btrfs_file_extent_item);
706 BTRFS_FILE_EXTENT_REG); 616 btrfs_set_file_extent_num_bytes(leaf, fi,
707 goto done; 617 end - other_start);
708 } 618 btrfs_mark_buffer_dirty(leaf);
709 619 goto out;
710 fi = btrfs_item_ptr(leaf, del_slot - 1,
711 struct btrfs_file_extent_item);
712 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
713 btrfs_set_file_extent_num_bytes(leaf, fi,
714 extent_end - key.offset);
715 btrfs_mark_buffer_dirty(leaf);
716
717 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
718 BUG_ON(ret);
719 goto release;
720 } else if (split == start) {
721 if (locked_end < extent_end) {
722 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
723 locked_end, extent_end - 1, GFP_NOFS);
724 if (!ret) {
725 btrfs_release_path(root, path);
726 lock_extent(&BTRFS_I(inode)->io_tree,
727 locked_end, extent_end - 1, GFP_NOFS);
728 locked_end = extent_end;
729 goto again;
730 }
731 locked_end = extent_end;
732 } 620 }
733 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
734 } else {
735 BUG_ON(key.offset != start);
736 key.offset = split;
737 btrfs_set_file_extent_offset(leaf, fi, key.offset -
738 orig_offset);
739 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
740 btrfs_set_item_key_safe(trans, root, path, &key);
741 extent_end = split;
742 } 621 }
743 622
744 if (extent_end == end) { 623 if (start > key.offset && end == extent_end) {
745 split_end = 0;
746 extent_type = BTRFS_FILE_EXTENT_REG;
747 }
748 if (extent_end == end && split == start) {
749 other_start = end; 624 other_start = end;
750 other_end = 0; 625 other_end = 0;
751 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 626 if (extent_mergeable(leaf, path->slots[0] + 1,
752 bytenr, &other_start, &other_end)) { 627 inode->i_ino, bytenr, orig_offset,
753 path->slots[0]++; 628 &other_start, &other_end)) {
754 fi = btrfs_item_ptr(leaf, path->slots[0], 629 fi = btrfs_item_ptr(leaf, path->slots[0],
755 struct btrfs_file_extent_item); 630 struct btrfs_file_extent_item);
756 key.offset = split;
757 btrfs_set_item_key_safe(trans, root, path, &key);
758 btrfs_set_file_extent_offset(leaf, fi, key.offset -
759 orig_offset);
760 btrfs_set_file_extent_num_bytes(leaf, fi, 631 btrfs_set_file_extent_num_bytes(leaf, fi,
761 other_end - split); 632 start - key.offset);
762 goto done; 633 path->slots[0]++;
763 } 634 new_key.offset = start;
764 } 635 btrfs_set_item_key_safe(trans, root, path, &new_key);
765 if (extent_end == end && split == end) { 636
766 other_start = 0;
767 other_end = start;
768 if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
769 bytenr, &other_start, &other_end)) {
770 path->slots[0]--;
771 fi = btrfs_item_ptr(leaf, path->slots[0], 637 fi = btrfs_item_ptr(leaf, path->slots[0],
772 struct btrfs_file_extent_item); 638 struct btrfs_file_extent_item);
773 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - 639 btrfs_set_file_extent_num_bytes(leaf, fi,
774 other_start); 640 other_end - start);
775 goto done; 641 btrfs_set_file_extent_offset(leaf, fi,
642 start - orig_offset);
643 btrfs_mark_buffer_dirty(leaf);
644 goto out;
776 } 645 }
777 } 646 }
778 647
779 btrfs_mark_buffer_dirty(leaf); 648 while (start > key.offset || end < extent_end) {
649 if (key.offset == start)
650 split = end;
780 651
781 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 652 new_key.offset = split;
782 root->root_key.objectid, 653 ret = btrfs_duplicate_item(trans, root, path, &new_key);
783 inode->i_ino, orig_offset); 654 if (ret == -EAGAIN) {
784 BUG_ON(ret); 655 btrfs_release_path(root, path);
785 btrfs_release_path(root, path); 656 goto again;
657 }
658 BUG_ON(ret < 0);
786 659
787 key.offset = start; 660 leaf = path->nodes[0];
788 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi)); 661 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
789 BUG_ON(ret); 662 struct btrfs_file_extent_item);
663 btrfs_set_file_extent_num_bytes(leaf, fi,
664 split - key.offset);
790 665
791 leaf = path->nodes[0]; 666 fi = btrfs_item_ptr(leaf, path->slots[0],
792 fi = btrfs_item_ptr(leaf, path->slots[0], 667 struct btrfs_file_extent_item);
793 struct btrfs_file_extent_item); 668
794 btrfs_set_file_extent_generation(leaf, fi, trans->transid); 669 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
795 btrfs_set_file_extent_type(leaf, fi, extent_type); 670 btrfs_set_file_extent_num_bytes(leaf, fi,
796 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); 671 extent_end - split);
797 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); 672 btrfs_mark_buffer_dirty(leaf);
798 btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset); 673
799 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); 674 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
800 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); 675 root->root_key.objectid,
801 btrfs_set_file_extent_compression(leaf, fi, 0); 676 inode->i_ino, orig_offset);
802 btrfs_set_file_extent_encryption(leaf, fi, 0); 677 BUG_ON(ret);
803 btrfs_set_file_extent_other_encoding(leaf, fi, 0); 678
804done: 679 if (split == start) {
805 btrfs_mark_buffer_dirty(leaf); 680 key.offset = start;
806 681 } else {
807release: 682 BUG_ON(start != key.offset);
808 btrfs_release_path(root, path); 683 path->slots[0]--;
809 if (split_end && split == start) { 684 extent_end = end;
810 split = end; 685 }
811 goto again; 686 recow = 1;
812 } 687 }
813 if (locked_end > end) { 688
814 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 689 other_start = end;
815 GFP_NOFS); 690 other_end = 0;
691 if (extent_mergeable(leaf, path->slots[0] + 1,
692 inode->i_ino, bytenr, orig_offset,
693 &other_start, &other_end)) {
694 if (recow) {
695 btrfs_release_path(root, path);
696 goto again;
697 }
698 extent_end = other_end;
699 del_slot = path->slots[0] + 1;
700 del_nr++;
701 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
702 0, root->root_key.objectid,
703 inode->i_ino, orig_offset);
704 BUG_ON(ret);
816 } 705 }
706 other_start = 0;
707 other_end = start;
708 if (extent_mergeable(leaf, path->slots[0] - 1,
709 inode->i_ino, bytenr, orig_offset,
710 &other_start, &other_end)) {
711 if (recow) {
712 btrfs_release_path(root, path);
713 goto again;
714 }
715 key.offset = other_start;
716 del_slot = path->slots[0];
717 del_nr++;
718 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
719 0, root->root_key.objectid,
720 inode->i_ino, orig_offset);
721 BUG_ON(ret);
722 }
723 if (del_nr == 0) {
724 fi = btrfs_item_ptr(leaf, path->slots[0],
725 struct btrfs_file_extent_item);
726 btrfs_set_file_extent_type(leaf, fi,
727 BTRFS_FILE_EXTENT_REG);
728 btrfs_mark_buffer_dirty(leaf);
729 } else {
730 fi = btrfs_item_ptr(leaf, del_slot - 1,
731 struct btrfs_file_extent_item);
732 btrfs_set_file_extent_type(leaf, fi,
733 BTRFS_FILE_EXTENT_REG);
734 btrfs_set_file_extent_num_bytes(leaf, fi,
735 extent_end - key.offset);
736 btrfs_mark_buffer_dirty(leaf);
737
738 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
739 BUG_ON(ret);
740 }
741out:
817 btrfs_free_path(path); 742 btrfs_free_path(path);
818 return 0; 743 return 0;
819} 744}
@@ -909,7 +834,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
909 unsigned long last_index; 834 unsigned long last_index;
910 int will_write; 835 int will_write;
911 836
912 will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || 837 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
913 (file->f_flags & O_DIRECT)); 838 (file->f_flags & O_DIRECT));
914 839
915 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, 840 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
@@ -1076,7 +1001,7 @@ out_nolock:
1076 if (err) 1001 if (err)
1077 num_written = err; 1002 num_written = err;
1078 1003
1079 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 1004 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1080 trans = btrfs_start_transaction(root, 1); 1005 trans = btrfs_start_transaction(root, 1);
1081 ret = btrfs_log_dentry_safe(trans, root, 1006 ret = btrfs_log_dentry_safe(trans, root,
1082 file->f_dentry); 1007 file->f_dentry);
@@ -1086,8 +1011,10 @@ out_nolock:
1086 btrfs_end_transaction(trans, root); 1011 btrfs_end_transaction(trans, root);
1087 else 1012 else
1088 btrfs_commit_transaction(trans, root); 1013 btrfs_commit_transaction(trans, root);
1089 } else { 1014 } else if (ret != BTRFS_NO_LOG_SYNC) {
1090 btrfs_commit_transaction(trans, root); 1015 btrfs_commit_transaction(trans, root);
1016 } else {
1017 btrfs_end_transaction(trans, root);
1091 } 1018 }
1092 } 1019 }
1093 if (file->f_flags & O_DIRECT) { 1020 if (file->f_flags & O_DIRECT) {
@@ -1137,6 +1064,13 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1137 int ret = 0; 1064 int ret = 0;
1138 struct btrfs_trans_handle *trans; 1065 struct btrfs_trans_handle *trans;
1139 1066
1067
1068 /* we wait first, since the writeback may change the inode */
1069 root->log_batch++;
1070 /* the VFS called filemap_fdatawrite for us */
1071 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1072 root->log_batch++;
1073
1140 /* 1074 /*
1141 * check the transaction that last modified this inode 1075 * check the transaction that last modified this inode
1142 * and see if its already been committed 1076 * and see if its already been committed
@@ -1144,6 +1078,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1144 if (!BTRFS_I(inode)->last_trans) 1078 if (!BTRFS_I(inode)->last_trans)
1145 goto out; 1079 goto out;
1146 1080
1081 /*
1082 * if the last transaction that changed this file was before
1083 * the current transaction, we can bail out now without any
1084 * syncing
1085 */
1147 mutex_lock(&root->fs_info->trans_mutex); 1086 mutex_lock(&root->fs_info->trans_mutex);
1148 if (BTRFS_I(inode)->last_trans <= 1087 if (BTRFS_I(inode)->last_trans <=
1149 root->fs_info->last_trans_committed) { 1088 root->fs_info->last_trans_committed) {
@@ -1153,13 +1092,6 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1153 } 1092 }
1154 mutex_unlock(&root->fs_info->trans_mutex); 1093 mutex_unlock(&root->fs_info->trans_mutex);
1155 1094
1156 root->log_batch++;
1157 filemap_fdatawrite(inode->i_mapping);
1158 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1159 root->log_batch++;
1160
1161 if (datasync && !(inode->i_state & I_DIRTY_PAGES))
1162 goto out;
1163 /* 1095 /*
1164 * ok we haven't committed the transaction yet, lets do a commit 1096 * ok we haven't committed the transaction yet, lets do a commit
1165 */ 1097 */
@@ -1188,18 +1120,22 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1188 */ 1120 */
1189 mutex_unlock(&dentry->d_inode->i_mutex); 1121 mutex_unlock(&dentry->d_inode->i_mutex);
1190 1122
1191 if (ret > 0) { 1123 if (ret != BTRFS_NO_LOG_SYNC) {
1192 ret = btrfs_commit_transaction(trans, root); 1124 if (ret > 0) {
1193 } else {
1194 ret = btrfs_sync_log(trans, root);
1195 if (ret == 0)
1196 ret = btrfs_end_transaction(trans, root);
1197 else
1198 ret = btrfs_commit_transaction(trans, root); 1125 ret = btrfs_commit_transaction(trans, root);
1126 } else {
1127 ret = btrfs_sync_log(trans, root);
1128 if (ret == 0)
1129 ret = btrfs_end_transaction(trans, root);
1130 else
1131 ret = btrfs_commit_transaction(trans, root);
1132 }
1133 } else {
1134 ret = btrfs_end_transaction(trans, root);
1199 } 1135 }
1200 mutex_lock(&dentry->d_inode->i_mutex); 1136 mutex_lock(&dentry->d_inode->i_mutex);
1201out: 1137out:
1202 return ret > 0 ? EIO : ret; 1138 return ret > 0 ? -EIO : ret;
1203} 1139}
1204 1140
1205static const struct vm_operations_struct btrfs_file_vm_ops = { 1141static const struct vm_operations_struct btrfs_file_vm_ops = {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 5c2caad76212..cb2849f03251 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1296,7 +1296,7 @@ again:
1296 window_start = entry->offset; 1296 window_start = entry->offset;
1297 window_free = entry->bytes; 1297 window_free = entry->bytes;
1298 last = entry; 1298 last = entry;
1299 max_extent = 0; 1299 max_extent = entry->bytes;
1300 } else { 1300 } else {
1301 last = next; 1301 last = next;
1302 window_free += next->bytes; 1302 window_free += next->bytes;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9e138b793dc7..4deb280f8969 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode,
88 u64 start, u64 end, int *page_started, 88 u64 start, u64 end, int *page_started,
89 unsigned long *nr_written, int unlock); 89 unsigned long *nr_written, int unlock);
90 90
91static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) 91static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
92 struct inode *inode, struct inode *dir)
92{ 93{
93 int err; 94 int err;
94 95
95 err = btrfs_init_acl(inode, dir); 96 err = btrfs_init_acl(trans, inode, dir);
96 if (!err) 97 if (!err)
97 err = btrfs_xattr_security_init(inode, dir); 98 err = btrfs_xattr_security_init(trans, inode, dir);
98 return err; 99 return err;
99} 100}
100 101
@@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
188 btrfs_mark_buffer_dirty(leaf); 189 btrfs_mark_buffer_dirty(leaf);
189 btrfs_free_path(path); 190 btrfs_free_path(path);
190 191
192 /*
193 * we're an inline extent, so nobody can
194 * extend the file past i_size without locking
195 * a page we already have locked.
196 *
197 * We must do any isize and inode updates
198 * before we unlock the pages. Otherwise we
199 * could end up racing with unlink.
200 */
191 BTRFS_I(inode)->disk_i_size = inode->i_size; 201 BTRFS_I(inode)->disk_i_size = inode->i_size;
192 btrfs_update_inode(trans, root, inode); 202 btrfs_update_inode(trans, root, inode);
203
193 return 0; 204 return 0;
194fail: 205fail:
195 btrfs_free_path(path); 206 btrfs_free_path(path);
@@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
230 return 1; 241 return 1;
231 } 242 }
232 243
233 ret = btrfs_drop_extents(trans, root, inode, start, 244 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
234 aligned_end, aligned_end, start,
235 &hint_byte, 1); 245 &hint_byte, 1);
236 BUG_ON(ret); 246 BUG_ON(ret);
237 247
@@ -416,7 +426,6 @@ again:
416 start, end, 426 start, end,
417 total_compressed, pages); 427 total_compressed, pages);
418 } 428 }
419 btrfs_end_transaction(trans, root);
420 if (ret == 0) { 429 if (ret == 0) {
421 /* 430 /*
422 * inline extent creation worked, we don't need 431 * inline extent creation worked, we don't need
@@ -430,9 +439,11 @@ again:
430 EXTENT_CLEAR_DELALLOC | 439 EXTENT_CLEAR_DELALLOC |
431 EXTENT_CLEAR_ACCOUNTING | 440 EXTENT_CLEAR_ACCOUNTING |
432 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 441 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
433 ret = 0; 442
443 btrfs_end_transaction(trans, root);
434 goto free_pages_out; 444 goto free_pages_out;
435 } 445 }
446 btrfs_end_transaction(trans, root);
436 } 447 }
437 448
438 if (will_compress) { 449 if (will_compress) {
@@ -472,7 +483,8 @@ again:
472 nr_pages_ret = 0; 483 nr_pages_ret = 0;
473 484
474 /* flag the file so we don't compress in the future */ 485 /* flag the file so we don't compress in the future */
475 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 486 if (!btrfs_test_opt(root, FORCE_COMPRESS))
487 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
476 } 488 }
477 if (will_compress) { 489 if (will_compress) {
478 *num_added += 1; 490 *num_added += 1;
@@ -538,12 +550,11 @@ static noinline int submit_compressed_extents(struct inode *inode,
538 struct btrfs_root *root = BTRFS_I(inode)->root; 550 struct btrfs_root *root = BTRFS_I(inode)->root;
539 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 551 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
540 struct extent_io_tree *io_tree; 552 struct extent_io_tree *io_tree;
541 int ret; 553 int ret = 0;
542 554
543 if (list_empty(&async_cow->extents)) 555 if (list_empty(&async_cow->extents))
544 return 0; 556 return 0;
545 557
546 trans = btrfs_join_transaction(root, 1);
547 558
548 while (!list_empty(&async_cow->extents)) { 559 while (!list_empty(&async_cow->extents)) {
549 async_extent = list_entry(async_cow->extents.next, 560 async_extent = list_entry(async_cow->extents.next,
@@ -552,6 +563,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
552 563
553 io_tree = &BTRFS_I(inode)->io_tree; 564 io_tree = &BTRFS_I(inode)->io_tree;
554 565
566retry:
555 /* did the compression code fall back to uncompressed IO? */ 567 /* did the compression code fall back to uncompressed IO? */
556 if (!async_extent->pages) { 568 if (!async_extent->pages) {
557 int page_started = 0; 569 int page_started = 0;
@@ -562,11 +574,11 @@ static noinline int submit_compressed_extents(struct inode *inode,
562 async_extent->ram_size - 1, GFP_NOFS); 574 async_extent->ram_size - 1, GFP_NOFS);
563 575
564 /* allocate blocks */ 576 /* allocate blocks */
565 cow_file_range(inode, async_cow->locked_page, 577 ret = cow_file_range(inode, async_cow->locked_page,
566 async_extent->start, 578 async_extent->start,
567 async_extent->start + 579 async_extent->start +
568 async_extent->ram_size - 1, 580 async_extent->ram_size - 1,
569 &page_started, &nr_written, 0); 581 &page_started, &nr_written, 0);
570 582
571 /* 583 /*
572 * if page_started, cow_file_range inserted an 584 * if page_started, cow_file_range inserted an
@@ -574,7 +586,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
574 * and IO for us. Otherwise, we need to submit 586 * and IO for us. Otherwise, we need to submit
575 * all those pages down to the drive. 587 * all those pages down to the drive.
576 */ 588 */
577 if (!page_started) 589 if (!page_started && !ret)
578 extent_write_locked_range(io_tree, 590 extent_write_locked_range(io_tree,
579 inode, async_extent->start, 591 inode, async_extent->start,
580 async_extent->start + 592 async_extent->start +
@@ -589,6 +601,30 @@ static noinline int submit_compressed_extents(struct inode *inode,
589 lock_extent(io_tree, async_extent->start, 601 lock_extent(io_tree, async_extent->start,
590 async_extent->start + async_extent->ram_size - 1, 602 async_extent->start + async_extent->ram_size - 1,
591 GFP_NOFS); 603 GFP_NOFS);
604
605 trans = btrfs_join_transaction(root, 1);
606 ret = btrfs_reserve_extent(trans, root,
607 async_extent->compressed_size,
608 async_extent->compressed_size,
609 0, alloc_hint,
610 (u64)-1, &ins, 1);
611 btrfs_end_transaction(trans, root);
612
613 if (ret) {
614 int i;
615 for (i = 0; i < async_extent->nr_pages; i++) {
616 WARN_ON(async_extent->pages[i]->mapping);
617 page_cache_release(async_extent->pages[i]);
618 }
619 kfree(async_extent->pages);
620 async_extent->nr_pages = 0;
621 async_extent->pages = NULL;
622 unlock_extent(io_tree, async_extent->start,
623 async_extent->start +
624 async_extent->ram_size - 1, GFP_NOFS);
625 goto retry;
626 }
627
592 /* 628 /*
593 * here we're doing allocation and writeback of the 629 * here we're doing allocation and writeback of the
594 * compressed pages 630 * compressed pages
@@ -597,12 +633,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
597 async_extent->start + 633 async_extent->start +
598 async_extent->ram_size - 1, 0); 634 async_extent->ram_size - 1, 0);
599 635
600 ret = btrfs_reserve_extent(trans, root,
601 async_extent->compressed_size,
602 async_extent->compressed_size,
603 0, alloc_hint,
604 (u64)-1, &ins, 1);
605 BUG_ON(ret);
606 em = alloc_extent_map(GFP_NOFS); 636 em = alloc_extent_map(GFP_NOFS);
607 em->start = async_extent->start; 637 em->start = async_extent->start;
608 em->len = async_extent->ram_size; 638 em->len = async_extent->ram_size;
@@ -634,8 +664,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
634 BTRFS_ORDERED_COMPRESSED); 664 BTRFS_ORDERED_COMPRESSED);
635 BUG_ON(ret); 665 BUG_ON(ret);
636 666
637 btrfs_end_transaction(trans, root);
638
639 /* 667 /*
640 * clear dirty, set writeback and unlock the pages. 668 * clear dirty, set writeback and unlock the pages.
641 */ 669 */
@@ -657,13 +685,11 @@ static noinline int submit_compressed_extents(struct inode *inode,
657 async_extent->nr_pages); 685 async_extent->nr_pages);
658 686
659 BUG_ON(ret); 687 BUG_ON(ret);
660 trans = btrfs_join_transaction(root, 1);
661 alloc_hint = ins.objectid + ins.offset; 688 alloc_hint = ins.objectid + ins.offset;
662 kfree(async_extent); 689 kfree(async_extent);
663 cond_resched(); 690 cond_resched();
664 } 691 }
665 692
666 btrfs_end_transaction(trans, root);
667 return 0; 693 return 0;
668} 694}
669 695
@@ -727,6 +753,7 @@ static noinline int cow_file_range(struct inode *inode,
727 EXTENT_CLEAR_DIRTY | 753 EXTENT_CLEAR_DIRTY |
728 EXTENT_SET_WRITEBACK | 754 EXTENT_SET_WRITEBACK |
729 EXTENT_END_WRITEBACK); 755 EXTENT_END_WRITEBACK);
756
730 *nr_written = *nr_written + 757 *nr_written = *nr_written +
731 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 758 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
732 *page_started = 1; 759 *page_started = 1;
@@ -743,8 +770,22 @@ static noinline int cow_file_range(struct inode *inode,
743 em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, 770 em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
744 start, num_bytes); 771 start, num_bytes);
745 if (em) { 772 if (em) {
746 alloc_hint = em->block_start; 773 /*
747 free_extent_map(em); 774 * if block start isn't an actual block number then find the
775 * first block in this inode and use that as a hint. If that
776 * block is also bogus then just don't worry about it.
777 */
778 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
779 free_extent_map(em);
780 em = search_extent_mapping(em_tree, 0, 0);
781 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
782 alloc_hint = em->block_start;
783 if (em)
784 free_extent_map(em);
785 } else {
786 alloc_hint = em->block_start;
787 free_extent_map(em);
788 }
748 } 789 }
749 read_unlock(&BTRFS_I(inode)->extent_tree.lock); 790 read_unlock(&BTRFS_I(inode)->extent_tree.lock);
750 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 791 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
@@ -1567,7 +1608,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1567 struct inode *inode, u64 file_pos, 1608 struct inode *inode, u64 file_pos,
1568 u64 disk_bytenr, u64 disk_num_bytes, 1609 u64 disk_bytenr, u64 disk_num_bytes,
1569 u64 num_bytes, u64 ram_bytes, 1610 u64 num_bytes, u64 ram_bytes,
1570 u64 locked_end,
1571 u8 compression, u8 encryption, 1611 u8 compression, u8 encryption,
1572 u16 other_encoding, int extent_type) 1612 u16 other_encoding, int extent_type)
1573{ 1613{
@@ -1593,9 +1633,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1593 * the caller is expected to unpin it and allow it to be merged 1633 * the caller is expected to unpin it and allow it to be merged
1594 * with the others. 1634 * with the others.
1595 */ 1635 */
1596 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1636 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1597 file_pos + num_bytes, locked_end, 1637 &hint, 0);
1598 file_pos, &hint, 0);
1599 BUG_ON(ret); 1638 BUG_ON(ret);
1600 1639
1601 ins.objectid = inode->i_ino; 1640 ins.objectid = inode->i_ino;
@@ -1642,24 +1681,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1642 * before we start the transaction. It limits the amount of btree 1681 * before we start the transaction. It limits the amount of btree
1643 * reads required while inside the transaction. 1682 * reads required while inside the transaction.
1644 */ 1683 */
1645static noinline void reada_csum(struct btrfs_root *root,
1646 struct btrfs_path *path,
1647 struct btrfs_ordered_extent *ordered_extent)
1648{
1649 struct btrfs_ordered_sum *sum;
1650 u64 bytenr;
1651
1652 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1653 list);
1654 bytenr = sum->sums[0].bytenr;
1655
1656 /*
1657 * we don't care about the results, the point of this search is
1658 * just to get the btree leaves into ram
1659 */
1660 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1661}
1662
1663/* as ordered data IO finishes, this gets called so we can finish 1684/* as ordered data IO finishes, this gets called so we can finish
1664 * an ordered extent if the range of bytes in the file it covers are 1685 * an ordered extent if the range of bytes in the file it covers are
1665 * fully written. 1686 * fully written.
@@ -1670,7 +1691,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1670 struct btrfs_trans_handle *trans; 1691 struct btrfs_trans_handle *trans;
1671 struct btrfs_ordered_extent *ordered_extent = NULL; 1692 struct btrfs_ordered_extent *ordered_extent = NULL;
1672 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1693 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1673 struct btrfs_path *path;
1674 int compressed = 0; 1694 int compressed = 0;
1675 int ret; 1695 int ret;
1676 1696
@@ -1678,46 +1698,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1678 if (!ret) 1698 if (!ret)
1679 return 0; 1699 return 0;
1680 1700
1681 /* 1701 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1682 * before we join the transaction, try to do some of our IO. 1702 BUG_ON(!ordered_extent);
1683 * This will limit the amount of IO that we have to do with 1703
1684 * the transaction running. We're unlikely to need to do any 1704 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1685 * IO if the file extents are new, the disk_i_size checks 1705 BUG_ON(!list_empty(&ordered_extent->list));
1686 * covers the most common case. 1706 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1687 */ 1707 if (!ret) {
1688 if (start < BTRFS_I(inode)->disk_i_size) { 1708 trans = btrfs_join_transaction(root, 1);
1689 path = btrfs_alloc_path(); 1709 ret = btrfs_update_inode(trans, root, inode);
1690 if (path) { 1710 BUG_ON(ret);
1691 ret = btrfs_lookup_file_extent(NULL, root, path, 1711 btrfs_end_transaction(trans, root);
1692 inode->i_ino,
1693 start, 0);
1694 ordered_extent = btrfs_lookup_ordered_extent(inode,
1695 start);
1696 if (!list_empty(&ordered_extent->list)) {
1697 btrfs_release_path(root, path);
1698 reada_csum(root, path, ordered_extent);
1699 }
1700 btrfs_free_path(path);
1701 } 1712 }
1713 goto out;
1702 } 1714 }
1703 1715
1704 trans = btrfs_join_transaction(root, 1);
1705
1706 if (!ordered_extent)
1707 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1708 BUG_ON(!ordered_extent);
1709 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
1710 goto nocow;
1711
1712 lock_extent(io_tree, ordered_extent->file_offset, 1716 lock_extent(io_tree, ordered_extent->file_offset,
1713 ordered_extent->file_offset + ordered_extent->len - 1, 1717 ordered_extent->file_offset + ordered_extent->len - 1,
1714 GFP_NOFS); 1718 GFP_NOFS);
1715 1719
1720 trans = btrfs_join_transaction(root, 1);
1721
1716 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1722 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1717 compressed = 1; 1723 compressed = 1;
1718 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1724 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1719 BUG_ON(compressed); 1725 BUG_ON(compressed);
1720 ret = btrfs_mark_extent_written(trans, root, inode, 1726 ret = btrfs_mark_extent_written(trans, inode,
1721 ordered_extent->file_offset, 1727 ordered_extent->file_offset,
1722 ordered_extent->file_offset + 1728 ordered_extent->file_offset +
1723 ordered_extent->len); 1729 ordered_extent->len);
@@ -1729,8 +1735,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1729 ordered_extent->disk_len, 1735 ordered_extent->disk_len,
1730 ordered_extent->len, 1736 ordered_extent->len,
1731 ordered_extent->len, 1737 ordered_extent->len,
1732 ordered_extent->file_offset +
1733 ordered_extent->len,
1734 compressed, 0, 0, 1738 compressed, 0, 0,
1735 BTRFS_FILE_EXTENT_REG); 1739 BTRFS_FILE_EXTENT_REG);
1736 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1740 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1741,22 +1745,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1741 unlock_extent(io_tree, ordered_extent->file_offset, 1745 unlock_extent(io_tree, ordered_extent->file_offset,
1742 ordered_extent->file_offset + ordered_extent->len - 1, 1746 ordered_extent->file_offset + ordered_extent->len - 1,
1743 GFP_NOFS); 1747 GFP_NOFS);
1744nocow:
1745 add_pending_csums(trans, inode, ordered_extent->file_offset, 1748 add_pending_csums(trans, inode, ordered_extent->file_offset,
1746 &ordered_extent->list); 1749 &ordered_extent->list);
1747 1750
1748 mutex_lock(&BTRFS_I(inode)->extent_mutex); 1751 /* this also removes the ordered extent from the tree */
1749 btrfs_ordered_update_i_size(inode, ordered_extent); 1752 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1750 btrfs_update_inode(trans, root, inode); 1753 ret = btrfs_update_inode(trans, root, inode);
1751 btrfs_remove_ordered_extent(inode, ordered_extent); 1754 BUG_ON(ret);
1752 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 1755 btrfs_end_transaction(trans, root);
1753 1756out:
1754 /* once for us */ 1757 /* once for us */
1755 btrfs_put_ordered_extent(ordered_extent); 1758 btrfs_put_ordered_extent(ordered_extent);
1756 /* once for the tree */ 1759 /* once for the tree */
1757 btrfs_put_ordered_extent(ordered_extent); 1760 btrfs_put_ordered_extent(ordered_extent);
1758 1761
1759 btrfs_end_transaction(trans, root);
1760 return 0; 1762 return 0;
1761} 1763}
1762 1764
@@ -1979,6 +1981,54 @@ zeroit:
1979 return -EIO; 1981 return -EIO;
1980} 1982}
1981 1983
1984struct delayed_iput {
1985 struct list_head list;
1986 struct inode *inode;
1987};
1988
1989void btrfs_add_delayed_iput(struct inode *inode)
1990{
1991 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
1992 struct delayed_iput *delayed;
1993
1994 if (atomic_add_unless(&inode->i_count, -1, 1))
1995 return;
1996
1997 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
1998 delayed->inode = inode;
1999
2000 spin_lock(&fs_info->delayed_iput_lock);
2001 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
2002 spin_unlock(&fs_info->delayed_iput_lock);
2003}
2004
2005void btrfs_run_delayed_iputs(struct btrfs_root *root)
2006{
2007 LIST_HEAD(list);
2008 struct btrfs_fs_info *fs_info = root->fs_info;
2009 struct delayed_iput *delayed;
2010 int empty;
2011
2012 spin_lock(&fs_info->delayed_iput_lock);
2013 empty = list_empty(&fs_info->delayed_iputs);
2014 spin_unlock(&fs_info->delayed_iput_lock);
2015 if (empty)
2016 return;
2017
2018 down_read(&root->fs_info->cleanup_work_sem);
2019 spin_lock(&fs_info->delayed_iput_lock);
2020 list_splice_init(&fs_info->delayed_iputs, &list);
2021 spin_unlock(&fs_info->delayed_iput_lock);
2022
2023 while (!list_empty(&list)) {
2024 delayed = list_entry(list.next, struct delayed_iput, list);
2025 list_del(&delayed->list);
2026 iput(delayed->inode);
2027 kfree(delayed);
2028 }
2029 up_read(&root->fs_info->cleanup_work_sem);
2030}
2031
1982/* 2032/*
1983 * This creates an orphan entry for the given inode in case something goes 2033 * This creates an orphan entry for the given inode in case something goes
1984 * wrong in the middle of an unlink/truncate. 2034 * wrong in the middle of an unlink/truncate.
@@ -2051,16 +2101,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2051 struct inode *inode; 2101 struct inode *inode;
2052 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2102 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2053 2103
2054 path = btrfs_alloc_path(); 2104 if (!xchg(&root->clean_orphans, 0))
2055 if (!path)
2056 return; 2105 return;
2106
2107 path = btrfs_alloc_path();
2108 BUG_ON(!path);
2057 path->reada = -1; 2109 path->reada = -1;
2058 2110
2059 key.objectid = BTRFS_ORPHAN_OBJECTID; 2111 key.objectid = BTRFS_ORPHAN_OBJECTID;
2060 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 2112 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
2061 key.offset = (u64)-1; 2113 key.offset = (u64)-1;
2062 2114
2063
2064 while (1) { 2115 while (1) {
2065 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2116 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2066 if (ret < 0) { 2117 if (ret < 0) {
@@ -2474,7 +2525,19 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2474 2525
2475 root = BTRFS_I(dir)->root; 2526 root = BTRFS_I(dir)->root;
2476 2527
2528 /*
2529 * 5 items for unlink inode
2530 * 1 for orphan
2531 */
2532 ret = btrfs_reserve_metadata_space(root, 6);
2533 if (ret)
2534 return ret;
2535
2477 trans = btrfs_start_transaction(root, 1); 2536 trans = btrfs_start_transaction(root, 1);
2537 if (IS_ERR(trans)) {
2538 btrfs_unreserve_metadata_space(root, 6);
2539 return PTR_ERR(trans);
2540 }
2478 2541
2479 btrfs_set_trans_block_group(trans, dir); 2542 btrfs_set_trans_block_group(trans, dir);
2480 2543
@@ -2489,6 +2552,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2489 nr = trans->blocks_used; 2552 nr = trans->blocks_used;
2490 2553
2491 btrfs_end_transaction_throttle(trans, root); 2554 btrfs_end_transaction_throttle(trans, root);
2555 btrfs_unreserve_metadata_space(root, 6);
2492 btrfs_btree_balance_dirty(root, nr); 2556 btrfs_btree_balance_dirty(root, nr);
2493 return ret; 2557 return ret;
2494} 2558}
@@ -2569,7 +2633,16 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2569 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 2633 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
2570 return -ENOTEMPTY; 2634 return -ENOTEMPTY;
2571 2635
2636 ret = btrfs_reserve_metadata_space(root, 5);
2637 if (ret)
2638 return ret;
2639
2572 trans = btrfs_start_transaction(root, 1); 2640 trans = btrfs_start_transaction(root, 1);
2641 if (IS_ERR(trans)) {
2642 btrfs_unreserve_metadata_space(root, 5);
2643 return PTR_ERR(trans);
2644 }
2645
2573 btrfs_set_trans_block_group(trans, dir); 2646 btrfs_set_trans_block_group(trans, dir);
2574 2647
2575 if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 2648 if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
@@ -2592,6 +2665,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2592out: 2665out:
2593 nr = trans->blocks_used; 2666 nr = trans->blocks_used;
2594 ret = btrfs_end_transaction_throttle(trans, root); 2667 ret = btrfs_end_transaction_throttle(trans, root);
2668 btrfs_unreserve_metadata_space(root, 5);
2595 btrfs_btree_balance_dirty(root, nr); 2669 btrfs_btree_balance_dirty(root, nr);
2596 2670
2597 if (ret && !err) 2671 if (ret && !err)
@@ -2782,37 +2856,40 @@ out:
2782 * min_type is the minimum key type to truncate down to. If set to 0, this 2856 * min_type is the minimum key type to truncate down to. If set to 0, this
2783 * will kill all the items on this inode, including the INODE_ITEM_KEY. 2857 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2784 */ 2858 */
2785noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2859int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2786 struct btrfs_root *root, 2860 struct btrfs_root *root,
2787 struct inode *inode, 2861 struct inode *inode,
2788 u64 new_size, u32 min_type) 2862 u64 new_size, u32 min_type)
2789{ 2863{
2790 int ret;
2791 struct btrfs_path *path; 2864 struct btrfs_path *path;
2792 struct btrfs_key key;
2793 struct btrfs_key found_key;
2794 u32 found_type = (u8)-1;
2795 struct extent_buffer *leaf; 2865 struct extent_buffer *leaf;
2796 struct btrfs_file_extent_item *fi; 2866 struct btrfs_file_extent_item *fi;
2867 struct btrfs_key key;
2868 struct btrfs_key found_key;
2797 u64 extent_start = 0; 2869 u64 extent_start = 0;
2798 u64 extent_num_bytes = 0; 2870 u64 extent_num_bytes = 0;
2799 u64 extent_offset = 0; 2871 u64 extent_offset = 0;
2800 u64 item_end = 0; 2872 u64 item_end = 0;
2873 u64 mask = root->sectorsize - 1;
2874 u32 found_type = (u8)-1;
2801 int found_extent; 2875 int found_extent;
2802 int del_item; 2876 int del_item;
2803 int pending_del_nr = 0; 2877 int pending_del_nr = 0;
2804 int pending_del_slot = 0; 2878 int pending_del_slot = 0;
2805 int extent_type = -1; 2879 int extent_type = -1;
2806 int encoding; 2880 int encoding;
2807 u64 mask = root->sectorsize - 1; 2881 int ret;
2882 int err = 0;
2883
2884 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2808 2885
2809 if (root->ref_cows) 2886 if (root->ref_cows)
2810 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2887 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2888
2811 path = btrfs_alloc_path(); 2889 path = btrfs_alloc_path();
2812 BUG_ON(!path); 2890 BUG_ON(!path);
2813 path->reada = -1; 2891 path->reada = -1;
2814 2892
2815 /* FIXME, add redo link to tree so we don't leak on crash */
2816 key.objectid = inode->i_ino; 2893 key.objectid = inode->i_ino;
2817 key.offset = (u64)-1; 2894 key.offset = (u64)-1;
2818 key.type = (u8)-1; 2895 key.type = (u8)-1;
@@ -2820,17 +2897,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2820search_again: 2897search_again:
2821 path->leave_spinning = 1; 2898 path->leave_spinning = 1;
2822 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2899 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2823 if (ret < 0) 2900 if (ret < 0) {
2824 goto error; 2901 err = ret;
2902 goto out;
2903 }
2825 2904
2826 if (ret > 0) { 2905 if (ret > 0) {
2827 /* there are no items in the tree for us to truncate, we're 2906 /* there are no items in the tree for us to truncate, we're
2828 * done 2907 * done
2829 */ 2908 */
2830 if (path->slots[0] == 0) { 2909 if (path->slots[0] == 0)
2831 ret = 0; 2910 goto out;
2832 goto error;
2833 }
2834 path->slots[0]--; 2911 path->slots[0]--;
2835 } 2912 }
2836 2913
@@ -2865,28 +2942,17 @@ search_again:
2865 } 2942 }
2866 item_end--; 2943 item_end--;
2867 } 2944 }
2868 if (item_end < new_size) { 2945 if (found_type > min_type) {
2869 if (found_type == BTRFS_DIR_ITEM_KEY) 2946 del_item = 1;
2870 found_type = BTRFS_INODE_ITEM_KEY; 2947 } else {
2871 else if (found_type == BTRFS_EXTENT_ITEM_KEY) 2948 if (item_end < new_size)
2872 found_type = BTRFS_EXTENT_DATA_KEY;
2873 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2874 found_type = BTRFS_XATTR_ITEM_KEY;
2875 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2876 found_type = BTRFS_INODE_REF_KEY;
2877 else if (found_type)
2878 found_type--;
2879 else
2880 break; 2949 break;
2881 btrfs_set_key_type(&key, found_type); 2950 if (found_key.offset >= new_size)
2882 goto next; 2951 del_item = 1;
2952 else
2953 del_item = 0;
2883 } 2954 }
2884 if (found_key.offset >= new_size)
2885 del_item = 1;
2886 else
2887 del_item = 0;
2888 found_extent = 0; 2955 found_extent = 0;
2889
2890 /* FIXME, shrink the extent if the ref count is only 1 */ 2956 /* FIXME, shrink the extent if the ref count is only 1 */
2891 if (found_type != BTRFS_EXTENT_DATA_KEY) 2957 if (found_type != BTRFS_EXTENT_DATA_KEY)
2892 goto delete; 2958 goto delete;
@@ -2973,42 +3039,36 @@ delete:
2973 inode->i_ino, extent_offset); 3039 inode->i_ino, extent_offset);
2974 BUG_ON(ret); 3040 BUG_ON(ret);
2975 } 3041 }
2976next:
2977 if (path->slots[0] == 0) {
2978 if (pending_del_nr)
2979 goto del_pending;
2980 btrfs_release_path(root, path);
2981 if (found_type == BTRFS_INODE_ITEM_KEY)
2982 break;
2983 goto search_again;
2984 }
2985 3042
2986 path->slots[0]--; 3043 if (found_type == BTRFS_INODE_ITEM_KEY)
2987 if (pending_del_nr && 3044 break;
2988 path->slots[0] + 1 != pending_del_slot) { 3045
2989 struct btrfs_key debug; 3046 if (path->slots[0] == 0 ||
2990del_pending: 3047 path->slots[0] != pending_del_slot) {
2991 btrfs_item_key_to_cpu(path->nodes[0], &debug, 3048 if (root->ref_cows) {
2992 pending_del_slot); 3049 err = -EAGAIN;
2993 ret = btrfs_del_items(trans, root, path, 3050 goto out;
2994 pending_del_slot, 3051 }
2995 pending_del_nr); 3052 if (pending_del_nr) {
2996 BUG_ON(ret); 3053 ret = btrfs_del_items(trans, root, path,
2997 pending_del_nr = 0; 3054 pending_del_slot,
3055 pending_del_nr);
3056 BUG_ON(ret);
3057 pending_del_nr = 0;
3058 }
2998 btrfs_release_path(root, path); 3059 btrfs_release_path(root, path);
2999 if (found_type == BTRFS_INODE_ITEM_KEY)
3000 break;
3001 goto search_again; 3060 goto search_again;
3061 } else {
3062 path->slots[0]--;
3002 } 3063 }
3003 } 3064 }
3004 ret = 0; 3065out:
3005error:
3006 if (pending_del_nr) { 3066 if (pending_del_nr) {
3007 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3067 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3008 pending_del_nr); 3068 pending_del_nr);
3009 } 3069 }
3010 btrfs_free_path(path); 3070 btrfs_free_path(path);
3011 return ret; 3071 return err;
3012} 3072}
3013 3073
3014/* 3074/*
@@ -3032,12 +3092,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3032 3092
3033 if ((offset & (blocksize - 1)) == 0) 3093 if ((offset & (blocksize - 1)) == 0)
3034 goto out; 3094 goto out;
3095 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
3096 if (ret)
3097 goto out;
3098
3099 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
3100 if (ret)
3101 goto out;
3035 3102
3036 ret = -ENOMEM; 3103 ret = -ENOMEM;
3037again: 3104again:
3038 page = grab_cache_page(mapping, index); 3105 page = grab_cache_page(mapping, index);
3039 if (!page) 3106 if (!page) {
3107 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
3108 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3040 goto out; 3109 goto out;
3110 }
3041 3111
3042 page_start = page_offset(page); 3112 page_start = page_offset(page);
3043 page_end = page_start + PAGE_CACHE_SIZE - 1; 3113 page_end = page_start + PAGE_CACHE_SIZE - 1;
@@ -3070,6 +3140,10 @@ again:
3070 goto again; 3140 goto again;
3071 } 3141 }
3072 3142
3143 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
3144 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
3145 GFP_NOFS);
3146
3073 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 3147 ret = btrfs_set_extent_delalloc(inode, page_start, page_end);
3074 if (ret) { 3148 if (ret) {
3075 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3149 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
@@ -3088,6 +3162,9 @@ again:
3088 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3162 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3089 3163
3090out_unlock: 3164out_unlock:
3165 if (ret)
3166 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
3167 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3091 unlock_page(page); 3168 unlock_page(page);
3092 page_cache_release(page); 3169 page_cache_release(page);
3093out: 3170out:
@@ -3111,8 +3188,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3111 if (size <= hole_start) 3188 if (size <= hole_start)
3112 return 0; 3189 return 0;
3113 3190
3114 btrfs_truncate_page(inode->i_mapping, inode->i_size);
3115
3116 while (1) { 3191 while (1) {
3117 struct btrfs_ordered_extent *ordered; 3192 struct btrfs_ordered_extent *ordered;
3118 btrfs_wait_ordered_range(inode, hole_start, 3193 btrfs_wait_ordered_range(inode, hole_start,
@@ -3125,9 +3200,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3125 btrfs_put_ordered_extent(ordered); 3200 btrfs_put_ordered_extent(ordered);
3126 } 3201 }
3127 3202
3128 trans = btrfs_start_transaction(root, 1);
3129 btrfs_set_trans_block_group(trans, inode);
3130
3131 cur_offset = hole_start; 3203 cur_offset = hole_start;
3132 while (1) { 3204 while (1) {
3133 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3205 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3135,40 +3207,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3135 BUG_ON(IS_ERR(em) || !em); 3207 BUG_ON(IS_ERR(em) || !em);
3136 last_byte = min(extent_map_end(em), block_end); 3208 last_byte = min(extent_map_end(em), block_end);
3137 last_byte = (last_byte + mask) & ~mask; 3209 last_byte = (last_byte + mask) & ~mask;
3138 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 3210 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3139 u64 hint_byte = 0; 3211 u64 hint_byte = 0;
3140 hole_size = last_byte - cur_offset; 3212 hole_size = last_byte - cur_offset;
3141 err = btrfs_drop_extents(trans, root, inode,
3142 cur_offset,
3143 cur_offset + hole_size,
3144 block_end,
3145 cur_offset, &hint_byte, 1);
3146 if (err)
3147 break;
3148 3213
3149 err = btrfs_reserve_metadata_space(root, 1); 3214 err = btrfs_reserve_metadata_space(root, 2);
3150 if (err) 3215 if (err)
3151 break; 3216 break;
3152 3217
3218 trans = btrfs_start_transaction(root, 1);
3219 btrfs_set_trans_block_group(trans, inode);
3220
3221 err = btrfs_drop_extents(trans, inode, cur_offset,
3222 cur_offset + hole_size,
3223 &hint_byte, 1);
3224 BUG_ON(err);
3225
3153 err = btrfs_insert_file_extent(trans, root, 3226 err = btrfs_insert_file_extent(trans, root,
3154 inode->i_ino, cur_offset, 0, 3227 inode->i_ino, cur_offset, 0,
3155 0, hole_size, 0, hole_size, 3228 0, hole_size, 0, hole_size,
3156 0, 0, 0); 3229 0, 0, 0);
3230 BUG_ON(err);
3231
3157 btrfs_drop_extent_cache(inode, hole_start, 3232 btrfs_drop_extent_cache(inode, hole_start,
3158 last_byte - 1, 0); 3233 last_byte - 1, 0);
3159 btrfs_unreserve_metadata_space(root, 1); 3234
3235 btrfs_end_transaction(trans, root);
3236 btrfs_unreserve_metadata_space(root, 2);
3160 } 3237 }
3161 free_extent_map(em); 3238 free_extent_map(em);
3162 cur_offset = last_byte; 3239 cur_offset = last_byte;
3163 if (err || cur_offset >= block_end) 3240 if (cur_offset >= block_end)
3164 break; 3241 break;
3165 } 3242 }
3166 3243
3167 btrfs_end_transaction(trans, root);
3168 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3244 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
3169 return err; 3245 return err;
3170} 3246}
3171 3247
3248static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3249{
3250 struct btrfs_root *root = BTRFS_I(inode)->root;
3251 struct btrfs_trans_handle *trans;
3252 unsigned long nr;
3253 int ret;
3254
3255 if (attr->ia_size == inode->i_size)
3256 return 0;
3257
3258 if (attr->ia_size > inode->i_size) {
3259 unsigned long limit;
3260 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
3261 if (attr->ia_size > inode->i_sb->s_maxbytes)
3262 return -EFBIG;
3263 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3264 send_sig(SIGXFSZ, current, 0);
3265 return -EFBIG;
3266 }
3267 }
3268
3269 ret = btrfs_reserve_metadata_space(root, 1);
3270 if (ret)
3271 return ret;
3272
3273 trans = btrfs_start_transaction(root, 1);
3274 btrfs_set_trans_block_group(trans, inode);
3275
3276 ret = btrfs_orphan_add(trans, inode);
3277 BUG_ON(ret);
3278
3279 nr = trans->blocks_used;
3280 btrfs_end_transaction(trans, root);
3281 btrfs_unreserve_metadata_space(root, 1);
3282 btrfs_btree_balance_dirty(root, nr);
3283
3284 if (attr->ia_size > inode->i_size) {
3285 ret = btrfs_cont_expand(inode, attr->ia_size);
3286 if (ret) {
3287 btrfs_truncate(inode);
3288 return ret;
3289 }
3290
3291 i_size_write(inode, attr->ia_size);
3292 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3293
3294 trans = btrfs_start_transaction(root, 1);
3295 btrfs_set_trans_block_group(trans, inode);
3296
3297 ret = btrfs_update_inode(trans, root, inode);
3298 BUG_ON(ret);
3299 if (inode->i_nlink > 0) {
3300 ret = btrfs_orphan_del(trans, inode);
3301 BUG_ON(ret);
3302 }
3303 nr = trans->blocks_used;
3304 btrfs_end_transaction(trans, root);
3305 btrfs_btree_balance_dirty(root, nr);
3306 return 0;
3307 }
3308
3309 /*
3310 * We're truncating a file that used to have good data down to
3311 * zero. Make sure it gets into the ordered flush list so that
3312 * any new writes get down to disk quickly.
3313 */
3314 if (attr->ia_size == 0)
3315 BTRFS_I(inode)->ordered_data_close = 1;
3316
3317 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3318 ret = vmtruncate(inode, attr->ia_size);
3319 BUG_ON(ret);
3320
3321 return 0;
3322}
3323
3172static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3324static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3173{ 3325{
3174 struct inode *inode = dentry->d_inode; 3326 struct inode *inode = dentry->d_inode;
@@ -3179,23 +3331,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3179 return err; 3331 return err;
3180 3332
3181 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3333 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3182 if (attr->ia_size > inode->i_size) { 3334 err = btrfs_setattr_size(inode, attr);
3183 err = btrfs_cont_expand(inode, attr->ia_size); 3335 if (err)
3184 if (err) 3336 return err;
3185 return err;
3186 } else if (inode->i_size > 0 &&
3187 attr->ia_size == 0) {
3188
3189 /* we're truncating a file that used to have good
3190 * data down to zero. Make sure it gets into
3191 * the ordered flush list so that any new writes
3192 * get down to disk quickly.
3193 */
3194 BTRFS_I(inode)->ordered_data_close = 1;
3195 }
3196 } 3337 }
3338 attr->ia_valid &= ~ATTR_SIZE;
3197 3339
3198 err = inode_setattr(inode, attr); 3340 if (attr->ia_valid)
3341 err = inode_setattr(inode, attr);
3199 3342
3200 if (!err && ((attr->ia_valid & ATTR_MODE))) 3343 if (!err && ((attr->ia_valid & ATTR_MODE)))
3201 err = btrfs_acl_chmod(inode); 3344 err = btrfs_acl_chmod(inode);
@@ -3216,36 +3359,43 @@ void btrfs_delete_inode(struct inode *inode)
3216 } 3359 }
3217 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3360 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3218 3361
3362 if (root->fs_info->log_root_recovering) {
3363 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
3364 goto no_delete;
3365 }
3366
3219 if (inode->i_nlink > 0) { 3367 if (inode->i_nlink > 0) {
3220 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 3368 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3221 goto no_delete; 3369 goto no_delete;
3222 } 3370 }
3223 3371
3224 btrfs_i_size_write(inode, 0); 3372 btrfs_i_size_write(inode, 0);
3225 trans = btrfs_join_transaction(root, 1);
3226 3373
3227 btrfs_set_trans_block_group(trans, inode); 3374 while (1) {
3228 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); 3375 trans = btrfs_start_transaction(root, 1);
3229 if (ret) { 3376 btrfs_set_trans_block_group(trans, inode);
3230 btrfs_orphan_del(NULL, inode); 3377 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3231 goto no_delete_lock;
3232 }
3233 3378
3234 btrfs_orphan_del(trans, inode); 3379 if (ret != -EAGAIN)
3380 break;
3235 3381
3236 nr = trans->blocks_used; 3382 nr = trans->blocks_used;
3237 clear_inode(inode); 3383 btrfs_end_transaction(trans, root);
3384 trans = NULL;
3385 btrfs_btree_balance_dirty(root, nr);
3386 }
3238 3387
3239 btrfs_end_transaction(trans, root); 3388 if (ret == 0) {
3240 btrfs_btree_balance_dirty(root, nr); 3389 ret = btrfs_orphan_del(trans, inode);
3241 return; 3390 BUG_ON(ret);
3391 }
3242 3392
3243no_delete_lock:
3244 nr = trans->blocks_used; 3393 nr = trans->blocks_used;
3245 btrfs_end_transaction(trans, root); 3394 btrfs_end_transaction(trans, root);
3246 btrfs_btree_balance_dirty(root, nr); 3395 btrfs_btree_balance_dirty(root, nr);
3247no_delete: 3396no_delete:
3248 clear_inode(inode); 3397 clear_inode(inode);
3398 return;
3249} 3399}
3250 3400
3251/* 3401/*
@@ -3480,6 +3630,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3480 bi->generation = 0; 3630 bi->generation = 0;
3481 bi->sequence = 0; 3631 bi->sequence = 0;
3482 bi->last_trans = 0; 3632 bi->last_trans = 0;
3633 bi->last_sub_trans = 0;
3483 bi->logged_trans = 0; 3634 bi->logged_trans = 0;
3484 bi->delalloc_bytes = 0; 3635 bi->delalloc_bytes = 0;
3485 bi->reserved_bytes = 0; 3636 bi->reserved_bytes = 0;
@@ -3497,7 +3648,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3497 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3648 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3498 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3649 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3499 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3650 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3500 mutex_init(&BTRFS_I(inode)->extent_mutex);
3501 mutex_init(&BTRFS_I(inode)->log_mutex); 3651 mutex_init(&BTRFS_I(inode)->log_mutex);
3502} 3652}
3503 3653
@@ -3623,6 +3773,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3623 } 3773 }
3624 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3774 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3625 3775
3776 if (root != sub_root) {
3777 down_read(&root->fs_info->cleanup_work_sem);
3778 if (!(inode->i_sb->s_flags & MS_RDONLY))
3779 btrfs_orphan_cleanup(sub_root);
3780 up_read(&root->fs_info->cleanup_work_sem);
3781 }
3782
3626 return inode; 3783 return inode;
3627} 3784}
3628 3785
@@ -3797,7 +3954,11 @@ skip:
3797 3954
3798 /* Reached end of directory/root. Bump pos past the last item. */ 3955 /* Reached end of directory/root. Bump pos past the last item. */
3799 if (key_type == BTRFS_DIR_INDEX_KEY) 3956 if (key_type == BTRFS_DIR_INDEX_KEY)
3800 filp->f_pos = INT_LIMIT(off_t); 3957 /*
3958 * 32-bit glibc will use getdents64, but then strtol -
3959 * so the last number we can serve is this.
3960 */
3961 filp->f_pos = 0x7fffffff;
3801 else 3962 else
3802 filp->f_pos++; 3963 filp->f_pos++;
3803nopos: 3964nopos:
@@ -4147,7 +4308,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4147 if (IS_ERR(inode)) 4308 if (IS_ERR(inode))
4148 goto out_unlock; 4309 goto out_unlock;
4149 4310
4150 err = btrfs_init_inode_security(inode, dir); 4311 err = btrfs_init_inode_security(trans, inode, dir);
4151 if (err) { 4312 if (err) {
4152 drop_inode = 1; 4313 drop_inode = 1;
4153 goto out_unlock; 4314 goto out_unlock;
@@ -4218,7 +4379,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4218 if (IS_ERR(inode)) 4379 if (IS_ERR(inode))
4219 goto out_unlock; 4380 goto out_unlock;
4220 4381
4221 err = btrfs_init_inode_security(inode, dir); 4382 err = btrfs_init_inode_security(trans, inode, dir);
4222 if (err) { 4383 if (err) {
4223 drop_inode = 1; 4384 drop_inode = 1;
4224 goto out_unlock; 4385 goto out_unlock;
@@ -4264,6 +4425,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4264 if (inode->i_nlink == 0) 4425 if (inode->i_nlink == 0)
4265 return -ENOENT; 4426 return -ENOENT;
4266 4427
4428 /* do not allow sys_link's with other subvols of the same device */
4429 if (root->objectid != BTRFS_I(inode)->root->objectid)
4430 return -EPERM;
4431
4267 /* 4432 /*
4268 * 1 item for inode ref 4433 * 1 item for inode ref
4269 * 2 items for dir items 4434 * 2 items for dir items
@@ -4351,7 +4516,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4351 4516
4352 drop_on_err = 1; 4517 drop_on_err = 1;
4353 4518
4354 err = btrfs_init_inode_security(inode, dir); 4519 err = btrfs_init_inode_security(trans, inode, dir);
4355 if (err) 4520 if (err)
4356 goto out_fail; 4521 goto out_fail;
4357 4522
@@ -4980,7 +5145,9 @@ again:
4980 set_page_dirty(page); 5145 set_page_dirty(page);
4981 SetPageUptodate(page); 5146 SetPageUptodate(page);
4982 5147
4983 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; 5148 BTRFS_I(inode)->last_trans = root->fs_info->generation;
5149 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
5150
4984 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5151 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4985 5152
4986out_unlock: 5153out_unlock:
@@ -5000,15 +5167,20 @@ static void btrfs_truncate(struct inode *inode)
5000 unsigned long nr; 5167 unsigned long nr;
5001 u64 mask = root->sectorsize - 1; 5168 u64 mask = root->sectorsize - 1;
5002 5169
5003 if (!S_ISREG(inode->i_mode)) 5170 if (!S_ISREG(inode->i_mode)) {
5171 WARN_ON(1);
5004 return; 5172 return;
5005 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 5173 }
5174
5175 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
5176 if (ret)
5006 return; 5177 return;
5007 5178
5008 btrfs_truncate_page(inode->i_mapping, inode->i_size);
5009 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 5179 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5180 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5010 5181
5011 trans = btrfs_start_transaction(root, 1); 5182 trans = btrfs_start_transaction(root, 1);
5183 btrfs_set_trans_block_group(trans, inode);
5012 5184
5013 /* 5185 /*
5014 * setattr is responsible for setting the ordered_data_close flag, 5186 * setattr is responsible for setting the ordered_data_close flag,
@@ -5030,21 +5202,32 @@ static void btrfs_truncate(struct inode *inode)
5030 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 5202 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
5031 btrfs_add_ordered_operation(trans, root, inode); 5203 btrfs_add_ordered_operation(trans, root, inode);
5032 5204
5033 btrfs_set_trans_block_group(trans, inode); 5205 while (1) {
5034 btrfs_i_size_write(inode, inode->i_size); 5206 ret = btrfs_truncate_inode_items(trans, root, inode,
5207 inode->i_size,
5208 BTRFS_EXTENT_DATA_KEY);
5209 if (ret != -EAGAIN)
5210 break;
5035 5211
5036 ret = btrfs_orphan_add(trans, inode); 5212 ret = btrfs_update_inode(trans, root, inode);
5037 if (ret) 5213 BUG_ON(ret);
5038 goto out; 5214
5039 /* FIXME, add redo link to tree so we don't leak on crash */ 5215 nr = trans->blocks_used;
5040 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 5216 btrfs_end_transaction(trans, root);
5041 BTRFS_EXTENT_DATA_KEY); 5217 btrfs_btree_balance_dirty(root, nr);
5042 btrfs_update_inode(trans, root, inode);
5043 5218
5044 ret = btrfs_orphan_del(trans, inode); 5219 trans = btrfs_start_transaction(root, 1);
5220 btrfs_set_trans_block_group(trans, inode);
5221 }
5222
5223 if (ret == 0 && inode->i_nlink > 0) {
5224 ret = btrfs_orphan_del(trans, inode);
5225 BUG_ON(ret);
5226 }
5227
5228 ret = btrfs_update_inode(trans, root, inode);
5045 BUG_ON(ret); 5229 BUG_ON(ret);
5046 5230
5047out:
5048 nr = trans->blocks_used; 5231 nr = trans->blocks_used;
5049 ret = btrfs_end_transaction_throttle(trans, root); 5232 ret = btrfs_end_transaction_throttle(trans, root);
5050 BUG_ON(ret); 5233 BUG_ON(ret);
@@ -5100,9 +5283,11 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
5100 if (!ei) 5283 if (!ei)
5101 return NULL; 5284 return NULL;
5102 ei->last_trans = 0; 5285 ei->last_trans = 0;
5286 ei->last_sub_trans = 0;
5103 ei->logged_trans = 0; 5287 ei->logged_trans = 0;
5104 ei->outstanding_extents = 0; 5288 ei->outstanding_extents = 0;
5105 ei->reserved_extents = 0; 5289 ei->reserved_extents = 0;
5290 ei->root = NULL;
5106 spin_lock_init(&ei->accounting_lock); 5291 spin_lock_init(&ei->accounting_lock);
5107 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 5292 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
5108 INIT_LIST_HEAD(&ei->i_orphan); 5293 INIT_LIST_HEAD(&ei->i_orphan);
@@ -5119,6 +5304,14 @@ void btrfs_destroy_inode(struct inode *inode)
5119 WARN_ON(inode->i_data.nrpages); 5304 WARN_ON(inode->i_data.nrpages);
5120 5305
5121 /* 5306 /*
5307 * This can happen where we create an inode, but somebody else also
5308 * created the same inode and we need to destroy the one we already
5309 * created.
5310 */
5311 if (!root)
5312 goto free;
5313
5314 /*
5122 * Make sure we're properly removed from the ordered operation 5315 * Make sure we're properly removed from the ordered operation
5123 * lists. 5316 * lists.
5124 */ 5317 */
@@ -5131,9 +5324,9 @@ void btrfs_destroy_inode(struct inode *inode)
5131 5324
5132 spin_lock(&root->list_lock); 5325 spin_lock(&root->list_lock);
5133 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 5326 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5134 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 5327 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5135 " list\n", inode->i_ino); 5328 inode->i_ino);
5136 dump_stack(); 5329 list_del_init(&BTRFS_I(inode)->i_orphan);
5137 } 5330 }
5138 spin_unlock(&root->list_lock); 5331 spin_unlock(&root->list_lock);
5139 5332
@@ -5153,6 +5346,7 @@ void btrfs_destroy_inode(struct inode *inode)
5153 } 5346 }
5154 inode_tree_del(inode); 5347 inode_tree_del(inode);
5155 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 5348 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
5349free:
5156 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 5350 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
5157} 5351}
5158 5352
@@ -5258,11 +5452,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5258 return -ENOTEMPTY; 5452 return -ENOTEMPTY;
5259 5453
5260 /* 5454 /*
5261 * 2 items for dir items 5455 * We want to reserve the absolute worst case amount of items. So if
5262 * 1 item for orphan entry 5456 * both inodes are subvols and we need to unlink them then that would
5263 * 1 item for ref 5457 * require 4 item modifications, but if they are both normal inodes it
5458 * would require 5 item modifications, so we'll assume their normal
5459 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
5460 * should cover the worst case number of items we'll modify.
5264 */ 5461 */
5265 ret = btrfs_reserve_metadata_space(root, 4); 5462 ret = btrfs_reserve_metadata_space(root, 11);
5266 if (ret) 5463 if (ret)
5267 return ret; 5464 return ret;
5268 5465
@@ -5378,7 +5575,7 @@ out_fail:
5378 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 5575 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5379 up_read(&root->fs_info->subvol_sem); 5576 up_read(&root->fs_info->subvol_sem);
5380 5577
5381 btrfs_unreserve_metadata_space(root, 4); 5578 btrfs_unreserve_metadata_space(root, 11);
5382 return ret; 5579 return ret;
5383} 5580}
5384 5581
@@ -5386,7 +5583,7 @@ out_fail:
5386 * some fairly slow code that needs optimization. This walks the list 5583 * some fairly slow code that needs optimization. This walks the list
5387 * of all the inodes with pending delalloc and forces them to disk. 5584 * of all the inodes with pending delalloc and forces them to disk.
5388 */ 5585 */
5389int btrfs_start_delalloc_inodes(struct btrfs_root *root) 5586int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5390{ 5587{
5391 struct list_head *head = &root->fs_info->delalloc_inodes; 5588 struct list_head *head = &root->fs_info->delalloc_inodes;
5392 struct btrfs_inode *binode; 5589 struct btrfs_inode *binode;
@@ -5405,7 +5602,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
5405 spin_unlock(&root->fs_info->delalloc_lock); 5602 spin_unlock(&root->fs_info->delalloc_lock);
5406 if (inode) { 5603 if (inode) {
5407 filemap_flush(inode->i_mapping); 5604 filemap_flush(inode->i_mapping);
5408 iput(inode); 5605 if (delay_iput)
5606 btrfs_add_delayed_iput(inode);
5607 else
5608 iput(inode);
5409 } 5609 }
5410 cond_resched(); 5610 cond_resched();
5411 spin_lock(&root->fs_info->delalloc_lock); 5611 spin_lock(&root->fs_info->delalloc_lock);
@@ -5479,7 +5679,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5479 if (IS_ERR(inode)) 5679 if (IS_ERR(inode))
5480 goto out_unlock; 5680 goto out_unlock;
5481 5681
5482 err = btrfs_init_inode_security(inode, dir); 5682 err = btrfs_init_inode_security(trans, inode, dir);
5483 if (err) { 5683 if (err) {
5484 drop_inode = 1; 5684 drop_inode = 1;
5485 goto out_unlock; 5685 goto out_unlock;
@@ -5551,57 +5751,77 @@ out_fail:
5551 return err; 5751 return err;
5552} 5752}
5553 5753
5554static int prealloc_file_range(struct btrfs_trans_handle *trans, 5754static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5555 struct inode *inode, u64 start, u64 end, 5755 u64 alloc_hint, int mode, loff_t actual_len)
5556 u64 locked_end, u64 alloc_hint, int mode)
5557{ 5756{
5757 struct btrfs_trans_handle *trans;
5558 struct btrfs_root *root = BTRFS_I(inode)->root; 5758 struct btrfs_root *root = BTRFS_I(inode)->root;
5559 struct btrfs_key ins; 5759 struct btrfs_key ins;
5560 u64 alloc_size; 5760 u64 alloc_size;
5561 u64 cur_offset = start; 5761 u64 cur_offset = start;
5562 u64 num_bytes = end - start; 5762 u64 num_bytes = end - start;
5563 int ret = 0; 5763 int ret = 0;
5764 u64 i_size;
5564 5765
5565 while (num_bytes > 0) { 5766 while (num_bytes > 0) {
5566 alloc_size = min(num_bytes, root->fs_info->max_extent); 5767 alloc_size = min(num_bytes, root->fs_info->max_extent);
5567 5768
5568 ret = btrfs_reserve_metadata_space(root, 1); 5769 trans = btrfs_start_transaction(root, 1);
5569 if (ret)
5570 goto out;
5571 5770
5572 ret = btrfs_reserve_extent(trans, root, alloc_size, 5771 ret = btrfs_reserve_extent(trans, root, alloc_size,
5573 root->sectorsize, 0, alloc_hint, 5772 root->sectorsize, 0, alloc_hint,
5574 (u64)-1, &ins, 1); 5773 (u64)-1, &ins, 1);
5575 if (ret) { 5774 if (ret) {
5576 WARN_ON(1); 5775 WARN_ON(1);
5577 goto out; 5776 goto stop_trans;
5777 }
5778
5779 ret = btrfs_reserve_metadata_space(root, 3);
5780 if (ret) {
5781 btrfs_free_reserved_extent(root, ins.objectid,
5782 ins.offset);
5783 goto stop_trans;
5578 } 5784 }
5785
5579 ret = insert_reserved_file_extent(trans, inode, 5786 ret = insert_reserved_file_extent(trans, inode,
5580 cur_offset, ins.objectid, 5787 cur_offset, ins.objectid,
5581 ins.offset, ins.offset, 5788 ins.offset, ins.offset,
5582 ins.offset, locked_end, 5789 ins.offset, 0, 0, 0,
5583 0, 0, 0,
5584 BTRFS_FILE_EXTENT_PREALLOC); 5790 BTRFS_FILE_EXTENT_PREALLOC);
5585 BUG_ON(ret); 5791 BUG_ON(ret);
5586 btrfs_drop_extent_cache(inode, cur_offset, 5792 btrfs_drop_extent_cache(inode, cur_offset,
5587 cur_offset + ins.offset -1, 0); 5793 cur_offset + ins.offset -1, 0);
5794
5588 num_bytes -= ins.offset; 5795 num_bytes -= ins.offset;
5589 cur_offset += ins.offset; 5796 cur_offset += ins.offset;
5590 alloc_hint = ins.objectid + ins.offset; 5797 alloc_hint = ins.objectid + ins.offset;
5591 btrfs_unreserve_metadata_space(root, 1); 5798
5592 }
5593out:
5594 if (cur_offset > start) {
5595 inode->i_ctime = CURRENT_TIME; 5799 inode->i_ctime = CURRENT_TIME;
5596 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5800 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5597 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5801 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5598 cur_offset > i_size_read(inode)) 5802 (actual_len > inode->i_size) &&
5599 btrfs_i_size_write(inode, cur_offset); 5803 (cur_offset > inode->i_size)) {
5804
5805 if (cur_offset > actual_len)
5806 i_size = actual_len;
5807 else
5808 i_size = cur_offset;
5809 i_size_write(inode, i_size);
5810 btrfs_ordered_update_i_size(inode, i_size, NULL);
5811 }
5812
5600 ret = btrfs_update_inode(trans, root, inode); 5813 ret = btrfs_update_inode(trans, root, inode);
5601 BUG_ON(ret); 5814 BUG_ON(ret);
5815
5816 btrfs_end_transaction(trans, root);
5817 btrfs_unreserve_metadata_space(root, 3);
5602 } 5818 }
5819 return ret;
5603 5820
5821stop_trans:
5822 btrfs_end_transaction(trans, root);
5604 return ret; 5823 return ret;
5824
5605} 5825}
5606 5826
5607static long btrfs_fallocate(struct inode *inode, int mode, 5827static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5615,8 +5835,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5615 u64 locked_end; 5835 u64 locked_end;
5616 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5836 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5617 struct extent_map *em; 5837 struct extent_map *em;
5618 struct btrfs_trans_handle *trans;
5619 struct btrfs_root *root;
5620 int ret; 5838 int ret;
5621 5839
5622 alloc_start = offset & ~mask; 5840 alloc_start = offset & ~mask;
@@ -5635,9 +5853,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5635 goto out; 5853 goto out;
5636 } 5854 }
5637 5855
5638 root = BTRFS_I(inode)->root; 5856 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
5639
5640 ret = btrfs_check_data_free_space(root, inode,
5641 alloc_end - alloc_start); 5857 alloc_end - alloc_start);
5642 if (ret) 5858 if (ret)
5643 goto out; 5859 goto out;
@@ -5646,12 +5862,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5646 while (1) { 5862 while (1) {
5647 struct btrfs_ordered_extent *ordered; 5863 struct btrfs_ordered_extent *ordered;
5648 5864
5649 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5650 if (!trans) {
5651 ret = -EIO;
5652 goto out_free;
5653 }
5654
5655 /* the extent lock is ordered inside the running 5865 /* the extent lock is ordered inside the running
5656 * transaction 5866 * transaction
5657 */ 5867 */
@@ -5665,8 +5875,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5665 btrfs_put_ordered_extent(ordered); 5875 btrfs_put_ordered_extent(ordered);
5666 unlock_extent(&BTRFS_I(inode)->io_tree, 5876 unlock_extent(&BTRFS_I(inode)->io_tree,
5667 alloc_start, locked_end, GFP_NOFS); 5877 alloc_start, locked_end, GFP_NOFS);
5668 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5669
5670 /* 5878 /*
5671 * we can't wait on the range with the transaction 5879 * we can't wait on the range with the transaction
5672 * running or with the extent lock held 5880 * running or with the extent lock held
@@ -5687,10 +5895,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5687 BUG_ON(IS_ERR(em) || !em); 5895 BUG_ON(IS_ERR(em) || !em);
5688 last_byte = min(extent_map_end(em), alloc_end); 5896 last_byte = min(extent_map_end(em), alloc_end);
5689 last_byte = (last_byte + mask) & ~mask; 5897 last_byte = (last_byte + mask) & ~mask;
5690 if (em->block_start == EXTENT_MAP_HOLE) { 5898 if (em->block_start == EXTENT_MAP_HOLE ||
5691 ret = prealloc_file_range(trans, inode, cur_offset, 5899 (cur_offset >= inode->i_size &&
5692 last_byte, locked_end + 1, 5900 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5693 alloc_hint, mode); 5901 ret = prealloc_file_range(inode,
5902 cur_offset, last_byte,
5903 alloc_hint, mode, offset+len);
5694 if (ret < 0) { 5904 if (ret < 0) {
5695 free_extent_map(em); 5905 free_extent_map(em);
5696 break; 5906 break;
@@ -5709,9 +5919,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5709 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5919 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5710 GFP_NOFS); 5920 GFP_NOFS);
5711 5921
5712 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5922 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5713out_free: 5923 alloc_end - alloc_start);
5714 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5715out: 5924out:
5716 mutex_unlock(&inode->i_mutex); 5925 mutex_unlock(&inode->i_mutex);
5717 return ret; 5926 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cdbb054102b9..645a17927a8f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root,
237 u64 objectid; 237 u64 objectid;
238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
239 u64 index = 0; 239 u64 index = 0;
240 unsigned long nr = 1;
241 240
242 /* 241 /*
243 * 1 - inode item 242 * 1 - inode item
@@ -290,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root,
290 btrfs_set_root_generation(&root_item, trans->transid); 289 btrfs_set_root_generation(&root_item, trans->transid);
291 btrfs_set_root_level(&root_item, 0); 290 btrfs_set_root_level(&root_item, 0);
292 btrfs_set_root_refs(&root_item, 1); 291 btrfs_set_root_refs(&root_item, 1);
293 btrfs_set_root_used(&root_item, 0); 292 btrfs_set_root_used(&root_item, leaf->len);
294 btrfs_set_root_last_snapshot(&root_item, 0); 293 btrfs_set_root_last_snapshot(&root_item, 0);
295 294
296 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 295 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
@@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root,
342 341
343 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 342 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
344fail: 343fail:
345 nr = trans->blocks_used;
346 err = btrfs_commit_transaction(trans, root); 344 err = btrfs_commit_transaction(trans, root);
347 if (err && !ret) 345 if (err && !ret)
348 ret = err; 346 ret = err;
349 347
350 btrfs_unreserve_metadata_space(root, 6); 348 btrfs_unreserve_metadata_space(root, 6);
351 btrfs_btree_balance_dirty(root, nr);
352 return ret; 349 return ret;
353} 350}
354 351
355static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 352static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
356 char *name, int namelen) 353 char *name, int namelen)
357{ 354{
355 struct inode *inode;
358 struct btrfs_pending_snapshot *pending_snapshot; 356 struct btrfs_pending_snapshot *pending_snapshot;
359 struct btrfs_trans_handle *trans; 357 struct btrfs_trans_handle *trans;
360 int ret = 0; 358 int ret;
361 int err;
362 unsigned long nr = 0;
363 359
364 if (!root->ref_cows) 360 if (!root->ref_cows)
365 return -EINVAL; 361 return -EINVAL;
@@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
372 */ 368 */
373 ret = btrfs_reserve_metadata_space(root, 6); 369 ret = btrfs_reserve_metadata_space(root, 6);
374 if (ret) 370 if (ret)
375 goto fail_unlock; 371 goto fail;
376 372
377 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 373 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
378 if (!pending_snapshot) { 374 if (!pending_snapshot) {
379 ret = -ENOMEM; 375 ret = -ENOMEM;
380 btrfs_unreserve_metadata_space(root, 6); 376 btrfs_unreserve_metadata_space(root, 6);
381 goto fail_unlock; 377 goto fail;
382 } 378 }
383 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); 379 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
384 if (!pending_snapshot->name) { 380 if (!pending_snapshot->name) {
385 ret = -ENOMEM; 381 ret = -ENOMEM;
386 kfree(pending_snapshot); 382 kfree(pending_snapshot);
387 btrfs_unreserve_metadata_space(root, 6); 383 btrfs_unreserve_metadata_space(root, 6);
388 goto fail_unlock; 384 goto fail;
389 } 385 }
390 memcpy(pending_snapshot->name, name, namelen); 386 memcpy(pending_snapshot->name, name, namelen);
391 pending_snapshot->name[namelen] = '\0'; 387 pending_snapshot->name[namelen] = '\0';
@@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
395 pending_snapshot->root = root; 391 pending_snapshot->root = root;
396 list_add(&pending_snapshot->list, 392 list_add(&pending_snapshot->list,
397 &trans->transaction->pending_snapshots); 393 &trans->transaction->pending_snapshots);
398 err = btrfs_commit_transaction(trans, root); 394 ret = btrfs_commit_transaction(trans, root);
395 BUG_ON(ret);
396 btrfs_unreserve_metadata_space(root, 6);
399 397
400fail_unlock: 398 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
401 btrfs_btree_balance_dirty(root, nr); 399 if (IS_ERR(inode)) {
400 ret = PTR_ERR(inode);
401 goto fail;
402 }
403 BUG_ON(!inode);
404 d_instantiate(dentry, inode);
405 ret = 0;
406fail:
402 return ret; 407 return ret;
403} 408}
404 409
@@ -1027,8 +1032,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1027 BUG_ON(!trans); 1032 BUG_ON(!trans);
1028 1033
1029 /* punch hole in destination first */ 1034 /* punch hole in destination first */
1030 btrfs_drop_extents(trans, root, inode, off, off + len, 1035 btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
1031 off + len, 0, &hint_byte, 1);
1032 1036
1033 /* clone data */ 1037 /* clone data */
1034 key.objectid = src->i_ino; 1038 key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5799bc46a309..5c2a9e78a949 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
291 291
292/* 292/*
293 * remove an ordered extent from the tree. No references are dropped 293 * remove an ordered extent from the tree. No references are dropped
294 * but, anyone waiting on this extent is woken up. 294 * and you must wake_up entry->wait. You must hold the tree mutex
295 * while you call this function.
295 */ 296 */
296int btrfs_remove_ordered_extent(struct inode *inode, 297static int __btrfs_remove_ordered_extent(struct inode *inode,
297 struct btrfs_ordered_extent *entry) 298 struct btrfs_ordered_extent *entry)
298{ 299{
299 struct btrfs_ordered_inode_tree *tree; 300 struct btrfs_ordered_inode_tree *tree;
300 struct rb_node *node; 301 struct rb_node *node;
301 302
302 tree = &BTRFS_I(inode)->ordered_tree; 303 tree = &BTRFS_I(inode)->ordered_tree;
303 mutex_lock(&tree->mutex);
304 node = &entry->rb_node; 304 node = &entry->rb_node;
305 rb_erase(node, &tree->tree); 305 rb_erase(node, &tree->tree);
306 tree->last = NULL; 306 tree->last = NULL;
@@ -326,16 +326,34 @@ int btrfs_remove_ordered_extent(struct inode *inode,
326 } 326 }
327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
328 328
329 return 0;
330}
331
332/*
333 * remove an ordered extent from the tree. No references are dropped
334 * but any waiters are woken.
335 */
336int btrfs_remove_ordered_extent(struct inode *inode,
337 struct btrfs_ordered_extent *entry)
338{
339 struct btrfs_ordered_inode_tree *tree;
340 int ret;
341
342 tree = &BTRFS_I(inode)->ordered_tree;
343 mutex_lock(&tree->mutex);
344 ret = __btrfs_remove_ordered_extent(inode, entry);
329 mutex_unlock(&tree->mutex); 345 mutex_unlock(&tree->mutex);
330 wake_up(&entry->wait); 346 wake_up(&entry->wait);
331 return 0; 347
348 return ret;
332} 349}
333 350
334/* 351/*
335 * wait for all the ordered extents in a root. This is done when balancing 352 * wait for all the ordered extents in a root. This is done when balancing
336 * space between drives. 353 * space between drives.
337 */ 354 */
338int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) 355int btrfs_wait_ordered_extents(struct btrfs_root *root,
356 int nocow_only, int delay_iput)
339{ 357{
340 struct list_head splice; 358 struct list_head splice;
341 struct list_head *cur; 359 struct list_head *cur;
@@ -372,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
372 if (inode) { 390 if (inode) {
373 btrfs_start_ordered_extent(inode, ordered, 1); 391 btrfs_start_ordered_extent(inode, ordered, 1);
374 btrfs_put_ordered_extent(ordered); 392 btrfs_put_ordered_extent(ordered);
375 iput(inode); 393 if (delay_iput)
394 btrfs_add_delayed_iput(inode);
395 else
396 iput(inode);
376 } else { 397 } else {
377 btrfs_put_ordered_extent(ordered); 398 btrfs_put_ordered_extent(ordered);
378 } 399 }
@@ -430,7 +451,7 @@ again:
430 btrfs_wait_ordered_range(inode, 0, (u64)-1); 451 btrfs_wait_ordered_range(inode, 0, (u64)-1);
431 else 452 else
432 filemap_flush(inode->i_mapping); 453 filemap_flush(inode->i_mapping);
433 iput(inode); 454 btrfs_add_delayed_iput(inode);
434 } 455 }
435 456
436 cond_resched(); 457 cond_resched();
@@ -589,7 +610,7 @@ out:
589 * After an extent is done, call this to conditionally update the on disk 610 * After an extent is done, call this to conditionally update the on disk
590 * i_size. i_size is updated to cover any fully written part of the file. 611 * i_size. i_size is updated to cover any fully written part of the file.
591 */ 612 */
592int btrfs_ordered_update_i_size(struct inode *inode, 613int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
593 struct btrfs_ordered_extent *ordered) 614 struct btrfs_ordered_extent *ordered)
594{ 615{
595 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 616 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
@@ -597,18 +618,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
597 u64 disk_i_size; 618 u64 disk_i_size;
598 u64 new_i_size; 619 u64 new_i_size;
599 u64 i_size_test; 620 u64 i_size_test;
621 u64 i_size = i_size_read(inode);
600 struct rb_node *node; 622 struct rb_node *node;
623 struct rb_node *prev = NULL;
601 struct btrfs_ordered_extent *test; 624 struct btrfs_ordered_extent *test;
625 int ret = 1;
626
627 if (ordered)
628 offset = entry_end(ordered);
629 else
630 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
602 631
603 mutex_lock(&tree->mutex); 632 mutex_lock(&tree->mutex);
604 disk_i_size = BTRFS_I(inode)->disk_i_size; 633 disk_i_size = BTRFS_I(inode)->disk_i_size;
605 634
635 /* truncate file */
636 if (disk_i_size > i_size) {
637 BTRFS_I(inode)->disk_i_size = i_size;
638 ret = 0;
639 goto out;
640 }
641
606 /* 642 /*
607 * if the disk i_size is already at the inode->i_size, or 643 * if the disk i_size is already at the inode->i_size, or
608 * this ordered extent is inside the disk i_size, we're done 644 * this ordered extent is inside the disk i_size, we're done
609 */ 645 */
610 if (disk_i_size >= inode->i_size || 646 if (disk_i_size == i_size || offset <= disk_i_size) {
611 ordered->file_offset + ordered->len <= disk_i_size) {
612 goto out; 647 goto out;
613 } 648 }
614 649
@@ -616,8 +651,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
616 * we can't update the disk_isize if there are delalloc bytes 651 * we can't update the disk_isize if there are delalloc bytes
617 * between disk_i_size and this ordered extent 652 * between disk_i_size and this ordered extent
618 */ 653 */
619 if (test_range_bit(io_tree, disk_i_size, 654 if (test_range_bit(io_tree, disk_i_size, offset - 1,
620 ordered->file_offset + ordered->len - 1,
621 EXTENT_DELALLOC, 0, NULL)) { 655 EXTENT_DELALLOC, 0, NULL)) {
622 goto out; 656 goto out;
623 } 657 }
@@ -626,20 +660,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
626 * if we find an ordered extent then we can't update disk i_size 660 * if we find an ordered extent then we can't update disk i_size
627 * yet 661 * yet
628 */ 662 */
629 node = &ordered->rb_node; 663 if (ordered) {
630 while (1) { 664 node = rb_prev(&ordered->rb_node);
631 node = rb_prev(node); 665 } else {
632 if (!node) 666 prev = tree_search(tree, offset);
633 break; 667 /*
668 * we insert file extents without involving ordered struct,
669 * so there should be no ordered struct cover this offset
670 */
671 if (prev) {
672 test = rb_entry(prev, struct btrfs_ordered_extent,
673 rb_node);
674 BUG_ON(offset_in_entry(test, offset));
675 }
676 node = prev;
677 }
678 while (node) {
634 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 679 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
635 if (test->file_offset + test->len <= disk_i_size) 680 if (test->file_offset + test->len <= disk_i_size)
636 break; 681 break;
637 if (test->file_offset >= inode->i_size) 682 if (test->file_offset >= i_size)
638 break; 683 break;
639 if (test->file_offset >= disk_i_size) 684 if (test->file_offset >= disk_i_size)
640 goto out; 685 goto out;
686 node = rb_prev(node);
641 } 687 }
642 new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); 688 new_i_size = min_t(u64, offset, i_size);
643 689
644 /* 690 /*
645 * at this point, we know we can safely update i_size to at least 691 * at this point, we know we can safely update i_size to at least
@@ -647,7 +693,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
647 * walk forward and see if ios from higher up in the file have 693 * walk forward and see if ios from higher up in the file have
648 * finished. 694 * finished.
649 */ 695 */
650 node = rb_next(&ordered->rb_node); 696 if (ordered) {
697 node = rb_next(&ordered->rb_node);
698 } else {
699 if (prev)
700 node = rb_next(prev);
701 else
702 node = rb_first(&tree->tree);
703 }
651 i_size_test = 0; 704 i_size_test = 0;
652 if (node) { 705 if (node) {
653 /* 706 /*
@@ -655,10 +708,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
655 * between our ordered extent and the next one. 708 * between our ordered extent and the next one.
656 */ 709 */
657 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 710 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
658 if (test->file_offset > entry_end(ordered)) 711 if (test->file_offset > offset)
659 i_size_test = test->file_offset; 712 i_size_test = test->file_offset;
660 } else { 713 } else {
661 i_size_test = i_size_read(inode); 714 i_size_test = i_size;
662 } 715 }
663 716
664 /* 717 /*
@@ -667,15 +720,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
667 * are no delalloc bytes in this area, it is safe to update 720 * are no delalloc bytes in this area, it is safe to update
668 * disk_i_size to the end of the region. 721 * disk_i_size to the end of the region.
669 */ 722 */
670 if (i_size_test > entry_end(ordered) && 723 if (i_size_test > offset &&
671 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 724 !test_range_bit(io_tree, offset, i_size_test - 1,
672 EXTENT_DELALLOC, 0, NULL)) { 725 EXTENT_DELALLOC, 0, NULL)) {
673 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 726 new_i_size = min_t(u64, i_size_test, i_size);
674 } 727 }
675 BTRFS_I(inode)->disk_i_size = new_i_size; 728 BTRFS_I(inode)->disk_i_size = new_i_size;
729 ret = 0;
676out: 730out:
731 /*
732 * we need to remove the ordered extent with the tree lock held
733 * so that other people calling this function don't find our fully
734 * processed ordered entry and skip updating the i_size
735 */
736 if (ordered)
737 __btrfs_remove_ordered_extent(inode, ordered);
677 mutex_unlock(&tree->mutex); 738 mutex_unlock(&tree->mutex);
678 return 0; 739 if (ordered)
740 wake_up(&ordered->wait);
741 return ret;
679} 742}
680 743
681/* 744/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f82e87488ca8..1fe1282ef47c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -150,12 +150,13 @@ void btrfs_start_ordered_extent(struct inode *inode,
150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
151struct btrfs_ordered_extent * 151struct btrfs_ordered_extent *
152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
153int btrfs_ordered_update_i_size(struct inode *inode, 153int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
154 struct btrfs_ordered_extent *ordered); 154 struct btrfs_ordered_extent *ordered);
155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
156int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
157int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 156int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
158int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 157int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
159 struct btrfs_root *root, 158 struct btrfs_root *root,
160 struct inode *inode); 159 struct inode *inode);
160int btrfs_wait_ordered_extents(struct btrfs_root *root,
161 int nocow_only, int delay_iput);
161#endif 162#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfcc93c93a7b..ab7ab5318745 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1561 return 0; 1561 return 0;
1562} 1562}
1563 1563
1564static void put_inodes(struct list_head *list)
1565{
1566 struct inodevec *ivec;
1567 while (!list_empty(list)) {
1568 ivec = list_entry(list->next, struct inodevec, list);
1569 list_del(&ivec->list);
1570 while (ivec->nr > 0) {
1571 ivec->nr--;
1572 iput(ivec->inode[ivec->nr]);
1573 }
1574 kfree(ivec);
1575 }
1576}
1577
1564static int find_next_key(struct btrfs_path *path, int level, 1578static int find_next_key(struct btrfs_path *path, int level,
1565 struct btrfs_key *key) 1579 struct btrfs_key *key)
1566 1580
@@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1723 1737
1724 btrfs_btree_balance_dirty(root, nr); 1738 btrfs_btree_balance_dirty(root, nr);
1725 1739
1740 /*
1741 * put inodes outside transaction, otherwise we may deadlock.
1742 */
1743 put_inodes(&inode_list);
1744
1726 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1745 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1727 invalidate_extent_cache(root, &key, &next_key); 1746 invalidate_extent_cache(root, &key, &next_key);
1728 } 1747 }
@@ -1752,19 +1771,7 @@ out:
1752 1771
1753 btrfs_btree_balance_dirty(root, nr); 1772 btrfs_btree_balance_dirty(root, nr);
1754 1773
1755 /* 1774 put_inodes(&inode_list);
1756 * put inodes while we aren't holding the tree locks
1757 */
1758 while (!list_empty(&inode_list)) {
1759 struct inodevec *ivec;
1760 ivec = list_entry(inode_list.next, struct inodevec, list);
1761 list_del(&ivec->list);
1762 while (ivec->nr > 0) {
1763 ivec->nr--;
1764 iput(ivec->inode[ivec->nr]);
1765 }
1766 kfree(ivec);
1767 }
1768 1775
1769 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1776 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1770 invalidate_extent_cache(root, &key, &next_key); 1777 invalidate_extent_cache(root, &key, &next_key);
@@ -3274,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3274 return -ENOMEM; 3281 return -ENOMEM;
3275 3282
3276 path = btrfs_alloc_path(); 3283 path = btrfs_alloc_path();
3277 if (!path) 3284 if (!path) {
3285 kfree(cluster);
3278 return -ENOMEM; 3286 return -ENOMEM;
3287 }
3279 3288
3280 rc->extents_found = 0; 3289 rc->extents_found = 0;
3281 rc->extents_skipped = 0; 3290 rc->extents_skipped = 0;
@@ -3534,8 +3543,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3534 (unsigned long long)rc->block_group->key.objectid, 3543 (unsigned long long)rc->block_group->key.objectid,
3535 (unsigned long long)rc->block_group->flags); 3544 (unsigned long long)rc->block_group->flags);
3536 3545
3537 btrfs_start_delalloc_inodes(fs_info->tree_root); 3546 btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3538 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3547 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
3539 3548
3540 while (1) { 3549 while (1) {
3541 rc->extents_found = 0; 3550 rc->extents_found = 0;
@@ -3755,6 +3764,8 @@ out:
3755 BTRFS_DATA_RELOC_TREE_OBJECTID); 3764 BTRFS_DATA_RELOC_TREE_OBJECTID);
3756 if (IS_ERR(fs_root)) 3765 if (IS_ERR(fs_root))
3757 err = PTR_ERR(fs_root); 3766 err = PTR_ERR(fs_root);
3767 else
3768 btrfs_orphan_cleanup(fs_root);
3758 } 3769 }
3759 return err; 3770 return err;
3760} 3771}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 9351428f30e2..67fa2d29d663 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -159,7 +159,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
159 write_extent_buffer(l, item, ptr, sizeof(*item)); 159 write_extent_buffer(l, item, ptr, sizeof(*item));
160 btrfs_mark_buffer_dirty(path->nodes[0]); 160 btrfs_mark_buffer_dirty(path->nodes[0]);
161out: 161out:
162 btrfs_release_path(root, path);
163 btrfs_free_path(path); 162 btrfs_free_path(path);
164 return ret; 163 return ret;
165} 164}
@@ -332,7 +331,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
332 BUG_ON(refs != 0); 331 BUG_ON(refs != 0);
333 ret = btrfs_del_item(trans, root, path); 332 ret = btrfs_del_item(trans, root, path);
334out: 333out:
335 btrfs_release_path(root, path);
336 btrfs_free_path(path); 334 btrfs_free_path(path);
337 return ret; 335 return ret;
338} 336}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9de9b2236419..8a1ea6e64575 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,7 +66,9 @@ enum {
66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, 68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
69 Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err, 69 Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio,
70 Opt_flushoncommit,
71 Opt_discard, Opt_err,
70}; 72};
71 73
72static match_table_t tokens = { 74static match_table_t tokens = {
@@ -81,6 +83,7 @@ static match_table_t tokens = {
81 {Opt_alloc_start, "alloc_start=%s"}, 83 {Opt_alloc_start, "alloc_start=%s"},
82 {Opt_thread_pool, "thread_pool=%d"}, 84 {Opt_thread_pool, "thread_pool=%d"},
83 {Opt_compress, "compress"}, 85 {Opt_compress, "compress"},
86 {Opt_compress_force, "compress-force"},
84 {Opt_ssd, "ssd"}, 87 {Opt_ssd, "ssd"},
85 {Opt_ssd_spread, "ssd_spread"}, 88 {Opt_ssd_spread, "ssd_spread"},
86 {Opt_nossd, "nossd"}, 89 {Opt_nossd, "nossd"},
@@ -88,6 +91,7 @@ static match_table_t tokens = {
88 {Opt_notreelog, "notreelog"}, 91 {Opt_notreelog, "notreelog"},
89 {Opt_flushoncommit, "flushoncommit"}, 92 {Opt_flushoncommit, "flushoncommit"},
90 {Opt_ratio, "metadata_ratio=%d"}, 93 {Opt_ratio, "metadata_ratio=%d"},
94 {Opt_discard, "discard"},
91 {Opt_err, NULL}, 95 {Opt_err, NULL},
92}; 96};
93 97
@@ -126,6 +130,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
126 substring_t args[MAX_OPT_ARGS]; 130 substring_t args[MAX_OPT_ARGS];
127 char *p, *num; 131 char *p, *num;
128 int intarg; 132 int intarg;
133 int ret = 0;
129 134
130 if (!options) 135 if (!options)
131 return 0; 136 return 0;
@@ -170,6 +175,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
170 printk(KERN_INFO "btrfs: use compression\n"); 175 printk(KERN_INFO "btrfs: use compression\n");
171 btrfs_set_opt(info->mount_opt, COMPRESS); 176 btrfs_set_opt(info->mount_opt, COMPRESS);
172 break; 177 break;
178 case Opt_compress_force:
179 printk(KERN_INFO "btrfs: forcing compression\n");
180 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
181 btrfs_set_opt(info->mount_opt, COMPRESS);
182 break;
173 case Opt_ssd: 183 case Opt_ssd:
174 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 184 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
175 btrfs_set_opt(info->mount_opt, SSD); 185 btrfs_set_opt(info->mount_opt, SSD);
@@ -257,12 +267,21 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
257 info->metadata_ratio); 267 info->metadata_ratio);
258 } 268 }
259 break; 269 break;
270 case Opt_discard:
271 btrfs_set_opt(info->mount_opt, DISCARD);
272 break;
273 case Opt_err:
274 printk(KERN_INFO "btrfs: unrecognized mount option "
275 "'%s'\n", p);
276 ret = -EINVAL;
277 goto out;
260 default: 278 default:
261 break; 279 break;
262 } 280 }
263 } 281 }
282out:
264 kfree(options); 283 kfree(options);
265 return 0; 284 return ret;
266} 285}
267 286
268/* 287/*
@@ -344,7 +363,7 @@ static int btrfs_fill_super(struct super_block *sb,
344 sb->s_export_op = &btrfs_export_ops; 363 sb->s_export_op = &btrfs_export_ops;
345 sb->s_xattr = btrfs_xattr_handlers; 364 sb->s_xattr = btrfs_xattr_handlers;
346 sb->s_time_gran = 1; 365 sb->s_time_gran = 1;
347#ifdef CONFIG_BTRFS_POSIX_ACL 366#ifdef CONFIG_BTRFS_FS_POSIX_ACL
348 sb->s_flags |= MS_POSIXACL; 367 sb->s_flags |= MS_POSIXACL;
349#endif 368#endif
350 369
@@ -400,8 +419,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
400 return 0; 419 return 0;
401 } 420 }
402 421
403 btrfs_start_delalloc_inodes(root); 422 btrfs_start_delalloc_inodes(root, 0);
404 btrfs_wait_ordered_extents(root, 0); 423 btrfs_wait_ordered_extents(root, 0, 0);
405 424
406 trans = btrfs_start_transaction(root, 1); 425 trans = btrfs_start_transaction(root, 1);
407 ret = btrfs_commit_transaction(trans, root); 426 ret = btrfs_commit_transaction(trans, root);
@@ -445,6 +464,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
445 seq_puts(seq, ",notreelog"); 464 seq_puts(seq, ",notreelog");
446 if (btrfs_test_opt(root, FLUSHONCOMMIT)) 465 if (btrfs_test_opt(root, FLUSHONCOMMIT))
447 seq_puts(seq, ",flushoncommit"); 466 seq_puts(seq, ",flushoncommit");
467 if (btrfs_test_opt(root, DISCARD))
468 seq_puts(seq, ",discard");
448 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 469 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
449 seq_puts(seq, ",noacl"); 470 seq_puts(seq, ",noacl");
450 return 0; 471 return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0b8f36d4400a..b2acc79f1b34 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,8 +163,14 @@ static void wait_current_trans(struct btrfs_root *root)
163 } 163 }
164} 164}
165 165
166enum btrfs_trans_type {
167 TRANS_START,
168 TRANS_JOIN,
169 TRANS_USERSPACE,
170};
171
166static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, 172static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
167 int num_blocks, int wait) 173 int num_blocks, int type)
168{ 174{
169 struct btrfs_trans_handle *h = 175 struct btrfs_trans_handle *h =
170 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 176 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
@@ -172,7 +178,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
172 178
173 mutex_lock(&root->fs_info->trans_mutex); 179 mutex_lock(&root->fs_info->trans_mutex);
174 if (!root->fs_info->log_root_recovering && 180 if (!root->fs_info->log_root_recovering &&
175 ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) 181 ((type == TRANS_START && !root->fs_info->open_ioctl_trans) ||
182 type == TRANS_USERSPACE))
176 wait_current_trans(root); 183 wait_current_trans(root);
177 ret = join_transaction(root); 184 ret = join_transaction(root);
178 BUG_ON(ret); 185 BUG_ON(ret);
@@ -186,7 +193,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
186 h->alloc_exclude_start = 0; 193 h->alloc_exclude_start = 0;
187 h->delayed_ref_updates = 0; 194 h->delayed_ref_updates = 0;
188 195
189 if (!current->journal_info) 196 if (!current->journal_info && type != TRANS_USERSPACE)
190 current->journal_info = h; 197 current->journal_info = h;
191 198
192 root->fs_info->running_transaction->use_count++; 199 root->fs_info->running_transaction->use_count++;
@@ -198,18 +205,18 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
198struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 205struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
199 int num_blocks) 206 int num_blocks)
200{ 207{
201 return start_transaction(root, num_blocks, 1); 208 return start_transaction(root, num_blocks, TRANS_START);
202} 209}
203struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 210struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
204 int num_blocks) 211 int num_blocks)
205{ 212{
206 return start_transaction(root, num_blocks, 0); 213 return start_transaction(root, num_blocks, TRANS_JOIN);
207} 214}
208 215
209struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 216struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
210 int num_blocks) 217 int num_blocks)
211{ 218{
212 return start_transaction(r, num_blocks, 2); 219 return start_transaction(r, num_blocks, TRANS_USERSPACE);
213} 220}
214 221
215/* wait for a transaction commit to be fully complete */ 222/* wait for a transaction commit to be fully complete */
@@ -326,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
326 memset(trans, 0, sizeof(*trans)); 333 memset(trans, 0, sizeof(*trans));
327 kmem_cache_free(btrfs_trans_handle_cachep, trans); 334 kmem_cache_free(btrfs_trans_handle_cachep, trans);
328 335
336 if (throttle)
337 btrfs_run_delayed_iputs(root);
338
329 return 0; 339 return 0;
330} 340}
331 341
@@ -344,10 +354,10 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
344/* 354/*
345 * when btree blocks are allocated, they have some corresponding bits set for 355 * when btree blocks are allocated, they have some corresponding bits set for
346 * them in one of two extent_io trees. This is used to make sure all of 356 * them in one of two extent_io trees. This is used to make sure all of
347 * those extents are on disk for transaction or log commit 357 * those extents are sent to disk but does not wait on them
348 */ 358 */
349int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 359int btrfs_write_marked_extents(struct btrfs_root *root,
350 struct extent_io_tree *dirty_pages) 360 struct extent_io_tree *dirty_pages, int mark)
351{ 361{
352 int ret; 362 int ret;
353 int err = 0; 363 int err = 0;
@@ -360,7 +370,7 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
360 370
361 while (1) { 371 while (1) {
362 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 372 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
363 EXTENT_DIRTY); 373 mark);
364 if (ret) 374 if (ret)
365 break; 375 break;
366 while (start <= end) { 376 while (start <= end) {
@@ -394,13 +404,36 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
394 page_cache_release(page); 404 page_cache_release(page);
395 } 405 }
396 } 406 }
407 if (err)
408 werr = err;
409 return werr;
410}
411
412/*
413 * when btree blocks are allocated, they have some corresponding bits set for
414 * them in one of two extent_io trees. This is used to make sure all of
415 * those extents are on disk for transaction or log commit. We wait
416 * on all the pages and clear them from the dirty pages state tree
417 */
418int btrfs_wait_marked_extents(struct btrfs_root *root,
419 struct extent_io_tree *dirty_pages, int mark)
420{
421 int ret;
422 int err = 0;
423 int werr = 0;
424 struct page *page;
425 struct inode *btree_inode = root->fs_info->btree_inode;
426 u64 start = 0;
427 u64 end;
428 unsigned long index;
429
397 while (1) { 430 while (1) {
398 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 431 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
399 EXTENT_DIRTY); 432 mark);
400 if (ret) 433 if (ret)
401 break; 434 break;
402 435
403 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 436 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
404 while (start <= end) { 437 while (start <= end) {
405 index = start >> PAGE_CACHE_SHIFT; 438 index = start >> PAGE_CACHE_SHIFT;
406 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 439 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
@@ -424,6 +457,22 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
424 return werr; 457 return werr;
425} 458}
426 459
460/*
461 * when btree blocks are allocated, they have some corresponding bits set for
462 * them in one of two extent_io trees. This is used to make sure all of
463 * those extents are on disk for transaction or log commit
464 */
465int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
466 struct extent_io_tree *dirty_pages, int mark)
467{
468 int ret;
469 int ret2;
470
471 ret = btrfs_write_marked_extents(root, dirty_pages, mark);
472 ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
473 return ret || ret2;
474}
475
427int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 476int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
428 struct btrfs_root *root) 477 struct btrfs_root *root)
429{ 478{
@@ -433,7 +482,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
433 return filemap_write_and_wait(btree_inode->i_mapping); 482 return filemap_write_and_wait(btree_inode->i_mapping);
434 } 483 }
435 return btrfs_write_and_wait_marked_extents(root, 484 return btrfs_write_and_wait_marked_extents(root,
436 &trans->transaction->dirty_pages); 485 &trans->transaction->dirty_pages,
486 EXTENT_DIRTY);
437} 487}
438 488
439/* 489/*
@@ -451,13 +501,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
451{ 501{
452 int ret; 502 int ret;
453 u64 old_root_bytenr; 503 u64 old_root_bytenr;
504 u64 old_root_used;
454 struct btrfs_root *tree_root = root->fs_info->tree_root; 505 struct btrfs_root *tree_root = root->fs_info->tree_root;
455 506
507 old_root_used = btrfs_root_used(&root->root_item);
456 btrfs_write_dirty_block_groups(trans, root); 508 btrfs_write_dirty_block_groups(trans, root);
457 509
458 while (1) { 510 while (1) {
459 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 511 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
460 if (old_root_bytenr == root->node->start) 512 if (old_root_bytenr == root->node->start &&
513 old_root_used == btrfs_root_used(&root->root_item))
461 break; 514 break;
462 515
463 btrfs_set_root_node(&root->root_item, root->node); 516 btrfs_set_root_node(&root->root_item, root->node);
@@ -466,6 +519,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
466 &root->root_item); 519 &root->root_item);
467 BUG_ON(ret); 520 BUG_ON(ret);
468 521
522 old_root_used = btrfs_root_used(&root->root_item);
469 ret = btrfs_write_dirty_block_groups(trans, root); 523 ret = btrfs_write_dirty_block_groups(trans, root);
470 BUG_ON(ret); 524 BUG_ON(ret);
471 } 525 }
@@ -749,7 +803,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
749 memcpy(&pending->root_key, &key, sizeof(key)); 803 memcpy(&pending->root_key, &key, sizeof(key));
750fail: 804fail:
751 kfree(new_root_item); 805 kfree(new_root_item);
752 btrfs_unreserve_metadata_space(root, 6);
753 return ret; 806 return ret;
754} 807}
755 808
@@ -761,7 +814,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
761 u64 index = 0; 814 u64 index = 0;
762 struct btrfs_trans_handle *trans; 815 struct btrfs_trans_handle *trans;
763 struct inode *parent_inode; 816 struct inode *parent_inode;
764 struct inode *inode;
765 struct btrfs_root *parent_root; 817 struct btrfs_root *parent_root;
766 818
767 parent_inode = pending->dentry->d_parent->d_inode; 819 parent_inode = pending->dentry->d_parent->d_inode;
@@ -793,8 +845,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
793 845
794 BUG_ON(ret); 846 BUG_ON(ret);
795 847
796 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
797 d_instantiate(pending->dentry, inode);
798fail: 848fail:
799 btrfs_end_transaction(trans, fs_info->fs_root); 849 btrfs_end_transaction(trans, fs_info->fs_root);
800 return ret; 850 return ret;
@@ -948,11 +998,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
948 mutex_unlock(&root->fs_info->trans_mutex); 998 mutex_unlock(&root->fs_info->trans_mutex);
949 999
950 if (flush_on_commit) { 1000 if (flush_on_commit) {
951 btrfs_start_delalloc_inodes(root); 1001 btrfs_start_delalloc_inodes(root, 1);
952 ret = btrfs_wait_ordered_extents(root, 0); 1002 ret = btrfs_wait_ordered_extents(root, 0, 1);
953 BUG_ON(ret); 1003 BUG_ON(ret);
954 } else if (snap_pending) { 1004 } else if (snap_pending) {
955 ret = btrfs_wait_ordered_extents(root, 1); 1005 ret = btrfs_wait_ordered_extents(root, 0, 1);
956 BUG_ON(ret); 1006 BUG_ON(ret);
957 } 1007 }
958 1008
@@ -1070,6 +1120,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1070 current->journal_info = NULL; 1120 current->journal_info = NULL;
1071 1121
1072 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1122 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1123
1124 if (current != root->fs_info->transaction_kthread)
1125 btrfs_run_delayed_iputs(root);
1126
1073 return ret; 1127 return ret;
1074} 1128}
1075 1129
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 663c67404918..93c7ccb33118 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -79,6 +79,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
79 struct inode *inode) 79 struct inode *inode)
80{ 80{
81 BTRFS_I(inode)->last_trans = trans->transaction->transid; 81 BTRFS_I(inode)->last_trans = trans->transaction->transid;
82 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
82} 83}
83 84
84int btrfs_end_transaction(struct btrfs_trans_handle *trans, 85int btrfs_end_transaction(struct btrfs_trans_handle *trans,
@@ -106,6 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
106int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
107 struct btrfs_root *root); 108 struct btrfs_root *root);
108int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
109 struct extent_io_tree *dirty_pages); 110 struct extent_io_tree *dirty_pages, int mark);
111int btrfs_write_marked_extents(struct btrfs_root *root,
112 struct extent_io_tree *dirty_pages, int mark);
113int btrfs_wait_marked_extents(struct btrfs_root *root,
114 struct extent_io_tree *dirty_pages, int mark);
110int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 115int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
111#endif 116#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 4edfdc2acc5f..4a9434b622ec 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -542,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
542 542
543 saved_nbytes = inode_get_bytes(inode); 543 saved_nbytes = inode_get_bytes(inode);
544 /* drop any overlapping extents */ 544 /* drop any overlapping extents */
545 ret = btrfs_drop_extents(trans, root, inode, 545 ret = btrfs_drop_extents(trans, inode, start, extent_end,
546 start, extent_end, extent_end, start, &alloc_hint, 1); 546 &alloc_hint, 1);
547 BUG_ON(ret); 547 BUG_ON(ret);
548 548
549 if (found_type == BTRFS_FILE_EXTENT_REG || 549 if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -930,6 +930,17 @@ out_nowrite:
930 return 0; 930 return 0;
931} 931}
932 932
933static int insert_orphan_item(struct btrfs_trans_handle *trans,
934 struct btrfs_root *root, u64 offset)
935{
936 int ret;
937 ret = btrfs_find_orphan_item(root, offset);
938 if (ret > 0)
939 ret = btrfs_insert_orphan_item(trans, root, offset);
940 return ret;
941}
942
943
933/* 944/*
934 * There are a few corners where the link count of the file can't 945 * There are a few corners where the link count of the file can't
935 * be properly maintained during replay. So, instead of adding 946 * be properly maintained during replay. So, instead of adding
@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
997 } 1008 }
998 BTRFS_I(inode)->index_cnt = (u64)-1; 1009 BTRFS_I(inode)->index_cnt = (u64)-1;
999 1010
1000 if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { 1011 if (inode->i_nlink == 0) {
1001 ret = replay_dir_deletes(trans, root, NULL, path, 1012 if (S_ISDIR(inode->i_mode)) {
1002 inode->i_ino, 1); 1013 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1);
1015 BUG_ON(ret);
1016 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino);
1003 BUG_ON(ret); 1018 BUG_ON(ret);
1004 } 1019 }
1005 btrfs_free_path(path); 1020 btrfs_free_path(path);
@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1587 /* inode keys are done during the first stage */ 1602 /* inode keys are done during the first stage */
1588 if (key.type == BTRFS_INODE_ITEM_KEY && 1603 if (key.type == BTRFS_INODE_ITEM_KEY &&
1589 wc->stage == LOG_WALK_REPLAY_INODES) { 1604 wc->stage == LOG_WALK_REPLAY_INODES) {
1590 struct inode *inode;
1591 struct btrfs_inode_item *inode_item; 1605 struct btrfs_inode_item *inode_item;
1592 u32 mode; 1606 u32 mode;
1593 1607
@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1603 eb, i, &key); 1617 eb, i, &key);
1604 BUG_ON(ret); 1618 BUG_ON(ret);
1605 1619
1606 /* for regular files, truncate away 1620 /* for regular files, make sure corresponding
1607 * extents past the new EOF 1621 * orhpan item exist. extents past the new EOF
1622 * will be truncated later by orphan cleanup.
1608 */ 1623 */
1609 if (S_ISREG(mode)) { 1624 if (S_ISREG(mode)) {
1610 inode = read_one_inode(root, 1625 ret = insert_orphan_item(wc->trans, root,
1611 key.objectid); 1626 key.objectid);
1612 BUG_ON(!inode);
1613
1614 ret = btrfs_truncate_inode_items(wc->trans,
1615 root, inode, inode->i_size,
1616 BTRFS_EXTENT_DATA_KEY);
1617 BUG_ON(ret); 1627 BUG_ON(ret);
1618
1619 /* if the nlink count is zero here, the iput
1620 * will free the inode. We bump it to make
1621 * sure it doesn't get freed until the link
1622 * count fixup is done
1623 */
1624 if (inode->i_nlink == 0) {
1625 btrfs_inc_nlink(inode);
1626 btrfs_update_inode(wc->trans,
1627 root, inode);
1628 }
1629 iput(inode);
1630 } 1628 }
1629
1631 ret = link_to_fixup_dir(wc->trans, root, 1630 ret = link_to_fixup_dir(wc->trans, root,
1632 path, key.objectid); 1631 path, key.objectid);
1633 BUG_ON(ret); 1632 BUG_ON(ret);
@@ -1977,9 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1977{ 1976{
1978 int index1; 1977 int index1;
1979 int index2; 1978 int index2;
1979 int mark;
1980 int ret; 1980 int ret;
1981 struct btrfs_root *log = root->log_root; 1981 struct btrfs_root *log = root->log_root;
1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1983 unsigned long log_transid = 0;
1983 1984
1984 mutex_lock(&root->log_mutex); 1985 mutex_lock(&root->log_mutex);
1985 index1 = root->log_transid % 2; 1986 index1 = root->log_transid % 2;
@@ -1994,12 +1995,13 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1994 if (atomic_read(&root->log_commit[(index1 + 1) % 2])) 1995 if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
1995 wait_log_commit(trans, root, root->log_transid - 1); 1996 wait_log_commit(trans, root, root->log_transid - 1);
1996 1997
1997 while (root->log_multiple_pids) { 1998 while (1) {
1998 unsigned long batch = root->log_batch; 1999 unsigned long batch = root->log_batch;
1999 mutex_unlock(&root->log_mutex); 2000 if (root->log_multiple_pids) {
2000 schedule_timeout_uninterruptible(1); 2001 mutex_unlock(&root->log_mutex);
2001 mutex_lock(&root->log_mutex); 2002 schedule_timeout_uninterruptible(1);
2002 2003 mutex_lock(&root->log_mutex);
2004 }
2003 wait_for_writer(trans, root); 2005 wait_for_writer(trans, root);
2004 if (batch == root->log_batch) 2006 if (batch == root->log_batch)
2005 break; 2007 break;
@@ -2012,7 +2014,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2012 goto out; 2014 goto out;
2013 } 2015 }
2014 2016
2015 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); 2017 log_transid = root->log_transid;
2018 if (log_transid % 2 == 0)
2019 mark = EXTENT_DIRTY;
2020 else
2021 mark = EXTENT_NEW;
2022
2023 /* we start IO on all the marked extents here, but we don't actually
2024 * wait for them until later.
2025 */
2026 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2016 BUG_ON(ret); 2027 BUG_ON(ret);
2017 2028
2018 btrfs_set_root_node(&log->root_item, log->node); 2029 btrfs_set_root_node(&log->root_item, log->node);
@@ -2023,9 +2034,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2023 root->log_start_pid = 0; 2034 root->log_start_pid = 0;
2024 smp_mb(); 2035 smp_mb();
2025 /* 2036 /*
2026 * log tree has been flushed to disk, new modifications of 2037 * IO has been started, blocks of the log tree have WRITTEN flag set
2027 * the log will be written to new positions. so it's safe to 2038 * in their headers. new modifications of the log will be written to
2028 * allow log writers to go in. 2039 * new positions. so it's safe to allow log writers to go in.
2029 */ 2040 */
2030 mutex_unlock(&root->log_mutex); 2041 mutex_unlock(&root->log_mutex);
2031 2042
@@ -2046,6 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2046 2057
2047 index2 = log_root_tree->log_transid % 2; 2058 index2 = log_root_tree->log_transid % 2;
2048 if (atomic_read(&log_root_tree->log_commit[index2])) { 2059 if (atomic_read(&log_root_tree->log_commit[index2])) {
2060 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2049 wait_log_commit(trans, log_root_tree, 2061 wait_log_commit(trans, log_root_tree,
2050 log_root_tree->log_transid); 2062 log_root_tree->log_transid);
2051 mutex_unlock(&log_root_tree->log_mutex); 2063 mutex_unlock(&log_root_tree->log_mutex);
@@ -2065,14 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2065 * check the full commit flag again 2077 * check the full commit flag again
2066 */ 2078 */
2067 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2079 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2080 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2068 mutex_unlock(&log_root_tree->log_mutex); 2081 mutex_unlock(&log_root_tree->log_mutex);
2069 ret = -EAGAIN; 2082 ret = -EAGAIN;
2070 goto out_wake_log_root; 2083 goto out_wake_log_root;
2071 } 2084 }
2072 2085
2073 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2086 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2074 &log_root_tree->dirty_log_pages); 2087 &log_root_tree->dirty_log_pages,
2088 EXTENT_DIRTY | EXTENT_NEW);
2075 BUG_ON(ret); 2089 BUG_ON(ret);
2090 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2076 2091
2077 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 2092 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
2078 log_root_tree->node->start); 2093 log_root_tree->node->start);
@@ -2092,9 +2107,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2092 * the running transaction open, so a full commit can't hop 2107 * the running transaction open, so a full commit can't hop
2093 * in and cause problems either. 2108 * in and cause problems either.
2094 */ 2109 */
2095 write_ctree_super(trans, root->fs_info->tree_root, 2); 2110 write_ctree_super(trans, root->fs_info->tree_root, 1);
2096 ret = 0; 2111 ret = 0;
2097 2112
2113 mutex_lock(&root->log_mutex);
2114 if (root->last_log_commit < log_transid)
2115 root->last_log_commit = log_transid;
2116 mutex_unlock(&root->log_mutex);
2117
2098out_wake_log_root: 2118out_wake_log_root:
2099 atomic_set(&log_root_tree->log_commit[index2], 0); 2119 atomic_set(&log_root_tree->log_commit[index2], 0);
2100 smp_mb(); 2120 smp_mb();
@@ -2133,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2133 2153
2134 while (1) { 2154 while (1) {
2135 ret = find_first_extent_bit(&log->dirty_log_pages, 2155 ret = find_first_extent_bit(&log->dirty_log_pages,
2136 0, &start, &end, EXTENT_DIRTY); 2156 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
2137 if (ret) 2157 if (ret)
2138 break; 2158 break;
2139 2159
2140 clear_extent_dirty(&log->dirty_log_pages, 2160 clear_extent_bits(&log->dirty_log_pages, start, end,
2141 start, end, GFP_NOFS); 2161 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2142 } 2162 }
2143 2163
2144 if (log->log_transid > 0) { 2164 if (log->log_transid > 0) {
@@ -2862,6 +2882,21 @@ out:
2862 return ret; 2882 return ret;
2863} 2883}
2864 2884
2885static int inode_in_log(struct btrfs_trans_handle *trans,
2886 struct inode *inode)
2887{
2888 struct btrfs_root *root = BTRFS_I(inode)->root;
2889 int ret = 0;
2890
2891 mutex_lock(&root->log_mutex);
2892 if (BTRFS_I(inode)->logged_trans == trans->transid &&
2893 BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
2894 ret = 1;
2895 mutex_unlock(&root->log_mutex);
2896 return ret;
2897}
2898
2899
2865/* 2900/*
2866 * helper function around btrfs_log_inode to make sure newly created 2901 * helper function around btrfs_log_inode to make sure newly created
2867 * parent directories also end up in the log. A minimal inode and backref 2902 * parent directories also end up in the log. A minimal inode and backref
@@ -2901,6 +2936,11 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
2901 if (ret) 2936 if (ret)
2902 goto end_no_trans; 2937 goto end_no_trans;
2903 2938
2939 if (inode_in_log(trans, inode)) {
2940 ret = BTRFS_NO_LOG_SYNC;
2941 goto end_no_trans;
2942 }
2943
2904 start_log_trans(trans, root); 2944 start_log_trans(trans, root);
2905 2945
2906 ret = btrfs_log_inode(trans, root, inode, inode_only); 2946 ret = btrfs_log_inode(trans, root, inode, inode_only);
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index d09c7609e16b..0776eacb5083 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -19,6 +19,9 @@
19#ifndef __TREE_LOG_ 19#ifndef __TREE_LOG_
20#define __TREE_LOG_ 20#define __TREE_LOG_
21 21
22/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
23#define BTRFS_NO_LOG_SYNC 256
24
22int btrfs_sync_log(struct btrfs_trans_handle *trans, 25int btrfs_sync_log(struct btrfs_trans_handle *trans,
23 struct btrfs_root *root); 26 struct btrfs_root *root);
24int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); 27int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7eda483d7b5a..41ecbb2347f2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1135 root->fs_info->avail_metadata_alloc_bits; 1135 root->fs_info->avail_metadata_alloc_bits;
1136 1136
1137 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && 1137 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
1138 root->fs_info->fs_devices->rw_devices <= 4) { 1138 root->fs_info->fs_devices->num_devices <= 4) {
1139 printk(KERN_ERR "btrfs: unable to go below four devices " 1139 printk(KERN_ERR "btrfs: unable to go below four devices "
1140 "on raid10\n"); 1140 "on raid10\n");
1141 ret = -EINVAL; 1141 ret = -EINVAL;
@@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1143 } 1143 }
1144 1144
1145 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && 1145 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
1146 root->fs_info->fs_devices->rw_devices <= 2) { 1146 root->fs_info->fs_devices->num_devices <= 2) {
1147 printk(KERN_ERR "btrfs: unable to go below two " 1147 printk(KERN_ERR "btrfs: unable to go below two "
1148 "devices on raid1\n"); 1148 "devices on raid1\n");
1149 ret = -EINVAL; 1149 ret = -EINVAL;
@@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1434 return -EINVAL; 1434 return -EINVAL;
1435 1435
1436 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); 1436 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
1437 if (!bdev) 1437 if (IS_ERR(bdev))
1438 return -EIO; 1438 return PTR_ERR(bdev);
1439 1439
1440 if (root->fs_info->fs_devices->seeding) { 1440 if (root->fs_info->fs_devices->seeding) {
1441 seeding_dev = 1; 1441 seeding_dev = 1;
@@ -2209,7 +2209,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2209 max_chunk_size = 10 * calc_size; 2209 max_chunk_size = 10 * calc_size;
2210 min_stripe_size = 64 * 1024 * 1024; 2210 min_stripe_size = 64 * 1024 * 1024;
2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2212 max_chunk_size = 4 * calc_size; 2212 max_chunk_size = 256 * 1024 * 1024;
2213 min_stripe_size = 32 * 1024 * 1024; 2213 min_stripe_size = 32 * 1024 * 1024;
2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2215 calc_size = 8 * 1024 * 1024; 2215 calc_size = 8 * 1024 * 1024;
@@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2538 if (!em) 2538 if (!em)
2539 return 1; 2539 return 1;
2540 2540
2541 if (btrfs_test_opt(root, DEGRADED)) {
2542 free_extent_map(em);
2543 return 0;
2544 }
2545
2541 map = (struct map_lookup *)em->bdev; 2546 map = (struct map_lookup *)em->bdev;
2542 for (i = 0; i < map->num_stripes; i++) { 2547 for (i = 0; i < map->num_stripes; i++) {
2543 if (!map->stripes[i].dev->writeable) { 2548 if (!map->stripes[i].dev->writeable) {
@@ -2649,8 +2654,10 @@ again:
2649 em = lookup_extent_mapping(em_tree, logical, *length); 2654 em = lookup_extent_mapping(em_tree, logical, *length);
2650 read_unlock(&em_tree->lock); 2655 read_unlock(&em_tree->lock);
2651 2656
2652 if (!em && unplug_page) 2657 if (!em && unplug_page) {
2658 kfree(multi);
2653 return 0; 2659 return 0;
2660 }
2654 2661
2655 if (!em) { 2662 if (!em) {
2656 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2663 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b0fc93f95fd0..193b58f7d3f3 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -85,22 +85,23 @@ out:
85 return ret; 85 return ret;
86} 86}
87 87
88int __btrfs_setxattr(struct inode *inode, const char *name, 88static int do_setxattr(struct btrfs_trans_handle *trans,
89 const void *value, size_t size, int flags) 89 struct inode *inode, const char *name,
90 const void *value, size_t size, int flags)
90{ 91{
91 struct btrfs_dir_item *di; 92 struct btrfs_dir_item *di;
92 struct btrfs_root *root = BTRFS_I(inode)->root; 93 struct btrfs_root *root = BTRFS_I(inode)->root;
93 struct btrfs_trans_handle *trans;
94 struct btrfs_path *path; 94 struct btrfs_path *path;
95 int ret = 0, mod = 0; 95 size_t name_len = strlen(name);
96 int ret = 0;
97
98 if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
99 return -ENOSPC;
96 100
97 path = btrfs_alloc_path(); 101 path = btrfs_alloc_path();
98 if (!path) 102 if (!path)
99 return -ENOMEM; 103 return -ENOMEM;
100 104
101 trans = btrfs_join_transaction(root, 1);
102 btrfs_set_trans_block_group(trans, inode);
103
104 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
105 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
106 strlen(name), -1); 107 strlen(name), -1);
@@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
118 } 119 }
119 120
120 ret = btrfs_delete_one_dir_name(trans, root, path, di); 121 ret = btrfs_delete_one_dir_name(trans, root, path, di);
121 if (ret) 122 BUG_ON(ret);
122 goto out;
123 btrfs_release_path(root, path); 123 btrfs_release_path(root, path);
124 124
125 /* if we don't have a value then we are removing the xattr */ 125 /* if we don't have a value then we are removing the xattr */
126 if (!value) { 126 if (!value)
127 mod = 1;
128 goto out; 127 goto out;
129 }
130 } else { 128 } else {
131 btrfs_release_path(root, path); 129 btrfs_release_path(root, path);
132 130
@@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
138 } 136 }
139 137
140 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
141 ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), 139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino,
142 value, size, inode->i_ino); 140 name, name_len, value, size);
141 BUG_ON(ret);
142out:
143 btrfs_free_path(path);
144 return ret;
145}
146
147int __btrfs_setxattr(struct btrfs_trans_handle *trans,
148 struct inode *inode, const char *name,
149 const void *value, size_t size, int flags)
150{
151 struct btrfs_root *root = BTRFS_I(inode)->root;
152 int ret;
153
154 if (trans)
155 return do_setxattr(trans, inode, name, value, size, flags);
156
157 ret = btrfs_reserve_metadata_space(root, 2);
143 if (ret) 158 if (ret)
144 goto out; 159 return ret;
145 mod = 1;
146 160
147out: 161 trans = btrfs_start_transaction(root, 1);
148 if (mod) { 162 if (!trans) {
149 inode->i_ctime = CURRENT_TIME; 163 ret = -ENOMEM;
150 ret = btrfs_update_inode(trans, root, inode); 164 goto out;
151 } 165 }
166 btrfs_set_trans_block_group(trans, inode);
152 167
153 btrfs_end_transaction(trans, root); 168 ret = do_setxattr(trans, inode, name, value, size, flags);
154 btrfs_free_path(path); 169 if (ret)
170 goto out;
171
172 inode->i_ctime = CURRENT_TIME;
173 ret = btrfs_update_inode(trans, root, inode);
174 BUG_ON(ret);
175out:
176 btrfs_end_transaction_throttle(trans, root);
177 btrfs_unreserve_metadata_space(root, 2);
155 return ret; 178 return ret;
156} 179}
157 180
@@ -260,7 +283,7 @@ err:
260 * attributes are handled directly. 283 * attributes are handled directly.
261 */ 284 */
262struct xattr_handler *btrfs_xattr_handlers[] = { 285struct xattr_handler *btrfs_xattr_handlers[] = {
263#ifdef CONFIG_BTRFS_POSIX_ACL 286#ifdef CONFIG_BTRFS_FS_POSIX_ACL
264 &btrfs_xattr_acl_access_handler, 287 &btrfs_xattr_acl_access_handler,
265 &btrfs_xattr_acl_default_handler, 288 &btrfs_xattr_acl_default_handler,
266#endif 289#endif
@@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
314 337
315 if (size == 0) 338 if (size == 0)
316 value = ""; /* empty EA, do not remove */ 339 value = ""; /* empty EA, do not remove */
317 return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); 340
341 return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
342 flags);
318} 343}
319 344
320int btrfs_removexattr(struct dentry *dentry, const char *name) 345int btrfs_removexattr(struct dentry *dentry, const char *name)
@@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
329 354
330 if (!btrfs_is_valid_xattr(name)) 355 if (!btrfs_is_valid_xattr(name))
331 return -EOPNOTSUPP; 356 return -EOPNOTSUPP;
332 return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); 357
358 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
359 XATTR_REPLACE);
333} 360}
334 361
335int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) 362int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
363 struct inode *inode, struct inode *dir)
336{ 364{
337 int err; 365 int err;
338 size_t len; 366 size_t len;
@@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
354 } else { 382 } else {
355 strcpy(name, XATTR_SECURITY_PREFIX); 383 strcpy(name, XATTR_SECURITY_PREFIX);
356 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); 384 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
357 err = __btrfs_setxattr(inode, name, value, len, 0); 385 err = __btrfs_setxattr(trans, inode, name, value, len, 0);
358 kfree(name); 386 kfree(name);
359 } 387 }
360 388
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index c71e9c3cf3f7..721efa0346e0 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[];
27 27
28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
29 void *buffer, size_t size); 29 void *buffer, size_t size);
30extern int __btrfs_setxattr(struct inode *inode, const char *name, 30extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
31 const void *value, size_t size, int flags); 31 struct inode *inode, const char *name,
32 32 const void *value, size_t size, int flags);
33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
34 void *buffer, size_t size); 34 void *buffer, size_t size);
35extern int btrfs_setxattr(struct dentry *dentry, const char *name, 35extern int btrfs_setxattr(struct dentry *dentry, const char *name,
36 const void *value, size_t size, int flags); 36 const void *value, size_t size, int flags);
37extern int btrfs_removexattr(struct dentry *dentry, const char *name); 37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38 38
39extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); 39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
40 struct inode *inode, struct inode *dir);
40 41
41#endif /* __XATTR__ */ 42#endif /* __XATTR__ */