aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c82
-rw-r--r--fs/btrfs/async-thread.c1
-rw-r--r--fs/btrfs/btrfs_inode.h10
-rw-r--r--fs/btrfs/compression.c23
-rw-r--r--fs/btrfs/ctree.c234
-rw-r--r--fs/btrfs/ctree.h58
-rw-r--r--fs/btrfs/delayed-ref.c1
-rw-r--r--fs/btrfs/dir-item.c19
-rw-r--r--fs/btrfs/disk-io.c84
-rw-r--r--fs/btrfs/export.c4
-rw-r--r--fs/btrfs/extent-tree.c167
-rw-r--r--fs/btrfs/extent_io.c102
-rw-r--r--fs/btrfs/extent_io.h10
-rw-r--r--fs/btrfs/extent_map.c19
-rw-r--r--fs/btrfs/file-item.c1
-rw-r--r--fs/btrfs/file.c757
-rw-r--r--fs/btrfs/free-space-cache.c5
-rw-r--r--fs/btrfs/inode.c822
-rw-r--r--fs/btrfs/ioctl.c749
-rw-r--r--fs/btrfs/ioctl.h111
-rw-r--r--fs/btrfs/locking.c1
-rw-r--r--fs/btrfs/ordered-data.c161
-rw-r--r--fs/btrfs/ordered-data.h14
-rw-r--r--fs/btrfs/ref-cache.c1
-rw-r--r--fs/btrfs/ref-cache.h2
-rw-r--r--fs/btrfs/relocation.c52
-rw-r--r--fs/btrfs/super.c279
-rw-r--r--fs/btrfs/sysfs.c4
-rw-r--r--fs/btrfs/transaction.c162
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/tree-log.c89
-rw-r--r--fs/btrfs/volumes.c75
-rw-r--r--fs/btrfs/xattr.c80
-rw-r--r--fs/btrfs/xattr.h9
34 files changed, 2697 insertions, 1497 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 361604244271..6ef7b26724ec 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -22,6 +22,7 @@
22#include <linux/posix_acl_xattr.h> 22#include <linux/posix_acl_xattr.h>
23#include <linux/posix_acl.h> 23#include <linux/posix_acl.h>
24#include <linux/sched.h> 24#include <linux/sched.h>
25#include <linux/slab.h>
25 26
26#include "ctree.h" 27#include "ctree.h"
27#include "btrfs_inode.h" 28#include "btrfs_inode.h"
@@ -73,13 +74,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
73 return acl; 74 return acl;
74} 75}
75 76
76static int btrfs_xattr_get_acl(struct inode *inode, int type, 77static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
77 void *value, size_t size) 78 void *value, size_t size, int type)
78{ 79{
79 struct posix_acl *acl; 80 struct posix_acl *acl;
80 int ret = 0; 81 int ret = 0;
81 82
82 acl = btrfs_get_acl(inode, type); 83 acl = btrfs_get_acl(dentry->d_inode, type);
83 84
84 if (IS_ERR(acl)) 85 if (IS_ERR(acl))
85 return PTR_ERR(acl); 86 return PTR_ERR(acl);
@@ -94,7 +95,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type,
94/* 95/*
95 * Needs to be called with fs_mutex held 96 * Needs to be called with fs_mutex held
96 */ 97 */
97static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) 98static int btrfs_set_acl(struct btrfs_trans_handle *trans,
99 struct inode *inode, struct posix_acl *acl, int type)
98{ 100{
99 int ret, size = 0; 101 int ret, size = 0;
100 const char *name; 102 const char *name;
@@ -111,12 +113,14 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
111 switch (type) { 113 switch (type) {
112 case ACL_TYPE_ACCESS: 114 case ACL_TYPE_ACCESS:
113 mode = inode->i_mode; 115 mode = inode->i_mode;
114 ret = posix_acl_equiv_mode(acl, &mode);
115 if (ret < 0)
116 return ret;
117 ret = 0;
118 inode->i_mode = mode;
119 name = POSIX_ACL_XATTR_ACCESS; 116 name = POSIX_ACL_XATTR_ACCESS;
117 if (acl) {
118 ret = posix_acl_equiv_mode(acl, &mode);
119 if (ret < 0)
120 return ret;
121 inode->i_mode = mode;
122 }
123 ret = 0;
120 break; 124 break;
121 case ACL_TYPE_DEFAULT: 125 case ACL_TYPE_DEFAULT:
122 if (!S_ISDIR(inode->i_mode)) 126 if (!S_ISDIR(inode->i_mode))
@@ -140,8 +144,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
140 goto out; 144 goto out;
141 } 145 }
142 146
143 ret = __btrfs_setxattr(inode, name, value, size, 0); 147 ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
144
145out: 148out:
146 kfree(value); 149 kfree(value);
147 150
@@ -151,10 +154,10 @@ out:
151 return ret; 154 return ret;
152} 155}
153 156
154static int btrfs_xattr_set_acl(struct inode *inode, int type, 157static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
155 const void *value, size_t size) 158 const void *value, size_t size, int flags, int type)
156{ 159{
157 int ret = 0; 160 int ret;
158 struct posix_acl *acl = NULL; 161 struct posix_acl *acl = NULL;
159 162
160 if (value) { 163 if (value) {
@@ -167,38 +170,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
167 } 170 }
168 } 171 }
169 172
170 ret = btrfs_set_acl(inode, acl, type); 173 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
171 174
172 posix_acl_release(acl); 175 posix_acl_release(acl);
173 176
174 return ret; 177 return ret;
175} 178}
176 179
177
178static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
179 void *value, size_t size)
180{
181 return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
182}
183
184static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
185 const void *value, size_t size, int flags)
186{
187 return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
188}
189
190static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
191 void *value, size_t size)
192{
193 return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
194}
195
196static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
197 const void *value, size_t size, int flags)
198{
199 return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
200}
201
202int btrfs_check_acl(struct inode *inode, int mask) 180int btrfs_check_acl(struct inode *inode, int mask)
203{ 181{
204 struct posix_acl *acl; 182 struct posix_acl *acl;
@@ -221,7 +199,8 @@ int btrfs_check_acl(struct inode *inode, int mask)
221 * stuff has been fixed to work with that. If the locking stuff changes, we 199 * stuff has been fixed to work with that. If the locking stuff changes, we
222 * need to re-evaluate the acl locking stuff. 200 * need to re-evaluate the acl locking stuff.
223 */ 201 */
224int btrfs_init_acl(struct inode *inode, struct inode *dir) 202int btrfs_init_acl(struct btrfs_trans_handle *trans,
203 struct inode *inode, struct inode *dir)
225{ 204{
226 struct posix_acl *acl = NULL; 205 struct posix_acl *acl = NULL;
227 int ret = 0; 206 int ret = 0;
@@ -246,7 +225,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
246 mode_t mode; 225 mode_t mode;
247 226
248 if (S_ISDIR(inode->i_mode)) { 227 if (S_ISDIR(inode->i_mode)) {
249 ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); 228 ret = btrfs_set_acl(trans, inode, acl,
229 ACL_TYPE_DEFAULT);
250 if (ret) 230 if (ret)
251 goto failed; 231 goto failed;
252 } 232 }
@@ -261,10 +241,11 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
261 inode->i_mode = mode; 241 inode->i_mode = mode;
262 if (ret > 0) { 242 if (ret > 0) {
263 /* we need an acl */ 243 /* we need an acl */
264 ret = btrfs_set_acl(inode, clone, 244 ret = btrfs_set_acl(trans, inode, clone,
265 ACL_TYPE_ACCESS); 245 ACL_TYPE_ACCESS);
266 } 246 }
267 } 247 }
248 posix_acl_release(clone);
268 } 249 }
269failed: 250failed:
270 posix_acl_release(acl); 251 posix_acl_release(acl);
@@ -294,7 +275,7 @@ int btrfs_acl_chmod(struct inode *inode)
294 275
295 ret = posix_acl_chmod_masq(clone, inode->i_mode); 276 ret = posix_acl_chmod_masq(clone, inode->i_mode);
296 if (!ret) 277 if (!ret)
297 ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); 278 ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
298 279
299 posix_acl_release(clone); 280 posix_acl_release(clone);
300 281
@@ -303,14 +284,16 @@ int btrfs_acl_chmod(struct inode *inode)
303 284
304struct xattr_handler btrfs_xattr_acl_default_handler = { 285struct xattr_handler btrfs_xattr_acl_default_handler = {
305 .prefix = POSIX_ACL_XATTR_DEFAULT, 286 .prefix = POSIX_ACL_XATTR_DEFAULT,
306 .get = btrfs_xattr_acl_default_get, 287 .flags = ACL_TYPE_DEFAULT,
307 .set = btrfs_xattr_acl_default_set, 288 .get = btrfs_xattr_acl_get,
289 .set = btrfs_xattr_acl_set,
308}; 290};
309 291
310struct xattr_handler btrfs_xattr_acl_access_handler = { 292struct xattr_handler btrfs_xattr_acl_access_handler = {
311 .prefix = POSIX_ACL_XATTR_ACCESS, 293 .prefix = POSIX_ACL_XATTR_ACCESS,
312 .get = btrfs_xattr_acl_access_get, 294 .flags = ACL_TYPE_ACCESS,
313 .set = btrfs_xattr_acl_access_set, 295 .get = btrfs_xattr_acl_get,
296 .set = btrfs_xattr_acl_set,
314}; 297};
315 298
316#else /* CONFIG_BTRFS_FS_POSIX_ACL */ 299#else /* CONFIG_BTRFS_FS_POSIX_ACL */
@@ -320,7 +303,8 @@ int btrfs_acl_chmod(struct inode *inode)
320 return 0; 303 return 0;
321} 304}
322 305
323int btrfs_init_acl(struct inode *inode, struct inode *dir) 306int btrfs_init_acl(struct btrfs_trans_handle *trans,
307 struct inode *inode, struct inode *dir)
324{ 308{
325 return 0; 309 return 0;
326} 310}
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index c0861e781cdb..462859a30141 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/kthread.h> 19#include <linux/kthread.h>
20#include <linux/slab.h>
20#include <linux/list.h> 21#include <linux/list.h>
21#include <linux/spinlock.h> 22#include <linux/spinlock.h>
22#include <linux/freezer.h> 23#include <linux/freezer.h>
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f6783a42f010..7a4dee199832 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,9 +44,6 @@ struct btrfs_inode {
44 */ 44 */
45 struct extent_io_tree io_failure_tree; 45 struct extent_io_tree io_failure_tree;
46 46
47 /* held while inesrting or deleting extents from files */
48 struct mutex extent_mutex;
49
50 /* held while logging the inode in tree-log.c */ 47 /* held while logging the inode in tree-log.c */
51 struct mutex log_mutex; 48 struct mutex log_mutex;
52 49
@@ -156,6 +153,11 @@ struct btrfs_inode {
156 unsigned ordered_data_close:1; 153 unsigned ordered_data_close:1;
157 unsigned dummy_inode:1; 154 unsigned dummy_inode:1;
158 155
156 /*
157 * always compress this one file
158 */
159 unsigned force_compress:1;
160
159 struct inode vfs_inode; 161 struct inode vfs_inode;
160}; 162};
161 163
@@ -166,7 +168,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
166 168
167static inline void btrfs_i_size_write(struct inode *inode, u64 size) 169static inline void btrfs_i_size_write(struct inode *inode, u64 size)
168{ 170{
169 inode->i_size = size; 171 i_size_write(inode, size);
170 BTRFS_I(inode)->disk_i_size = size; 172 BTRFS_I(inode)->disk_i_size = size;
171} 173}
172 174
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index a11a32058b50..396039b3a8a2 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -31,7 +31,7 @@
31#include <linux/swap.h> 31#include <linux/swap.h>
32#include <linux/writeback.h> 32#include <linux/writeback.h>
33#include <linux/bit_spinlock.h> 33#include <linux/bit_spinlock.h>
34#include <linux/pagevec.h> 34#include <linux/slab.h>
35#include "compat.h" 35#include "compat.h"
36#include "ctree.h" 36#include "ctree.h"
37#include "disk-io.h" 37#include "disk-io.h"
@@ -445,7 +445,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
445 unsigned long nr_pages = 0; 445 unsigned long nr_pages = 0;
446 struct extent_map *em; 446 struct extent_map *em;
447 struct address_space *mapping = inode->i_mapping; 447 struct address_space *mapping = inode->i_mapping;
448 struct pagevec pvec;
449 struct extent_map_tree *em_tree; 448 struct extent_map_tree *em_tree;
450 struct extent_io_tree *tree; 449 struct extent_io_tree *tree;
451 u64 end; 450 u64 end;
@@ -461,7 +460,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
461 460
462 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 461 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
463 462
464 pagevec_init(&pvec, 0);
465 while (last_offset < compressed_end) { 463 while (last_offset < compressed_end) {
466 page_index = last_offset >> PAGE_CACHE_SHIFT; 464 page_index = last_offset >> PAGE_CACHE_SHIFT;
467 465
@@ -478,26 +476,17 @@ static noinline int add_ra_bio_pages(struct inode *inode,
478 goto next; 476 goto next;
479 } 477 }
480 478
481 page = alloc_page(mapping_gfp_mask(mapping) | GFP_NOFS); 479 page = __page_cache_alloc(mapping_gfp_mask(mapping) &
480 ~__GFP_FS);
482 if (!page) 481 if (!page)
483 break; 482 break;
484 483
485 page->index = page_index; 484 if (add_to_page_cache_lru(page, mapping, page_index,
486 /* 485 GFP_NOFS)) {
487 * what we want to do here is call add_to_page_cache_lru,
488 * but that isn't exported, so we reproduce it here
489 */
490 if (add_to_page_cache(page, mapping,
491 page->index, GFP_NOFS)) {
492 page_cache_release(page); 486 page_cache_release(page);
493 goto next; 487 goto next;
494 } 488 }
495 489
496 /* open coding of lru_cache_add, also not exported */
497 page_cache_get(page);
498 if (!pagevec_add(&pvec, page))
499 __pagevec_lru_add_file(&pvec);
500
501 end = last_offset + PAGE_CACHE_SIZE - 1; 490 end = last_offset + PAGE_CACHE_SIZE - 1;
502 /* 491 /*
503 * at this point, we have a locked page in the page cache 492 * at this point, we have a locked page in the page cache
@@ -551,8 +540,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
551next: 540next:
552 last_offset += PAGE_CACHE_SIZE; 541 last_offset += PAGE_CACHE_SIZE;
553 } 542 }
554 if (pagevec_count(&pvec))
555 __pagevec_lru_add_file(&pvec);
556 return 0; 543 return 0;
557} 544}
558 545
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ec96f3a6d536..6795a713b205 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h>
20#include "ctree.h" 21#include "ctree.h"
21#include "disk-io.h" 22#include "disk-io.h"
22#include "transaction.h" 23#include "transaction.h"
@@ -37,6 +38,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct extent_buffer *src_buf); 38 struct extent_buffer *src_buf);
38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 struct btrfs_path *path, int level, int slot); 40 struct btrfs_path *path, int level, int slot);
41static int setup_items_for_insert(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, struct btrfs_path *path,
43 struct btrfs_key *cpu_key, u32 *data_size,
44 u32 total_data, u32 total_size, int nr);
45
40 46
41struct btrfs_path *btrfs_alloc_path(void) 47struct btrfs_path *btrfs_alloc_path(void)
42{ 48{
@@ -451,9 +457,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
451 extent_buffer_get(cow); 457 extent_buffer_get(cow);
452 spin_unlock(&root->node_lock); 458 spin_unlock(&root->node_lock);
453 459
454 btrfs_free_extent(trans, root, buf->start, buf->len, 460 btrfs_free_tree_block(trans, root, buf->start, buf->len,
455 parent_start, root->root_key.objectid, 461 parent_start, root->root_key.objectid, level);
456 level, 0);
457 free_extent_buffer(buf); 462 free_extent_buffer(buf);
458 add_root_to_dirty_list(root); 463 add_root_to_dirty_list(root);
459 } else { 464 } else {
@@ -468,9 +473,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
468 btrfs_set_node_ptr_generation(parent, parent_slot, 473 btrfs_set_node_ptr_generation(parent, parent_slot,
469 trans->transid); 474 trans->transid);
470 btrfs_mark_buffer_dirty(parent); 475 btrfs_mark_buffer_dirty(parent);
471 btrfs_free_extent(trans, root, buf->start, buf->len, 476 btrfs_free_tree_block(trans, root, buf->start, buf->len,
472 parent_start, root->root_key.objectid, 477 parent_start, root->root_key.objectid, level);
473 level, 0);
474 } 478 }
475 if (unlock_orig) 479 if (unlock_orig)
476 btrfs_tree_unlock(buf); 480 btrfs_tree_unlock(buf);
@@ -1030,8 +1034,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1030 btrfs_tree_unlock(mid); 1034 btrfs_tree_unlock(mid);
1031 /* once for the path */ 1035 /* once for the path */
1032 free_extent_buffer(mid); 1036 free_extent_buffer(mid);
1033 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1037 ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
1034 0, root->root_key.objectid, level, 1); 1038 0, root->root_key.objectid, level);
1035 /* once for the root ptr */ 1039 /* once for the root ptr */
1036 free_extent_buffer(mid); 1040 free_extent_buffer(mid);
1037 return ret; 1041 return ret;
@@ -1095,10 +1099,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1095 1); 1099 1);
1096 if (wret) 1100 if (wret)
1097 ret = wret; 1101 ret = wret;
1098 wret = btrfs_free_extent(trans, root, bytenr, 1102 wret = btrfs_free_tree_block(trans, root,
1099 blocksize, 0, 1103 bytenr, blocksize, 0,
1100 root->root_key.objectid, 1104 root->root_key.objectid,
1101 level, 0); 1105 level);
1102 if (wret) 1106 if (wret)
1103 ret = wret; 1107 ret = wret;
1104 } else { 1108 } else {
@@ -1143,9 +1147,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1143 wret = del_ptr(trans, root, path, level + 1, pslot); 1147 wret = del_ptr(trans, root, path, level + 1, pslot);
1144 if (wret) 1148 if (wret)
1145 ret = wret; 1149 ret = wret;
1146 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1150 wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
1147 0, root->root_key.objectid, 1151 0, root->root_key.objectid, level);
1148 level, 0);
1149 if (wret) 1152 if (wret)
1150 ret = wret; 1153 ret = wret;
1151 } else { 1154 } else {
@@ -2997,75 +3000,89 @@ again:
2997 return ret; 3000 return ret;
2998} 3001}
2999 3002
3000/* 3003static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3001 * This function splits a single item into two items, 3004 struct btrfs_root *root,
3002 * giving 'new_key' to the new item and splitting the 3005 struct btrfs_path *path, int ins_len)
3003 * old one at split_offset (from the start of the item).
3004 *
3005 * The path may be released by this operation. After
3006 * the split, the path is pointing to the old item. The
3007 * new item is going to be in the same node as the old one.
3008 *
3009 * Note, the item being split must be smaller enough to live alone on
3010 * a tree block with room for one extra struct btrfs_item
3011 *
3012 * This allows us to split the item in place, keeping a lock on the
3013 * leaf the entire time.
3014 */
3015int btrfs_split_item(struct btrfs_trans_handle *trans,
3016 struct btrfs_root *root,
3017 struct btrfs_path *path,
3018 struct btrfs_key *new_key,
3019 unsigned long split_offset)
3020{ 3006{
3021 u32 item_size; 3007 struct btrfs_key key;
3022 struct extent_buffer *leaf; 3008 struct extent_buffer *leaf;
3023 struct btrfs_key orig_key; 3009 struct btrfs_file_extent_item *fi;
3024 struct btrfs_item *item; 3010 u64 extent_len = 0;
3025 struct btrfs_item *new_item; 3011 u32 item_size;
3026 int ret = 0; 3012 int ret;
3027 int slot;
3028 u32 nritems;
3029 u32 orig_offset;
3030 struct btrfs_disk_key disk_key;
3031 char *buf;
3032 3013
3033 leaf = path->nodes[0]; 3014 leaf = path->nodes[0];
3034 btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); 3015 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3035 if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) 3016
3036 goto split; 3017 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3018 key.type != BTRFS_EXTENT_CSUM_KEY);
3019
3020 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
3021 return 0;
3037 3022
3038 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3023 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3024 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3025 fi = btrfs_item_ptr(leaf, path->slots[0],
3026 struct btrfs_file_extent_item);
3027 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3028 }
3039 btrfs_release_path(root, path); 3029 btrfs_release_path(root, path);
3040 3030
3041 path->search_for_split = 1;
3042 path->keep_locks = 1; 3031 path->keep_locks = 1;
3043 3032 path->search_for_split = 1;
3044 ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); 3033 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3045 path->search_for_split = 0; 3034 path->search_for_split = 0;
3035 if (ret < 0)
3036 goto err;
3046 3037
3038 ret = -EAGAIN;
3039 leaf = path->nodes[0];
3047 /* if our item isn't there or got smaller, return now */ 3040 /* if our item isn't there or got smaller, return now */
3048 if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], 3041 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3049 path->slots[0])) { 3042 goto err;
3050 path->keep_locks = 0; 3043
3051 return -EAGAIN; 3044 /* the leaf has changed, it now has room. return now */
3045 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
3046 goto err;
3047
3048 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3049 fi = btrfs_item_ptr(leaf, path->slots[0],
3050 struct btrfs_file_extent_item);
3051 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3052 goto err;
3052 } 3053 }
3053 3054
3054 btrfs_set_path_blocking(path); 3055 btrfs_set_path_blocking(path);
3055 ret = split_leaf(trans, root, &orig_key, path, 3056 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3056 sizeof(struct btrfs_item), 1);
3057 path->keep_locks = 0;
3058 BUG_ON(ret); 3057 BUG_ON(ret);
3059 3058
3059 path->keep_locks = 0;
3060 btrfs_unlock_up_safe(path, 1); 3060 btrfs_unlock_up_safe(path, 1);
3061 return 0;
3062err:
3063 path->keep_locks = 0;
3064 return ret;
3065}
3066
3067static noinline int split_item(struct btrfs_trans_handle *trans,
3068 struct btrfs_root *root,
3069 struct btrfs_path *path,
3070 struct btrfs_key *new_key,
3071 unsigned long split_offset)
3072{
3073 struct extent_buffer *leaf;
3074 struct btrfs_item *item;
3075 struct btrfs_item *new_item;
3076 int slot;
3077 char *buf;
3078 u32 nritems;
3079 u32 item_size;
3080 u32 orig_offset;
3081 struct btrfs_disk_key disk_key;
3082
3061 leaf = path->nodes[0]; 3083 leaf = path->nodes[0];
3062 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3084 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3063 3085
3064split:
3065 /*
3066 * make sure any changes to the path from split_leaf leave it
3067 * in a blocking state
3068 */
3069 btrfs_set_path_blocking(path); 3086 btrfs_set_path_blocking(path);
3070 3087
3071 item = btrfs_item_nr(leaf, path->slots[0]); 3088 item = btrfs_item_nr(leaf, path->slots[0]);
@@ -3073,19 +3090,19 @@ split:
3073 item_size = btrfs_item_size(leaf, item); 3090 item_size = btrfs_item_size(leaf, item);
3074 3091
3075 buf = kmalloc(item_size, GFP_NOFS); 3092 buf = kmalloc(item_size, GFP_NOFS);
3093 if (!buf)
3094 return -ENOMEM;
3095
3076 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3096 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3077 path->slots[0]), item_size); 3097 path->slots[0]), item_size);
3078 slot = path->slots[0] + 1;
3079 leaf = path->nodes[0];
3080 3098
3099 slot = path->slots[0] + 1;
3081 nritems = btrfs_header_nritems(leaf); 3100 nritems = btrfs_header_nritems(leaf);
3082
3083 if (slot != nritems) { 3101 if (slot != nritems) {
3084 /* shift the items */ 3102 /* shift the items */
3085 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3103 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3086 btrfs_item_nr_offset(slot), 3104 btrfs_item_nr_offset(slot),
3087 (nritems - slot) * sizeof(struct btrfs_item)); 3105 (nritems - slot) * sizeof(struct btrfs_item));
3088
3089 } 3106 }
3090 3107
3091 btrfs_cpu_key_to_disk(&disk_key, new_key); 3108 btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3113,16 +3130,81 @@ split:
3113 item_size - split_offset); 3130 item_size - split_offset);
3114 btrfs_mark_buffer_dirty(leaf); 3131 btrfs_mark_buffer_dirty(leaf);
3115 3132
3116 ret = 0; 3133 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
3117 if (btrfs_leaf_free_space(root, leaf) < 0) {
3118 btrfs_print_leaf(root, leaf);
3119 BUG();
3120 }
3121 kfree(buf); 3134 kfree(buf);
3135 return 0;
3136}
3137
3138/*
3139 * This function splits a single item into two items,
3140 * giving 'new_key' to the new item and splitting the
3141 * old one at split_offset (from the start of the item).
3142 *
3143 * The path may be released by this operation. After
3144 * the split, the path is pointing to the old item. The
3145 * new item is going to be in the same node as the old one.
3146 *
3147 * Note, the item being split must be smaller enough to live alone on
3148 * a tree block with room for one extra struct btrfs_item
3149 *
3150 * This allows us to split the item in place, keeping a lock on the
3151 * leaf the entire time.
3152 */
3153int btrfs_split_item(struct btrfs_trans_handle *trans,
3154 struct btrfs_root *root,
3155 struct btrfs_path *path,
3156 struct btrfs_key *new_key,
3157 unsigned long split_offset)
3158{
3159 int ret;
3160 ret = setup_leaf_for_split(trans, root, path,
3161 sizeof(struct btrfs_item));
3162 if (ret)
3163 return ret;
3164
3165 ret = split_item(trans, root, path, new_key, split_offset);
3122 return ret; 3166 return ret;
3123} 3167}
3124 3168
3125/* 3169/*
3170 * This function duplicate a item, giving 'new_key' to the new item.
3171 * It guarantees both items live in the same tree leaf and the new item
3172 * is contiguous with the original item.
3173 *
3174 * This allows us to split file extent in place, keeping a lock on the
3175 * leaf the entire time.
3176 */
3177int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3178 struct btrfs_root *root,
3179 struct btrfs_path *path,
3180 struct btrfs_key *new_key)
3181{
3182 struct extent_buffer *leaf;
3183 int ret;
3184 u32 item_size;
3185
3186 leaf = path->nodes[0];
3187 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3188 ret = setup_leaf_for_split(trans, root, path,
3189 item_size + sizeof(struct btrfs_item));
3190 if (ret)
3191 return ret;
3192
3193 path->slots[0]++;
3194 ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
3195 item_size, item_size +
3196 sizeof(struct btrfs_item), 1);
3197 BUG_ON(ret);
3198
3199 leaf = path->nodes[0];
3200 memcpy_extent_buffer(leaf,
3201 btrfs_item_ptr_offset(leaf, path->slots[0]),
3202 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3203 item_size);
3204 return 0;
3205}
3206
3207/*
3126 * make the item pointed to by the path smaller. new_size indicates 3208 * make the item pointed to by the path smaller. new_size indicates
3127 * how small to make it, and from_end tells us if we just chop bytes 3209 * how small to make it, and from_end tells us if we just chop bytes
3128 * off the end of the item or if we shift the item to chop bytes off 3210 * off the end of the item or if we shift the item to chop bytes off
@@ -3714,8 +3796,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3714 */ 3796 */
3715 btrfs_unlock_up_safe(path, 0); 3797 btrfs_unlock_up_safe(path, 0);
3716 3798
3717 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, 3799 ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
3718 0, root->root_key.objectid, 0, 0); 3800 0, root->root_key.objectid, 0);
3719 return ret; 3801 return ret;
3720} 3802}
3721/* 3803/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 444b3e9b92a4..746a7248678e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -26,6 +26,7 @@
26#include <linux/completion.h> 26#include <linux/completion.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/wait.h> 28#include <linux/wait.h>
29#include <linux/slab.h>
29#include <asm/kmap_types.h> 30#include <asm/kmap_types.h>
30#include "extent_io.h" 31#include "extent_io.h"
31#include "extent_map.h" 32#include "extent_map.h"
@@ -310,6 +311,9 @@ struct btrfs_header {
310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ 311#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
311 sizeof(struct btrfs_item) - \ 312 sizeof(struct btrfs_item) - \
312 sizeof(struct btrfs_file_extent_item)) 313 sizeof(struct btrfs_file_extent_item))
314#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
315 sizeof(struct btrfs_item) -\
316 sizeof(struct btrfs_dir_item))
313 317
314 318
315/* 319/*
@@ -370,11 +374,13 @@ struct btrfs_super_block {
370 * ones specified below then we will fail to mount 374 * ones specified below then we will fail to mount
371 */ 375 */
372#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 376#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
377#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0)
373 378
374#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 379#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
375#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 380#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
376#define BTRFS_FEATURE_INCOMPAT_SUPP \ 381#define BTRFS_FEATURE_INCOMPAT_SUPP \
377 BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF 382 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
383 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
378 384
379/* 385/*
380 * A leaf is full of items. offset and size tell us where to find 386 * A leaf is full of items. offset and size tell us where to find
@@ -829,7 +835,6 @@ struct btrfs_fs_info {
829 u64 last_trans_log_full_commit; 835 u64 last_trans_log_full_commit;
830 u64 open_ioctl_trans; 836 u64 open_ioctl_trans;
831 unsigned long mount_opt; 837 unsigned long mount_opt;
832 u64 max_extent;
833 u64 max_inline; 838 u64 max_inline;
834 u64 alloc_start; 839 u64 alloc_start;
835 struct btrfs_transaction *running_transaction; 840 struct btrfs_transaction *running_transaction;
@@ -859,8 +864,9 @@ struct btrfs_fs_info {
859 struct mutex ordered_operations_mutex; 864 struct mutex ordered_operations_mutex;
860 struct rw_semaphore extent_commit_sem; 865 struct rw_semaphore extent_commit_sem;
861 866
862 struct rw_semaphore subvol_sem; 867 struct rw_semaphore cleanup_work_sem;
863 868
869 struct rw_semaphore subvol_sem;
864 struct srcu_struct subvol_srcu; 870 struct srcu_struct subvol_srcu;
865 871
866 struct list_head trans_list; 872 struct list_head trans_list;
@@ -868,6 +874,9 @@ struct btrfs_fs_info {
868 struct list_head dead_roots; 874 struct list_head dead_roots;
869 struct list_head caching_block_groups; 875 struct list_head caching_block_groups;
870 876
877 spinlock_t delayed_iput_lock;
878 struct list_head delayed_iputs;
879
871 atomic_t nr_async_submits; 880 atomic_t nr_async_submits;
872 atomic_t async_submit_draining; 881 atomic_t async_submit_draining;
873 atomic_t nr_async_bios; 882 atomic_t nr_async_bios;
@@ -1034,12 +1043,12 @@ struct btrfs_root {
1034 int ref_cows; 1043 int ref_cows;
1035 int track_dirty; 1044 int track_dirty;
1036 int in_radix; 1045 int in_radix;
1046 int clean_orphans;
1037 1047
1038 u64 defrag_trans_start; 1048 u64 defrag_trans_start;
1039 struct btrfs_key defrag_progress; 1049 struct btrfs_key defrag_progress;
1040 struct btrfs_key defrag_max; 1050 struct btrfs_key defrag_max;
1041 int defrag_running; 1051 int defrag_running;
1042 int defrag_level;
1043 char *name; 1052 char *name;
1044 int in_sysfs; 1053 int in_sysfs;
1045 1054
@@ -1154,6 +1163,7 @@ struct btrfs_root {
1154#define BTRFS_MOUNT_SSD_SPREAD (1 << 8) 1163#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
1155#define BTRFS_MOUNT_NOSSD (1 << 9) 1164#define BTRFS_MOUNT_NOSSD (1 << 9)
1156#define BTRFS_MOUNT_DISCARD (1 << 10) 1165#define BTRFS_MOUNT_DISCARD (1 << 10)
1166#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
1157 1167
1158#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1168#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1159#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1169#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1174,7 +1184,6 @@ struct btrfs_root {
1174#define BTRFS_INODE_NOATIME (1 << 9) 1184#define BTRFS_INODE_NOATIME (1 << 9)
1175#define BTRFS_INODE_DIRSYNC (1 << 10) 1185#define BTRFS_INODE_DIRSYNC (1 << 10)
1176 1186
1177
1178/* some macros to generate set/get funcs for the struct fields. This 1187/* some macros to generate set/get funcs for the struct fields. This
1179 * assumes there is a lefoo_to_cpu for every type, so lets make a simple 1188 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
1180 * one for u8: 1189 * one for u8:
@@ -1834,7 +1843,7 @@ BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
1834BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block, 1843BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block,
1835 compat_flags, 64); 1844 compat_flags, 64);
1836BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block, 1845BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block,
1837 compat_flags, 64); 1846 compat_ro_flags, 64);
1838BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, 1847BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
1839 incompat_flags, 64); 1848 incompat_flags, 64);
1840BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, 1849BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
@@ -1975,6 +1984,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1975 u64 parent, u64 root_objectid, 1984 u64 parent, u64 root_objectid,
1976 struct btrfs_disk_key *key, int level, 1985 struct btrfs_disk_key *key, int level,
1977 u64 hint, u64 empty_size); 1986 u64 hint, u64 empty_size);
1987int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
1988 struct btrfs_root *root,
1989 u64 bytenr, u32 blocksize,
1990 u64 parent, u64 root_objectid, int level);
1978struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1991struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1979 struct btrfs_root *root, 1992 struct btrfs_root *root,
1980 u64 bytenr, u32 blocksize, 1993 u64 bytenr, u32 blocksize,
@@ -2089,6 +2102,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
2089 struct btrfs_path *path, 2102 struct btrfs_path *path,
2090 struct btrfs_key *new_key, 2103 struct btrfs_key *new_key,
2091 unsigned long split_offset); 2104 unsigned long split_offset);
2105int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
2106 struct btrfs_root *root,
2107 struct btrfs_path *path,
2108 struct btrfs_key *new_key);
2092int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 2109int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2093 *root, struct btrfs_key *key, struct btrfs_path *p, int 2110 *root, struct btrfs_key *key, struct btrfs_path *p, int
2094 ins_len, int cow); 2111 ins_len, int cow);
@@ -2196,9 +2213,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
2196 struct btrfs_path *path, 2213 struct btrfs_path *path,
2197 struct btrfs_dir_item *di); 2214 struct btrfs_dir_item *di);
2198int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 2215int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
2199 struct btrfs_root *root, const char *name, 2216 struct btrfs_root *root,
2200 u16 name_len, const void *data, u16 data_len, 2217 struct btrfs_path *path, u64 objectid,
2201 u64 dir); 2218 const char *name, u16 name_len,
2219 const void *data, u16 data_len);
2202struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, 2220struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
2203 struct btrfs_root *root, 2221 struct btrfs_root *root,
2204 struct btrfs_path *path, u64 dir, 2222 struct btrfs_path *path, u64 dir,
@@ -2292,8 +2310,9 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2292 struct inode *inode, u64 new_size, 2310 struct inode *inode, u64 new_size,
2293 u32 min_type); 2311 u32 min_type);
2294 2312
2295int btrfs_start_delalloc_inodes(struct btrfs_root *root); 2313int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2296int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); 2314int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2315 struct extent_state **cached_state);
2297int btrfs_writepages(struct address_space *mapping, 2316int btrfs_writepages(struct address_space *mapping,
2298 struct writeback_control *wbc); 2317 struct writeback_control *wbc);
2299int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 2318int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
@@ -2309,7 +2328,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2309int btrfs_readpage(struct file *file, struct page *page); 2328int btrfs_readpage(struct file *file, struct page *page);
2310void btrfs_delete_inode(struct inode *inode); 2329void btrfs_delete_inode(struct inode *inode);
2311void btrfs_put_inode(struct inode *inode); 2330void btrfs_put_inode(struct inode *inode);
2312int btrfs_write_inode(struct inode *inode, int wait); 2331int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2313void btrfs_dirty_inode(struct inode *inode); 2332void btrfs_dirty_inode(struct inode *inode);
2314struct inode *btrfs_alloc_inode(struct super_block *sb); 2333struct inode *btrfs_alloc_inode(struct super_block *sb);
2315void btrfs_destroy_inode(struct inode *inode); 2334void btrfs_destroy_inode(struct inode *inode);
@@ -2318,7 +2337,7 @@ int btrfs_init_cachep(void);
2318void btrfs_destroy_cachep(void); 2337void btrfs_destroy_cachep(void);
2319long btrfs_ioctl_trans_end(struct file *file); 2338long btrfs_ioctl_trans_end(struct file *file);
2320struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 2339struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2321 struct btrfs_root *root); 2340 struct btrfs_root *root, int *was_new);
2322int btrfs_commit_write(struct file *file, struct page *page, 2341int btrfs_commit_write(struct file *file, struct page *page,
2323 unsigned from, unsigned to); 2342 unsigned from, unsigned to);
2324struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2343struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
@@ -2332,6 +2351,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2332void btrfs_orphan_cleanup(struct btrfs_root *root); 2351void btrfs_orphan_cleanup(struct btrfs_root *root);
2333int btrfs_cont_expand(struct inode *inode, loff_t size); 2352int btrfs_cont_expand(struct inode *inode, loff_t size);
2334int btrfs_invalidate_inodes(struct btrfs_root *root); 2353int btrfs_invalidate_inodes(struct btrfs_root *root);
2354void btrfs_add_delayed_iput(struct inode *inode);
2355void btrfs_run_delayed_iputs(struct btrfs_root *root);
2335extern const struct dentry_operations btrfs_dentry_operations; 2356extern const struct dentry_operations btrfs_dentry_operations;
2336 2357
2337/* ioctl.c */ 2358/* ioctl.c */
@@ -2345,12 +2366,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2345 int skip_pinned); 2366 int skip_pinned);
2346int btrfs_check_file(struct btrfs_root *root, struct inode *inode); 2367int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2347extern const struct file_operations btrfs_file_operations; 2368extern const struct file_operations btrfs_file_operations;
2348int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2369int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2349 struct btrfs_root *root, struct inode *inode, 2370 u64 start, u64 end, u64 *hint_byte, int drop_cache);
2350 u64 start, u64 end, u64 locked_end,
2351 u64 inline_limit, u64 *hint_block, int drop_cache);
2352int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2371int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2353 struct btrfs_root *root,
2354 struct inode *inode, u64 start, u64 end); 2372 struct inode *inode, u64 start, u64 end);
2355int btrfs_release_file(struct inode *inode, struct file *file); 2373int btrfs_release_file(struct inode *inode, struct file *file);
2356 2374
@@ -2370,7 +2388,6 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *root);
2370ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); 2388ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
2371 2389
2372/* super.c */ 2390/* super.c */
2373u64 btrfs_parse_size(char *str);
2374int btrfs_parse_options(struct btrfs_root *root, char *options); 2391int btrfs_parse_options(struct btrfs_root *root, char *options);
2375int btrfs_sync_fs(struct super_block *sb, int wait); 2392int btrfs_sync_fs(struct super_block *sb, int wait);
2376 2393
@@ -2380,7 +2397,8 @@ int btrfs_check_acl(struct inode *inode, int mask);
2380#else 2397#else
2381#define btrfs_check_acl NULL 2398#define btrfs_check_acl NULL
2382#endif 2399#endif
2383int btrfs_init_acl(struct inode *inode, struct inode *dir); 2400int btrfs_init_acl(struct btrfs_trans_handle *trans,
2401 struct inode *inode, struct inode *dir);
2384int btrfs_acl_chmod(struct inode *inode); 2402int btrfs_acl_chmod(struct inode *inode);
2385 2403
2386/* relocation.c */ 2404/* relocation.c */
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 84e6781413b1..902ce507c4e3 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h>
20#include <linux/sort.h> 21#include <linux/sort.h>
21#include "ctree.h" 22#include "ctree.h"
22#include "delayed-ref.h" 23#include "delayed-ref.h"
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f3a6075519cc..e9103b3baa49 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
68 * into the tree 68 * into the tree
69 */ 69 */
70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
71 struct btrfs_root *root, const char *name, 71 struct btrfs_root *root,
72 u16 name_len, const void *data, u16 data_len, 72 struct btrfs_path *path, u64 objectid,
73 u64 dir) 73 const char *name, u16 name_len,
74 const void *data, u16 data_len)
74{ 75{
75 int ret = 0; 76 int ret = 0;
76 struct btrfs_path *path;
77 struct btrfs_dir_item *dir_item; 77 struct btrfs_dir_item *dir_item;
78 unsigned long name_ptr, data_ptr; 78 unsigned long name_ptr, data_ptr;
79 struct btrfs_key key, location; 79 struct btrfs_key key, location;
@@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
81 struct extent_buffer *leaf; 81 struct extent_buffer *leaf;
82 u32 data_size; 82 u32 data_size;
83 83
84 key.objectid = dir; 84 BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
85
86 key.objectid = objectid;
85 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 87 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
86 key.offset = btrfs_name_hash(name, name_len); 88 key.offset = btrfs_name_hash(name, name_len);
87 path = btrfs_alloc_path();
88 if (!path)
89 return -ENOMEM;
90 if (name_len + data_len + sizeof(struct btrfs_dir_item) >
91 BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
92 return -ENOSPC;
93 89
94 data_size = sizeof(*dir_item) + name_len + data_len; 90 data_size = sizeof(*dir_item) + name_len + data_len;
95 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 91 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
@@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
117 write_extent_buffer(leaf, data, data_ptr, data_len); 113 write_extent_buffer(leaf, data, data_ptr, data_len);
118 btrfs_mark_buffer_dirty(path->nodes[0]); 114 btrfs_mark_buffer_dirty(path->nodes[0]);
119 115
120 btrfs_free_path(path);
121 return ret; 116 return ret;
122} 117}
123 118
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02b6afbd7450..feca04197d02 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -27,6 +27,7 @@
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <linux/freezer.h> 28#include <linux/freezer.h>
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h>
30#include "compat.h" 31#include "compat.h"
31#include "ctree.h" 32#include "ctree.h"
32#include "disk-io.h" 33#include "disk-io.h"
@@ -43,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops;
43static void end_workqueue_fn(struct btrfs_work *work); 44static void end_workqueue_fn(struct btrfs_work *work);
44static void free_fs_root(struct btrfs_root *root); 45static void free_fs_root(struct btrfs_root *root);
45 46
46static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
47
48/* 47/*
49 * end_io_wq structs are used to do processing in task context when an IO is 48 * end_io_wq structs are used to do processing in task context when an IO is
50 * complete. This is used during reads to verify checksums, and it is used 49 * complete. This is used during reads to verify checksums, and it is used
@@ -263,13 +262,15 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
263static int verify_parent_transid(struct extent_io_tree *io_tree, 262static int verify_parent_transid(struct extent_io_tree *io_tree,
264 struct extent_buffer *eb, u64 parent_transid) 263 struct extent_buffer *eb, u64 parent_transid)
265{ 264{
265 struct extent_state *cached_state = NULL;
266 int ret; 266 int ret;
267 267
268 if (!parent_transid || btrfs_header_generation(eb) == parent_transid) 268 if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
269 return 0; 269 return 0;
270 270
271 lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); 271 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
272 if (extent_buffer_uptodate(io_tree, eb) && 272 0, &cached_state, GFP_NOFS);
273 if (extent_buffer_uptodate(io_tree, eb, cached_state) &&
273 btrfs_header_generation(eb) == parent_transid) { 274 btrfs_header_generation(eb) == parent_transid) {
274 ret = 0; 275 ret = 0;
275 goto out; 276 goto out;
@@ -282,10 +283,10 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
282 (unsigned long long)btrfs_header_generation(eb)); 283 (unsigned long long)btrfs_header_generation(eb));
283 } 284 }
284 ret = 1; 285 ret = 1;
285 clear_extent_buffer_uptodate(io_tree, eb); 286 clear_extent_buffer_uptodate(io_tree, eb, &cached_state);
286out: 287out:
287 unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, 288 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
288 GFP_NOFS); 289 &cached_state, GFP_NOFS);
289 return ret; 290 return ret;
290} 291}
291 292
@@ -892,6 +893,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
892 root->stripesize = stripesize; 893 root->stripesize = stripesize;
893 root->ref_cows = 0; 894 root->ref_cows = 0;
894 root->track_dirty = 0; 895 root->track_dirty = 0;
896 root->in_radix = 0;
897 root->clean_orphans = 0;
895 898
896 root->fs_info = fs_info; 899 root->fs_info = fs_info;
897 root->objectid = objectid; 900 root->objectid = objectid;
@@ -899,7 +902,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
899 root->highest_objectid = 0; 902 root->highest_objectid = 0;
900 root->name = NULL; 903 root->name = NULL;
901 root->in_sysfs = 0; 904 root->in_sysfs = 0;
902 root->inode_tree.rb_node = NULL; 905 root->inode_tree = RB_ROOT;
903 906
904 INIT_LIST_HEAD(&root->dirty_list); 907 INIT_LIST_HEAD(&root->dirty_list);
905 INIT_LIST_HEAD(&root->orphan_list); 908 INIT_LIST_HEAD(&root->orphan_list);
@@ -928,7 +931,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
928 root->defrag_trans_start = fs_info->generation; 931 root->defrag_trans_start = fs_info->generation;
929 init_completion(&root->kobj_unregister); 932 init_completion(&root->kobj_unregister);
930 root->defrag_running = 0; 933 root->defrag_running = 0;
931 root->defrag_level = 0;
932 root->root_key.objectid = objectid; 934 root->root_key.objectid = objectid;
933 root->anon_super.s_root = NULL; 935 root->anon_super.s_root = NULL;
934 root->anon_super.s_dev = 0; 936 root->anon_super.s_dev = 0;
@@ -980,12 +982,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
980 982
981 while (1) { 983 while (1) {
982 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 984 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
983 0, &start, &end, EXTENT_DIRTY); 985 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
984 if (ret) 986 if (ret)
985 break; 987 break;
986 988
987 clear_extent_dirty(&log_root_tree->dirty_log_pages, 989 clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
988 start, end, GFP_NOFS); 990 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
989 } 991 }
990 eb = fs_info->log_root_tree->node; 992 eb = fs_info->log_root_tree->node;
991 993
@@ -1210,8 +1212,10 @@ again:
1210 ret = radix_tree_insert(&fs_info->fs_roots_radix, 1212 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1211 (unsigned long)root->root_key.objectid, 1213 (unsigned long)root->root_key.objectid,
1212 root); 1214 root);
1213 if (ret == 0) 1215 if (ret == 0) {
1214 root->in_radix = 1; 1216 root->in_radix = 1;
1217 root->clean_orphans = 1;
1218 }
1215 spin_unlock(&fs_info->fs_roots_radix_lock); 1219 spin_unlock(&fs_info->fs_roots_radix_lock);
1216 radix_tree_preload_end(); 1220 radix_tree_preload_end();
1217 if (ret) { 1221 if (ret) {
@@ -1225,10 +1229,6 @@ again:
1225 ret = btrfs_find_dead_roots(fs_info->tree_root, 1229 ret = btrfs_find_dead_roots(fs_info->tree_root,
1226 root->root_key.objectid); 1230 root->root_key.objectid);
1227 WARN_ON(ret); 1231 WARN_ON(ret);
1228
1229 if (!(fs_info->sb->s_flags & MS_RDONLY))
1230 btrfs_orphan_cleanup(root);
1231
1232 return root; 1232 return root;
1233fail: 1233fail:
1234 free_fs_root(root); 1234 free_fs_root(root);
@@ -1373,19 +1373,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1373{ 1373{
1374 int err; 1374 int err;
1375 1375
1376 bdi->name = "btrfs";
1377 bdi->capabilities = BDI_CAP_MAP_COPY; 1376 bdi->capabilities = BDI_CAP_MAP_COPY;
1378 err = bdi_init(bdi); 1377 err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
1379 if (err) 1378 if (err)
1380 return err; 1379 return err;
1381 1380
1382 err = bdi_register(bdi, NULL, "btrfs-%d",
1383 atomic_inc_return(&btrfs_bdi_num));
1384 if (err) {
1385 bdi_destroy(bdi);
1386 return err;
1387 }
1388
1389 bdi->ra_pages = default_backing_dev_info.ra_pages; 1381 bdi->ra_pages = default_backing_dev_info.ra_pages;
1390 bdi->unplug_io_fn = btrfs_unplug_io_fn; 1382 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1391 bdi->unplug_io_data = info; 1383 bdi->unplug_io_data = info;
@@ -1477,6 +1469,7 @@ static int cleaner_kthread(void *arg)
1477 1469
1478 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1470 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1479 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1471 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1472 btrfs_run_delayed_iputs(root);
1480 btrfs_clean_old_snapshots(root); 1473 btrfs_clean_old_snapshots(root);
1481 mutex_unlock(&root->fs_info->cleaner_mutex); 1474 mutex_unlock(&root->fs_info->cleaner_mutex);
1482 } 1475 }
@@ -1606,6 +1599,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1599 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1607 INIT_LIST_HEAD(&fs_info->trans_list); 1600 INIT_LIST_HEAD(&fs_info->trans_list);
1608 INIT_LIST_HEAD(&fs_info->dead_roots); 1601 INIT_LIST_HEAD(&fs_info->dead_roots);
1602 INIT_LIST_HEAD(&fs_info->delayed_iputs);
1609 INIT_LIST_HEAD(&fs_info->hashers); 1603 INIT_LIST_HEAD(&fs_info->hashers);
1610 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1604 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1611 INIT_LIST_HEAD(&fs_info->ordered_operations); 1605 INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1614,6 +1608,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1614 spin_lock_init(&fs_info->new_trans_lock); 1608 spin_lock_init(&fs_info->new_trans_lock);
1615 spin_lock_init(&fs_info->ref_cache_lock); 1609 spin_lock_init(&fs_info->ref_cache_lock);
1616 spin_lock_init(&fs_info->fs_roots_radix_lock); 1610 spin_lock_init(&fs_info->fs_roots_radix_lock);
1611 spin_lock_init(&fs_info->delayed_iput_lock);
1617 1612
1618 init_completion(&fs_info->kobj_unregister); 1613 init_completion(&fs_info->kobj_unregister);
1619 fs_info->tree_root = tree_root; 1614 fs_info->tree_root = tree_root;
@@ -1630,7 +1625,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1630 atomic_set(&fs_info->async_submit_draining, 0); 1625 atomic_set(&fs_info->async_submit_draining, 0);
1631 atomic_set(&fs_info->nr_async_bios, 0); 1626 atomic_set(&fs_info->nr_async_bios, 0);
1632 fs_info->sb = sb; 1627 fs_info->sb = sb;
1633 fs_info->max_extent = (u64)-1;
1634 fs_info->max_inline = 8192 * 1024; 1628 fs_info->max_inline = 8192 * 1024;
1635 fs_info->metadata_ratio = 0; 1629 fs_info->metadata_ratio = 0;
1636 1630
@@ -1671,7 +1665,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1671 insert_inode_hash(fs_info->btree_inode); 1665 insert_inode_hash(fs_info->btree_inode);
1672 1666
1673 spin_lock_init(&fs_info->block_group_cache_lock); 1667 spin_lock_init(&fs_info->block_group_cache_lock);
1674 fs_info->block_group_cache_tree.rb_node = NULL; 1668 fs_info->block_group_cache_tree = RB_ROOT;
1675 1669
1676 extent_io_tree_init(&fs_info->freed_extents[0], 1670 extent_io_tree_init(&fs_info->freed_extents[0],
1677 fs_info->btree_inode->i_mapping, GFP_NOFS); 1671 fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -1689,6 +1683,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1689 mutex_init(&fs_info->cleaner_mutex); 1683 mutex_init(&fs_info->cleaner_mutex);
1690 mutex_init(&fs_info->volume_mutex); 1684 mutex_init(&fs_info->volume_mutex);
1691 init_rwsem(&fs_info->extent_commit_sem); 1685 init_rwsem(&fs_info->extent_commit_sem);
1686 init_rwsem(&fs_info->cleanup_work_sem);
1692 init_rwsem(&fs_info->subvol_sem); 1687 init_rwsem(&fs_info->subvol_sem);
1693 1688
1694 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1689 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -1917,7 +1912,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1917 1912
1918 csum_root->track_dirty = 1; 1913 csum_root->track_dirty = 1;
1919 1914
1920 btrfs_read_block_groups(extent_root); 1915 ret = btrfs_read_block_groups(extent_root);
1916 if (ret) {
1917 printk(KERN_ERR "Failed to read block groups: %d\n", ret);
1918 goto fail_block_groups;
1919 }
1921 1920
1922 fs_info->generation = generation; 1921 fs_info->generation = generation;
1923 fs_info->last_trans_committed = generation; 1922 fs_info->last_trans_committed = generation;
@@ -1927,7 +1926,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1927 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 1926 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
1928 "btrfs-cleaner"); 1927 "btrfs-cleaner");
1929 if (IS_ERR(fs_info->cleaner_kthread)) 1928 if (IS_ERR(fs_info->cleaner_kthread))
1930 goto fail_csum_root; 1929 goto fail_block_groups;
1931 1930
1932 fs_info->transaction_kthread = kthread_run(transaction_kthread, 1931 fs_info->transaction_kthread = kthread_run(transaction_kthread,
1933 tree_root, 1932 tree_root,
@@ -1979,7 +1978,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1979 1978
1980 if (!(sb->s_flags & MS_RDONLY)) { 1979 if (!(sb->s_flags & MS_RDONLY)) {
1981 ret = btrfs_recover_relocation(tree_root); 1980 ret = btrfs_recover_relocation(tree_root);
1982 BUG_ON(ret); 1981 if (ret < 0) {
1982 printk(KERN_WARNING
1983 "btrfs: failed to recover relocation\n");
1984 err = -EINVAL;
1985 goto fail_trans_kthread;
1986 }
1983 } 1987 }
1984 1988
1985 location.objectid = BTRFS_FS_TREE_OBJECTID; 1989 location.objectid = BTRFS_FS_TREE_OBJECTID;
@@ -1990,6 +1994,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1990 if (!fs_info->fs_root) 1994 if (!fs_info->fs_root)
1991 goto fail_trans_kthread; 1995 goto fail_trans_kthread;
1992 1996
1997 if (!(sb->s_flags & MS_RDONLY)) {
1998 down_read(&fs_info->cleanup_work_sem);
1999 btrfs_orphan_cleanup(fs_info->fs_root);
2000 up_read(&fs_info->cleanup_work_sem);
2001 }
2002
1993 return tree_root; 2003 return tree_root;
1994 2004
1995fail_trans_kthread: 2005fail_trans_kthread:
@@ -2004,7 +2014,8 @@ fail_cleaner:
2004 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 2014 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
2005 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2015 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2006 2016
2007fail_csum_root: 2017fail_block_groups:
2018 btrfs_free_block_groups(fs_info);
2008 free_extent_buffer(csum_root->node); 2019 free_extent_buffer(csum_root->node);
2009 free_extent_buffer(csum_root->commit_root); 2020 free_extent_buffer(csum_root->commit_root);
2010fail_dev_root: 2021fail_dev_root:
@@ -2386,8 +2397,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2386 int ret; 2397 int ret;
2387 2398
2388 mutex_lock(&root->fs_info->cleaner_mutex); 2399 mutex_lock(&root->fs_info->cleaner_mutex);
2400 btrfs_run_delayed_iputs(root);
2389 btrfs_clean_old_snapshots(root); 2401 btrfs_clean_old_snapshots(root);
2390 mutex_unlock(&root->fs_info->cleaner_mutex); 2402 mutex_unlock(&root->fs_info->cleaner_mutex);
2403
2404 /* wait until ongoing cleanup work done */
2405 down_write(&root->fs_info->cleanup_work_sem);
2406 up_write(&root->fs_info->cleanup_work_sem);
2407
2391 trans = btrfs_start_transaction(root, 1); 2408 trans = btrfs_start_transaction(root, 1);
2392 ret = btrfs_commit_transaction(trans, root); 2409 ret = btrfs_commit_transaction(trans, root);
2393 BUG_ON(ret); 2410 BUG_ON(ret);
@@ -2477,7 +2494,8 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
2477 int ret; 2494 int ret;
2478 struct inode *btree_inode = buf->first_page->mapping->host; 2495 struct inode *btree_inode = buf->first_page->mapping->host;
2479 2496
2480 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); 2497 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf,
2498 NULL);
2481 if (!ret) 2499 if (!ret)
2482 return ret; 2500 return ret;
2483 2501
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ba5c3fd5ab8c..951ef09b82f4 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -95,7 +95,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
95 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 95 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
96 key.offset = 0; 96 key.offset = 0;
97 97
98 inode = btrfs_iget(sb, &key, root); 98 inode = btrfs_iget(sb, &key, root, NULL);
99 if (IS_ERR(inode)) { 99 if (IS_ERR(inode)) {
100 err = PTR_ERR(inode); 100 err = PTR_ERR(inode);
101 goto fail; 101 goto fail;
@@ -223,7 +223,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
223 223
224 key.type = BTRFS_INODE_ITEM_KEY; 224 key.type = BTRFS_INODE_ITEM_KEY;
225 key.offset = 0; 225 key.offset = 0;
226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); 226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
227 if (!IS_ERR(dentry)) 227 if (!IS_ERR(dentry))
228 dentry->d_op = &btrfs_dentry_operations; 228 dentry->d_op = &btrfs_dentry_operations;
229 return dentry; 229 return dentry;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 94627c4cc193..b34d32fdaaec 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -22,6 +22,7 @@
22#include <linux/sort.h> 22#include <linux/sort.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/kthread.h> 24#include <linux/kthread.h>
25#include <linux/slab.h>
25#include "compat.h" 26#include "compat.h"
26#include "hash.h" 27#include "hash.h"
27#include "ctree.h" 28#include "ctree.h"
@@ -83,6 +84,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
83 return (cache->flags & bits) == bits; 84 return (cache->flags & bits) == bits;
84} 85}
85 86
87void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
88{
89 atomic_inc(&cache->count);
90}
91
92void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
93{
94 if (atomic_dec_and_test(&cache->count))
95 kfree(cache);
96}
97
86/* 98/*
87 * this adds the block group to the fs_info rb tree for the block group 99 * this adds the block group to the fs_info rb tree for the block group
88 * cache 100 * cache
@@ -156,7 +168,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
156 } 168 }
157 } 169 }
158 if (ret) 170 if (ret)
159 atomic_inc(&ret->count); 171 btrfs_get_block_group(ret);
160 spin_unlock(&info->block_group_cache_lock); 172 spin_unlock(&info->block_group_cache_lock);
161 173
162 return ret; 174 return ret;
@@ -195,6 +207,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
195 int stripe_len; 207 int stripe_len;
196 int i, nr, ret; 208 int i, nr, ret;
197 209
210 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
211 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
212 cache->bytes_super += stripe_len;
213 ret = add_excluded_extent(root, cache->key.objectid,
214 stripe_len);
215 BUG_ON(ret);
216 }
217
198 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 218 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
199 bytenr = btrfs_sb_offset(i); 219 bytenr = btrfs_sb_offset(i);
200 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 220 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
@@ -255,7 +275,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
255 if (ret) 275 if (ret)
256 break; 276 break;
257 277
258 if (extent_start == start) { 278 if (extent_start <= start) {
259 start = extent_end + 1; 279 start = extent_end + 1;
260 } else if (extent_start > start && extent_start < end) { 280 } else if (extent_start > start && extent_start < end) {
261 size = extent_start - start; 281 size = extent_start - start;
@@ -399,6 +419,8 @@ err:
399 419
400 put_caching_control(caching_ctl); 420 put_caching_control(caching_ctl);
401 atomic_dec(&block_group->space_info->caching_threads); 421 atomic_dec(&block_group->space_info->caching_threads);
422 btrfs_put_block_group(block_group);
423
402 return 0; 424 return 0;
403} 425}
404 426
@@ -439,6 +461,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
439 up_write(&fs_info->extent_commit_sem); 461 up_write(&fs_info->extent_commit_sem);
440 462
441 atomic_inc(&cache->space_info->caching_threads); 463 atomic_inc(&cache->space_info->caching_threads);
464 btrfs_get_block_group(cache);
442 465
443 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 466 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
444 cache->key.objectid); 467 cache->key.objectid);
@@ -478,12 +501,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
478 return cache; 501 return cache;
479} 502}
480 503
481void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
482{
483 if (atomic_dec_and_test(&cache->count))
484 kfree(cache);
485}
486
487static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 504static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
488 u64 flags) 505 u64 flags)
489{ 506{
@@ -2574,7 +2591,7 @@ next_block_group(struct btrfs_root *root,
2574 if (node) { 2591 if (node) {
2575 cache = rb_entry(node, struct btrfs_block_group_cache, 2592 cache = rb_entry(node, struct btrfs_block_group_cache,
2576 cache_node); 2593 cache_node);
2577 atomic_inc(&cache->count); 2594 btrfs_get_block_group(cache);
2578 } else 2595 } else
2579 cache = NULL; 2596 cache = NULL;
2580 spin_unlock(&root->fs_info->block_group_cache_lock); 2597 spin_unlock(&root->fs_info->block_group_cache_lock);
@@ -2660,6 +2677,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2660 2677
2661 INIT_LIST_HEAD(&found->block_groups); 2678 INIT_LIST_HEAD(&found->block_groups);
2662 init_rwsem(&found->groups_sem); 2679 init_rwsem(&found->groups_sem);
2680 init_waitqueue_head(&found->flush_wait);
2681 init_waitqueue_head(&found->allocate_wait);
2663 spin_lock_init(&found->lock); 2682 spin_lock_init(&found->lock);
2664 found->flags = flags; 2683 found->flags = flags;
2665 found->total_bytes = total_bytes; 2684 found->total_bytes = total_bytes;
@@ -2830,7 +2849,7 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2830 } 2849 }
2831 spin_unlock(&BTRFS_I(inode)->accounting_lock); 2850 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2832 2851
2833 BTRFS_I(inode)->reserved_extents--; 2852 BTRFS_I(inode)->reserved_extents -= num_items;
2834 BUG_ON(BTRFS_I(inode)->reserved_extents < 0); 2853 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
2835 2854
2836 if (meta_sinfo->bytes_delalloc < num_bytes) { 2855 if (meta_sinfo->bytes_delalloc < num_bytes) {
@@ -2880,9 +2899,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
2880 root = async->root; 2899 root = async->root;
2881 info = async->info; 2900 info = async->info;
2882 2901
2883 btrfs_start_delalloc_inodes(root); 2902 btrfs_start_delalloc_inodes(root, 0);
2884 wake_up(&info->flush_wait); 2903 wake_up(&info->flush_wait);
2885 btrfs_wait_ordered_extents(root, 0); 2904 btrfs_wait_ordered_extents(root, 0, 0);
2886 2905
2887 spin_lock(&info->lock); 2906 spin_lock(&info->lock);
2888 info->flushing = 0; 2907 info->flushing = 0;
@@ -2928,12 +2947,10 @@ static void flush_delalloc(struct btrfs_root *root,
2928 2947
2929 spin_lock(&info->lock); 2948 spin_lock(&info->lock);
2930 2949
2931 if (!info->flushing) { 2950 if (!info->flushing)
2932 info->flushing = 1; 2951 info->flushing = 1;
2933 init_waitqueue_head(&info->flush_wait); 2952 else
2934 } else {
2935 wait = true; 2953 wait = true;
2936 }
2937 2954
2938 spin_unlock(&info->lock); 2955 spin_unlock(&info->lock);
2939 2956
@@ -2956,8 +2973,8 @@ static void flush_delalloc(struct btrfs_root *root,
2956 return; 2973 return;
2957 2974
2958flush: 2975flush:
2959 btrfs_start_delalloc_inodes(root); 2976 btrfs_start_delalloc_inodes(root, 0);
2960 btrfs_wait_ordered_extents(root, 0); 2977 btrfs_wait_ordered_extents(root, 0, 0);
2961 2978
2962 spin_lock(&info->lock); 2979 spin_lock(&info->lock);
2963 info->flushing = 0; 2980 info->flushing = 0;
@@ -2995,7 +3012,6 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
2995 if (!info->allocating_chunk) { 3012 if (!info->allocating_chunk) {
2996 info->force_alloc = 1; 3013 info->force_alloc = 1;
2997 info->allocating_chunk = 1; 3014 info->allocating_chunk = 1;
2998 init_waitqueue_head(&info->allocate_wait);
2999 } else { 3015 } else {
3000 wait = true; 3016 wait = true;
3001 } 3017 }
@@ -3095,7 +3111,7 @@ again:
3095 return -ENOSPC; 3111 return -ENOSPC;
3096 } 3112 }
3097 3113
3098 BTRFS_I(inode)->reserved_extents++; 3114 BTRFS_I(inode)->reserved_extents += num_items;
3099 check_force_delalloc(meta_sinfo); 3115 check_force_delalloc(meta_sinfo);
3100 spin_unlock(&meta_sinfo->lock); 3116 spin_unlock(&meta_sinfo->lock);
3101 3117
@@ -3219,7 +3235,8 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
3219 u64 bytes) 3235 u64 bytes)
3220{ 3236{
3221 struct btrfs_space_info *data_sinfo; 3237 struct btrfs_space_info *data_sinfo;
3222 int ret = 0, committed = 0; 3238 u64 used;
3239 int ret = 0, committed = 0, flushed = 0;
3223 3240
3224 /* make sure bytes are sectorsize aligned */ 3241 /* make sure bytes are sectorsize aligned */
3225 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3242 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
@@ -3231,12 +3248,21 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
3231again: 3248again:
3232 /* make sure we have enough space to handle the data first */ 3249 /* make sure we have enough space to handle the data first */
3233 spin_lock(&data_sinfo->lock); 3250 spin_lock(&data_sinfo->lock);
3234 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 3251 used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc +
3235 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 3252 data_sinfo->bytes_reserved + data_sinfo->bytes_pinned +
3236 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 3253 data_sinfo->bytes_readonly + data_sinfo->bytes_may_use +
3237 data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { 3254 data_sinfo->bytes_super;
3255
3256 if (used + bytes > data_sinfo->total_bytes) {
3238 struct btrfs_trans_handle *trans; 3257 struct btrfs_trans_handle *trans;
3239 3258
3259 if (!flushed) {
3260 spin_unlock(&data_sinfo->lock);
3261 flush_delalloc(root, data_sinfo);
3262 flushed = 1;
3263 goto again;
3264 }
3265
3240 /* 3266 /*
3241 * if we don't have enough free bytes in this space then we need 3267 * if we don't have enough free bytes in this space then we need
3242 * to alloc a new chunk. 3268 * to alloc a new chunk.
@@ -3454,14 +3480,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3454 else 3480 else
3455 old_val -= num_bytes; 3481 old_val -= num_bytes;
3456 btrfs_set_super_bytes_used(&info->super_copy, old_val); 3482 btrfs_set_super_bytes_used(&info->super_copy, old_val);
3457
3458 /* block accounting for root item */
3459 old_val = btrfs_root_used(&root->root_item);
3460 if (alloc)
3461 old_val += num_bytes;
3462 else
3463 old_val -= num_bytes;
3464 btrfs_set_root_used(&root->root_item, old_val);
3465 spin_unlock(&info->delalloc_lock); 3483 spin_unlock(&info->delalloc_lock);
3466 3484
3467 while (total) { 3485 while (total) {
@@ -4049,6 +4067,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4049 return ret; 4067 return ret;
4050} 4068}
4051 4069
4070int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4071 struct btrfs_root *root,
4072 u64 bytenr, u32 blocksize,
4073 u64 parent, u64 root_objectid, int level)
4074{
4075 u64 used;
4076 spin_lock(&root->node_lock);
4077 used = btrfs_root_used(&root->root_item) - blocksize;
4078 btrfs_set_root_used(&root->root_item, used);
4079 spin_unlock(&root->node_lock);
4080
4081 return btrfs_free_extent(trans, root, bytenr, blocksize,
4082 parent, root_objectid, level, 0);
4083}
4084
4052static u64 stripe_align(struct btrfs_root *root, u64 val) 4085static u64 stripe_align(struct btrfs_root *root, u64 val)
4053{ 4086{
4054 u64 mask = ((u64)root->stripesize - 1); 4087 u64 mask = ((u64)root->stripesize - 1);
@@ -4147,6 +4180,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4147 ins->offset = 0; 4180 ins->offset = 0;
4148 4181
4149 space_info = __find_space_info(root->fs_info, data); 4182 space_info = __find_space_info(root->fs_info, data);
4183 if (!space_info) {
4184 printk(KERN_ERR "No space info for %d\n", data);
4185 return -ENOSPC;
4186 }
4150 4187
4151 if (orig_root->ref_cows || empty_size) 4188 if (orig_root->ref_cows || empty_size)
4152 allowed_chunk_alloc = 1; 4189 allowed_chunk_alloc = 1;
@@ -4212,7 +4249,7 @@ search:
4212 u64 offset; 4249 u64 offset;
4213 int cached; 4250 int cached;
4214 4251
4215 atomic_inc(&block_group->count); 4252 btrfs_get_block_group(block_group);
4216 search_start = block_group->key.objectid; 4253 search_start = block_group->key.objectid;
4217 4254
4218have_block_group: 4255have_block_group:
@@ -4300,7 +4337,7 @@ have_block_group:
4300 4337
4301 btrfs_put_block_group(block_group); 4338 btrfs_put_block_group(block_group);
4302 block_group = last_ptr->block_group; 4339 block_group = last_ptr->block_group;
4303 atomic_inc(&block_group->count); 4340 btrfs_get_block_group(block_group);
4304 spin_unlock(&last_ptr->lock); 4341 spin_unlock(&last_ptr->lock);
4305 spin_unlock(&last_ptr->refill_lock); 4342 spin_unlock(&last_ptr->refill_lock);
4306 4343
@@ -4578,7 +4615,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4578{ 4615{
4579 int ret; 4616 int ret;
4580 u64 search_start = 0; 4617 u64 search_start = 0;
4581 struct btrfs_fs_info *info = root->fs_info;
4582 4618
4583 data = btrfs_get_alloc_profile(root, data); 4619 data = btrfs_get_alloc_profile(root, data);
4584again: 4620again:
@@ -4586,17 +4622,9 @@ again:
4586 * the only place that sets empty_size is btrfs_realloc_node, which 4622 * the only place that sets empty_size is btrfs_realloc_node, which
4587 * is not called recursively on allocations 4623 * is not called recursively on allocations
4588 */ 4624 */
4589 if (empty_size || root->ref_cows) { 4625 if (empty_size || root->ref_cows)
4590 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
4591 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4592 2 * 1024 * 1024,
4593 BTRFS_BLOCK_GROUP_METADATA |
4594 (info->metadata_alloc_profile &
4595 info->avail_metadata_alloc_bits), 0);
4596 }
4597 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4626 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4598 num_bytes + 2 * 1024 * 1024, data, 0); 4627 num_bytes + 2 * 1024 * 1024, data, 0);
4599 }
4600 4628
4601 WARN_ON(num_bytes < root->sectorsize); 4629 WARN_ON(num_bytes < root->sectorsize);
4602 ret = find_free_extent(trans, root, num_bytes, empty_size, 4630 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -4897,6 +4925,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4897 extent_op); 4925 extent_op);
4898 BUG_ON(ret); 4926 BUG_ON(ret);
4899 } 4927 }
4928
4929 if (root_objectid == root->root_key.objectid) {
4930 u64 used;
4931 spin_lock(&root->node_lock);
4932 used = btrfs_root_used(&root->root_item) + num_bytes;
4933 btrfs_set_root_used(&root->root_item, used);
4934 spin_unlock(&root->node_lock);
4935 }
4900 return ret; 4936 return ret;
4901} 4937}
4902 4938
@@ -4919,8 +4955,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4919 btrfs_set_buffer_uptodate(buf); 4955 btrfs_set_buffer_uptodate(buf);
4920 4956
4921 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4957 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4922 set_extent_dirty(&root->dirty_log_pages, buf->start, 4958 /*
4923 buf->start + buf->len - 1, GFP_NOFS); 4959 * we allow two log transactions at a time, use different
4960 * EXENT bit to differentiate dirty pages.
4961 */
4962 if (root->log_transid % 2 == 0)
4963 set_extent_dirty(&root->dirty_log_pages, buf->start,
4964 buf->start + buf->len - 1, GFP_NOFS);
4965 else
4966 set_extent_new(&root->dirty_log_pages, buf->start,
4967 buf->start + buf->len - 1, GFP_NOFS);
4924 } else { 4968 } else {
4925 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4969 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4926 buf->start + buf->len - 1, GFP_NOFS); 4970 buf->start + buf->len - 1, GFP_NOFS);
@@ -5175,6 +5219,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5175 next = btrfs_find_tree_block(root, bytenr, blocksize); 5219 next = btrfs_find_tree_block(root, bytenr, blocksize);
5176 if (!next) { 5220 if (!next) {
5177 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 5221 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
5222 if (!next)
5223 return -ENOMEM;
5178 reada = 1; 5224 reada = 1;
5179 } 5225 }
5180 btrfs_tree_lock(next); 5226 btrfs_tree_lock(next);
@@ -5372,10 +5418,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5372 int ret; 5418 int ret;
5373 5419
5374 while (level >= 0) { 5420 while (level >= 0) {
5375 if (path->slots[level] >=
5376 btrfs_header_nritems(path->nodes[level]))
5377 break;
5378
5379 ret = walk_down_proc(trans, root, path, wc, lookup_info); 5421 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5380 if (ret > 0) 5422 if (ret > 0)
5381 break; 5423 break;
@@ -5383,11 +5425,16 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5383 if (level == 0) 5425 if (level == 0)
5384 break; 5426 break;
5385 5427
5428 if (path->slots[level] >=
5429 btrfs_header_nritems(path->nodes[level]))
5430 break;
5431
5386 ret = do_walk_down(trans, root, path, wc, &lookup_info); 5432 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5387 if (ret > 0) { 5433 if (ret > 0) {
5388 path->slots[level]++; 5434 path->slots[level]++;
5389 continue; 5435 continue;
5390 } 5436 } else if (ret < 0)
5437 return ret;
5391 level = wc->level; 5438 level = wc->level;
5392 } 5439 }
5393 return 0; 5440 return 0;
@@ -6531,6 +6578,7 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root,
6531 struct btrfs_key key; 6578 struct btrfs_key key;
6532 struct inode *inode = NULL; 6579 struct inode *inode = NULL;
6533 struct btrfs_file_extent_item *fi; 6580 struct btrfs_file_extent_item *fi;
6581 struct extent_state *cached_state = NULL;
6534 u64 num_bytes; 6582 u64 num_bytes;
6535 u64 skip_objectid = 0; 6583 u64 skip_objectid = 0;
6536 u32 nritems; 6584 u32 nritems;
@@ -6559,12 +6607,14 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root,
6559 } 6607 }
6560 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6608 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
6561 6609
6562 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6610 lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
6563 key.offset + num_bytes - 1, GFP_NOFS); 6611 key.offset + num_bytes - 1, 0, &cached_state,
6612 GFP_NOFS);
6564 btrfs_drop_extent_cache(inode, key.offset, 6613 btrfs_drop_extent_cache(inode, key.offset,
6565 key.offset + num_bytes - 1, 1); 6614 key.offset + num_bytes - 1, 1);
6566 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6615 unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
6567 key.offset + num_bytes - 1, GFP_NOFS); 6616 key.offset + num_bytes - 1, &cached_state,
6617 GFP_NOFS);
6568 cond_resched(); 6618 cond_resched();
6569 } 6619 }
6570 iput(inode); 6620 iput(inode);
@@ -7336,7 +7386,6 @@ static int find_first_block_group(struct btrfs_root *root,
7336 } 7386 }
7337 path->slots[0]++; 7387 path->slots[0]++;
7338 } 7388 }
7339 ret = -ENOENT;
7340out: 7389out:
7341 return ret; 7390 return ret;
7342} 7391}
@@ -7373,9 +7422,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7373 wait_block_group_cache_done(block_group); 7422 wait_block_group_cache_done(block_group);
7374 7423
7375 btrfs_remove_free_space_cache(block_group); 7424 btrfs_remove_free_space_cache(block_group);
7376 7425 btrfs_put_block_group(block_group);
7377 WARN_ON(atomic_read(&block_group->count) != 1);
7378 kfree(block_group);
7379 7426
7380 spin_lock(&info->block_group_cache_lock); 7427 spin_lock(&info->block_group_cache_lock);
7381 } 7428 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 96577e8bf9fd..d2d03684fab2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2,7 +2,6 @@
2#include <linux/slab.h> 2#include <linux/slab.h>
3#include <linux/bio.h> 3#include <linux/bio.h>
4#include <linux/mm.h> 4#include <linux/mm.h>
5#include <linux/gfp.h>
6#include <linux/pagemap.h> 5#include <linux/pagemap.h>
7#include <linux/page-flags.h> 6#include <linux/page-flags.h>
8#include <linux/module.h> 7#include <linux/module.h>
@@ -104,8 +103,8 @@ void extent_io_exit(void)
104void extent_io_tree_init(struct extent_io_tree *tree, 103void extent_io_tree_init(struct extent_io_tree *tree,
105 struct address_space *mapping, gfp_t mask) 104 struct address_space *mapping, gfp_t mask)
106{ 105{
107 tree->state.rb_node = NULL; 106 tree->state = RB_ROOT;
108 tree->buffer.rb_node = NULL; 107 tree->buffer = RB_ROOT;
109 tree->ops = NULL; 108 tree->ops = NULL;
110 tree->dirty_bytes = 0; 109 tree->dirty_bytes = 0;
111 spin_lock_init(&tree->lock); 110 spin_lock_init(&tree->lock);
@@ -513,7 +512,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
513 u64 last_end; 512 u64 last_end;
514 int err; 513 int err;
515 int set = 0; 514 int set = 0;
515 int clear = 0;
516 516
517 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
518 clear = 1;
517again: 519again:
518 if (!prealloc && (mask & __GFP_WAIT)) { 520 if (!prealloc && (mask & __GFP_WAIT)) {
519 prealloc = alloc_extent_state(mask); 521 prealloc = alloc_extent_state(mask);
@@ -524,14 +526,20 @@ again:
524 spin_lock(&tree->lock); 526 spin_lock(&tree->lock);
525 if (cached_state) { 527 if (cached_state) {
526 cached = *cached_state; 528 cached = *cached_state;
527 *cached_state = NULL; 529
528 cached_state = NULL; 530 if (clear) {
531 *cached_state = NULL;
532 cached_state = NULL;
533 }
534
529 if (cached && cached->tree && cached->start == start) { 535 if (cached && cached->tree && cached->start == start) {
530 atomic_dec(&cached->refs); 536 if (clear)
537 atomic_dec(&cached->refs);
531 state = cached; 538 state = cached;
532 goto hit_next; 539 goto hit_next;
533 } 540 }
534 free_extent_state(cached); 541 if (clear)
542 free_extent_state(cached);
535 } 543 }
536 /* 544 /*
537 * this search will find the extents that end after 545 * this search will find the extents that end after
@@ -946,11 +954,11 @@ int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
946} 954}
947 955
948int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 956int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
949 gfp_t mask) 957 struct extent_state **cached_state, gfp_t mask)
950{ 958{
951 return set_extent_bit(tree, start, end, 959 return set_extent_bit(tree, start, end,
952 EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, 960 EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
953 0, NULL, NULL, mask); 961 0, NULL, cached_state, mask);
954} 962}
955 963
956int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 964int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
@@ -984,10 +992,11 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
984} 992}
985 993
986static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 994static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
987 u64 end, gfp_t mask) 995 u64 end, struct extent_state **cached_state,
996 gfp_t mask)
988{ 997{
989 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, 998 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
990 NULL, mask); 999 cached_state, mask);
991} 1000}
992 1001
993int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) 1002int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -1171,7 +1180,8 @@ out:
1171 * 1 is returned if we find something, 0 if nothing was in the tree 1180 * 1 is returned if we find something, 0 if nothing was in the tree
1172 */ 1181 */
1173static noinline u64 find_delalloc_range(struct extent_io_tree *tree, 1182static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1174 u64 *start, u64 *end, u64 max_bytes) 1183 u64 *start, u64 *end, u64 max_bytes,
1184 struct extent_state **cached_state)
1175{ 1185{
1176 struct rb_node *node; 1186 struct rb_node *node;
1177 struct extent_state *state; 1187 struct extent_state *state;
@@ -1203,8 +1213,11 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1203 *end = state->end; 1213 *end = state->end;
1204 goto out; 1214 goto out;
1205 } 1215 }
1206 if (!found) 1216 if (!found) {
1207 *start = state->start; 1217 *start = state->start;
1218 *cached_state = state;
1219 atomic_inc(&state->refs);
1220 }
1208 found++; 1221 found++;
1209 *end = state->end; 1222 *end = state->end;
1210 cur_start = state->end + 1; 1223 cur_start = state->end + 1;
@@ -1336,10 +1349,11 @@ again:
1336 delalloc_start = *start; 1349 delalloc_start = *start;
1337 delalloc_end = 0; 1350 delalloc_end = 0;
1338 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, 1351 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1339 max_bytes); 1352 max_bytes, &cached_state);
1340 if (!found || delalloc_end <= *start) { 1353 if (!found || delalloc_end <= *start) {
1341 *start = delalloc_start; 1354 *start = delalloc_start;
1342 *end = delalloc_end; 1355 *end = delalloc_end;
1356 free_extent_state(cached_state);
1343 return found; 1357 return found;
1344 } 1358 }
1345 1359
@@ -1722,7 +1736,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
1722 } 1736 }
1723 1737
1724 if (!uptodate) { 1738 if (!uptodate) {
1725 clear_extent_uptodate(tree, start, end, GFP_NOFS); 1739 clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
1726 ClearPageUptodate(page); 1740 ClearPageUptodate(page);
1727 SetPageError(page); 1741 SetPageError(page);
1728 } 1742 }
@@ -1750,7 +1764,8 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
1750static void end_bio_extent_readpage(struct bio *bio, int err) 1764static void end_bio_extent_readpage(struct bio *bio, int err)
1751{ 1765{
1752 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1766 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1753 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1767 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
1768 struct bio_vec *bvec = bio->bi_io_vec;
1754 struct extent_io_tree *tree; 1769 struct extent_io_tree *tree;
1755 u64 start; 1770 u64 start;
1756 u64 end; 1771 u64 end;
@@ -1773,7 +1788,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1773 else 1788 else
1774 whole_page = 0; 1789 whole_page = 0;
1775 1790
1776 if (--bvec >= bio->bi_io_vec) 1791 if (++bvec <= bvec_end)
1777 prefetchw(&bvec->bv_page->flags); 1792 prefetchw(&bvec->bv_page->flags);
1778 1793
1779 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1794 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
@@ -1818,7 +1833,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1818 } 1833 }
1819 check_page_locked(tree, page); 1834 check_page_locked(tree, page);
1820 } 1835 }
1821 } while (bvec >= bio->bi_io_vec); 1836 } while (bvec <= bvec_end);
1822 1837
1823 bio_put(bio); 1838 bio_put(bio);
1824} 1839}
@@ -2663,33 +2678,20 @@ int extent_readpages(struct extent_io_tree *tree,
2663{ 2678{
2664 struct bio *bio = NULL; 2679 struct bio *bio = NULL;
2665 unsigned page_idx; 2680 unsigned page_idx;
2666 struct pagevec pvec;
2667 unsigned long bio_flags = 0; 2681 unsigned long bio_flags = 0;
2668 2682
2669 pagevec_init(&pvec, 0);
2670 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 2683 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
2671 struct page *page = list_entry(pages->prev, struct page, lru); 2684 struct page *page = list_entry(pages->prev, struct page, lru);
2672 2685
2673 prefetchw(&page->flags); 2686 prefetchw(&page->flags);
2674 list_del(&page->lru); 2687 list_del(&page->lru);
2675 /* 2688 if (!add_to_page_cache_lru(page, mapping,
2676 * what we want to do here is call add_to_page_cache_lru,
2677 * but that isn't exported, so we reproduce it here
2678 */
2679 if (!add_to_page_cache(page, mapping,
2680 page->index, GFP_KERNEL)) { 2689 page->index, GFP_KERNEL)) {
2681
2682 /* open coding of lru_cache_add, also not exported */
2683 page_cache_get(page);
2684 if (!pagevec_add(&pvec, page))
2685 __pagevec_lru_add_file(&pvec);
2686 __extent_read_full_page(tree, page, get_extent, 2690 __extent_read_full_page(tree, page, get_extent,
2687 &bio, 0, &bio_flags); 2691 &bio, 0, &bio_flags);
2688 } 2692 }
2689 page_cache_release(page); 2693 page_cache_release(page);
2690 } 2694 }
2691 if (pagevec_count(&pvec))
2692 __pagevec_lru_add_file(&pvec);
2693 BUG_ON(!list_empty(pages)); 2695 BUG_ON(!list_empty(pages));
2694 if (bio) 2696 if (bio)
2695 submit_one_bio(READ, bio, 0, bio_flags); 2697 submit_one_bio(READ, bio, 0, bio_flags);
@@ -2704,6 +2706,7 @@ int extent_readpages(struct extent_io_tree *tree,
2704int extent_invalidatepage(struct extent_io_tree *tree, 2706int extent_invalidatepage(struct extent_io_tree *tree,
2705 struct page *page, unsigned long offset) 2707 struct page *page, unsigned long offset)
2706{ 2708{
2709 struct extent_state *cached_state = NULL;
2707 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); 2710 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
2708 u64 end = start + PAGE_CACHE_SIZE - 1; 2711 u64 end = start + PAGE_CACHE_SIZE - 1;
2709 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 2712 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
@@ -2712,12 +2715,12 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2712 if (start > end) 2715 if (start > end)
2713 return 0; 2716 return 0;
2714 2717
2715 lock_extent(tree, start, end, GFP_NOFS); 2718 lock_extent_bits(tree, start, end, 0, &cached_state, GFP_NOFS);
2716 wait_on_page_writeback(page); 2719 wait_on_page_writeback(page);
2717 clear_extent_bit(tree, start, end, 2720 clear_extent_bit(tree, start, end,
2718 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 2721 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
2719 EXTENT_DO_ACCOUNTING, 2722 EXTENT_DO_ACCOUNTING,
2720 1, 1, NULL, GFP_NOFS); 2723 1, 1, &cached_state, GFP_NOFS);
2721 return 0; 2724 return 0;
2722} 2725}
2723 2726
@@ -2920,16 +2923,17 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2920 get_extent_t *get_extent) 2923 get_extent_t *get_extent)
2921{ 2924{
2922 struct inode *inode = mapping->host; 2925 struct inode *inode = mapping->host;
2926 struct extent_state *cached_state = NULL;
2923 u64 start = iblock << inode->i_blkbits; 2927 u64 start = iblock << inode->i_blkbits;
2924 sector_t sector = 0; 2928 sector_t sector = 0;
2925 size_t blksize = (1 << inode->i_blkbits); 2929 size_t blksize = (1 << inode->i_blkbits);
2926 struct extent_map *em; 2930 struct extent_map *em;
2927 2931
2928 lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2932 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2929 GFP_NOFS); 2933 0, &cached_state, GFP_NOFS);
2930 em = get_extent(inode, NULL, 0, start, blksize, 0); 2934 em = get_extent(inode, NULL, 0, start, blksize, 0);
2931 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2935 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start,
2932 GFP_NOFS); 2936 start + blksize - 1, &cached_state, GFP_NOFS);
2933 if (!em || IS_ERR(em)) 2937 if (!em || IS_ERR(em))
2934 return 0; 2938 return 0;
2935 2939
@@ -2951,6 +2955,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2951 u32 flags = 0; 2955 u32 flags = 0;
2952 u64 disko = 0; 2956 u64 disko = 0;
2953 struct extent_map *em = NULL; 2957 struct extent_map *em = NULL;
2958 struct extent_state *cached_state = NULL;
2954 int end = 0; 2959 int end = 0;
2955 u64 em_start = 0, em_len = 0; 2960 u64 em_start = 0, em_len = 0;
2956 unsigned long emflags; 2961 unsigned long emflags;
@@ -2959,8 +2964,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2959 if (len == 0) 2964 if (len == 0)
2960 return -EINVAL; 2965 return -EINVAL;
2961 2966
2962 lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 2967 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2963 GFP_NOFS); 2968 &cached_state, GFP_NOFS);
2964 em = get_extent(inode, NULL, 0, off, max - off, 0); 2969 em = get_extent(inode, NULL, 0, off, max - off, 0);
2965 if (!em) 2970 if (!em)
2966 goto out; 2971 goto out;
@@ -3023,8 +3028,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3023out_free: 3028out_free:
3024 free_extent_map(em); 3029 free_extent_map(em);
3025out: 3030out:
3026 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 3031 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
3027 GFP_NOFS); 3032 &cached_state, GFP_NOFS);
3028 return ret; 3033 return ret;
3029} 3034}
3030 3035
@@ -3165,10 +3170,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3165 spin_unlock(&tree->buffer_lock); 3170 spin_unlock(&tree->buffer_lock);
3166 goto free_eb; 3171 goto free_eb;
3167 } 3172 }
3168 spin_unlock(&tree->buffer_lock);
3169
3170 /* add one reference for the tree */ 3173 /* add one reference for the tree */
3171 atomic_inc(&eb->refs); 3174 atomic_inc(&eb->refs);
3175 spin_unlock(&tree->buffer_lock);
3172 return eb; 3176 return eb;
3173 3177
3174free_eb: 3178free_eb:
@@ -3265,7 +3269,8 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
3265} 3269}
3266 3270
3267int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 3271int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3268 struct extent_buffer *eb) 3272 struct extent_buffer *eb,
3273 struct extent_state **cached_state)
3269{ 3274{
3270 unsigned long i; 3275 unsigned long i;
3271 struct page *page; 3276 struct page *page;
@@ -3275,7 +3280,7 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3275 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3280 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3276 3281
3277 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3282 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3278 GFP_NOFS); 3283 cached_state, GFP_NOFS);
3279 for (i = 0; i < num_pages; i++) { 3284 for (i = 0; i < num_pages; i++) {
3280 page = extent_buffer_page(eb, i); 3285 page = extent_buffer_page(eb, i);
3281 if (page) 3286 if (page)
@@ -3335,7 +3340,8 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3335} 3340}
3336 3341
3337int extent_buffer_uptodate(struct extent_io_tree *tree, 3342int extent_buffer_uptodate(struct extent_io_tree *tree,
3338 struct extent_buffer *eb) 3343 struct extent_buffer *eb,
3344 struct extent_state *cached_state)
3339{ 3345{
3340 int ret = 0; 3346 int ret = 0;
3341 unsigned long num_pages; 3347 unsigned long num_pages;
@@ -3347,7 +3353,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
3347 return 1; 3353 return 1;
3348 3354
3349 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3355 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3350 EXTENT_UPTODATE, 1, NULL); 3356 EXTENT_UPTODATE, 1, cached_state);
3351 if (ret) 3357 if (ret)
3352 return ret; 3358 return ret;
3353 3359
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 36de250a7b2b..bbab4813646f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -163,6 +163,8 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
163int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 163int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
164 int bits, struct extent_state **cached, gfp_t mask); 164 int bits, struct extent_state **cached, gfp_t mask);
165int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 165int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
166int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
167 struct extent_state **cached, gfp_t mask);
166int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 168int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
167 gfp_t mask); 169 gfp_t mask);
168int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 170int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
@@ -196,7 +198,7 @@ int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
196int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, 198int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
197 u64 end, gfp_t mask); 199 u64 end, gfp_t mask);
198int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 200int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
199 gfp_t mask); 201 struct extent_state **cached_state, gfp_t mask);
200int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, 202int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
201 gfp_t mask); 203 gfp_t mask);
202int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 204int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
@@ -281,9 +283,11 @@ int test_extent_buffer_dirty(struct extent_io_tree *tree,
281int set_extent_buffer_uptodate(struct extent_io_tree *tree, 283int set_extent_buffer_uptodate(struct extent_io_tree *tree,
282 struct extent_buffer *eb); 284 struct extent_buffer *eb);
283int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 285int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
284 struct extent_buffer *eb); 286 struct extent_buffer *eb,
287 struct extent_state **cached_state);
285int extent_buffer_uptodate(struct extent_io_tree *tree, 288int extent_buffer_uptodate(struct extent_io_tree *tree,
286 struct extent_buffer *eb); 289 struct extent_buffer *eb,
290 struct extent_state *cached_state);
287int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, 291int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
288 unsigned long min_len, char **token, char **map, 292 unsigned long min_len, char **token, char **map,
289 unsigned long *map_start, 293 unsigned long *map_start,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index ccbdcb54ec5d..454ca52d6451 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -1,5 +1,4 @@
1#include <linux/err.h> 1#include <linux/err.h>
2#include <linux/gfp.h>
3#include <linux/slab.h> 2#include <linux/slab.h>
4#include <linux/module.h> 3#include <linux/module.h>
5#include <linux/spinlock.h> 4#include <linux/spinlock.h>
@@ -35,7 +34,7 @@ void extent_map_exit(void)
35 */ 34 */
36void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) 35void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
37{ 36{
38 tree->map.rb_node = NULL; 37 tree->map = RB_ROOT;
39 rwlock_init(&tree->lock); 38 rwlock_init(&tree->lock);
40} 39}
41 40
@@ -155,20 +154,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
155 return NULL; 154 return NULL;
156} 155}
157 156
158/*
159 * look for an offset in the tree, and if it can't be found, return
160 * the first offset we can find smaller than 'offset'.
161 */
162static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
163{
164 struct rb_node *prev;
165 struct rb_node *ret;
166 ret = __tree_search(root, offset, &prev, NULL);
167 if (!ret)
168 return prev;
169 return ret;
170}
171
172/* check to see if two extent_map structs are adjacent and safe to merge */ 157/* check to see if two extent_map structs are adjacent and safe to merge */
173static int mergable_maps(struct extent_map *prev, struct extent_map *next) 158static int mergable_maps(struct extent_map *prev, struct extent_map *next)
174{ 159{
@@ -256,7 +241,7 @@ out:
256 * Insert @em into @tree or perform a simple forward/backward merge with 241 * Insert @em into @tree or perform a simple forward/backward merge with
257 * existing mappings. The extent_map struct passed in will be inserted 242 * existing mappings. The extent_map struct passed in will be inserted
258 * into the tree directly, with an additional reference taken, or a 243 * into the tree directly, with an additional reference taken, or a
259 * reference dropped if the merge attempt was sucessfull. 244 * reference dropped if the merge attempt was successfull.
260 */ 245 */
261int add_extent_mapping(struct extent_map_tree *tree, 246int add_extent_mapping(struct extent_map_tree *tree,
262 struct extent_map *em) 247 struct extent_map *em)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 9b99886562d0..54a255065aa3 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/bio.h> 19#include <linux/bio.h>
20#include <linux/slab.h>
20#include <linux/pagemap.h> 21#include <linux/pagemap.h>
21#include <linux/highmem.h> 22#include <linux/highmem.h>
22#include "ctree.h" 23#include "ctree.h"
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 06550affbd27..29ff749ff4ca 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -28,6 +28,7 @@
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/statfs.h> 29#include <linux/statfs.h>
30#include <linux/compat.h> 30#include <linux/compat.h>
31#include <linux/slab.h>
31#include "ctree.h" 32#include "ctree.h"
32#include "disk-io.h" 33#include "disk-io.h"
33#include "transaction.h" 34#include "transaction.h"
@@ -123,7 +124,8 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
123 root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 124 root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
124 125
125 end_of_last_block = start_pos + num_bytes - 1; 126 end_of_last_block = start_pos + num_bytes - 1;
126 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); 127 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
128 NULL);
127 if (err) 129 if (err)
128 return err; 130 return err;
129 131
@@ -179,18 +181,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
179 } 181 }
180 flags = em->flags; 182 flags = em->flags;
181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 183 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
182 if (em->start <= start && 184 if (testend && em->start + em->len >= start + len) {
183 (!testend || em->start + em->len >= start + len)) {
184 free_extent_map(em); 185 free_extent_map(em);
185 write_unlock(&em_tree->lock); 186 write_unlock(&em_tree->lock);
186 break; 187 break;
187 } 188 }
188 if (start < em->start) { 189 start = em->start + em->len;
189 len = em->start - start; 190 if (testend)
190 } else {
191 len = start + len - (em->start + em->len); 191 len = start + len - (em->start + em->len);
192 start = em->start + em->len;
193 }
194 free_extent_map(em); 192 free_extent_map(em);
195 write_unlock(&em_tree->lock); 193 write_unlock(&em_tree->lock);
196 continue; 194 continue;
@@ -265,324 +263,253 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
265 * If an extent intersects the range but is not entirely inside the range 263 * If an extent intersects the range but is not entirely inside the range
266 * it is either truncated or split. Anything entirely inside the range 264 * it is either truncated or split. Anything entirely inside the range
267 * is deleted from the tree. 265 * is deleted from the tree.
268 *
269 * inline_limit is used to tell this code which offsets in the file to keep
270 * if they contain inline extents.
271 */ 266 */
272noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 267int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
273 struct btrfs_root *root, struct inode *inode, 268 u64 start, u64 end, u64 *hint_byte, int drop_cache)
274 u64 start, u64 end, u64 locked_end,
275 u64 inline_limit, u64 *hint_byte, int drop_cache)
276{ 269{
277 u64 extent_end = 0; 270 struct btrfs_root *root = BTRFS_I(inode)->root;
278 u64 search_start = start;
279 u64 ram_bytes = 0;
280 u64 disk_bytenr = 0;
281 u64 orig_locked_end = locked_end;
282 u8 compression;
283 u8 encryption;
284 u16 other_encoding = 0;
285 struct extent_buffer *leaf; 271 struct extent_buffer *leaf;
286 struct btrfs_file_extent_item *extent; 272 struct btrfs_file_extent_item *fi;
287 struct btrfs_path *path; 273 struct btrfs_path *path;
288 struct btrfs_key key; 274 struct btrfs_key key;
289 struct btrfs_file_extent_item old; 275 struct btrfs_key new_key;
290 int keep; 276 u64 search_start = start;
291 int slot; 277 u64 disk_bytenr = 0;
292 int bookend; 278 u64 num_bytes = 0;
293 int found_type = 0; 279 u64 extent_offset = 0;
294 int found_extent; 280 u64 extent_end = 0;
295 int found_inline; 281 int del_nr = 0;
282 int del_slot = 0;
283 int extent_type;
296 int recow; 284 int recow;
297 int ret; 285 int ret;
298 286
299 inline_limit = 0;
300 if (drop_cache) 287 if (drop_cache)
301 btrfs_drop_extent_cache(inode, start, end - 1, 0); 288 btrfs_drop_extent_cache(inode, start, end - 1, 0);
302 289
303 path = btrfs_alloc_path(); 290 path = btrfs_alloc_path();
304 if (!path) 291 if (!path)
305 return -ENOMEM; 292 return -ENOMEM;
293
306 while (1) { 294 while (1) {
307 recow = 0; 295 recow = 0;
308 btrfs_release_path(root, path);
309 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 296 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
310 search_start, -1); 297 search_start, -1);
311 if (ret < 0) 298 if (ret < 0)
312 goto out; 299 break;
313 if (ret > 0) { 300 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
314 if (path->slots[0] == 0) { 301 leaf = path->nodes[0];
315 ret = 0; 302 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
316 goto out; 303 if (key.objectid == inode->i_ino &&
317 } 304 key.type == BTRFS_EXTENT_DATA_KEY)
318 path->slots[0]--; 305 path->slots[0]--;
319 } 306 }
307 ret = 0;
320next_slot: 308next_slot:
321 keep = 0;
322 bookend = 0;
323 found_extent = 0;
324 found_inline = 0;
325 compression = 0;
326 encryption = 0;
327 extent = NULL;
328 leaf = path->nodes[0]; 309 leaf = path->nodes[0];
329 slot = path->slots[0]; 310 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
330 ret = 0; 311 BUG_ON(del_nr > 0);
331 btrfs_item_key_to_cpu(leaf, &key, slot); 312 ret = btrfs_next_leaf(root, path);
332 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY && 313 if (ret < 0)
333 key.offset >= end) { 314 break;
334 goto out; 315 if (ret > 0) {
335 } 316 ret = 0;
336 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 317 break;
337 key.objectid != inode->i_ino) {
338 goto out;
339 }
340 if (recow) {
341 search_start = max(key.offset, start);
342 continue;
343 }
344 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
345 extent = btrfs_item_ptr(leaf, slot,
346 struct btrfs_file_extent_item);
347 found_type = btrfs_file_extent_type(leaf, extent);
348 compression = btrfs_file_extent_compression(leaf,
349 extent);
350 encryption = btrfs_file_extent_encryption(leaf,
351 extent);
352 other_encoding = btrfs_file_extent_other_encoding(leaf,
353 extent);
354 if (found_type == BTRFS_FILE_EXTENT_REG ||
355 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
356 extent_end =
357 btrfs_file_extent_disk_bytenr(leaf,
358 extent);
359 if (extent_end)
360 *hint_byte = extent_end;
361
362 extent_end = key.offset +
363 btrfs_file_extent_num_bytes(leaf, extent);
364 ram_bytes = btrfs_file_extent_ram_bytes(leaf,
365 extent);
366 found_extent = 1;
367 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
368 found_inline = 1;
369 extent_end = key.offset +
370 btrfs_file_extent_inline_len(leaf, extent);
371 } 318 }
319 leaf = path->nodes[0];
320 recow = 1;
321 }
322
323 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
324 if (key.objectid > inode->i_ino ||
325 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
326 break;
327
328 fi = btrfs_item_ptr(leaf, path->slots[0],
329 struct btrfs_file_extent_item);
330 extent_type = btrfs_file_extent_type(leaf, fi);
331
332 if (extent_type == BTRFS_FILE_EXTENT_REG ||
333 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
334 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
335 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
336 extent_offset = btrfs_file_extent_offset(leaf, fi);
337 extent_end = key.offset +
338 btrfs_file_extent_num_bytes(leaf, fi);
339 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
340 extent_end = key.offset +
341 btrfs_file_extent_inline_len(leaf, fi);
372 } else { 342 } else {
343 WARN_ON(1);
373 extent_end = search_start; 344 extent_end = search_start;
374 } 345 }
375 346
376 /* we found nothing we can drop */ 347 if (extent_end <= search_start) {
377 if ((!found_extent && !found_inline) || 348 path->slots[0]++;
378 search_start >= extent_end) {
379 int nextret;
380 u32 nritems;
381 nritems = btrfs_header_nritems(leaf);
382 if (slot >= nritems - 1) {
383 nextret = btrfs_next_leaf(root, path);
384 if (nextret)
385 goto out;
386 recow = 1;
387 } else {
388 path->slots[0]++;
389 }
390 goto next_slot; 349 goto next_slot;
391 } 350 }
392 351
393 if (end <= extent_end && start >= key.offset && found_inline) 352 search_start = max(key.offset, start);
394 *hint_byte = EXTENT_MAP_INLINE; 353 if (recow) {
395 354 btrfs_release_path(root, path);
396 if (found_extent) { 355 continue;
397 read_extent_buffer(leaf, &old, (unsigned long)extent,
398 sizeof(old));
399 }
400
401 if (end < extent_end && end >= key.offset) {
402 bookend = 1;
403 if (found_inline && start <= key.offset)
404 keep = 1;
405 } 356 }
406 357
407 if (bookend && found_extent) { 358 /*
408 if (locked_end < extent_end) { 359 * | - range to drop - |
409 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 360 * | -------- extent -------- |
410 locked_end, extent_end - 1, 361 */
411 GFP_NOFS); 362 if (start > key.offset && end < extent_end) {
412 if (!ret) { 363 BUG_ON(del_nr > 0);
413 btrfs_release_path(root, path); 364 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
414 lock_extent(&BTRFS_I(inode)->io_tree, 365
415 locked_end, extent_end - 1, 366 memcpy(&new_key, &key, sizeof(new_key));
416 GFP_NOFS); 367 new_key.offset = start;
417 locked_end = extent_end; 368 ret = btrfs_duplicate_item(trans, root, path,
418 continue; 369 &new_key);
419 } 370 if (ret == -EAGAIN) {
420 locked_end = extent_end; 371 btrfs_release_path(root, path);
372 continue;
421 } 373 }
422 disk_bytenr = le64_to_cpu(old.disk_bytenr); 374 if (ret < 0)
423 if (disk_bytenr != 0) { 375 break;
376
377 leaf = path->nodes[0];
378 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
379 struct btrfs_file_extent_item);
380 btrfs_set_file_extent_num_bytes(leaf, fi,
381 start - key.offset);
382
383 fi = btrfs_item_ptr(leaf, path->slots[0],
384 struct btrfs_file_extent_item);
385
386 extent_offset += start - key.offset;
387 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
388 btrfs_set_file_extent_num_bytes(leaf, fi,
389 extent_end - start);
390 btrfs_mark_buffer_dirty(leaf);
391
392 if (disk_bytenr > 0) {
424 ret = btrfs_inc_extent_ref(trans, root, 393 ret = btrfs_inc_extent_ref(trans, root,
425 disk_bytenr, 394 disk_bytenr, num_bytes, 0,
426 le64_to_cpu(old.disk_num_bytes), 0, 395 root->root_key.objectid,
427 root->root_key.objectid, 396 new_key.objectid,
428 key.objectid, key.offset - 397 start - extent_offset);
429 le64_to_cpu(old.offset));
430 BUG_ON(ret); 398 BUG_ON(ret);
399 *hint_byte = disk_bytenr;
431 } 400 }
401 key.offset = start;
432 } 402 }
403 /*
404 * | ---- range to drop ----- |
405 * | -------- extent -------- |
406 */
407 if (start <= key.offset && end < extent_end) {
408 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
433 409
434 if (found_inline) { 410 memcpy(&new_key, &key, sizeof(new_key));
435 u64 mask = root->sectorsize - 1; 411 new_key.offset = end;
436 search_start = (extent_end + mask) & ~mask; 412 btrfs_set_item_key_safe(trans, root, path, &new_key);
437 } else 413
438 search_start = extent_end; 414 extent_offset += end - key.offset;
439 415 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
440 /* truncate existing extent */ 416 btrfs_set_file_extent_num_bytes(leaf, fi,
441 if (start > key.offset) { 417 extent_end - end);
442 u64 new_num; 418 btrfs_mark_buffer_dirty(leaf);
443 u64 old_num; 419 if (disk_bytenr > 0) {
444 keep = 1; 420 inode_sub_bytes(inode, end - key.offset);
445 WARN_ON(start & (root->sectorsize - 1)); 421 *hint_byte = disk_bytenr;
446 if (found_extent) {
447 new_num = start - key.offset;
448 old_num = btrfs_file_extent_num_bytes(leaf,
449 extent);
450 *hint_byte =
451 btrfs_file_extent_disk_bytenr(leaf,
452 extent);
453 if (btrfs_file_extent_disk_bytenr(leaf,
454 extent)) {
455 inode_sub_bytes(inode, old_num -
456 new_num);
457 }
458 btrfs_set_file_extent_num_bytes(leaf,
459 extent, new_num);
460 btrfs_mark_buffer_dirty(leaf);
461 } else if (key.offset < inline_limit &&
462 (end > extent_end) &&
463 (inline_limit < extent_end)) {
464 u32 new_size;
465 new_size = btrfs_file_extent_calc_inline_size(
466 inline_limit - key.offset);
467 inode_sub_bytes(inode, extent_end -
468 inline_limit);
469 btrfs_set_file_extent_ram_bytes(leaf, extent,
470 new_size);
471 if (!compression && !encryption) {
472 btrfs_truncate_item(trans, root, path,
473 new_size, 1);
474 }
475 } 422 }
423 break;
476 } 424 }
477 /* delete the entire extent */
478 if (!keep) {
479 if (found_inline)
480 inode_sub_bytes(inode, extent_end -
481 key.offset);
482 ret = btrfs_del_item(trans, root, path);
483 /* TODO update progress marker and return */
484 BUG_ON(ret);
485 extent = NULL;
486 btrfs_release_path(root, path);
487 /* the extent will be freed later */
488 }
489 if (bookend && found_inline && start <= key.offset) {
490 u32 new_size;
491 new_size = btrfs_file_extent_calc_inline_size(
492 extent_end - end);
493 inode_sub_bytes(inode, end - key.offset);
494 btrfs_set_file_extent_ram_bytes(leaf, extent,
495 new_size);
496 if (!compression && !encryption)
497 ret = btrfs_truncate_item(trans, root, path,
498 new_size, 0);
499 BUG_ON(ret);
500 }
501 /* create bookend, splitting the extent in two */
502 if (bookend && found_extent) {
503 struct btrfs_key ins;
504 ins.objectid = inode->i_ino;
505 ins.offset = end;
506 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
507 425
508 btrfs_release_path(root, path); 426 search_start = extent_end;
509 path->leave_spinning = 1; 427 /*
510 ret = btrfs_insert_empty_item(trans, root, path, &ins, 428 * | ---- range to drop ----- |
511 sizeof(*extent)); 429 * | -------- extent -------- |
512 BUG_ON(ret); 430 */
431 if (start > key.offset && end >= extent_end) {
432 BUG_ON(del_nr > 0);
433 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
513 434
514 leaf = path->nodes[0]; 435 btrfs_set_file_extent_num_bytes(leaf, fi,
515 extent = btrfs_item_ptr(leaf, path->slots[0], 436 start - key.offset);
516 struct btrfs_file_extent_item); 437 btrfs_mark_buffer_dirty(leaf);
517 write_extent_buffer(leaf, &old, 438 if (disk_bytenr > 0) {
518 (unsigned long)extent, sizeof(old)); 439 inode_sub_bytes(inode, extent_end - start);
519 440 *hint_byte = disk_bytenr;
520 btrfs_set_file_extent_compression(leaf, extent, 441 }
521 compression); 442 if (end == extent_end)
522 btrfs_set_file_extent_encryption(leaf, extent, 443 break;
523 encryption);
524 btrfs_set_file_extent_other_encoding(leaf, extent,
525 other_encoding);
526 btrfs_set_file_extent_offset(leaf, extent,
527 le64_to_cpu(old.offset) + end - key.offset);
528 WARN_ON(le64_to_cpu(old.num_bytes) <
529 (extent_end - end));
530 btrfs_set_file_extent_num_bytes(leaf, extent,
531 extent_end - end);
532 444
533 /* 445 path->slots[0]++;
534 * set the ram bytes to the size of the full extent 446 goto next_slot;
535 * before splitting. This is a worst case flag,
536 * but its the best we can do because we don't know
537 * how splitting affects compression
538 */
539 btrfs_set_file_extent_ram_bytes(leaf, extent,
540 ram_bytes);
541 btrfs_set_file_extent_type(leaf, extent, found_type);
542
543 btrfs_unlock_up_safe(path, 1);
544 btrfs_mark_buffer_dirty(path->nodes[0]);
545 btrfs_set_lock_blocking(path->nodes[0]);
546
547 path->leave_spinning = 0;
548 btrfs_release_path(root, path);
549 if (disk_bytenr != 0)
550 inode_add_bytes(inode, extent_end - end);
551 } 447 }
552 448
553 if (found_extent && !keep) { 449 /*
554 u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr); 450 * | ---- range to drop ----- |
451 * | ------ extent ------ |
452 */
453 if (start <= key.offset && end >= extent_end) {
454 if (del_nr == 0) {
455 del_slot = path->slots[0];
456 del_nr = 1;
457 } else {
458 BUG_ON(del_slot + del_nr != path->slots[0]);
459 del_nr++;
460 }
555 461
556 if (old_disk_bytenr != 0) { 462 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
557 inode_sub_bytes(inode, 463 inode_sub_bytes(inode,
558 le64_to_cpu(old.num_bytes)); 464 extent_end - key.offset);
465 extent_end = ALIGN(extent_end,
466 root->sectorsize);
467 } else if (disk_bytenr > 0) {
559 ret = btrfs_free_extent(trans, root, 468 ret = btrfs_free_extent(trans, root,
560 old_disk_bytenr, 469 disk_bytenr, num_bytes, 0,
561 le64_to_cpu(old.disk_num_bytes), 470 root->root_key.objectid,
562 0, root->root_key.objectid,
563 key.objectid, key.offset - 471 key.objectid, key.offset -
564 le64_to_cpu(old.offset)); 472 extent_offset);
565 BUG_ON(ret); 473 BUG_ON(ret);
566 *hint_byte = old_disk_bytenr; 474 inode_sub_bytes(inode,
475 extent_end - key.offset);
476 *hint_byte = disk_bytenr;
567 } 477 }
568 }
569 478
570 if (search_start >= end) { 479 if (end == extent_end)
571 ret = 0; 480 break;
572 goto out; 481
482 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
483 path->slots[0]++;
484 goto next_slot;
485 }
486
487 ret = btrfs_del_items(trans, root, path, del_slot,
488 del_nr);
489 BUG_ON(ret);
490
491 del_nr = 0;
492 del_slot = 0;
493
494 btrfs_release_path(root, path);
495 continue;
573 } 496 }
497
498 BUG_ON(1);
574 } 499 }
575out: 500
576 btrfs_free_path(path); 501 if (del_nr > 0) {
577 if (locked_end > orig_locked_end) { 502 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
578 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, 503 BUG_ON(ret);
579 locked_end - 1, GFP_NOFS);
580 } 504 }
505
506 btrfs_free_path(path);
581 return ret; 507 return ret;
582} 508}
583 509
584static int extent_mergeable(struct extent_buffer *leaf, int slot, 510static int extent_mergeable(struct extent_buffer *leaf, int slot,
585 u64 objectid, u64 bytenr, u64 *start, u64 *end) 511 u64 objectid, u64 bytenr, u64 orig_offset,
512 u64 *start, u64 *end)
586{ 513{
587 struct btrfs_file_extent_item *fi; 514 struct btrfs_file_extent_item *fi;
588 struct btrfs_key key; 515 struct btrfs_key key;
@@ -598,6 +525,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
598 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 525 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
599 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || 526 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
600 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || 527 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
528 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
601 btrfs_file_extent_compression(leaf, fi) || 529 btrfs_file_extent_compression(leaf, fi) ||
602 btrfs_file_extent_encryption(leaf, fi) || 530 btrfs_file_extent_encryption(leaf, fi) ||
603 btrfs_file_extent_other_encoding(leaf, fi)) 531 btrfs_file_extent_other_encoding(leaf, fi))
@@ -620,23 +548,24 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
620 * two or three. 548 * two or three.
621 */ 549 */
622int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 550int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
623 struct btrfs_root *root,
624 struct inode *inode, u64 start, u64 end) 551 struct inode *inode, u64 start, u64 end)
625{ 552{
553 struct btrfs_root *root = BTRFS_I(inode)->root;
626 struct extent_buffer *leaf; 554 struct extent_buffer *leaf;
627 struct btrfs_path *path; 555 struct btrfs_path *path;
628 struct btrfs_file_extent_item *fi; 556 struct btrfs_file_extent_item *fi;
629 struct btrfs_key key; 557 struct btrfs_key key;
558 struct btrfs_key new_key;
630 u64 bytenr; 559 u64 bytenr;
631 u64 num_bytes; 560 u64 num_bytes;
632 u64 extent_end; 561 u64 extent_end;
633 u64 orig_offset; 562 u64 orig_offset;
634 u64 other_start; 563 u64 other_start;
635 u64 other_end; 564 u64 other_end;
636 u64 split = start; 565 u64 split;
637 u64 locked_end = end; 566 int del_nr = 0;
638 int extent_type; 567 int del_slot = 0;
639 int split_end = 1; 568 int recow;
640 int ret; 569 int ret;
641 570
642 btrfs_drop_extent_cache(inode, start, end - 1, 0); 571 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -644,12 +573,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
644 path = btrfs_alloc_path(); 573 path = btrfs_alloc_path();
645 BUG_ON(!path); 574 BUG_ON(!path);
646again: 575again:
576 recow = 0;
577 split = start;
647 key.objectid = inode->i_ino; 578 key.objectid = inode->i_ino;
648 key.type = BTRFS_EXTENT_DATA_KEY; 579 key.type = BTRFS_EXTENT_DATA_KEY;
649 if (split == start) 580 key.offset = split;
650 key.offset = split;
651 else
652 key.offset = split - 1;
653 581
654 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 582 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
655 if (ret > 0 && path->slots[0] > 0) 583 if (ret > 0 && path->slots[0] > 0)
@@ -661,159 +589,158 @@ again:
661 key.type != BTRFS_EXTENT_DATA_KEY); 589 key.type != BTRFS_EXTENT_DATA_KEY);
662 fi = btrfs_item_ptr(leaf, path->slots[0], 590 fi = btrfs_item_ptr(leaf, path->slots[0],
663 struct btrfs_file_extent_item); 591 struct btrfs_file_extent_item);
664 extent_type = btrfs_file_extent_type(leaf, fi); 592 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
665 BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC); 593 BTRFS_FILE_EXTENT_PREALLOC);
666 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 594 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
667 BUG_ON(key.offset > start || extent_end < end); 595 BUG_ON(key.offset > start || extent_end < end);
668 596
669 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 597 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
670 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 598 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
671 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 599 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
600 memcpy(&new_key, &key, sizeof(new_key));
672 601
673 if (key.offset == start) 602 if (start == key.offset && end < extent_end) {
674 split = end;
675
676 if (key.offset == start && extent_end == end) {
677 int del_nr = 0;
678 int del_slot = 0;
679 other_start = end;
680 other_end = 0;
681 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
682 bytenr, &other_start, &other_end)) {
683 extent_end = other_end;
684 del_slot = path->slots[0] + 1;
685 del_nr++;
686 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
687 0, root->root_key.objectid,
688 inode->i_ino, orig_offset);
689 BUG_ON(ret);
690 }
691 other_start = 0; 603 other_start = 0;
692 other_end = start; 604 other_end = start;
693 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino, 605 if (extent_mergeable(leaf, path->slots[0] - 1,
694 bytenr, &other_start, &other_end)) { 606 inode->i_ino, bytenr, orig_offset,
695 key.offset = other_start; 607 &other_start, &other_end)) {
696 del_slot = path->slots[0]; 608 new_key.offset = end;
697 del_nr++; 609 btrfs_set_item_key_safe(trans, root, path, &new_key);
698 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 610 fi = btrfs_item_ptr(leaf, path->slots[0],
699 0, root->root_key.objectid, 611 struct btrfs_file_extent_item);
700 inode->i_ino, orig_offset); 612 btrfs_set_file_extent_num_bytes(leaf, fi,
701 BUG_ON(ret); 613 extent_end - end);
702 } 614 btrfs_set_file_extent_offset(leaf, fi,
703 split_end = 0; 615 end - orig_offset);
704 if (del_nr == 0) { 616 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
705 btrfs_set_file_extent_type(leaf, fi, 617 struct btrfs_file_extent_item);
706 BTRFS_FILE_EXTENT_REG); 618 btrfs_set_file_extent_num_bytes(leaf, fi,
707 goto done; 619 end - other_start);
708 } 620 btrfs_mark_buffer_dirty(leaf);
709 621 goto out;
710 fi = btrfs_item_ptr(leaf, del_slot - 1,
711 struct btrfs_file_extent_item);
712 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
713 btrfs_set_file_extent_num_bytes(leaf, fi,
714 extent_end - key.offset);
715 btrfs_mark_buffer_dirty(leaf);
716
717 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
718 BUG_ON(ret);
719 goto release;
720 } else if (split == start) {
721 if (locked_end < extent_end) {
722 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
723 locked_end, extent_end - 1, GFP_NOFS);
724 if (!ret) {
725 btrfs_release_path(root, path);
726 lock_extent(&BTRFS_I(inode)->io_tree,
727 locked_end, extent_end - 1, GFP_NOFS);
728 locked_end = extent_end;
729 goto again;
730 }
731 locked_end = extent_end;
732 } 622 }
733 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
734 } else {
735 BUG_ON(key.offset != start);
736 key.offset = split;
737 btrfs_set_file_extent_offset(leaf, fi, key.offset -
738 orig_offset);
739 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
740 btrfs_set_item_key_safe(trans, root, path, &key);
741 extent_end = split;
742 } 623 }
743 624
744 if (extent_end == end) { 625 if (start > key.offset && end == extent_end) {
745 split_end = 0;
746 extent_type = BTRFS_FILE_EXTENT_REG;
747 }
748 if (extent_end == end && split == start) {
749 other_start = end; 626 other_start = end;
750 other_end = 0; 627 other_end = 0;
751 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 628 if (extent_mergeable(leaf, path->slots[0] + 1,
752 bytenr, &other_start, &other_end)) { 629 inode->i_ino, bytenr, orig_offset,
753 path->slots[0]++; 630 &other_start, &other_end)) {
754 fi = btrfs_item_ptr(leaf, path->slots[0], 631 fi = btrfs_item_ptr(leaf, path->slots[0],
755 struct btrfs_file_extent_item); 632 struct btrfs_file_extent_item);
756 key.offset = split;
757 btrfs_set_item_key_safe(trans, root, path, &key);
758 btrfs_set_file_extent_offset(leaf, fi, key.offset -
759 orig_offset);
760 btrfs_set_file_extent_num_bytes(leaf, fi, 633 btrfs_set_file_extent_num_bytes(leaf, fi,
761 other_end - split); 634 start - key.offset);
762 goto done; 635 path->slots[0]++;
763 } 636 new_key.offset = start;
764 } 637 btrfs_set_item_key_safe(trans, root, path, &new_key);
765 if (extent_end == end && split == end) { 638
766 other_start = 0;
767 other_end = start;
768 if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
769 bytenr, &other_start, &other_end)) {
770 path->slots[0]--;
771 fi = btrfs_item_ptr(leaf, path->slots[0], 639 fi = btrfs_item_ptr(leaf, path->slots[0],
772 struct btrfs_file_extent_item); 640 struct btrfs_file_extent_item);
773 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - 641 btrfs_set_file_extent_num_bytes(leaf, fi,
774 other_start); 642 other_end - start);
775 goto done; 643 btrfs_set_file_extent_offset(leaf, fi,
644 start - orig_offset);
645 btrfs_mark_buffer_dirty(leaf);
646 goto out;
776 } 647 }
777 } 648 }
778 649
779 btrfs_mark_buffer_dirty(leaf); 650 while (start > key.offset || end < extent_end) {
651 if (key.offset == start)
652 split = end;
780 653
781 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 654 new_key.offset = split;
782 root->root_key.objectid, 655 ret = btrfs_duplicate_item(trans, root, path, &new_key);
783 inode->i_ino, orig_offset); 656 if (ret == -EAGAIN) {
784 BUG_ON(ret); 657 btrfs_release_path(root, path);
785 btrfs_release_path(root, path); 658 goto again;
659 }
660 BUG_ON(ret < 0);
786 661
787 key.offset = start; 662 leaf = path->nodes[0];
788 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi)); 663 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
789 BUG_ON(ret); 664 struct btrfs_file_extent_item);
665 btrfs_set_file_extent_num_bytes(leaf, fi,
666 split - key.offset);
790 667
791 leaf = path->nodes[0]; 668 fi = btrfs_item_ptr(leaf, path->slots[0],
792 fi = btrfs_item_ptr(leaf, path->slots[0], 669 struct btrfs_file_extent_item);
793 struct btrfs_file_extent_item); 670
794 btrfs_set_file_extent_generation(leaf, fi, trans->transid); 671 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
795 btrfs_set_file_extent_type(leaf, fi, extent_type); 672 btrfs_set_file_extent_num_bytes(leaf, fi,
796 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); 673 extent_end - split);
797 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); 674 btrfs_mark_buffer_dirty(leaf);
798 btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset); 675
799 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); 676 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
800 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); 677 root->root_key.objectid,
801 btrfs_set_file_extent_compression(leaf, fi, 0); 678 inode->i_ino, orig_offset);
802 btrfs_set_file_extent_encryption(leaf, fi, 0); 679 BUG_ON(ret);
803 btrfs_set_file_extent_other_encoding(leaf, fi, 0); 680
804done: 681 if (split == start) {
805 btrfs_mark_buffer_dirty(leaf); 682 key.offset = start;
806 683 } else {
807release: 684 BUG_ON(start != key.offset);
808 btrfs_release_path(root, path); 685 path->slots[0]--;
809 if (split_end && split == start) { 686 extent_end = end;
810 split = end; 687 }
811 goto again; 688 recow = 1;
689 }
690
691 other_start = end;
692 other_end = 0;
693 if (extent_mergeable(leaf, path->slots[0] + 1,
694 inode->i_ino, bytenr, orig_offset,
695 &other_start, &other_end)) {
696 if (recow) {
697 btrfs_release_path(root, path);
698 goto again;
699 }
700 extent_end = other_end;
701 del_slot = path->slots[0] + 1;
702 del_nr++;
703 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
704 0, root->root_key.objectid,
705 inode->i_ino, orig_offset);
706 BUG_ON(ret);
707 }
708 other_start = 0;
709 other_end = start;
710 if (extent_mergeable(leaf, path->slots[0] - 1,
711 inode->i_ino, bytenr, orig_offset,
712 &other_start, &other_end)) {
713 if (recow) {
714 btrfs_release_path(root, path);
715 goto again;
716 }
717 key.offset = other_start;
718 del_slot = path->slots[0];
719 del_nr++;
720 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
721 0, root->root_key.objectid,
722 inode->i_ino, orig_offset);
723 BUG_ON(ret);
812 } 724 }
813 if (locked_end > end) { 725 if (del_nr == 0) {
814 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 726 fi = btrfs_item_ptr(leaf, path->slots[0],
815 GFP_NOFS); 727 struct btrfs_file_extent_item);
728 btrfs_set_file_extent_type(leaf, fi,
729 BTRFS_FILE_EXTENT_REG);
730 btrfs_mark_buffer_dirty(leaf);
731 } else {
732 fi = btrfs_item_ptr(leaf, del_slot - 1,
733 struct btrfs_file_extent_item);
734 btrfs_set_file_extent_type(leaf, fi,
735 BTRFS_FILE_EXTENT_REG);
736 btrfs_set_file_extent_num_bytes(leaf, fi,
737 extent_end - key.offset);
738 btrfs_mark_buffer_dirty(leaf);
739
740 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
741 BUG_ON(ret);
816 } 742 }
743out:
817 btrfs_free_path(path); 744 btrfs_free_path(path);
818 return 0; 745 return 0;
819} 746}
@@ -828,6 +755,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
828 loff_t pos, unsigned long first_index, 755 loff_t pos, unsigned long first_index,
829 unsigned long last_index, size_t write_bytes) 756 unsigned long last_index, size_t write_bytes)
830{ 757{
758 struct extent_state *cached_state = NULL;
831 int i; 759 int i;
832 unsigned long index = pos >> PAGE_CACHE_SHIFT; 760 unsigned long index = pos >> PAGE_CACHE_SHIFT;
833 struct inode *inode = fdentry(file)->d_inode; 761 struct inode *inode = fdentry(file)->d_inode;
@@ -856,16 +784,18 @@ again:
856 } 784 }
857 if (start_pos < inode->i_size) { 785 if (start_pos < inode->i_size) {
858 struct btrfs_ordered_extent *ordered; 786 struct btrfs_ordered_extent *ordered;
859 lock_extent(&BTRFS_I(inode)->io_tree, 787 lock_extent_bits(&BTRFS_I(inode)->io_tree,
860 start_pos, last_pos - 1, GFP_NOFS); 788 start_pos, last_pos - 1, 0, &cached_state,
789 GFP_NOFS);
861 ordered = btrfs_lookup_first_ordered_extent(inode, 790 ordered = btrfs_lookup_first_ordered_extent(inode,
862 last_pos - 1); 791 last_pos - 1);
863 if (ordered && 792 if (ordered &&
864 ordered->file_offset + ordered->len > start_pos && 793 ordered->file_offset + ordered->len > start_pos &&
865 ordered->file_offset < last_pos) { 794 ordered->file_offset < last_pos) {
866 btrfs_put_ordered_extent(ordered); 795 btrfs_put_ordered_extent(ordered);
867 unlock_extent(&BTRFS_I(inode)->io_tree, 796 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
868 start_pos, last_pos - 1, GFP_NOFS); 797 start_pos, last_pos - 1,
798 &cached_state, GFP_NOFS);
869 for (i = 0; i < num_pages; i++) { 799 for (i = 0; i < num_pages; i++) {
870 unlock_page(pages[i]); 800 unlock_page(pages[i]);
871 page_cache_release(pages[i]); 801 page_cache_release(pages[i]);
@@ -877,12 +807,13 @@ again:
877 if (ordered) 807 if (ordered)
878 btrfs_put_ordered_extent(ordered); 808 btrfs_put_ordered_extent(ordered);
879 809
880 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, 810 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
881 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 811 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
882 EXTENT_DO_ACCOUNTING, 812 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
883 GFP_NOFS); 813 GFP_NOFS);
884 unlock_extent(&BTRFS_I(inode)->io_tree, 814 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
885 start_pos, last_pos - 1, GFP_NOFS); 815 start_pos, last_pos - 1, &cached_state,
816 GFP_NOFS);
886 } 817 }
887 for (i = 0; i < num_pages; i++) { 818 for (i = 0; i < num_pages; i++) {
888 clear_page_dirty_for_io(pages[i]); 819 clear_page_dirty_for_io(pages[i]);
@@ -909,7 +840,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
909 unsigned long last_index; 840 unsigned long last_index;
910 int will_write; 841 int will_write;
911 842
912 will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || 843 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
913 (file->f_flags & O_DIRECT)); 844 (file->f_flags & O_DIRECT));
914 845
915 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, 846 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
@@ -1076,7 +1007,7 @@ out_nolock:
1076 if (err) 1007 if (err)
1077 num_written = err; 1008 num_written = err;
1078 1009
1079 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 1010 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1080 trans = btrfs_start_transaction(root, 1); 1011 trans = btrfs_start_transaction(root, 1);
1081 ret = btrfs_log_dentry_safe(trans, root, 1012 ret = btrfs_log_dentry_safe(trans, root,
1082 file->f_dentry); 1013 file->f_dentry);
@@ -1210,7 +1141,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1210 } 1141 }
1211 mutex_lock(&dentry->d_inode->i_mutex); 1142 mutex_lock(&dentry->d_inode->i_mutex);
1212out: 1143out:
1213 return ret > 0 ? EIO : ret; 1144 return ret > 0 ? -EIO : ret;
1214} 1145}
1215 1146
1216static const struct vm_operations_struct btrfs_file_vm_ops = { 1147static const struct vm_operations_struct btrfs_file_vm_ops = {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index cb2849f03251..f488fac04d99 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/slab.h>
21#include <linux/math64.h> 22#include <linux/math64.h>
22#include "ctree.h" 23#include "ctree.h"
23#include "free-space-cache.h" 24#include "free-space-cache.h"
@@ -870,7 +871,7 @@ __btrfs_return_cluster_to_free_space(
870 tree_insert_offset(&block_group->free_space_offset, 871 tree_insert_offset(&block_group->free_space_offset,
871 entry->offset, &entry->offset_index, 0); 872 entry->offset, &entry->offset_index, 0);
872 } 873 }
873 cluster->root.rb_node = NULL; 874 cluster->root = RB_ROOT;
874 875
875out: 876out:
876 spin_unlock(&cluster->lock); 877 spin_unlock(&cluster->lock);
@@ -1355,7 +1356,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
1355{ 1356{
1356 spin_lock_init(&cluster->lock); 1357 spin_lock_init(&cluster->lock);
1357 spin_lock_init(&cluster->refill_lock); 1358 spin_lock_init(&cluster->refill_lock);
1358 cluster->root.rb_node = NULL; 1359 cluster->root = RB_ROOT;
1359 cluster->max_size = 0; 1360 cluster->max_size = 0;
1360 cluster->points_to_bitmap = false; 1361 cluster->points_to_bitmap = false;
1361 INIT_LIST_HEAD(&cluster->block_group_list); 1362 INIT_LIST_HEAD(&cluster->block_group_list);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3ad168a0bfc..2bfdc641d4e3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -36,6 +36,7 @@
36#include <linux/xattr.h> 36#include <linux/xattr.h>
37#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h>
39#include "compat.h" 40#include "compat.h"
40#include "ctree.h" 41#include "ctree.h"
41#include "disk-io.h" 42#include "disk-io.h"
@@ -88,13 +89,14 @@ static noinline int cow_file_range(struct inode *inode,
88 u64 start, u64 end, int *page_started, 89 u64 start, u64 end, int *page_started,
89 unsigned long *nr_written, int unlock); 90 unsigned long *nr_written, int unlock);
90 91
91static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) 92static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
93 struct inode *inode, struct inode *dir)
92{ 94{
93 int err; 95 int err;
94 96
95 err = btrfs_init_acl(inode, dir); 97 err = btrfs_init_acl(trans, inode, dir);
96 if (!err) 98 if (!err)
97 err = btrfs_xattr_security_init(inode, dir); 99 err = btrfs_xattr_security_init(trans, inode, dir);
98 return err; 100 return err;
99} 101}
100 102
@@ -188,8 +190,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
188 btrfs_mark_buffer_dirty(leaf); 190 btrfs_mark_buffer_dirty(leaf);
189 btrfs_free_path(path); 191 btrfs_free_path(path);
190 192
193 /*
194 * we're an inline extent, so nobody can
195 * extend the file past i_size without locking
196 * a page we already have locked.
197 *
198 * We must do any isize and inode updates
199 * before we unlock the pages. Otherwise we
200 * could end up racing with unlink.
201 */
191 BTRFS_I(inode)->disk_i_size = inode->i_size; 202 BTRFS_I(inode)->disk_i_size = inode->i_size;
192 btrfs_update_inode(trans, root, inode); 203 btrfs_update_inode(trans, root, inode);
204
193 return 0; 205 return 0;
194fail: 206fail:
195 btrfs_free_path(path); 207 btrfs_free_path(path);
@@ -230,8 +242,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
230 return 1; 242 return 1;
231 } 243 }
232 244
233 ret = btrfs_drop_extents(trans, root, inode, start, 245 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
234 aligned_end, aligned_end, start,
235 &hint_byte, 1); 246 &hint_byte, 1);
236 BUG_ON(ret); 247 BUG_ON(ret);
237 248
@@ -369,7 +380,8 @@ again:
369 * change at any time if we discover bad compression ratios. 380 * change at any time if we discover bad compression ratios.
370 */ 381 */
371 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && 382 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
372 btrfs_test_opt(root, COMPRESS)) { 383 (btrfs_test_opt(root, COMPRESS) ||
384 (BTRFS_I(inode)->force_compress))) {
373 WARN_ON(pages); 385 WARN_ON(pages);
374 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 386 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
375 387
@@ -416,7 +428,6 @@ again:
416 start, end, 428 start, end,
417 total_compressed, pages); 429 total_compressed, pages);
418 } 430 }
419 btrfs_end_transaction(trans, root);
420 if (ret == 0) { 431 if (ret == 0) {
421 /* 432 /*
422 * inline extent creation worked, we don't need 433 * inline extent creation worked, we don't need
@@ -430,9 +441,11 @@ again:
430 EXTENT_CLEAR_DELALLOC | 441 EXTENT_CLEAR_DELALLOC |
431 EXTENT_CLEAR_ACCOUNTING | 442 EXTENT_CLEAR_ACCOUNTING |
432 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 443 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
433 ret = 0; 444
445 btrfs_end_transaction(trans, root);
434 goto free_pages_out; 446 goto free_pages_out;
435 } 447 }
448 btrfs_end_transaction(trans, root);
436 } 449 }
437 450
438 if (will_compress) { 451 if (will_compress) {
@@ -472,7 +485,10 @@ again:
472 nr_pages_ret = 0; 485 nr_pages_ret = 0;
473 486
474 /* flag the file so we don't compress in the future */ 487 /* flag the file so we don't compress in the future */
475 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 488 if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
489 !(BTRFS_I(inode)->force_compress)) {
490 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
491 }
476 } 492 }
477 if (will_compress) { 493 if (will_compress) {
478 *num_added += 1; 494 *num_added += 1;
@@ -543,7 +559,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
543 if (list_empty(&async_cow->extents)) 559 if (list_empty(&async_cow->extents))
544 return 0; 560 return 0;
545 561
546 trans = btrfs_join_transaction(root, 1);
547 562
548 while (!list_empty(&async_cow->extents)) { 563 while (!list_empty(&async_cow->extents)) {
549 async_extent = list_entry(async_cow->extents.next, 564 async_extent = list_entry(async_cow->extents.next,
@@ -559,8 +574,8 @@ retry:
559 unsigned long nr_written = 0; 574 unsigned long nr_written = 0;
560 575
561 lock_extent(io_tree, async_extent->start, 576 lock_extent(io_tree, async_extent->start,
562 async_extent->start + 577 async_extent->start +
563 async_extent->ram_size - 1, GFP_NOFS); 578 async_extent->ram_size - 1, GFP_NOFS);
564 579
565 /* allocate blocks */ 580 /* allocate blocks */
566 ret = cow_file_range(inode, async_cow->locked_page, 581 ret = cow_file_range(inode, async_cow->locked_page,
@@ -590,19 +605,15 @@ retry:
590 lock_extent(io_tree, async_extent->start, 605 lock_extent(io_tree, async_extent->start,
591 async_extent->start + async_extent->ram_size - 1, 606 async_extent->start + async_extent->ram_size - 1,
592 GFP_NOFS); 607 GFP_NOFS);
593 /*
594 * here we're doing allocation and writeback of the
595 * compressed pages
596 */
597 btrfs_drop_extent_cache(inode, async_extent->start,
598 async_extent->start +
599 async_extent->ram_size - 1, 0);
600 608
609 trans = btrfs_join_transaction(root, 1);
601 ret = btrfs_reserve_extent(trans, root, 610 ret = btrfs_reserve_extent(trans, root,
602 async_extent->compressed_size, 611 async_extent->compressed_size,
603 async_extent->compressed_size, 612 async_extent->compressed_size,
604 0, alloc_hint, 613 0, alloc_hint,
605 (u64)-1, &ins, 1); 614 (u64)-1, &ins, 1);
615 btrfs_end_transaction(trans, root);
616
606 if (ret) { 617 if (ret) {
607 int i; 618 int i;
608 for (i = 0; i < async_extent->nr_pages; i++) { 619 for (i = 0; i < async_extent->nr_pages; i++) {
@@ -618,6 +629,14 @@ retry:
618 goto retry; 629 goto retry;
619 } 630 }
620 631
632 /*
633 * here we're doing allocation and writeback of the
634 * compressed pages
635 */
636 btrfs_drop_extent_cache(inode, async_extent->start,
637 async_extent->start +
638 async_extent->ram_size - 1, 0);
639
621 em = alloc_extent_map(GFP_NOFS); 640 em = alloc_extent_map(GFP_NOFS);
622 em->start = async_extent->start; 641 em->start = async_extent->start;
623 em->len = async_extent->ram_size; 642 em->len = async_extent->ram_size;
@@ -649,8 +668,6 @@ retry:
649 BTRFS_ORDERED_COMPRESSED); 668 BTRFS_ORDERED_COMPRESSED);
650 BUG_ON(ret); 669 BUG_ON(ret);
651 670
652 btrfs_end_transaction(trans, root);
653
654 /* 671 /*
655 * clear dirty, set writeback and unlock the pages. 672 * clear dirty, set writeback and unlock the pages.
656 */ 673 */
@@ -672,13 +689,11 @@ retry:
672 async_extent->nr_pages); 689 async_extent->nr_pages);
673 690
674 BUG_ON(ret); 691 BUG_ON(ret);
675 trans = btrfs_join_transaction(root, 1);
676 alloc_hint = ins.objectid + ins.offset; 692 alloc_hint = ins.objectid + ins.offset;
677 kfree(async_extent); 693 kfree(async_extent);
678 cond_resched(); 694 cond_resched();
679 } 695 }
680 696
681 btrfs_end_transaction(trans, root);
682 return 0; 697 return 0;
683} 698}
684 699
@@ -742,6 +757,7 @@ static noinline int cow_file_range(struct inode *inode,
742 EXTENT_CLEAR_DIRTY | 757 EXTENT_CLEAR_DIRTY |
743 EXTENT_SET_WRITEBACK | 758 EXTENT_SET_WRITEBACK |
744 EXTENT_END_WRITEBACK); 759 EXTENT_END_WRITEBACK);
760
745 *nr_written = *nr_written + 761 *nr_written = *nr_written +
746 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 762 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
747 *page_started = 1; 763 *page_started = 1;
@@ -781,7 +797,7 @@ static noinline int cow_file_range(struct inode *inode,
781 while (disk_num_bytes > 0) { 797 while (disk_num_bytes > 0) {
782 unsigned long op; 798 unsigned long op;
783 799
784 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); 800 cur_alloc_size = disk_num_bytes;
785 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 801 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
786 root->sectorsize, 0, alloc_hint, 802 root->sectorsize, 0, alloc_hint,
787 (u64)-1, &ins, 1); 803 (u64)-1, &ins, 1);
@@ -1199,7 +1215,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1199 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1215 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
1200 ret = run_delalloc_nocow(inode, locked_page, start, end, 1216 ret = run_delalloc_nocow(inode, locked_page, start, end,
1201 page_started, 0, nr_written); 1217 page_started, 0, nr_written);
1202 else if (!btrfs_test_opt(root, COMPRESS)) 1218 else if (!btrfs_test_opt(root, COMPRESS) &&
1219 !(BTRFS_I(inode)->force_compress))
1203 ret = cow_file_range(inode, locked_page, start, end, 1220 ret = cow_file_range(inode, locked_page, start, end,
1204 page_started, nr_written, 1); 1221 page_started, nr_written, 1);
1205 else 1222 else
@@ -1211,30 +1228,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1211static int btrfs_split_extent_hook(struct inode *inode, 1228static int btrfs_split_extent_hook(struct inode *inode,
1212 struct extent_state *orig, u64 split) 1229 struct extent_state *orig, u64 split)
1213{ 1230{
1214 struct btrfs_root *root = BTRFS_I(inode)->root;
1215 u64 size;
1216
1217 if (!(orig->state & EXTENT_DELALLOC)) 1231 if (!(orig->state & EXTENT_DELALLOC))
1218 return 0; 1232 return 0;
1219 1233
1220 size = orig->end - orig->start + 1;
1221 if (size > root->fs_info->max_extent) {
1222 u64 num_extents;
1223 u64 new_size;
1224
1225 new_size = orig->end - split + 1;
1226 num_extents = div64_u64(size + root->fs_info->max_extent - 1,
1227 root->fs_info->max_extent);
1228
1229 /*
1230 * if we break a large extent up then leave oustanding_extents
1231 * be, since we've already accounted for the large extent.
1232 */
1233 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1234 root->fs_info->max_extent) < num_extents)
1235 return 0;
1236 }
1237
1238 spin_lock(&BTRFS_I(inode)->accounting_lock); 1234 spin_lock(&BTRFS_I(inode)->accounting_lock);
1239 BTRFS_I(inode)->outstanding_extents++; 1235 BTRFS_I(inode)->outstanding_extents++;
1240 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1236 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -1252,38 +1248,10 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1252 struct extent_state *new, 1248 struct extent_state *new,
1253 struct extent_state *other) 1249 struct extent_state *other)
1254{ 1250{
1255 struct btrfs_root *root = BTRFS_I(inode)->root;
1256 u64 new_size, old_size;
1257 u64 num_extents;
1258
1259 /* not delalloc, ignore it */ 1251 /* not delalloc, ignore it */
1260 if (!(other->state & EXTENT_DELALLOC)) 1252 if (!(other->state & EXTENT_DELALLOC))
1261 return 0; 1253 return 0;
1262 1254
1263 old_size = other->end - other->start + 1;
1264 if (new->start < other->start)
1265 new_size = other->end - new->start + 1;
1266 else
1267 new_size = new->end - other->start + 1;
1268
1269 /* we're not bigger than the max, unreserve the space and go */
1270 if (new_size <= root->fs_info->max_extent) {
1271 spin_lock(&BTRFS_I(inode)->accounting_lock);
1272 BTRFS_I(inode)->outstanding_extents--;
1273 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1274 return 0;
1275 }
1276
1277 /*
1278 * If we grew by another max_extent, just return, we want to keep that
1279 * reserved amount.
1280 */
1281 num_extents = div64_u64(old_size + root->fs_info->max_extent - 1,
1282 root->fs_info->max_extent);
1283 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1284 root->fs_info->max_extent) > num_extents)
1285 return 0;
1286
1287 spin_lock(&BTRFS_I(inode)->accounting_lock); 1255 spin_lock(&BTRFS_I(inode)->accounting_lock);
1288 BTRFS_I(inode)->outstanding_extents--; 1256 BTRFS_I(inode)->outstanding_extents--;
1289 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1257 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -1312,6 +1280,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1312 BTRFS_I(inode)->outstanding_extents++; 1280 BTRFS_I(inode)->outstanding_extents++;
1313 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1281 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1314 btrfs_delalloc_reserve_space(root, inode, end - start + 1); 1282 btrfs_delalloc_reserve_space(root, inode, end - start + 1);
1283
1315 spin_lock(&root->fs_info->delalloc_lock); 1284 spin_lock(&root->fs_info->delalloc_lock);
1316 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1285 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
1317 root->fs_info->delalloc_bytes += end - start + 1; 1286 root->fs_info->delalloc_bytes += end - start + 1;
@@ -1340,6 +1309,7 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1340 1309
1341 if (bits & EXTENT_DO_ACCOUNTING) { 1310 if (bits & EXTENT_DO_ACCOUNTING) {
1342 spin_lock(&BTRFS_I(inode)->accounting_lock); 1311 spin_lock(&BTRFS_I(inode)->accounting_lock);
1312 WARN_ON(!BTRFS_I(inode)->outstanding_extents);
1343 BTRFS_I(inode)->outstanding_extents--; 1313 BTRFS_I(inode)->outstanding_extents--;
1344 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1314 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1345 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1315 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
@@ -1496,12 +1466,13 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1496 return 0; 1466 return 0;
1497} 1467}
1498 1468
1499int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) 1469int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
1470 struct extent_state **cached_state)
1500{ 1471{
1501 if ((end & (PAGE_CACHE_SIZE - 1)) == 0) 1472 if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
1502 WARN_ON(1); 1473 WARN_ON(1);
1503 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, 1474 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
1504 GFP_NOFS); 1475 cached_state, GFP_NOFS);
1505} 1476}
1506 1477
1507/* see btrfs_writepage_start_hook for details on why this is required */ 1478/* see btrfs_writepage_start_hook for details on why this is required */
@@ -1514,6 +1485,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1514{ 1485{
1515 struct btrfs_writepage_fixup *fixup; 1486 struct btrfs_writepage_fixup *fixup;
1516 struct btrfs_ordered_extent *ordered; 1487 struct btrfs_ordered_extent *ordered;
1488 struct extent_state *cached_state = NULL;
1517 struct page *page; 1489 struct page *page;
1518 struct inode *inode; 1490 struct inode *inode;
1519 u64 page_start; 1491 u64 page_start;
@@ -1532,7 +1504,8 @@ again:
1532 page_start = page_offset(page); 1504 page_start = page_offset(page);
1533 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; 1505 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
1534 1506
1535 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1507 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
1508 &cached_state, GFP_NOFS);
1536 1509
1537 /* already ordered? We're done */ 1510 /* already ordered? We're done */
1538 if (PagePrivate2(page)) 1511 if (PagePrivate2(page))
@@ -1540,17 +1513,18 @@ again:
1540 1513
1541 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1514 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1542 if (ordered) { 1515 if (ordered) {
1543 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, 1516 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
1544 page_end, GFP_NOFS); 1517 page_end, &cached_state, GFP_NOFS);
1545 unlock_page(page); 1518 unlock_page(page);
1546 btrfs_start_ordered_extent(inode, ordered, 1); 1519 btrfs_start_ordered_extent(inode, ordered, 1);
1547 goto again; 1520 goto again;
1548 } 1521 }
1549 1522
1550 btrfs_set_extent_delalloc(inode, page_start, page_end); 1523 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1551 ClearPageChecked(page); 1524 ClearPageChecked(page);
1552out: 1525out:
1553 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1526 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
1527 &cached_state, GFP_NOFS);
1554out_page: 1528out_page:
1555 unlock_page(page); 1529 unlock_page(page);
1556 page_cache_release(page); 1530 page_cache_release(page);
@@ -1596,7 +1570,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1596 struct inode *inode, u64 file_pos, 1570 struct inode *inode, u64 file_pos,
1597 u64 disk_bytenr, u64 disk_num_bytes, 1571 u64 disk_bytenr, u64 disk_num_bytes,
1598 u64 num_bytes, u64 ram_bytes, 1572 u64 num_bytes, u64 ram_bytes,
1599 u64 locked_end,
1600 u8 compression, u8 encryption, 1573 u8 compression, u8 encryption,
1601 u16 other_encoding, int extent_type) 1574 u16 other_encoding, int extent_type)
1602{ 1575{
@@ -1622,9 +1595,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1622 * the caller is expected to unpin it and allow it to be merged 1595 * the caller is expected to unpin it and allow it to be merged
1623 * with the others. 1596 * with the others.
1624 */ 1597 */
1625 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1598 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1626 file_pos + num_bytes, locked_end, 1599 &hint, 0);
1627 file_pos, &hint, 0);
1628 BUG_ON(ret); 1600 BUG_ON(ret);
1629 1601
1630 ins.objectid = inode->i_ino; 1602 ins.objectid = inode->i_ino;
@@ -1671,24 +1643,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1671 * before we start the transaction. It limits the amount of btree 1643 * before we start the transaction. It limits the amount of btree
1672 * reads required while inside the transaction. 1644 * reads required while inside the transaction.
1673 */ 1645 */
1674static noinline void reada_csum(struct btrfs_root *root,
1675 struct btrfs_path *path,
1676 struct btrfs_ordered_extent *ordered_extent)
1677{
1678 struct btrfs_ordered_sum *sum;
1679 u64 bytenr;
1680
1681 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1682 list);
1683 bytenr = sum->sums[0].bytenr;
1684
1685 /*
1686 * we don't care about the results, the point of this search is
1687 * just to get the btree leaves into ram
1688 */
1689 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1690}
1691
1692/* as ordered data IO finishes, this gets called so we can finish 1646/* as ordered data IO finishes, this gets called so we can finish
1693 * an ordered extent if the range of bytes in the file it covers are 1647 * an ordered extent if the range of bytes in the file it covers are
1694 * fully written. 1648 * fully written.
@@ -1699,54 +1653,39 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1699 struct btrfs_trans_handle *trans; 1653 struct btrfs_trans_handle *trans;
1700 struct btrfs_ordered_extent *ordered_extent = NULL; 1654 struct btrfs_ordered_extent *ordered_extent = NULL;
1701 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1655 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1702 struct btrfs_path *path; 1656 struct extent_state *cached_state = NULL;
1703 int compressed = 0; 1657 int compressed = 0;
1704 int ret; 1658 int ret;
1705 1659
1706 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); 1660 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1661 end - start + 1);
1707 if (!ret) 1662 if (!ret)
1708 return 0; 1663 return 0;
1664 BUG_ON(!ordered_extent);
1709 1665
1710 /* 1666 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1711 * before we join the transaction, try to do some of our IO. 1667 BUG_ON(!list_empty(&ordered_extent->list));
1712 * This will limit the amount of IO that we have to do with 1668 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1713 * the transaction running. We're unlikely to need to do any 1669 if (!ret) {
1714 * IO if the file extents are new, the disk_i_size checks 1670 trans = btrfs_join_transaction(root, 1);
1715 * covers the most common case. 1671 ret = btrfs_update_inode(trans, root, inode);
1716 */ 1672 BUG_ON(ret);
1717 if (start < BTRFS_I(inode)->disk_i_size) { 1673 btrfs_end_transaction(trans, root);
1718 path = btrfs_alloc_path();
1719 if (path) {
1720 ret = btrfs_lookup_file_extent(NULL, root, path,
1721 inode->i_ino,
1722 start, 0);
1723 ordered_extent = btrfs_lookup_ordered_extent(inode,
1724 start);
1725 if (!list_empty(&ordered_extent->list)) {
1726 btrfs_release_path(root, path);
1727 reada_csum(root, path, ordered_extent);
1728 }
1729 btrfs_free_path(path);
1730 } 1674 }
1675 goto out;
1731 } 1676 }
1732 1677
1733 trans = btrfs_join_transaction(root, 1); 1678 lock_extent_bits(io_tree, ordered_extent->file_offset,
1734 1679 ordered_extent->file_offset + ordered_extent->len - 1,
1735 if (!ordered_extent) 1680 0, &cached_state, GFP_NOFS);
1736 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1737 BUG_ON(!ordered_extent);
1738 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
1739 goto nocow;
1740 1681
1741 lock_extent(io_tree, ordered_extent->file_offset, 1682 trans = btrfs_join_transaction(root, 1);
1742 ordered_extent->file_offset + ordered_extent->len - 1,
1743 GFP_NOFS);
1744 1683
1745 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1684 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1746 compressed = 1; 1685 compressed = 1;
1747 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1686 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1748 BUG_ON(compressed); 1687 BUG_ON(compressed);
1749 ret = btrfs_mark_extent_written(trans, root, inode, 1688 ret = btrfs_mark_extent_written(trans, inode,
1750 ordered_extent->file_offset, 1689 ordered_extent->file_offset,
1751 ordered_extent->file_offset + 1690 ordered_extent->file_offset +
1752 ordered_extent->len); 1691 ordered_extent->len);
@@ -1758,8 +1697,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1758 ordered_extent->disk_len, 1697 ordered_extent->disk_len,
1759 ordered_extent->len, 1698 ordered_extent->len,
1760 ordered_extent->len, 1699 ordered_extent->len,
1761 ordered_extent->file_offset +
1762 ordered_extent->len,
1763 compressed, 0, 0, 1700 compressed, 0, 0,
1764 BTRFS_FILE_EXTENT_REG); 1701 BTRFS_FILE_EXTENT_REG);
1765 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1702 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1767,25 +1704,24 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1767 ordered_extent->len); 1704 ordered_extent->len);
1768 BUG_ON(ret); 1705 BUG_ON(ret);
1769 } 1706 }
1770 unlock_extent(io_tree, ordered_extent->file_offset, 1707 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1771 ordered_extent->file_offset + ordered_extent->len - 1, 1708 ordered_extent->file_offset +
1772 GFP_NOFS); 1709 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1773nocow: 1710
1774 add_pending_csums(trans, inode, ordered_extent->file_offset, 1711 add_pending_csums(trans, inode, ordered_extent->file_offset,
1775 &ordered_extent->list); 1712 &ordered_extent->list);
1776 1713
1777 mutex_lock(&BTRFS_I(inode)->extent_mutex); 1714 /* this also removes the ordered extent from the tree */
1778 btrfs_ordered_update_i_size(inode, ordered_extent); 1715 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1779 btrfs_update_inode(trans, root, inode); 1716 ret = btrfs_update_inode(trans, root, inode);
1780 btrfs_remove_ordered_extent(inode, ordered_extent); 1717 BUG_ON(ret);
1781 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 1718 btrfs_end_transaction(trans, root);
1782 1719out:
1783 /* once for us */ 1720 /* once for us */
1784 btrfs_put_ordered_extent(ordered_extent); 1721 btrfs_put_ordered_extent(ordered_extent);
1785 /* once for the tree */ 1722 /* once for the tree */
1786 btrfs_put_ordered_extent(ordered_extent); 1723 btrfs_put_ordered_extent(ordered_extent);
1787 1724
1788 btrfs_end_transaction(trans, root);
1789 return 0; 1725 return 0;
1790} 1726}
1791 1727
@@ -2008,6 +1944,54 @@ zeroit:
2008 return -EIO; 1944 return -EIO;
2009} 1945}
2010 1946
1947struct delayed_iput {
1948 struct list_head list;
1949 struct inode *inode;
1950};
1951
1952void btrfs_add_delayed_iput(struct inode *inode)
1953{
1954 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
1955 struct delayed_iput *delayed;
1956
1957 if (atomic_add_unless(&inode->i_count, -1, 1))
1958 return;
1959
1960 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
1961 delayed->inode = inode;
1962
1963 spin_lock(&fs_info->delayed_iput_lock);
1964 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
1965 spin_unlock(&fs_info->delayed_iput_lock);
1966}
1967
1968void btrfs_run_delayed_iputs(struct btrfs_root *root)
1969{
1970 LIST_HEAD(list);
1971 struct btrfs_fs_info *fs_info = root->fs_info;
1972 struct delayed_iput *delayed;
1973 int empty;
1974
1975 spin_lock(&fs_info->delayed_iput_lock);
1976 empty = list_empty(&fs_info->delayed_iputs);
1977 spin_unlock(&fs_info->delayed_iput_lock);
1978 if (empty)
1979 return;
1980
1981 down_read(&root->fs_info->cleanup_work_sem);
1982 spin_lock(&fs_info->delayed_iput_lock);
1983 list_splice_init(&fs_info->delayed_iputs, &list);
1984 spin_unlock(&fs_info->delayed_iput_lock);
1985
1986 while (!list_empty(&list)) {
1987 delayed = list_entry(list.next, struct delayed_iput, list);
1988 list_del(&delayed->list);
1989 iput(delayed->inode);
1990 kfree(delayed);
1991 }
1992 up_read(&root->fs_info->cleanup_work_sem);
1993}
1994
2011/* 1995/*
2012 * This creates an orphan entry for the given inode in case something goes 1996 * This creates an orphan entry for the given inode in case something goes
2013 * wrong in the middle of an unlink/truncate. 1997 * wrong in the middle of an unlink/truncate.
@@ -2080,16 +2064,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2080 struct inode *inode; 2064 struct inode *inode;
2081 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2065 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2082 2066
2083 path = btrfs_alloc_path(); 2067 if (!xchg(&root->clean_orphans, 0))
2084 if (!path)
2085 return; 2068 return;
2069
2070 path = btrfs_alloc_path();
2071 BUG_ON(!path);
2086 path->reada = -1; 2072 path->reada = -1;
2087 2073
2088 key.objectid = BTRFS_ORPHAN_OBJECTID; 2074 key.objectid = BTRFS_ORPHAN_OBJECTID;
2089 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 2075 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
2090 key.offset = (u64)-1; 2076 key.offset = (u64)-1;
2091 2077
2092
2093 while (1) { 2078 while (1) {
2094 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2079 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2095 if (ret < 0) { 2080 if (ret < 0) {
@@ -2131,7 +2116,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2131 found_key.objectid = found_key.offset; 2116 found_key.objectid = found_key.offset;
2132 found_key.type = BTRFS_INODE_ITEM_KEY; 2117 found_key.type = BTRFS_INODE_ITEM_KEY;
2133 found_key.offset = 0; 2118 found_key.offset = 0;
2134 inode = btrfs_iget(root->fs_info->sb, &found_key, root); 2119 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2135 if (IS_ERR(inode)) 2120 if (IS_ERR(inode))
2136 break; 2121 break;
2137 2122
@@ -2834,37 +2819,40 @@ out:
2834 * min_type is the minimum key type to truncate down to. If set to 0, this 2819 * min_type is the minimum key type to truncate down to. If set to 0, this
2835 * will kill all the items on this inode, including the INODE_ITEM_KEY. 2820 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2836 */ 2821 */
2837noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2822int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root, 2823 struct btrfs_root *root,
2839 struct inode *inode, 2824 struct inode *inode,
2840 u64 new_size, u32 min_type) 2825 u64 new_size, u32 min_type)
2841{ 2826{
2842 int ret;
2843 struct btrfs_path *path; 2827 struct btrfs_path *path;
2844 struct btrfs_key key;
2845 struct btrfs_key found_key;
2846 u32 found_type = (u8)-1;
2847 struct extent_buffer *leaf; 2828 struct extent_buffer *leaf;
2848 struct btrfs_file_extent_item *fi; 2829 struct btrfs_file_extent_item *fi;
2830 struct btrfs_key key;
2831 struct btrfs_key found_key;
2849 u64 extent_start = 0; 2832 u64 extent_start = 0;
2850 u64 extent_num_bytes = 0; 2833 u64 extent_num_bytes = 0;
2851 u64 extent_offset = 0; 2834 u64 extent_offset = 0;
2852 u64 item_end = 0; 2835 u64 item_end = 0;
2836 u64 mask = root->sectorsize - 1;
2837 u32 found_type = (u8)-1;
2853 int found_extent; 2838 int found_extent;
2854 int del_item; 2839 int del_item;
2855 int pending_del_nr = 0; 2840 int pending_del_nr = 0;
2856 int pending_del_slot = 0; 2841 int pending_del_slot = 0;
2857 int extent_type = -1; 2842 int extent_type = -1;
2858 int encoding; 2843 int encoding;
2859 u64 mask = root->sectorsize - 1; 2844 int ret;
2845 int err = 0;
2846
2847 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2860 2848
2861 if (root->ref_cows) 2849 if (root->ref_cows)
2862 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2850 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2851
2863 path = btrfs_alloc_path(); 2852 path = btrfs_alloc_path();
2864 BUG_ON(!path); 2853 BUG_ON(!path);
2865 path->reada = -1; 2854 path->reada = -1;
2866 2855
2867 /* FIXME, add redo link to tree so we don't leak on crash */
2868 key.objectid = inode->i_ino; 2856 key.objectid = inode->i_ino;
2869 key.offset = (u64)-1; 2857 key.offset = (u64)-1;
2870 key.type = (u8)-1; 2858 key.type = (u8)-1;
@@ -2872,17 +2860,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2872search_again: 2860search_again:
2873 path->leave_spinning = 1; 2861 path->leave_spinning = 1;
2874 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2862 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2875 if (ret < 0) 2863 if (ret < 0) {
2876 goto error; 2864 err = ret;
2865 goto out;
2866 }
2877 2867
2878 if (ret > 0) { 2868 if (ret > 0) {
2879 /* there are no items in the tree for us to truncate, we're 2869 /* there are no items in the tree for us to truncate, we're
2880 * done 2870 * done
2881 */ 2871 */
2882 if (path->slots[0] == 0) { 2872 if (path->slots[0] == 0)
2883 ret = 0; 2873 goto out;
2884 goto error;
2885 }
2886 path->slots[0]--; 2874 path->slots[0]--;
2887 } 2875 }
2888 2876
@@ -2917,28 +2905,17 @@ search_again:
2917 } 2905 }
2918 item_end--; 2906 item_end--;
2919 } 2907 }
2920 if (item_end < new_size) { 2908 if (found_type > min_type) {
2921 if (found_type == BTRFS_DIR_ITEM_KEY) 2909 del_item = 1;
2922 found_type = BTRFS_INODE_ITEM_KEY; 2910 } else {
2923 else if (found_type == BTRFS_EXTENT_ITEM_KEY) 2911 if (item_end < new_size)
2924 found_type = BTRFS_EXTENT_DATA_KEY;
2925 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2926 found_type = BTRFS_XATTR_ITEM_KEY;
2927 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2928 found_type = BTRFS_INODE_REF_KEY;
2929 else if (found_type)
2930 found_type--;
2931 else
2932 break; 2912 break;
2933 btrfs_set_key_type(&key, found_type); 2913 if (found_key.offset >= new_size)
2934 goto next; 2914 del_item = 1;
2915 else
2916 del_item = 0;
2935 } 2917 }
2936 if (found_key.offset >= new_size)
2937 del_item = 1;
2938 else
2939 del_item = 0;
2940 found_extent = 0; 2918 found_extent = 0;
2941
2942 /* FIXME, shrink the extent if the ref count is only 1 */ 2919 /* FIXME, shrink the extent if the ref count is only 1 */
2943 if (found_type != BTRFS_EXTENT_DATA_KEY) 2920 if (found_type != BTRFS_EXTENT_DATA_KEY)
2944 goto delete; 2921 goto delete;
@@ -3025,42 +3002,36 @@ delete:
3025 inode->i_ino, extent_offset); 3002 inode->i_ino, extent_offset);
3026 BUG_ON(ret); 3003 BUG_ON(ret);
3027 } 3004 }
3028next:
3029 if (path->slots[0] == 0) {
3030 if (pending_del_nr)
3031 goto del_pending;
3032 btrfs_release_path(root, path);
3033 if (found_type == BTRFS_INODE_ITEM_KEY)
3034 break;
3035 goto search_again;
3036 }
3037 3005
3038 path->slots[0]--; 3006 if (found_type == BTRFS_INODE_ITEM_KEY)
3039 if (pending_del_nr && 3007 break;
3040 path->slots[0] + 1 != pending_del_slot) { 3008
3041 struct btrfs_key debug; 3009 if (path->slots[0] == 0 ||
3042del_pending: 3010 path->slots[0] != pending_del_slot) {
3043 btrfs_item_key_to_cpu(path->nodes[0], &debug, 3011 if (root->ref_cows) {
3044 pending_del_slot); 3012 err = -EAGAIN;
3045 ret = btrfs_del_items(trans, root, path, 3013 goto out;
3046 pending_del_slot, 3014 }
3047 pending_del_nr); 3015 if (pending_del_nr) {
3048 BUG_ON(ret); 3016 ret = btrfs_del_items(trans, root, path,
3049 pending_del_nr = 0; 3017 pending_del_slot,
3018 pending_del_nr);
3019 BUG_ON(ret);
3020 pending_del_nr = 0;
3021 }
3050 btrfs_release_path(root, path); 3022 btrfs_release_path(root, path);
3051 if (found_type == BTRFS_INODE_ITEM_KEY)
3052 break;
3053 goto search_again; 3023 goto search_again;
3024 } else {
3025 path->slots[0]--;
3054 } 3026 }
3055 } 3027 }
3056 ret = 0; 3028out:
3057error:
3058 if (pending_del_nr) { 3029 if (pending_del_nr) {
3059 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3030 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3060 pending_del_nr); 3031 pending_del_nr);
3061 } 3032 }
3062 btrfs_free_path(path); 3033 btrfs_free_path(path);
3063 return ret; 3034 return err;
3064} 3035}
3065 3036
3066/* 3037/*
@@ -3073,6 +3044,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3073 struct btrfs_root *root = BTRFS_I(inode)->root; 3044 struct btrfs_root *root = BTRFS_I(inode)->root;
3074 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3045 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3075 struct btrfs_ordered_extent *ordered; 3046 struct btrfs_ordered_extent *ordered;
3047 struct extent_state *cached_state = NULL;
3076 char *kaddr; 3048 char *kaddr;
3077 u32 blocksize = root->sectorsize; 3049 u32 blocksize = root->sectorsize;
3078 pgoff_t index = from >> PAGE_CACHE_SHIFT; 3050 pgoff_t index = from >> PAGE_CACHE_SHIFT;
@@ -3119,12 +3091,14 @@ again:
3119 } 3091 }
3120 wait_on_page_writeback(page); 3092 wait_on_page_writeback(page);
3121 3093
3122 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 3094 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
3095 GFP_NOFS);
3123 set_page_extent_mapped(page); 3096 set_page_extent_mapped(page);
3124 3097
3125 ordered = btrfs_lookup_ordered_extent(inode, page_start); 3098 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3126 if (ordered) { 3099 if (ordered) {
3127 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3100 unlock_extent_cached(io_tree, page_start, page_end,
3101 &cached_state, GFP_NOFS);
3128 unlock_page(page); 3102 unlock_page(page);
3129 page_cache_release(page); 3103 page_cache_release(page);
3130 btrfs_start_ordered_extent(inode, ordered, 1); 3104 btrfs_start_ordered_extent(inode, ordered, 1);
@@ -3132,13 +3106,15 @@ again:
3132 goto again; 3106 goto again;
3133 } 3107 }
3134 3108
3135 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 3109 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
3136 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 3110 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
3137 GFP_NOFS); 3111 0, 0, &cached_state, GFP_NOFS);
3138 3112
3139 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 3113 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
3114 &cached_state);
3140 if (ret) { 3115 if (ret) {
3141 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3116 unlock_extent_cached(io_tree, page_start, page_end,
3117 &cached_state, GFP_NOFS);
3142 goto out_unlock; 3118 goto out_unlock;
3143 } 3119 }
3144 3120
@@ -3151,7 +3127,8 @@ again:
3151 } 3127 }
3152 ClearPageChecked(page); 3128 ClearPageChecked(page);
3153 set_page_dirty(page); 3129 set_page_dirty(page);
3154 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3130 unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
3131 GFP_NOFS);
3155 3132
3156out_unlock: 3133out_unlock:
3157 if (ret) 3134 if (ret)
@@ -3169,6 +3146,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3169 struct btrfs_root *root = BTRFS_I(inode)->root; 3146 struct btrfs_root *root = BTRFS_I(inode)->root;
3170 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3147 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3171 struct extent_map *em; 3148 struct extent_map *em;
3149 struct extent_state *cached_state = NULL;
3172 u64 mask = root->sectorsize - 1; 3150 u64 mask = root->sectorsize - 1;
3173 u64 hole_start = (inode->i_size + mask) & ~mask; 3151 u64 hole_start = (inode->i_size + mask) & ~mask;
3174 u64 block_end = (size + mask) & ~mask; 3152 u64 block_end = (size + mask) & ~mask;
@@ -3180,25 +3158,20 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3180 if (size <= hole_start) 3158 if (size <= hole_start)
3181 return 0; 3159 return 0;
3182 3160
3183 err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
3184 if (err)
3185 return err;
3186
3187 while (1) { 3161 while (1) {
3188 struct btrfs_ordered_extent *ordered; 3162 struct btrfs_ordered_extent *ordered;
3189 btrfs_wait_ordered_range(inode, hole_start, 3163 btrfs_wait_ordered_range(inode, hole_start,
3190 block_end - hole_start); 3164 block_end - hole_start);
3191 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3165 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
3166 &cached_state, GFP_NOFS);
3192 ordered = btrfs_lookup_ordered_extent(inode, hole_start); 3167 ordered = btrfs_lookup_ordered_extent(inode, hole_start);
3193 if (!ordered) 3168 if (!ordered)
3194 break; 3169 break;
3195 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3170 unlock_extent_cached(io_tree, hole_start, block_end - 1,
3171 &cached_state, GFP_NOFS);
3196 btrfs_put_ordered_extent(ordered); 3172 btrfs_put_ordered_extent(ordered);
3197 } 3173 }
3198 3174
3199 trans = btrfs_start_transaction(root, 1);
3200 btrfs_set_trans_block_group(trans, inode);
3201
3202 cur_offset = hole_start; 3175 cur_offset = hole_start;
3203 while (1) { 3176 while (1) {
3204 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3177 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3206,40 +3179,121 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3206 BUG_ON(IS_ERR(em) || !em); 3179 BUG_ON(IS_ERR(em) || !em);
3207 last_byte = min(extent_map_end(em), block_end); 3180 last_byte = min(extent_map_end(em), block_end);
3208 last_byte = (last_byte + mask) & ~mask; 3181 last_byte = (last_byte + mask) & ~mask;
3209 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 3182 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3210 u64 hint_byte = 0; 3183 u64 hint_byte = 0;
3211 hole_size = last_byte - cur_offset; 3184 hole_size = last_byte - cur_offset;
3212 err = btrfs_drop_extents(trans, root, inode,
3213 cur_offset,
3214 cur_offset + hole_size,
3215 block_end,
3216 cur_offset, &hint_byte, 1);
3217 if (err)
3218 break;
3219 3185
3220 err = btrfs_reserve_metadata_space(root, 1); 3186 err = btrfs_reserve_metadata_space(root, 2);
3221 if (err) 3187 if (err)
3222 break; 3188 break;
3223 3189
3190 trans = btrfs_start_transaction(root, 1);
3191 btrfs_set_trans_block_group(trans, inode);
3192
3193 err = btrfs_drop_extents(trans, inode, cur_offset,
3194 cur_offset + hole_size,
3195 &hint_byte, 1);
3196 BUG_ON(err);
3197
3224 err = btrfs_insert_file_extent(trans, root, 3198 err = btrfs_insert_file_extent(trans, root,
3225 inode->i_ino, cur_offset, 0, 3199 inode->i_ino, cur_offset, 0,
3226 0, hole_size, 0, hole_size, 3200 0, hole_size, 0, hole_size,
3227 0, 0, 0); 3201 0, 0, 0);
3202 BUG_ON(err);
3203
3228 btrfs_drop_extent_cache(inode, hole_start, 3204 btrfs_drop_extent_cache(inode, hole_start,
3229 last_byte - 1, 0); 3205 last_byte - 1, 0);
3230 btrfs_unreserve_metadata_space(root, 1); 3206
3207 btrfs_end_transaction(trans, root);
3208 btrfs_unreserve_metadata_space(root, 2);
3231 } 3209 }
3232 free_extent_map(em); 3210 free_extent_map(em);
3233 cur_offset = last_byte; 3211 cur_offset = last_byte;
3234 if (err || cur_offset >= block_end) 3212 if (cur_offset >= block_end)
3235 break; 3213 break;
3236 } 3214 }
3237 3215
3238 btrfs_end_transaction(trans, root); 3216 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
3239 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3217 GFP_NOFS);
3240 return err; 3218 return err;
3241} 3219}
3242 3220
3221static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3222{
3223 struct btrfs_root *root = BTRFS_I(inode)->root;
3224 struct btrfs_trans_handle *trans;
3225 unsigned long nr;
3226 int ret;
3227
3228 if (attr->ia_size == inode->i_size)
3229 return 0;
3230
3231 if (attr->ia_size > inode->i_size) {
3232 unsigned long limit;
3233 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
3234 if (attr->ia_size > inode->i_sb->s_maxbytes)
3235 return -EFBIG;
3236 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3237 send_sig(SIGXFSZ, current, 0);
3238 return -EFBIG;
3239 }
3240 }
3241
3242 ret = btrfs_reserve_metadata_space(root, 1);
3243 if (ret)
3244 return ret;
3245
3246 trans = btrfs_start_transaction(root, 1);
3247 btrfs_set_trans_block_group(trans, inode);
3248
3249 ret = btrfs_orphan_add(trans, inode);
3250 BUG_ON(ret);
3251
3252 nr = trans->blocks_used;
3253 btrfs_end_transaction(trans, root);
3254 btrfs_unreserve_metadata_space(root, 1);
3255 btrfs_btree_balance_dirty(root, nr);
3256
3257 if (attr->ia_size > inode->i_size) {
3258 ret = btrfs_cont_expand(inode, attr->ia_size);
3259 if (ret) {
3260 btrfs_truncate(inode);
3261 return ret;
3262 }
3263
3264 i_size_write(inode, attr->ia_size);
3265 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3266
3267 trans = btrfs_start_transaction(root, 1);
3268 btrfs_set_trans_block_group(trans, inode);
3269
3270 ret = btrfs_update_inode(trans, root, inode);
3271 BUG_ON(ret);
3272 if (inode->i_nlink > 0) {
3273 ret = btrfs_orphan_del(trans, inode);
3274 BUG_ON(ret);
3275 }
3276 nr = trans->blocks_used;
3277 btrfs_end_transaction(trans, root);
3278 btrfs_btree_balance_dirty(root, nr);
3279 return 0;
3280 }
3281
3282 /*
3283 * We're truncating a file that used to have good data down to
3284 * zero. Make sure it gets into the ordered flush list so that
3285 * any new writes get down to disk quickly.
3286 */
3287 if (attr->ia_size == 0)
3288 BTRFS_I(inode)->ordered_data_close = 1;
3289
3290 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3291 ret = vmtruncate(inode, attr->ia_size);
3292 BUG_ON(ret);
3293
3294 return 0;
3295}
3296
3243static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3297static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3244{ 3298{
3245 struct inode *inode = dentry->d_inode; 3299 struct inode *inode = dentry->d_inode;
@@ -3250,23 +3304,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3250 return err; 3304 return err;
3251 3305
3252 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3306 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3253 if (attr->ia_size > inode->i_size) { 3307 err = btrfs_setattr_size(inode, attr);
3254 err = btrfs_cont_expand(inode, attr->ia_size); 3308 if (err)
3255 if (err) 3309 return err;
3256 return err;
3257 } else if (inode->i_size > 0 &&
3258 attr->ia_size == 0) {
3259
3260 /* we're truncating a file that used to have good
3261 * data down to zero. Make sure it gets into
3262 * the ordered flush list so that any new writes
3263 * get down to disk quickly.
3264 */
3265 BTRFS_I(inode)->ordered_data_close = 1;
3266 }
3267 } 3310 }
3311 attr->ia_valid &= ~ATTR_SIZE;
3268 3312
3269 err = inode_setattr(inode, attr); 3313 if (attr->ia_valid)
3314 err = inode_setattr(inode, attr);
3270 3315
3271 if (!err && ((attr->ia_valid & ATTR_MODE))) 3316 if (!err && ((attr->ia_valid & ATTR_MODE)))
3272 err = btrfs_acl_chmod(inode); 3317 err = btrfs_acl_chmod(inode);
@@ -3287,36 +3332,43 @@ void btrfs_delete_inode(struct inode *inode)
3287 } 3332 }
3288 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3333 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3289 3334
3335 if (root->fs_info->log_root_recovering) {
3336 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
3337 goto no_delete;
3338 }
3339
3290 if (inode->i_nlink > 0) { 3340 if (inode->i_nlink > 0) {
3291 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 3341 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3292 goto no_delete; 3342 goto no_delete;
3293 } 3343 }
3294 3344
3295 btrfs_i_size_write(inode, 0); 3345 btrfs_i_size_write(inode, 0);
3296 trans = btrfs_join_transaction(root, 1);
3297 3346
3298 btrfs_set_trans_block_group(trans, inode); 3347 while (1) {
3299 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); 3348 trans = btrfs_start_transaction(root, 1);
3300 if (ret) { 3349 btrfs_set_trans_block_group(trans, inode);
3301 btrfs_orphan_del(NULL, inode); 3350 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3302 goto no_delete_lock;
3303 }
3304 3351
3305 btrfs_orphan_del(trans, inode); 3352 if (ret != -EAGAIN)
3353 break;
3306 3354
3307 nr = trans->blocks_used; 3355 nr = trans->blocks_used;
3308 clear_inode(inode); 3356 btrfs_end_transaction(trans, root);
3357 trans = NULL;
3358 btrfs_btree_balance_dirty(root, nr);
3359 }
3309 3360
3310 btrfs_end_transaction(trans, root); 3361 if (ret == 0) {
3311 btrfs_btree_balance_dirty(root, nr); 3362 ret = btrfs_orphan_del(trans, inode);
3312 return; 3363 BUG_ON(ret);
3364 }
3313 3365
3314no_delete_lock:
3315 nr = trans->blocks_used; 3366 nr = trans->blocks_used;
3316 btrfs_end_transaction(trans, root); 3367 btrfs_end_transaction(trans, root);
3317 btrfs_btree_balance_dirty(root, nr); 3368 btrfs_btree_balance_dirty(root, nr);
3318no_delete: 3369no_delete:
3319 clear_inode(inode); 3370 clear_inode(inode);
3371 return;
3320} 3372}
3321 3373
3322/* 3374/*
@@ -3560,6 +3612,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3560 bi->index_cnt = (u64)-1; 3612 bi->index_cnt = (u64)-1;
3561 bi->last_unlink_trans = 0; 3613 bi->last_unlink_trans = 0;
3562 bi->ordered_data_close = 0; 3614 bi->ordered_data_close = 0;
3615 bi->force_compress = 0;
3563 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); 3616 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3564 extent_io_tree_init(&BTRFS_I(inode)->io_tree, 3617 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3565 inode->i_mapping, GFP_NOFS); 3618 inode->i_mapping, GFP_NOFS);
@@ -3569,7 +3622,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3569 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3622 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3570 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3623 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3571 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3624 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3572 mutex_init(&BTRFS_I(inode)->extent_mutex);
3573 mutex_init(&BTRFS_I(inode)->log_mutex); 3625 mutex_init(&BTRFS_I(inode)->log_mutex);
3574} 3626}
3575 3627
@@ -3609,7 +3661,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
3609 * Returns in *is_new if the inode was read from disk 3661 * Returns in *is_new if the inode was read from disk
3610 */ 3662 */
3611struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 3663struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3612 struct btrfs_root *root) 3664 struct btrfs_root *root, int *new)
3613{ 3665{
3614 struct inode *inode; 3666 struct inode *inode;
3615 3667
@@ -3624,6 +3676,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3624 3676
3625 inode_tree_add(inode); 3677 inode_tree_add(inode);
3626 unlock_new_inode(inode); 3678 unlock_new_inode(inode);
3679 if (new)
3680 *new = 1;
3627 } 3681 }
3628 3682
3629 return inode; 3683 return inode;
@@ -3676,7 +3730,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3676 return NULL; 3730 return NULL;
3677 3731
3678 if (location.type == BTRFS_INODE_ITEM_KEY) { 3732 if (location.type == BTRFS_INODE_ITEM_KEY) {
3679 inode = btrfs_iget(dir->i_sb, &location, root); 3733 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
3680 return inode; 3734 return inode;
3681 } 3735 }
3682 3736
@@ -3691,10 +3745,17 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3691 else 3745 else
3692 inode = new_simple_dir(dir->i_sb, &location, sub_root); 3746 inode = new_simple_dir(dir->i_sb, &location, sub_root);
3693 } else { 3747 } else {
3694 inode = btrfs_iget(dir->i_sb, &location, sub_root); 3748 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
3695 } 3749 }
3696 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3750 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3697 3751
3752 if (root != sub_root) {
3753 down_read(&root->fs_info->cleanup_work_sem);
3754 if (!(inode->i_sb->s_flags & MS_RDONLY))
3755 btrfs_orphan_cleanup(sub_root);
3756 up_read(&root->fs_info->cleanup_work_sem);
3757 }
3758
3698 return inode; 3759 return inode;
3699} 3760}
3700 3761
@@ -3869,7 +3930,11 @@ skip:
3869 3930
3870 /* Reached end of directory/root. Bump pos past the last item. */ 3931 /* Reached end of directory/root. Bump pos past the last item. */
3871 if (key_type == BTRFS_DIR_INDEX_KEY) 3932 if (key_type == BTRFS_DIR_INDEX_KEY)
3872 filp->f_pos = INT_LIMIT(off_t); 3933 /*
3934 * 32-bit glibc will use getdents64, but then strtol -
3935 * so the last number we can serve is this.
3936 */
3937 filp->f_pos = 0x7fffffff;
3873 else 3938 else
3874 filp->f_pos++; 3939 filp->f_pos++;
3875nopos: 3940nopos:
@@ -3879,7 +3944,7 @@ err:
3879 return ret; 3944 return ret;
3880} 3945}
3881 3946
3882int btrfs_write_inode(struct inode *inode, int wait) 3947int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
3883{ 3948{
3884 struct btrfs_root *root = BTRFS_I(inode)->root; 3949 struct btrfs_root *root = BTRFS_I(inode)->root;
3885 struct btrfs_trans_handle *trans; 3950 struct btrfs_trans_handle *trans;
@@ -3888,7 +3953,7 @@ int btrfs_write_inode(struct inode *inode, int wait)
3888 if (root->fs_info->btree_inode == inode) 3953 if (root->fs_info->btree_inode == inode)
3889 return 0; 3954 return 0;
3890 3955
3891 if (wait) { 3956 if (wbc->sync_mode == WB_SYNC_ALL) {
3892 trans = btrfs_join_transaction(root, 1); 3957 trans = btrfs_join_transaction(root, 1);
3893 btrfs_set_trans_block_group(trans, inode); 3958 btrfs_set_trans_block_group(trans, inode);
3894 ret = btrfs_commit_transaction(trans, root); 3959 ret = btrfs_commit_transaction(trans, root);
@@ -4219,7 +4284,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4219 if (IS_ERR(inode)) 4284 if (IS_ERR(inode))
4220 goto out_unlock; 4285 goto out_unlock;
4221 4286
4222 err = btrfs_init_inode_security(inode, dir); 4287 err = btrfs_init_inode_security(trans, inode, dir);
4223 if (err) { 4288 if (err) {
4224 drop_inode = 1; 4289 drop_inode = 1;
4225 goto out_unlock; 4290 goto out_unlock;
@@ -4290,7 +4355,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4290 if (IS_ERR(inode)) 4355 if (IS_ERR(inode))
4291 goto out_unlock; 4356 goto out_unlock;
4292 4357
4293 err = btrfs_init_inode_security(inode, dir); 4358 err = btrfs_init_inode_security(trans, inode, dir);
4294 if (err) { 4359 if (err) {
4295 drop_inode = 1; 4360 drop_inode = 1;
4296 goto out_unlock; 4361 goto out_unlock;
@@ -4336,6 +4401,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4336 if (inode->i_nlink == 0) 4401 if (inode->i_nlink == 0)
4337 return -ENOENT; 4402 return -ENOENT;
4338 4403
4404 /* do not allow sys_link's with other subvols of the same device */
4405 if (root->objectid != BTRFS_I(inode)->root->objectid)
4406 return -EPERM;
4407
4339 /* 4408 /*
4340 * 1 item for inode ref 4409 * 1 item for inode ref
4341 * 2 items for dir items 4410 * 2 items for dir items
@@ -4408,7 +4477,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4408 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); 4477 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4409 if (err) { 4478 if (err) {
4410 err = -ENOSPC; 4479 err = -ENOSPC;
4411 goto out_unlock; 4480 goto out_fail;
4412 } 4481 }
4413 4482
4414 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4483 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
@@ -4423,7 +4492,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4423 4492
4424 drop_on_err = 1; 4493 drop_on_err = 1;
4425 4494
4426 err = btrfs_init_inode_security(inode, dir); 4495 err = btrfs_init_inode_security(trans, inode, dir);
4427 if (err) 4496 if (err)
4428 goto out_fail; 4497 goto out_fail;
4429 4498
@@ -4886,6 +4955,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4886{ 4955{
4887 struct extent_io_tree *tree; 4956 struct extent_io_tree *tree;
4888 struct btrfs_ordered_extent *ordered; 4957 struct btrfs_ordered_extent *ordered;
4958 struct extent_state *cached_state = NULL;
4889 u64 page_start = page_offset(page); 4959 u64 page_start = page_offset(page);
4890 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 4960 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
4891 4961
@@ -4904,7 +4974,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4904 btrfs_releasepage(page, GFP_NOFS); 4974 btrfs_releasepage(page, GFP_NOFS);
4905 return; 4975 return;
4906 } 4976 }
4907 lock_extent(tree, page_start, page_end, GFP_NOFS); 4977 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
4978 GFP_NOFS);
4908 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 4979 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
4909 page_offset(page)); 4980 page_offset(page));
4910 if (ordered) { 4981 if (ordered) {
@@ -4915,7 +4986,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4915 clear_extent_bit(tree, page_start, page_end, 4986 clear_extent_bit(tree, page_start, page_end,
4916 EXTENT_DIRTY | EXTENT_DELALLOC | 4987 EXTENT_DIRTY | EXTENT_DELALLOC |
4917 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, 4988 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
4918 NULL, GFP_NOFS); 4989 &cached_state, GFP_NOFS);
4919 /* 4990 /*
4920 * whoever cleared the private bit is responsible 4991 * whoever cleared the private bit is responsible
4921 * for the finish_ordered_io 4992 * for the finish_ordered_io
@@ -4925,11 +4996,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4925 page_start, page_end); 4996 page_start, page_end);
4926 } 4997 }
4927 btrfs_put_ordered_extent(ordered); 4998 btrfs_put_ordered_extent(ordered);
4928 lock_extent(tree, page_start, page_end, GFP_NOFS); 4999 cached_state = NULL;
5000 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
5001 GFP_NOFS);
4929 } 5002 }
4930 clear_extent_bit(tree, page_start, page_end, 5003 clear_extent_bit(tree, page_start, page_end,
4931 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 5004 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4932 EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); 5005 EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS);
4933 __btrfs_releasepage(page, GFP_NOFS); 5006 __btrfs_releasepage(page, GFP_NOFS);
4934 5007
4935 ClearPageChecked(page); 5008 ClearPageChecked(page);
@@ -4962,6 +5035,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4962 struct btrfs_root *root = BTRFS_I(inode)->root; 5035 struct btrfs_root *root = BTRFS_I(inode)->root;
4963 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5036 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4964 struct btrfs_ordered_extent *ordered; 5037 struct btrfs_ordered_extent *ordered;
5038 struct extent_state *cached_state = NULL;
4965 char *kaddr; 5039 char *kaddr;
4966 unsigned long zero_start; 5040 unsigned long zero_start;
4967 loff_t size; 5041 loff_t size;
@@ -5000,7 +5074,8 @@ again:
5000 } 5074 }
5001 wait_on_page_writeback(page); 5075 wait_on_page_writeback(page);
5002 5076
5003 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 5077 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
5078 GFP_NOFS);
5004 set_page_extent_mapped(page); 5079 set_page_extent_mapped(page);
5005 5080
5006 /* 5081 /*
@@ -5009,7 +5084,8 @@ again:
5009 */ 5084 */
5010 ordered = btrfs_lookup_ordered_extent(inode, page_start); 5085 ordered = btrfs_lookup_ordered_extent(inode, page_start);
5011 if (ordered) { 5086 if (ordered) {
5012 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5087 unlock_extent_cached(io_tree, page_start, page_end,
5088 &cached_state, GFP_NOFS);
5013 unlock_page(page); 5089 unlock_page(page);
5014 btrfs_start_ordered_extent(inode, ordered, 1); 5090 btrfs_start_ordered_extent(inode, ordered, 1);
5015 btrfs_put_ordered_extent(ordered); 5091 btrfs_put_ordered_extent(ordered);
@@ -5023,13 +5099,15 @@ again:
5023 * is probably a better way to do this, but for now keep consistent with 5099 * is probably a better way to do this, but for now keep consistent with
5024 * prepare_pages in the normal write path. 5100 * prepare_pages in the normal write path.
5025 */ 5101 */
5026 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 5102 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
5027 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 5103 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
5028 GFP_NOFS); 5104 0, 0, &cached_state, GFP_NOFS);
5029 5105
5030 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 5106 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
5107 &cached_state);
5031 if (ret) { 5108 if (ret) {
5032 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5109 unlock_extent_cached(io_tree, page_start, page_end,
5110 &cached_state, GFP_NOFS);
5033 ret = VM_FAULT_SIGBUS; 5111 ret = VM_FAULT_SIGBUS;
5034 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 5112 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5035 goto out_unlock; 5113 goto out_unlock;
@@ -5055,7 +5133,7 @@ again:
5055 BTRFS_I(inode)->last_trans = root->fs_info->generation; 5133 BTRFS_I(inode)->last_trans = root->fs_info->generation;
5056 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; 5134 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
5057 5135
5058 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5136 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
5059 5137
5060out_unlock: 5138out_unlock:
5061 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 5139 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
@@ -5074,17 +5152,20 @@ static void btrfs_truncate(struct inode *inode)
5074 unsigned long nr; 5152 unsigned long nr;
5075 u64 mask = root->sectorsize - 1; 5153 u64 mask = root->sectorsize - 1;
5076 5154
5077 if (!S_ISREG(inode->i_mode)) 5155 if (!S_ISREG(inode->i_mode)) {
5078 return; 5156 WARN_ON(1);
5079 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
5080 return; 5157 return;
5158 }
5081 5159
5082 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 5160 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
5083 if (ret) 5161 if (ret)
5084 return; 5162 return;
5163
5085 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 5164 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5165 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5086 5166
5087 trans = btrfs_start_transaction(root, 1); 5167 trans = btrfs_start_transaction(root, 1);
5168 btrfs_set_trans_block_group(trans, inode);
5088 5169
5089 /* 5170 /*
5090 * setattr is responsible for setting the ordered_data_close flag, 5171 * setattr is responsible for setting the ordered_data_close flag,
@@ -5106,21 +5187,32 @@ static void btrfs_truncate(struct inode *inode)
5106 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 5187 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
5107 btrfs_add_ordered_operation(trans, root, inode); 5188 btrfs_add_ordered_operation(trans, root, inode);
5108 5189
5109 btrfs_set_trans_block_group(trans, inode); 5190 while (1) {
5110 btrfs_i_size_write(inode, inode->i_size); 5191 ret = btrfs_truncate_inode_items(trans, root, inode,
5192 inode->i_size,
5193 BTRFS_EXTENT_DATA_KEY);
5194 if (ret != -EAGAIN)
5195 break;
5111 5196
5112 ret = btrfs_orphan_add(trans, inode); 5197 ret = btrfs_update_inode(trans, root, inode);
5113 if (ret) 5198 BUG_ON(ret);
5114 goto out; 5199
5115 /* FIXME, add redo link to tree so we don't leak on crash */ 5200 nr = trans->blocks_used;
5116 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 5201 btrfs_end_transaction(trans, root);
5117 BTRFS_EXTENT_DATA_KEY); 5202 btrfs_btree_balance_dirty(root, nr);
5118 btrfs_update_inode(trans, root, inode); 5203
5204 trans = btrfs_start_transaction(root, 1);
5205 btrfs_set_trans_block_group(trans, inode);
5206 }
5207
5208 if (ret == 0 && inode->i_nlink > 0) {
5209 ret = btrfs_orphan_del(trans, inode);
5210 BUG_ON(ret);
5211 }
5119 5212
5120 ret = btrfs_orphan_del(trans, inode); 5213 ret = btrfs_update_inode(trans, root, inode);
5121 BUG_ON(ret); 5214 BUG_ON(ret);
5122 5215
5123out:
5124 nr = trans->blocks_used; 5216 nr = trans->blocks_used;
5125 ret = btrfs_end_transaction_throttle(trans, root); 5217 ret = btrfs_end_transaction_throttle(trans, root);
5126 BUG_ON(ret); 5218 BUG_ON(ret);
@@ -5217,9 +5309,9 @@ void btrfs_destroy_inode(struct inode *inode)
5217 5309
5218 spin_lock(&root->list_lock); 5310 spin_lock(&root->list_lock);
5219 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 5311 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5220 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 5312 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5221 " list\n", inode->i_ino); 5313 inode->i_ino);
5222 dump_stack(); 5314 list_del_init(&BTRFS_I(inode)->i_orphan);
5223 } 5315 }
5224 spin_unlock(&root->list_lock); 5316 spin_unlock(&root->list_lock);
5225 5317
@@ -5246,7 +5338,6 @@ free:
5246void btrfs_drop_inode(struct inode *inode) 5338void btrfs_drop_inode(struct inode *inode)
5247{ 5339{
5248 struct btrfs_root *root = BTRFS_I(inode)->root; 5340 struct btrfs_root *root = BTRFS_I(inode)->root;
5249
5250 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) 5341 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
5251 generic_delete_inode(inode); 5342 generic_delete_inode(inode);
5252 else 5343 else
@@ -5476,7 +5567,7 @@ out_fail:
5476 * some fairly slow code that needs optimization. This walks the list 5567 * some fairly slow code that needs optimization. This walks the list
5477 * of all the inodes with pending delalloc and forces them to disk. 5568 * of all the inodes with pending delalloc and forces them to disk.
5478 */ 5569 */
5479int btrfs_start_delalloc_inodes(struct btrfs_root *root) 5570int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5480{ 5571{
5481 struct list_head *head = &root->fs_info->delalloc_inodes; 5572 struct list_head *head = &root->fs_info->delalloc_inodes;
5482 struct btrfs_inode *binode; 5573 struct btrfs_inode *binode;
@@ -5495,7 +5586,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
5495 spin_unlock(&root->fs_info->delalloc_lock); 5586 spin_unlock(&root->fs_info->delalloc_lock);
5496 if (inode) { 5587 if (inode) {
5497 filemap_flush(inode->i_mapping); 5588 filemap_flush(inode->i_mapping);
5498 iput(inode); 5589 if (delay_iput)
5590 btrfs_add_delayed_iput(inode);
5591 else
5592 iput(inode);
5499 } 5593 }
5500 cond_resched(); 5594 cond_resched();
5501 spin_lock(&root->fs_info->delalloc_lock); 5595 spin_lock(&root->fs_info->delalloc_lock);
@@ -5569,7 +5663,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5569 if (IS_ERR(inode)) 5663 if (IS_ERR(inode))
5570 goto out_unlock; 5664 goto out_unlock;
5571 5665
5572 err = btrfs_init_inode_security(inode, dir); 5666 err = btrfs_init_inode_security(trans, inode, dir);
5573 if (err) { 5667 if (err) {
5574 drop_inode = 1; 5668 drop_inode = 1;
5575 goto out_unlock; 5669 goto out_unlock;
@@ -5641,62 +5735,80 @@ out_fail:
5641 return err; 5735 return err;
5642} 5736}
5643 5737
5644static int prealloc_file_range(struct btrfs_trans_handle *trans, 5738static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5645 struct inode *inode, u64 start, u64 end, 5739 u64 alloc_hint, int mode, loff_t actual_len)
5646 u64 locked_end, u64 alloc_hint, int mode)
5647{ 5740{
5741 struct btrfs_trans_handle *trans;
5648 struct btrfs_root *root = BTRFS_I(inode)->root; 5742 struct btrfs_root *root = BTRFS_I(inode)->root;
5649 struct btrfs_key ins; 5743 struct btrfs_key ins;
5650 u64 alloc_size;
5651 u64 cur_offset = start; 5744 u64 cur_offset = start;
5652 u64 num_bytes = end - start; 5745 u64 num_bytes = end - start;
5653 int ret = 0; 5746 int ret = 0;
5747 u64 i_size;
5654 5748
5655 while (num_bytes > 0) { 5749 while (num_bytes > 0) {
5656 alloc_size = min(num_bytes, root->fs_info->max_extent); 5750 trans = btrfs_start_transaction(root, 1);
5657 5751
5658 ret = btrfs_reserve_metadata_space(root, 1); 5752 ret = btrfs_reserve_extent(trans, root, num_bytes,
5659 if (ret)
5660 goto out;
5661
5662 ret = btrfs_reserve_extent(trans, root, alloc_size,
5663 root->sectorsize, 0, alloc_hint, 5753 root->sectorsize, 0, alloc_hint,
5664 (u64)-1, &ins, 1); 5754 (u64)-1, &ins, 1);
5665 if (ret) { 5755 if (ret) {
5666 WARN_ON(1); 5756 WARN_ON(1);
5667 goto out; 5757 goto stop_trans;
5668 } 5758 }
5759
5760 ret = btrfs_reserve_metadata_space(root, 3);
5761 if (ret) {
5762 btrfs_free_reserved_extent(root, ins.objectid,
5763 ins.offset);
5764 goto stop_trans;
5765 }
5766
5669 ret = insert_reserved_file_extent(trans, inode, 5767 ret = insert_reserved_file_extent(trans, inode,
5670 cur_offset, ins.objectid, 5768 cur_offset, ins.objectid,
5671 ins.offset, ins.offset, 5769 ins.offset, ins.offset,
5672 ins.offset, locked_end, 5770 ins.offset, 0, 0, 0,
5673 0, 0, 0,
5674 BTRFS_FILE_EXTENT_PREALLOC); 5771 BTRFS_FILE_EXTENT_PREALLOC);
5675 BUG_ON(ret); 5772 BUG_ON(ret);
5676 btrfs_drop_extent_cache(inode, cur_offset, 5773 btrfs_drop_extent_cache(inode, cur_offset,
5677 cur_offset + ins.offset -1, 0); 5774 cur_offset + ins.offset -1, 0);
5775
5678 num_bytes -= ins.offset; 5776 num_bytes -= ins.offset;
5679 cur_offset += ins.offset; 5777 cur_offset += ins.offset;
5680 alloc_hint = ins.objectid + ins.offset; 5778 alloc_hint = ins.objectid + ins.offset;
5681 btrfs_unreserve_metadata_space(root, 1); 5779
5682 }
5683out:
5684 if (cur_offset > start) {
5685 inode->i_ctime = CURRENT_TIME; 5780 inode->i_ctime = CURRENT_TIME;
5686 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5781 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5687 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5782 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5688 cur_offset > i_size_read(inode)) 5783 (actual_len > inode->i_size) &&
5689 btrfs_i_size_write(inode, cur_offset); 5784 (cur_offset > inode->i_size)) {
5785
5786 if (cur_offset > actual_len)
5787 i_size = actual_len;
5788 else
5789 i_size = cur_offset;
5790 i_size_write(inode, i_size);
5791 btrfs_ordered_update_i_size(inode, i_size, NULL);
5792 }
5793
5690 ret = btrfs_update_inode(trans, root, inode); 5794 ret = btrfs_update_inode(trans, root, inode);
5691 BUG_ON(ret); 5795 BUG_ON(ret);
5796
5797 btrfs_end_transaction(trans, root);
5798 btrfs_unreserve_metadata_space(root, 3);
5692 } 5799 }
5800 return ret;
5693 5801
5802stop_trans:
5803 btrfs_end_transaction(trans, root);
5694 return ret; 5804 return ret;
5805
5695} 5806}
5696 5807
5697static long btrfs_fallocate(struct inode *inode, int mode, 5808static long btrfs_fallocate(struct inode *inode, int mode,
5698 loff_t offset, loff_t len) 5809 loff_t offset, loff_t len)
5699{ 5810{
5811 struct extent_state *cached_state = NULL;
5700 u64 cur_offset; 5812 u64 cur_offset;
5701 u64 last_byte; 5813 u64 last_byte;
5702 u64 alloc_start; 5814 u64 alloc_start;
@@ -5705,8 +5817,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5705 u64 locked_end; 5817 u64 locked_end;
5706 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5818 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5707 struct extent_map *em; 5819 struct extent_map *em;
5708 struct btrfs_trans_handle *trans;
5709 struct btrfs_root *root;
5710 int ret; 5820 int ret;
5711 5821
5712 alloc_start = offset & ~mask; 5822 alloc_start = offset & ~mask;
@@ -5725,9 +5835,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5725 goto out; 5835 goto out;
5726 } 5836 }
5727 5837
5728 root = BTRFS_I(inode)->root; 5838 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
5729
5730 ret = btrfs_check_data_free_space(root, inode,
5731 alloc_end - alloc_start); 5839 alloc_end - alloc_start);
5732 if (ret) 5840 if (ret)
5733 goto out; 5841 goto out;
@@ -5736,27 +5844,20 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5736 while (1) { 5844 while (1) {
5737 struct btrfs_ordered_extent *ordered; 5845 struct btrfs_ordered_extent *ordered;
5738 5846
5739 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5740 if (!trans) {
5741 ret = -EIO;
5742 goto out_free;
5743 }
5744
5745 /* the extent lock is ordered inside the running 5847 /* the extent lock is ordered inside the running
5746 * transaction 5848 * transaction
5747 */ 5849 */
5748 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5850 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
5749 GFP_NOFS); 5851 locked_end, 0, &cached_state, GFP_NOFS);
5750 ordered = btrfs_lookup_first_ordered_extent(inode, 5852 ordered = btrfs_lookup_first_ordered_extent(inode,
5751 alloc_end - 1); 5853 alloc_end - 1);
5752 if (ordered && 5854 if (ordered &&
5753 ordered->file_offset + ordered->len > alloc_start && 5855 ordered->file_offset + ordered->len > alloc_start &&
5754 ordered->file_offset < alloc_end) { 5856 ordered->file_offset < alloc_end) {
5755 btrfs_put_ordered_extent(ordered); 5857 btrfs_put_ordered_extent(ordered);
5756 unlock_extent(&BTRFS_I(inode)->io_tree, 5858 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
5757 alloc_start, locked_end, GFP_NOFS); 5859 alloc_start, locked_end,
5758 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5860 &cached_state, GFP_NOFS);
5759
5760 /* 5861 /*
5761 * we can't wait on the range with the transaction 5862 * we can't wait on the range with the transaction
5762 * running or with the extent lock held 5863 * running or with the extent lock held
@@ -5777,10 +5878,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5777 BUG_ON(IS_ERR(em) || !em); 5878 BUG_ON(IS_ERR(em) || !em);
5778 last_byte = min(extent_map_end(em), alloc_end); 5879 last_byte = min(extent_map_end(em), alloc_end);
5779 last_byte = (last_byte + mask) & ~mask; 5880 last_byte = (last_byte + mask) & ~mask;
5780 if (em->block_start == EXTENT_MAP_HOLE) { 5881 if (em->block_start == EXTENT_MAP_HOLE ||
5781 ret = prealloc_file_range(trans, inode, cur_offset, 5882 (cur_offset >= inode->i_size &&
5782 last_byte, locked_end + 1, 5883 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5783 alloc_hint, mode); 5884 ret = prealloc_file_range(inode,
5885 cur_offset, last_byte,
5886 alloc_hint, mode, offset+len);
5784 if (ret < 0) { 5887 if (ret < 0) {
5785 free_extent_map(em); 5888 free_extent_map(em);
5786 break; 5889 break;
@@ -5796,12 +5899,11 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5796 break; 5899 break;
5797 } 5900 }
5798 } 5901 }
5799 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5902 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5800 GFP_NOFS); 5903 &cached_state, GFP_NOFS);
5801 5904
5802 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5905 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5803out_free: 5906 alloc_end - alloc_start);
5804 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5805out: 5907out:
5806 mutex_unlock(&inode->i_mutex); 5908 mutex_unlock(&inode->i_mutex);
5807 return ret; 5909 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cdbb054102b9..97a97839a867 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -39,6 +39,7 @@
39#include <linux/security.h> 39#include <linux/security.h>
40#include <linux/xattr.h> 40#include <linux/xattr.h>
41#include <linux/vmalloc.h> 41#include <linux/vmalloc.h>
42#include <linux/slab.h>
42#include "compat.h" 43#include "compat.h"
43#include "ctree.h" 44#include "ctree.h"
44#include "disk-io.h" 45#include "disk-io.h"
@@ -237,7 +238,6 @@ static noinline int create_subvol(struct btrfs_root *root,
237 u64 objectid; 238 u64 objectid;
238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 239 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
239 u64 index = 0; 240 u64 index = 0;
240 unsigned long nr = 1;
241 241
242 /* 242 /*
243 * 1 - inode item 243 * 1 - inode item
@@ -290,7 +290,7 @@ static noinline int create_subvol(struct btrfs_root *root,
290 btrfs_set_root_generation(&root_item, trans->transid); 290 btrfs_set_root_generation(&root_item, trans->transid);
291 btrfs_set_root_level(&root_item, 0); 291 btrfs_set_root_level(&root_item, 0);
292 btrfs_set_root_refs(&root_item, 1); 292 btrfs_set_root_refs(&root_item, 1);
293 btrfs_set_root_used(&root_item, 0); 293 btrfs_set_root_used(&root_item, leaf->len);
294 btrfs_set_root_last_snapshot(&root_item, 0); 294 btrfs_set_root_last_snapshot(&root_item, 0);
295 295
296 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 296 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
@@ -342,24 +342,21 @@ static noinline int create_subvol(struct btrfs_root *root,
342 342
343 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 343 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
344fail: 344fail:
345 nr = trans->blocks_used;
346 err = btrfs_commit_transaction(trans, root); 345 err = btrfs_commit_transaction(trans, root);
347 if (err && !ret) 346 if (err && !ret)
348 ret = err; 347 ret = err;
349 348
350 btrfs_unreserve_metadata_space(root, 6); 349 btrfs_unreserve_metadata_space(root, 6);
351 btrfs_btree_balance_dirty(root, nr);
352 return ret; 350 return ret;
353} 351}
354 352
355static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 353static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
356 char *name, int namelen) 354 char *name, int namelen)
357{ 355{
356 struct inode *inode;
358 struct btrfs_pending_snapshot *pending_snapshot; 357 struct btrfs_pending_snapshot *pending_snapshot;
359 struct btrfs_trans_handle *trans; 358 struct btrfs_trans_handle *trans;
360 int ret = 0; 359 int ret;
361 int err;
362 unsigned long nr = 0;
363 360
364 if (!root->ref_cows) 361 if (!root->ref_cows)
365 return -EINVAL; 362 return -EINVAL;
@@ -372,20 +369,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
372 */ 369 */
373 ret = btrfs_reserve_metadata_space(root, 6); 370 ret = btrfs_reserve_metadata_space(root, 6);
374 if (ret) 371 if (ret)
375 goto fail_unlock; 372 goto fail;
376 373
377 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 374 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
378 if (!pending_snapshot) { 375 if (!pending_snapshot) {
379 ret = -ENOMEM; 376 ret = -ENOMEM;
380 btrfs_unreserve_metadata_space(root, 6); 377 btrfs_unreserve_metadata_space(root, 6);
381 goto fail_unlock; 378 goto fail;
382 } 379 }
383 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); 380 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
384 if (!pending_snapshot->name) { 381 if (!pending_snapshot->name) {
385 ret = -ENOMEM; 382 ret = -ENOMEM;
386 kfree(pending_snapshot); 383 kfree(pending_snapshot);
387 btrfs_unreserve_metadata_space(root, 6); 384 btrfs_unreserve_metadata_space(root, 6);
388 goto fail_unlock; 385 goto fail;
389 } 386 }
390 memcpy(pending_snapshot->name, name, namelen); 387 memcpy(pending_snapshot->name, name, namelen);
391 pending_snapshot->name[namelen] = '\0'; 388 pending_snapshot->name[namelen] = '\0';
@@ -395,10 +392,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
395 pending_snapshot->root = root; 392 pending_snapshot->root = root;
396 list_add(&pending_snapshot->list, 393 list_add(&pending_snapshot->list,
397 &trans->transaction->pending_snapshots); 394 &trans->transaction->pending_snapshots);
398 err = btrfs_commit_transaction(trans, root); 395 ret = btrfs_commit_transaction(trans, root);
396 BUG_ON(ret);
397 btrfs_unreserve_metadata_space(root, 6);
399 398
400fail_unlock: 399 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
401 btrfs_btree_balance_dirty(root, nr); 400 if (IS_ERR(inode)) {
401 ret = PTR_ERR(inode);
402 goto fail;
403 }
404 BUG_ON(!inode);
405 d_instantiate(dentry, inode);
406 ret = 0;
407fail:
402 return ret; 408 return ret;
403} 409}
404 410
@@ -469,7 +475,79 @@ out_unlock:
469 return error; 475 return error;
470} 476}
471 477
472static int btrfs_defrag_file(struct file *file) 478static int should_defrag_range(struct inode *inode, u64 start, u64 len,
479 int thresh, u64 *last_len, u64 *skip,
480 u64 *defrag_end)
481{
482 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
483 struct extent_map *em = NULL;
484 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
485 int ret = 1;
486
487
488 if (thresh == 0)
489 thresh = 256 * 1024;
490
491 /*
492 * make sure that once we start defragging and extent, we keep on
493 * defragging it
494 */
495 if (start < *defrag_end)
496 return 1;
497
498 *skip = 0;
499
500 /*
501 * hopefully we have this extent in the tree already, try without
502 * the full extent lock
503 */
504 read_lock(&em_tree->lock);
505 em = lookup_extent_mapping(em_tree, start, len);
506 read_unlock(&em_tree->lock);
507
508 if (!em) {
509 /* get the big lock and read metadata off disk */
510 lock_extent(io_tree, start, start + len - 1, GFP_NOFS);
511 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
512 unlock_extent(io_tree, start, start + len - 1, GFP_NOFS);
513
514 if (IS_ERR(em))
515 return 0;
516 }
517
518 /* this will cover holes, and inline extents */
519 if (em->block_start >= EXTENT_MAP_LAST_BYTE)
520 ret = 0;
521
522 /*
523 * we hit a real extent, if it is big don't bother defragging it again
524 */
525 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh)
526 ret = 0;
527
528 /*
529 * last_len ends up being a counter of how many bytes we've defragged.
530 * every time we choose not to defrag an extent, we reset *last_len
531 * so that the next tiny extent will force a defrag.
532 *
533 * The end result of this is that tiny extents before a single big
534 * extent will force at least part of that big extent to be defragged.
535 */
536 if (ret) {
537 *last_len += len;
538 *defrag_end = extent_map_end(em);
539 } else {
540 *last_len = 0;
541 *skip = extent_map_end(em);
542 *defrag_end = 0;
543 }
544
545 free_extent_map(em);
546 return ret;
547}
548
549static int btrfs_defrag_file(struct file *file,
550 struct btrfs_ioctl_defrag_range_args *range)
473{ 551{
474 struct inode *inode = fdentry(file)->d_inode; 552 struct inode *inode = fdentry(file)->d_inode;
475 struct btrfs_root *root = BTRFS_I(inode)->root; 553 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -481,37 +559,96 @@ static int btrfs_defrag_file(struct file *file)
481 unsigned long total_read = 0; 559 unsigned long total_read = 0;
482 u64 page_start; 560 u64 page_start;
483 u64 page_end; 561 u64 page_end;
562 u64 last_len = 0;
563 u64 skip = 0;
564 u64 defrag_end = 0;
484 unsigned long i; 565 unsigned long i;
485 int ret; 566 int ret;
486 567
487 ret = btrfs_check_data_free_space(root, inode, inode->i_size); 568 if (inode->i_size == 0)
488 if (ret) 569 return 0;
489 return -ENOSPC; 570
571 if (range->start + range->len > range->start) {
572 last_index = min_t(u64, inode->i_size - 1,
573 range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
574 } else {
575 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
576 }
577
578 i = range->start >> PAGE_CACHE_SHIFT;
579 while (i <= last_index) {
580 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
581 PAGE_CACHE_SIZE,
582 range->extent_thresh,
583 &last_len, &skip,
584 &defrag_end)) {
585 unsigned long next;
586 /*
587 * the should_defrag function tells us how much to skip
588 * bump our counter by the suggested amount
589 */
590 next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
591 i = max(i + 1, next);
592 continue;
593 }
490 594
491 mutex_lock(&inode->i_mutex);
492 last_index = inode->i_size >> PAGE_CACHE_SHIFT;
493 for (i = 0; i <= last_index; i++) {
494 if (total_read % ra_pages == 0) { 595 if (total_read % ra_pages == 0) {
495 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, 596 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
496 min(last_index, i + ra_pages - 1)); 597 min(last_index, i + ra_pages - 1));
497 } 598 }
498 total_read++; 599 total_read++;
600 mutex_lock(&inode->i_mutex);
601 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
602 BTRFS_I(inode)->force_compress = 1;
603
604 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
605 if (ret) {
606 ret = -ENOSPC;
607 break;
608 }
609
610 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
611 if (ret) {
612 btrfs_free_reserved_data_space(root, inode,
613 PAGE_CACHE_SIZE);
614 ret = -ENOSPC;
615 break;
616 }
499again: 617again:
618 if (inode->i_size == 0 ||
619 i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
620 ret = 0;
621 goto err_reservations;
622 }
623
500 page = grab_cache_page(inode->i_mapping, i); 624 page = grab_cache_page(inode->i_mapping, i);
501 if (!page) 625 if (!page)
502 goto out_unlock; 626 goto err_reservations;
627
503 if (!PageUptodate(page)) { 628 if (!PageUptodate(page)) {
504 btrfs_readpage(NULL, page); 629 btrfs_readpage(NULL, page);
505 lock_page(page); 630 lock_page(page);
506 if (!PageUptodate(page)) { 631 if (!PageUptodate(page)) {
507 unlock_page(page); 632 unlock_page(page);
508 page_cache_release(page); 633 page_cache_release(page);
509 goto out_unlock; 634 goto err_reservations;
510 } 635 }
511 } 636 }
512 637
638 if (page->mapping != inode->i_mapping) {
639 unlock_page(page);
640 page_cache_release(page);
641 goto again;
642 }
643
513 wait_on_page_writeback(page); 644 wait_on_page_writeback(page);
514 645
646 if (PageDirty(page)) {
647 btrfs_free_reserved_data_space(root, inode,
648 PAGE_CACHE_SIZE);
649 goto loop_unlock;
650 }
651
515 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 652 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
516 page_end = page_start + PAGE_CACHE_SIZE - 1; 653 page_end = page_start + PAGE_CACHE_SIZE - 1;
517 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 654 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
@@ -532,18 +669,54 @@ again:
532 * page if it is dirtied again later 669 * page if it is dirtied again later
533 */ 670 */
534 clear_page_dirty_for_io(page); 671 clear_page_dirty_for_io(page);
672 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start,
673 page_end, EXTENT_DIRTY | EXTENT_DELALLOC |
674 EXTENT_DO_ACCOUNTING, GFP_NOFS);
535 675
536 btrfs_set_extent_delalloc(inode, page_start, page_end); 676 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
677 ClearPageChecked(page);
537 set_page_dirty(page); 678 set_page_dirty(page);
538 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 679 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
680
681loop_unlock:
539 unlock_page(page); 682 unlock_page(page);
540 page_cache_release(page); 683 page_cache_release(page);
684 mutex_unlock(&inode->i_mutex);
685
686 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
541 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); 687 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
688 i++;
689 }
690
691 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
692 filemap_flush(inode->i_mapping);
693
694 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
695 /* the filemap_flush will queue IO into the worker threads, but
696 * we have to make sure the IO is actually started and that
697 * ordered extents get created before we return
698 */
699 atomic_inc(&root->fs_info->async_submit_draining);
700 while (atomic_read(&root->fs_info->nr_async_submits) ||
701 atomic_read(&root->fs_info->async_delalloc_pages)) {
702 wait_event(root->fs_info->async_submit_wait,
703 (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
704 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
705 }
706 atomic_dec(&root->fs_info->async_submit_draining);
707
708 mutex_lock(&inode->i_mutex);
709 BTRFS_I(inode)->force_compress = 0;
710 mutex_unlock(&inode->i_mutex);
542 } 711 }
543 712
544out_unlock:
545 mutex_unlock(&inode->i_mutex);
546 return 0; 713 return 0;
714
715err_reservations:
716 mutex_unlock(&inode->i_mutex);
717 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
718 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
719 return ret;
547} 720}
548 721
549static noinline int btrfs_ioctl_resize(struct btrfs_root *root, 722static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
@@ -603,7 +776,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
603 mod = 1; 776 mod = 1;
604 sizestr++; 777 sizestr++;
605 } 778 }
606 new_size = btrfs_parse_size(sizestr); 779 new_size = memparse(sizestr, NULL);
607 if (new_size == 0) { 780 if (new_size == 0) {
608 ret = -EINVAL; 781 ret = -EINVAL;
609 goto out_unlock; 782 goto out_unlock;
@@ -738,6 +911,330 @@ out:
738 return ret; 911 return ret;
739} 912}
740 913
914static noinline int key_in_sk(struct btrfs_key *key,
915 struct btrfs_ioctl_search_key *sk)
916{
917 struct btrfs_key test;
918 int ret;
919
920 test.objectid = sk->min_objectid;
921 test.type = sk->min_type;
922 test.offset = sk->min_offset;
923
924 ret = btrfs_comp_cpu_keys(key, &test);
925 if (ret < 0)
926 return 0;
927
928 test.objectid = sk->max_objectid;
929 test.type = sk->max_type;
930 test.offset = sk->max_offset;
931
932 ret = btrfs_comp_cpu_keys(key, &test);
933 if (ret > 0)
934 return 0;
935 return 1;
936}
937
938static noinline int copy_to_sk(struct btrfs_root *root,
939 struct btrfs_path *path,
940 struct btrfs_key *key,
941 struct btrfs_ioctl_search_key *sk,
942 char *buf,
943 unsigned long *sk_offset,
944 int *num_found)
945{
946 u64 found_transid;
947 struct extent_buffer *leaf;
948 struct btrfs_ioctl_search_header sh;
949 unsigned long item_off;
950 unsigned long item_len;
951 int nritems;
952 int i;
953 int slot;
954 int found = 0;
955 int ret = 0;
956
957 leaf = path->nodes[0];
958 slot = path->slots[0];
959 nritems = btrfs_header_nritems(leaf);
960
961 if (btrfs_header_generation(leaf) > sk->max_transid) {
962 i = nritems;
963 goto advance_key;
964 }
965 found_transid = btrfs_header_generation(leaf);
966
967 for (i = slot; i < nritems; i++) {
968 item_off = btrfs_item_ptr_offset(leaf, i);
969 item_len = btrfs_item_size_nr(leaf, i);
970
971 if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
972 item_len = 0;
973
974 if (sizeof(sh) + item_len + *sk_offset >
975 BTRFS_SEARCH_ARGS_BUFSIZE) {
976 ret = 1;
977 goto overflow;
978 }
979
980 btrfs_item_key_to_cpu(leaf, key, i);
981 if (!key_in_sk(key, sk))
982 continue;
983
984 sh.objectid = key->objectid;
985 sh.offset = key->offset;
986 sh.type = key->type;
987 sh.len = item_len;
988 sh.transid = found_transid;
989
990 /* copy search result header */
991 memcpy(buf + *sk_offset, &sh, sizeof(sh));
992 *sk_offset += sizeof(sh);
993
994 if (item_len) {
995 char *p = buf + *sk_offset;
996 /* copy the item */
997 read_extent_buffer(leaf, p,
998 item_off, item_len);
999 *sk_offset += item_len;
1000 }
1001 found++;
1002
1003 if (*num_found >= sk->nr_items)
1004 break;
1005 }
1006advance_key:
1007 ret = 0;
1008 if (key->offset < (u64)-1 && key->offset < sk->max_offset)
1009 key->offset++;
1010 else if (key->type < (u8)-1 && key->type < sk->max_type) {
1011 key->offset = 0;
1012 key->type++;
1013 } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) {
1014 key->offset = 0;
1015 key->type = 0;
1016 key->objectid++;
1017 } else
1018 ret = 1;
1019overflow:
1020 *num_found += found;
1021 return ret;
1022}
1023
1024static noinline int search_ioctl(struct inode *inode,
1025 struct btrfs_ioctl_search_args *args)
1026{
1027 struct btrfs_root *root;
1028 struct btrfs_key key;
1029 struct btrfs_key max_key;
1030 struct btrfs_path *path;
1031 struct btrfs_ioctl_search_key *sk = &args->key;
1032 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
1033 int ret;
1034 int num_found = 0;
1035 unsigned long sk_offset = 0;
1036
1037 path = btrfs_alloc_path();
1038 if (!path)
1039 return -ENOMEM;
1040
1041 if (sk->tree_id == 0) {
1042 /* search the root of the inode that was passed */
1043 root = BTRFS_I(inode)->root;
1044 } else {
1045 key.objectid = sk->tree_id;
1046 key.type = BTRFS_ROOT_ITEM_KEY;
1047 key.offset = (u64)-1;
1048 root = btrfs_read_fs_root_no_name(info, &key);
1049 if (IS_ERR(root)) {
1050 printk(KERN_ERR "could not find root %llu\n",
1051 sk->tree_id);
1052 btrfs_free_path(path);
1053 return -ENOENT;
1054 }
1055 }
1056
1057 key.objectid = sk->min_objectid;
1058 key.type = sk->min_type;
1059 key.offset = sk->min_offset;
1060
1061 max_key.objectid = sk->max_objectid;
1062 max_key.type = sk->max_type;
1063 max_key.offset = sk->max_offset;
1064
1065 path->keep_locks = 1;
1066
1067 while(1) {
1068 ret = btrfs_search_forward(root, &key, &max_key, path, 0,
1069 sk->min_transid);
1070 if (ret != 0) {
1071 if (ret > 0)
1072 ret = 0;
1073 goto err;
1074 }
1075 ret = copy_to_sk(root, path, &key, sk, args->buf,
1076 &sk_offset, &num_found);
1077 btrfs_release_path(root, path);
1078 if (ret || num_found >= sk->nr_items)
1079 break;
1080
1081 }
1082 ret = 0;
1083err:
1084 sk->nr_items = num_found;
1085 btrfs_free_path(path);
1086 return ret;
1087}
1088
1089static noinline int btrfs_ioctl_tree_search(struct file *file,
1090 void __user *argp)
1091{
1092 struct btrfs_ioctl_search_args *args;
1093 struct inode *inode;
1094 int ret;
1095
1096 if (!capable(CAP_SYS_ADMIN))
1097 return -EPERM;
1098
1099 args = kmalloc(sizeof(*args), GFP_KERNEL);
1100 if (!args)
1101 return -ENOMEM;
1102
1103 if (copy_from_user(args, argp, sizeof(*args))) {
1104 kfree(args);
1105 return -EFAULT;
1106 }
1107 inode = fdentry(file)->d_inode;
1108 ret = search_ioctl(inode, args);
1109 if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1110 ret = -EFAULT;
1111 kfree(args);
1112 return ret;
1113}
1114
1115/*
1116 * Search INODE_REFs to identify path name of 'dirid' directory
1117 * in a 'tree_id' tree. and sets path name to 'name'.
1118 */
1119static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1120 u64 tree_id, u64 dirid, char *name)
1121{
1122 struct btrfs_root *root;
1123 struct btrfs_key key;
1124 char *ptr;
1125 int ret = -1;
1126 int slot;
1127 int len;
1128 int total_len = 0;
1129 struct btrfs_inode_ref *iref;
1130 struct extent_buffer *l;
1131 struct btrfs_path *path;
1132
1133 if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
1134 name[0]='\0';
1135 return 0;
1136 }
1137
1138 path = btrfs_alloc_path();
1139 if (!path)
1140 return -ENOMEM;
1141
1142 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX];
1143
1144 key.objectid = tree_id;
1145 key.type = BTRFS_ROOT_ITEM_KEY;
1146 key.offset = (u64)-1;
1147 root = btrfs_read_fs_root_no_name(info, &key);
1148 if (IS_ERR(root)) {
1149 printk(KERN_ERR "could not find root %llu\n", tree_id);
1150 ret = -ENOENT;
1151 goto out;
1152 }
1153
1154 key.objectid = dirid;
1155 key.type = BTRFS_INODE_REF_KEY;
1156 key.offset = (u64)-1;
1157
1158 while(1) {
1159 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1160 if (ret < 0)
1161 goto out;
1162
1163 l = path->nodes[0];
1164 slot = path->slots[0];
1165 if (ret > 0 && slot > 0)
1166 slot--;
1167 btrfs_item_key_to_cpu(l, &key, slot);
1168
1169 if (ret > 0 && (key.objectid != dirid ||
1170 key.type != BTRFS_INODE_REF_KEY)) {
1171 ret = -ENOENT;
1172 goto out;
1173 }
1174
1175 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
1176 len = btrfs_inode_ref_name_len(l, iref);
1177 ptr -= len + 1;
1178 total_len += len + 1;
1179 if (ptr < name)
1180 goto out;
1181
1182 *(ptr + len) = '/';
1183 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
1184
1185 if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1186 break;
1187
1188 btrfs_release_path(root, path);
1189 key.objectid = key.offset;
1190 key.offset = (u64)-1;
1191 dirid = key.objectid;
1192
1193 }
1194 if (ptr < name)
1195 goto out;
1196 memcpy(name, ptr, total_len);
1197 name[total_len]='\0';
1198 ret = 0;
1199out:
1200 btrfs_free_path(path);
1201 return ret;
1202}
1203
1204static noinline int btrfs_ioctl_ino_lookup(struct file *file,
1205 void __user *argp)
1206{
1207 struct btrfs_ioctl_ino_lookup_args *args;
1208 struct inode *inode;
1209 int ret;
1210
1211 if (!capable(CAP_SYS_ADMIN))
1212 return -EPERM;
1213
1214 args = kmalloc(sizeof(*args), GFP_KERNEL);
1215 if (!args)
1216 return -ENOMEM;
1217
1218 if (copy_from_user(args, argp, sizeof(*args))) {
1219 kfree(args);
1220 return -EFAULT;
1221 }
1222 inode = fdentry(file)->d_inode;
1223
1224 if (args->treeid == 0)
1225 args->treeid = BTRFS_I(inode)->root->root_key.objectid;
1226
1227 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
1228 args->treeid, args->objectid,
1229 args->name);
1230
1231 if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1232 ret = -EFAULT;
1233
1234 kfree(args);
1235 return ret;
1236}
1237
741static noinline int btrfs_ioctl_snap_destroy(struct file *file, 1238static noinline int btrfs_ioctl_snap_destroy(struct file *file,
742 void __user *arg) 1239 void __user *arg)
743{ 1240{
@@ -844,10 +1341,11 @@ out:
844 return err; 1341 return err;
845} 1342}
846 1343
847static int btrfs_ioctl_defrag(struct file *file) 1344static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
848{ 1345{
849 struct inode *inode = fdentry(file)->d_inode; 1346 struct inode *inode = fdentry(file)->d_inode;
850 struct btrfs_root *root = BTRFS_I(inode)->root; 1347 struct btrfs_root *root = BTRFS_I(inode)->root;
1348 struct btrfs_ioctl_defrag_range_args *range;
851 int ret; 1349 int ret;
852 1350
853 ret = mnt_want_write(file->f_path.mnt); 1351 ret = mnt_want_write(file->f_path.mnt);
@@ -868,7 +1366,31 @@ static int btrfs_ioctl_defrag(struct file *file)
868 ret = -EINVAL; 1366 ret = -EINVAL;
869 goto out; 1367 goto out;
870 } 1368 }
871 btrfs_defrag_file(file); 1369
1370 range = kzalloc(sizeof(*range), GFP_KERNEL);
1371 if (!range) {
1372 ret = -ENOMEM;
1373 goto out;
1374 }
1375
1376 if (argp) {
1377 if (copy_from_user(range, argp,
1378 sizeof(*range))) {
1379 ret = -EFAULT;
1380 kfree(range);
1381 goto out;
1382 }
1383 /* compression requires us to start the IO */
1384 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
1385 range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
1386 range->extent_thresh = (u32)-1;
1387 }
1388 } else {
1389 /* the rest are all set to zero by kzalloc */
1390 range->len = (u64)-1;
1391 }
1392 btrfs_defrag_file(file, range);
1393 kfree(range);
872 break; 1394 break;
873 } 1395 }
874out: 1396out:
@@ -959,12 +1481,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
959 ret = -EBADF; 1481 ret = -EBADF;
960 goto out_drop_write; 1482 goto out_drop_write;
961 } 1483 }
1484
962 src = src_file->f_dentry->d_inode; 1485 src = src_file->f_dentry->d_inode;
963 1486
964 ret = -EINVAL; 1487 ret = -EINVAL;
965 if (src == inode) 1488 if (src == inode)
966 goto out_fput; 1489 goto out_fput;
967 1490
1491 /* the src must be open for reading */
1492 if (!(src_file->f_mode & FMODE_READ))
1493 goto out_fput;
1494
968 ret = -EISDIR; 1495 ret = -EISDIR;
969 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 1496 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
970 goto out_fput; 1497 goto out_fput;
@@ -1027,8 +1554,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1027 BUG_ON(!trans); 1554 BUG_ON(!trans);
1028 1555
1029 /* punch hole in destination first */ 1556 /* punch hole in destination first */
1030 btrfs_drop_extents(trans, root, inode, off, off + len, 1557 btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
1031 off + len, 0, &hint_byte, 1);
1032 1558
1033 /* clone data */ 1559 /* clone data */
1034 key.objectid = src->i_ino; 1560 key.objectid = src->i_ino;
@@ -1270,6 +1796,157 @@ out:
1270 return ret; 1796 return ret;
1271} 1797}
1272 1798
1799static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
1800{
1801 struct inode *inode = fdentry(file)->d_inode;
1802 struct btrfs_root *root = BTRFS_I(inode)->root;
1803 struct btrfs_root *new_root;
1804 struct btrfs_dir_item *di;
1805 struct btrfs_trans_handle *trans;
1806 struct btrfs_path *path;
1807 struct btrfs_key location;
1808 struct btrfs_disk_key disk_key;
1809 struct btrfs_super_block *disk_super;
1810 u64 features;
1811 u64 objectid = 0;
1812 u64 dir_id;
1813
1814 if (!capable(CAP_SYS_ADMIN))
1815 return -EPERM;
1816
1817 if (copy_from_user(&objectid, argp, sizeof(objectid)))
1818 return -EFAULT;
1819
1820 if (!objectid)
1821 objectid = root->root_key.objectid;
1822
1823 location.objectid = objectid;
1824 location.type = BTRFS_ROOT_ITEM_KEY;
1825 location.offset = (u64)-1;
1826
1827 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
1828 if (IS_ERR(new_root))
1829 return PTR_ERR(new_root);
1830
1831 if (btrfs_root_refs(&new_root->root_item) == 0)
1832 return -ENOENT;
1833
1834 path = btrfs_alloc_path();
1835 if (!path)
1836 return -ENOMEM;
1837 path->leave_spinning = 1;
1838
1839 trans = btrfs_start_transaction(root, 1);
1840 if (!trans) {
1841 btrfs_free_path(path);
1842 return -ENOMEM;
1843 }
1844
1845 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
1846 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
1847 dir_id, "default", 7, 1);
1848 if (!di) {
1849 btrfs_free_path(path);
1850 btrfs_end_transaction(trans, root);
1851 printk(KERN_ERR "Umm, you don't have the default dir item, "
1852 "this isn't going to work\n");
1853 return -ENOENT;
1854 }
1855
1856 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
1857 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
1858 btrfs_mark_buffer_dirty(path->nodes[0]);
1859 btrfs_free_path(path);
1860
1861 disk_super = &root->fs_info->super_copy;
1862 features = btrfs_super_incompat_flags(disk_super);
1863 if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) {
1864 features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL;
1865 btrfs_set_super_incompat_flags(disk_super, features);
1866 }
1867 btrfs_end_transaction(trans, root);
1868
1869 return 0;
1870}
1871
1872long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1873{
1874 struct btrfs_ioctl_space_args space_args;
1875 struct btrfs_ioctl_space_info space;
1876 struct btrfs_ioctl_space_info *dest;
1877 struct btrfs_ioctl_space_info *dest_orig;
1878 struct btrfs_ioctl_space_info *user_dest;
1879 struct btrfs_space_info *info;
1880 int alloc_size;
1881 int ret = 0;
1882 int slot_count = 0;
1883
1884 if (copy_from_user(&space_args,
1885 (struct btrfs_ioctl_space_args __user *)arg,
1886 sizeof(space_args)))
1887 return -EFAULT;
1888
1889 /* first we count slots */
1890 rcu_read_lock();
1891 list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
1892 slot_count++;
1893 rcu_read_unlock();
1894
1895 /* space_slots == 0 means they are asking for a count */
1896 if (space_args.space_slots == 0) {
1897 space_args.total_spaces = slot_count;
1898 goto out;
1899 }
1900 alloc_size = sizeof(*dest) * slot_count;
1901 /* we generally have at most 6 or so space infos, one for each raid
1902 * level. So, a whole page should be more than enough for everyone
1903 */
1904 if (alloc_size > PAGE_CACHE_SIZE)
1905 return -ENOMEM;
1906
1907 space_args.total_spaces = 0;
1908 dest = kmalloc(alloc_size, GFP_NOFS);
1909 if (!dest)
1910 return -ENOMEM;
1911 dest_orig = dest;
1912
1913 /* now we have a buffer to copy into */
1914 rcu_read_lock();
1915 list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
1916 /* make sure we don't copy more than we allocated
1917 * in our buffer
1918 */
1919 if (slot_count == 0)
1920 break;
1921 slot_count--;
1922
1923 /* make sure userland has enough room in their buffer */
1924 if (space_args.total_spaces >= space_args.space_slots)
1925 break;
1926
1927 space.flags = info->flags;
1928 space.total_bytes = info->total_bytes;
1929 space.used_bytes = info->bytes_used;
1930 memcpy(dest, &space, sizeof(space));
1931 dest++;
1932 space_args.total_spaces++;
1933 }
1934 rcu_read_unlock();
1935
1936 user_dest = (struct btrfs_ioctl_space_info *)
1937 (arg + sizeof(struct btrfs_ioctl_space_args));
1938
1939 if (copy_to_user(user_dest, dest_orig, alloc_size))
1940 ret = -EFAULT;
1941
1942 kfree(dest_orig);
1943out:
1944 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args)))
1945 ret = -EFAULT;
1946
1947 return ret;
1948}
1949
1273/* 1950/*
1274 * there are many ways the trans_start and trans_end ioctls can lead 1951 * there are many ways the trans_start and trans_end ioctls can lead
1275 * to deadlocks. They should only be used by applications that 1952 * to deadlocks. They should only be used by applications that
@@ -1316,8 +1993,12 @@ long btrfs_ioctl(struct file *file, unsigned int
1316 return btrfs_ioctl_snap_create(file, argp, 1); 1993 return btrfs_ioctl_snap_create(file, argp, 1);
1317 case BTRFS_IOC_SNAP_DESTROY: 1994 case BTRFS_IOC_SNAP_DESTROY:
1318 return btrfs_ioctl_snap_destroy(file, argp); 1995 return btrfs_ioctl_snap_destroy(file, argp);
1996 case BTRFS_IOC_DEFAULT_SUBVOL:
1997 return btrfs_ioctl_default_subvol(file, argp);
1319 case BTRFS_IOC_DEFRAG: 1998 case BTRFS_IOC_DEFRAG:
1320 return btrfs_ioctl_defrag(file); 1999 return btrfs_ioctl_defrag(file, NULL);
2000 case BTRFS_IOC_DEFRAG_RANGE:
2001 return btrfs_ioctl_defrag(file, argp);
1321 case BTRFS_IOC_RESIZE: 2002 case BTRFS_IOC_RESIZE:
1322 return btrfs_ioctl_resize(root, argp); 2003 return btrfs_ioctl_resize(root, argp);
1323 case BTRFS_IOC_ADD_DEV: 2004 case BTRFS_IOC_ADD_DEV:
@@ -1334,6 +2015,12 @@ long btrfs_ioctl(struct file *file, unsigned int
1334 return btrfs_ioctl_trans_start(file); 2015 return btrfs_ioctl_trans_start(file);
1335 case BTRFS_IOC_TRANS_END: 2016 case BTRFS_IOC_TRANS_END:
1336 return btrfs_ioctl_trans_end(file); 2017 return btrfs_ioctl_trans_end(file);
2018 case BTRFS_IOC_TREE_SEARCH:
2019 return btrfs_ioctl_tree_search(file, argp);
2020 case BTRFS_IOC_INO_LOOKUP:
2021 return btrfs_ioctl_ino_lookup(file, argp);
2022 case BTRFS_IOC_SPACE_INFO:
2023 return btrfs_ioctl_space_info(root, argp);
1337 case BTRFS_IOC_SYNC: 2024 case BTRFS_IOC_SYNC:
1338 btrfs_sync_fs(file->f_dentry->d_sb, 1); 2025 btrfs_sync_fs(file->f_dentry->d_sb, 1);
1339 return 0; 2026 return 0;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index bc49914475eb..424694aa517f 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -30,12 +30,114 @@ struct btrfs_ioctl_vol_args {
30 char name[BTRFS_PATH_NAME_MAX + 1]; 30 char name[BTRFS_PATH_NAME_MAX + 1];
31}; 31};
32 32
33#define BTRFS_INO_LOOKUP_PATH_MAX 4080
34struct btrfs_ioctl_ino_lookup_args {
35 __u64 treeid;
36 __u64 objectid;
37 char name[BTRFS_INO_LOOKUP_PATH_MAX];
38};
39
40struct btrfs_ioctl_search_key {
41 /* which root are we searching. 0 is the tree of tree roots */
42 __u64 tree_id;
43
44 /* keys returned will be >= min and <= max */
45 __u64 min_objectid;
46 __u64 max_objectid;
47
48 /* keys returned will be >= min and <= max */
49 __u64 min_offset;
50 __u64 max_offset;
51
52 /* max and min transids to search for */
53 __u64 min_transid;
54 __u64 max_transid;
55
56 /* keys returned will be >= min and <= max */
57 __u32 min_type;
58 __u32 max_type;
59
60 /*
61 * how many items did userland ask for, and how many are we
62 * returning
63 */
64 __u32 nr_items;
65
66 /* align to 64 bits */
67 __u32 unused;
68
69 /* some extra for later */
70 __u64 unused1;
71 __u64 unused2;
72 __u64 unused3;
73 __u64 unused4;
74};
75
76struct btrfs_ioctl_search_header {
77 __u64 transid;
78 __u64 objectid;
79 __u64 offset;
80 __u32 type;
81 __u32 len;
82};
83
84#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
85/*
86 * the buf is an array of search headers where
87 * each header is followed by the actual item
88 * the type field is expanded to 32 bits for alignment
89 */
90struct btrfs_ioctl_search_args {
91 struct btrfs_ioctl_search_key key;
92 char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
93};
94
33struct btrfs_ioctl_clone_range_args { 95struct btrfs_ioctl_clone_range_args {
34 __s64 src_fd; 96 __s64 src_fd;
35 __u64 src_offset, src_length; 97 __u64 src_offset, src_length;
36 __u64 dest_offset; 98 __u64 dest_offset;
37}; 99};
38 100
101/* flags for the defrag range ioctl */
102#define BTRFS_DEFRAG_RANGE_COMPRESS 1
103#define BTRFS_DEFRAG_RANGE_START_IO 2
104
105struct btrfs_ioctl_defrag_range_args {
106 /* start of the defrag operation */
107 __u64 start;
108
109 /* number of bytes to defrag, use (u64)-1 to say all */
110 __u64 len;
111
112 /*
113 * flags for the operation, which can include turning
114 * on compression for this one defrag
115 */
116 __u64 flags;
117
118 /*
119 * any extent bigger than this will be considered
120 * already defragged. Use 0 to take the kernel default
121 * Use 1 to say every single extent must be rewritten
122 */
123 __u32 extent_thresh;
124
125 /* spare for later */
126 __u32 unused[5];
127};
128
129struct btrfs_ioctl_space_info {
130 __u64 flags;
131 __u64 total_bytes;
132 __u64 used_bytes;
133};
134
135struct btrfs_ioctl_space_args {
136 __u64 space_slots;
137 __u64 total_spaces;
138 struct btrfs_ioctl_space_info spaces[0];
139};
140
39#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ 141#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
40 struct btrfs_ioctl_vol_args) 142 struct btrfs_ioctl_vol_args)
41#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ 143#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@@ -67,4 +169,13 @@ struct btrfs_ioctl_clone_range_args {
67 struct btrfs_ioctl_vol_args) 169 struct btrfs_ioctl_vol_args)
68#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ 170#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
69 struct btrfs_ioctl_vol_args) 171 struct btrfs_ioctl_vol_args)
172#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \
173 struct btrfs_ioctl_defrag_range_args)
174#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
175 struct btrfs_ioctl_search_args)
176#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
177 struct btrfs_ioctl_ino_lookup_args)
178#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
179#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
180 struct btrfs_ioctl_space_args)
70#endif 181#endif
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 1c36e5cd8f55..6151f2ea38bb 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -16,7 +16,6 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/gfp.h>
20#include <linux/pagemap.h> 19#include <linux/pagemap.h>
21#include <linux/spinlock.h> 20#include <linux/spinlock.h>
22#include <linux/page-flags.h> 21#include <linux/page-flags.h>
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5799bc46a309..a127c0ebb2dc 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -16,7 +16,6 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/gfp.h>
20#include <linux/slab.h> 19#include <linux/slab.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/writeback.h> 21#include <linux/writeback.h>
@@ -174,7 +173,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
174 if (!entry) 173 if (!entry)
175 return -ENOMEM; 174 return -ENOMEM;
176 175
177 mutex_lock(&tree->mutex);
178 entry->file_offset = file_offset; 176 entry->file_offset = file_offset;
179 entry->start = start; 177 entry->start = start;
180 entry->len = len; 178 entry->len = len;
@@ -190,16 +188,17 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
190 INIT_LIST_HEAD(&entry->list); 188 INIT_LIST_HEAD(&entry->list);
191 INIT_LIST_HEAD(&entry->root_extent_list); 189 INIT_LIST_HEAD(&entry->root_extent_list);
192 190
191 spin_lock(&tree->lock);
193 node = tree_insert(&tree->tree, file_offset, 192 node = tree_insert(&tree->tree, file_offset,
194 &entry->rb_node); 193 &entry->rb_node);
195 BUG_ON(node); 194 BUG_ON(node);
195 spin_unlock(&tree->lock);
196 196
197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
198 list_add_tail(&entry->root_extent_list, 198 list_add_tail(&entry->root_extent_list,
199 &BTRFS_I(inode)->root->fs_info->ordered_extents); 199 &BTRFS_I(inode)->root->fs_info->ordered_extents);
200 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 200 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
201 201
202 mutex_unlock(&tree->mutex);
203 BUG_ON(node); 202 BUG_ON(node);
204 return 0; 203 return 0;
205} 204}
@@ -216,9 +215,9 @@ int btrfs_add_ordered_sum(struct inode *inode,
216 struct btrfs_ordered_inode_tree *tree; 215 struct btrfs_ordered_inode_tree *tree;
217 216
218 tree = &BTRFS_I(inode)->ordered_tree; 217 tree = &BTRFS_I(inode)->ordered_tree;
219 mutex_lock(&tree->mutex); 218 spin_lock(&tree->lock);
220 list_add_tail(&sum->list, &entry->list); 219 list_add_tail(&sum->list, &entry->list);
221 mutex_unlock(&tree->mutex); 220 spin_unlock(&tree->lock);
222 return 0; 221 return 0;
223} 222}
224 223
@@ -232,15 +231,16 @@ int btrfs_add_ordered_sum(struct inode *inode,
232 * to make sure this function only returns 1 once for a given ordered extent. 231 * to make sure this function only returns 1 once for a given ordered extent.
233 */ 232 */
234int btrfs_dec_test_ordered_pending(struct inode *inode, 233int btrfs_dec_test_ordered_pending(struct inode *inode,
234 struct btrfs_ordered_extent **cached,
235 u64 file_offset, u64 io_size) 235 u64 file_offset, u64 io_size)
236{ 236{
237 struct btrfs_ordered_inode_tree *tree; 237 struct btrfs_ordered_inode_tree *tree;
238 struct rb_node *node; 238 struct rb_node *node;
239 struct btrfs_ordered_extent *entry; 239 struct btrfs_ordered_extent *entry = NULL;
240 int ret; 240 int ret;
241 241
242 tree = &BTRFS_I(inode)->ordered_tree; 242 tree = &BTRFS_I(inode)->ordered_tree;
243 mutex_lock(&tree->mutex); 243 spin_lock(&tree->lock);
244 node = tree_search(tree, file_offset); 244 node = tree_search(tree, file_offset);
245 if (!node) { 245 if (!node) {
246 ret = 1; 246 ret = 1;
@@ -264,7 +264,11 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
264 else 264 else
265 ret = 1; 265 ret = 1;
266out: 266out:
267 mutex_unlock(&tree->mutex); 267 if (!ret && cached && entry) {
268 *cached = entry;
269 atomic_inc(&entry->refs);
270 }
271 spin_unlock(&tree->lock);
268 return ret == 0; 272 return ret == 0;
269} 273}
270 274
@@ -291,28 +295,30 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
291 295
292/* 296/*
293 * remove an ordered extent from the tree. No references are dropped 297 * remove an ordered extent from the tree. No references are dropped
294 * but, anyone waiting on this extent is woken up. 298 * and you must wake_up entry->wait. You must hold the tree lock
299 * while you call this function.
295 */ 300 */
296int btrfs_remove_ordered_extent(struct inode *inode, 301static int __btrfs_remove_ordered_extent(struct inode *inode,
297 struct btrfs_ordered_extent *entry) 302 struct btrfs_ordered_extent *entry)
298{ 303{
299 struct btrfs_ordered_inode_tree *tree; 304 struct btrfs_ordered_inode_tree *tree;
305 struct btrfs_root *root = BTRFS_I(inode)->root;
300 struct rb_node *node; 306 struct rb_node *node;
301 307
302 tree = &BTRFS_I(inode)->ordered_tree; 308 tree = &BTRFS_I(inode)->ordered_tree;
303 mutex_lock(&tree->mutex);
304 node = &entry->rb_node; 309 node = &entry->rb_node;
305 rb_erase(node, &tree->tree); 310 rb_erase(node, &tree->tree);
306 tree->last = NULL; 311 tree->last = NULL;
307 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 312 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
308 313
309 spin_lock(&BTRFS_I(inode)->accounting_lock); 314 spin_lock(&BTRFS_I(inode)->accounting_lock);
315 WARN_ON(!BTRFS_I(inode)->outstanding_extents);
310 BTRFS_I(inode)->outstanding_extents--; 316 BTRFS_I(inode)->outstanding_extents--;
311 spin_unlock(&BTRFS_I(inode)->accounting_lock); 317 spin_unlock(&BTRFS_I(inode)->accounting_lock);
312 btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, 318 btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
313 inode, 1); 319 inode, 1);
314 320
315 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 321 spin_lock(&root->fs_info->ordered_extent_lock);
316 list_del_init(&entry->root_extent_list); 322 list_del_init(&entry->root_extent_list);
317 323
318 /* 324 /*
@@ -324,18 +330,36 @@ int btrfs_remove_ordered_extent(struct inode *inode,
324 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { 330 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
325 list_del_init(&BTRFS_I(inode)->ordered_operations); 331 list_del_init(&BTRFS_I(inode)->ordered_operations);
326 } 332 }
327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 333 spin_unlock(&root->fs_info->ordered_extent_lock);
328 334
329 mutex_unlock(&tree->mutex);
330 wake_up(&entry->wait);
331 return 0; 335 return 0;
332} 336}
333 337
334/* 338/*
339 * remove an ordered extent from the tree. No references are dropped
340 * but any waiters are woken.
341 */
342int btrfs_remove_ordered_extent(struct inode *inode,
343 struct btrfs_ordered_extent *entry)
344{
345 struct btrfs_ordered_inode_tree *tree;
346 int ret;
347
348 tree = &BTRFS_I(inode)->ordered_tree;
349 spin_lock(&tree->lock);
350 ret = __btrfs_remove_ordered_extent(inode, entry);
351 spin_unlock(&tree->lock);
352 wake_up(&entry->wait);
353
354 return ret;
355}
356
357/*
335 * wait for all the ordered extents in a root. This is done when balancing 358 * wait for all the ordered extents in a root. This is done when balancing
336 * space between drives. 359 * space between drives.
337 */ 360 */
338int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) 361int btrfs_wait_ordered_extents(struct btrfs_root *root,
362 int nocow_only, int delay_iput)
339{ 363{
340 struct list_head splice; 364 struct list_head splice;
341 struct list_head *cur; 365 struct list_head *cur;
@@ -372,7 +396,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
372 if (inode) { 396 if (inode) {
373 btrfs_start_ordered_extent(inode, ordered, 1); 397 btrfs_start_ordered_extent(inode, ordered, 1);
374 btrfs_put_ordered_extent(ordered); 398 btrfs_put_ordered_extent(ordered);
375 iput(inode); 399 if (delay_iput)
400 btrfs_add_delayed_iput(inode);
401 else
402 iput(inode);
376 } else { 403 } else {
377 btrfs_put_ordered_extent(ordered); 404 btrfs_put_ordered_extent(ordered);
378 } 405 }
@@ -430,7 +457,7 @@ again:
430 btrfs_wait_ordered_range(inode, 0, (u64)-1); 457 btrfs_wait_ordered_range(inode, 0, (u64)-1);
431 else 458 else
432 filemap_flush(inode->i_mapping); 459 filemap_flush(inode->i_mapping);
433 iput(inode); 460 btrfs_add_delayed_iput(inode);
434 } 461 }
435 462
436 cond_resched(); 463 cond_resched();
@@ -546,7 +573,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
546 struct btrfs_ordered_extent *entry = NULL; 573 struct btrfs_ordered_extent *entry = NULL;
547 574
548 tree = &BTRFS_I(inode)->ordered_tree; 575 tree = &BTRFS_I(inode)->ordered_tree;
549 mutex_lock(&tree->mutex); 576 spin_lock(&tree->lock);
550 node = tree_search(tree, file_offset); 577 node = tree_search(tree, file_offset);
551 if (!node) 578 if (!node)
552 goto out; 579 goto out;
@@ -557,7 +584,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
557 if (entry) 584 if (entry)
558 atomic_inc(&entry->refs); 585 atomic_inc(&entry->refs);
559out: 586out:
560 mutex_unlock(&tree->mutex); 587 spin_unlock(&tree->lock);
561 return entry; 588 return entry;
562} 589}
563 590
@@ -573,7 +600,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
573 struct btrfs_ordered_extent *entry = NULL; 600 struct btrfs_ordered_extent *entry = NULL;
574 601
575 tree = &BTRFS_I(inode)->ordered_tree; 602 tree = &BTRFS_I(inode)->ordered_tree;
576 mutex_lock(&tree->mutex); 603 spin_lock(&tree->lock);
577 node = tree_search(tree, file_offset); 604 node = tree_search(tree, file_offset);
578 if (!node) 605 if (!node)
579 goto out; 606 goto out;
@@ -581,7 +608,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
581 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 608 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
582 atomic_inc(&entry->refs); 609 atomic_inc(&entry->refs);
583out: 610out:
584 mutex_unlock(&tree->mutex); 611 spin_unlock(&tree->lock);
585 return entry; 612 return entry;
586} 613}
587 614
@@ -589,7 +616,7 @@ out:
589 * After an extent is done, call this to conditionally update the on disk 616 * After an extent is done, call this to conditionally update the on disk
590 * i_size. i_size is updated to cover any fully written part of the file. 617 * i_size. i_size is updated to cover any fully written part of the file.
591 */ 618 */
592int btrfs_ordered_update_i_size(struct inode *inode, 619int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
593 struct btrfs_ordered_extent *ordered) 620 struct btrfs_ordered_extent *ordered)
594{ 621{
595 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 622 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
@@ -597,18 +624,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
597 u64 disk_i_size; 624 u64 disk_i_size;
598 u64 new_i_size; 625 u64 new_i_size;
599 u64 i_size_test; 626 u64 i_size_test;
627 u64 i_size = i_size_read(inode);
600 struct rb_node *node; 628 struct rb_node *node;
629 struct rb_node *prev = NULL;
601 struct btrfs_ordered_extent *test; 630 struct btrfs_ordered_extent *test;
631 int ret = 1;
602 632
603 mutex_lock(&tree->mutex); 633 if (ordered)
634 offset = entry_end(ordered);
635 else
636 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
637
638 spin_lock(&tree->lock);
604 disk_i_size = BTRFS_I(inode)->disk_i_size; 639 disk_i_size = BTRFS_I(inode)->disk_i_size;
605 640
641 /* truncate file */
642 if (disk_i_size > i_size) {
643 BTRFS_I(inode)->disk_i_size = i_size;
644 ret = 0;
645 goto out;
646 }
647
606 /* 648 /*
607 * if the disk i_size is already at the inode->i_size, or 649 * if the disk i_size is already at the inode->i_size, or
608 * this ordered extent is inside the disk i_size, we're done 650 * this ordered extent is inside the disk i_size, we're done
609 */ 651 */
610 if (disk_i_size >= inode->i_size || 652 if (disk_i_size == i_size || offset <= disk_i_size) {
611 ordered->file_offset + ordered->len <= disk_i_size) {
612 goto out; 653 goto out;
613 } 654 }
614 655
@@ -616,8 +657,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
616 * we can't update the disk_isize if there are delalloc bytes 657 * we can't update the disk_isize if there are delalloc bytes
617 * between disk_i_size and this ordered extent 658 * between disk_i_size and this ordered extent
618 */ 659 */
619 if (test_range_bit(io_tree, disk_i_size, 660 if (test_range_bit(io_tree, disk_i_size, offset - 1,
620 ordered->file_offset + ordered->len - 1,
621 EXTENT_DELALLOC, 0, NULL)) { 661 EXTENT_DELALLOC, 0, NULL)) {
622 goto out; 662 goto out;
623 } 663 }
@@ -626,20 +666,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
626 * if we find an ordered extent then we can't update disk i_size 666 * if we find an ordered extent then we can't update disk i_size
627 * yet 667 * yet
628 */ 668 */
629 node = &ordered->rb_node; 669 if (ordered) {
630 while (1) { 670 node = rb_prev(&ordered->rb_node);
631 node = rb_prev(node); 671 } else {
632 if (!node) 672 prev = tree_search(tree, offset);
633 break; 673 /*
674 * we insert file extents without involving ordered struct,
675 * so there should be no ordered struct cover this offset
676 */
677 if (prev) {
678 test = rb_entry(prev, struct btrfs_ordered_extent,
679 rb_node);
680 BUG_ON(offset_in_entry(test, offset));
681 }
682 node = prev;
683 }
684 while (node) {
634 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 685 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
635 if (test->file_offset + test->len <= disk_i_size) 686 if (test->file_offset + test->len <= disk_i_size)
636 break; 687 break;
637 if (test->file_offset >= inode->i_size) 688 if (test->file_offset >= i_size)
638 break; 689 break;
639 if (test->file_offset >= disk_i_size) 690 if (test->file_offset >= disk_i_size)
640 goto out; 691 goto out;
692 node = rb_prev(node);
641 } 693 }
642 new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); 694 new_i_size = min_t(u64, offset, i_size);
643 695
644 /* 696 /*
645 * at this point, we know we can safely update i_size to at least 697 * at this point, we know we can safely update i_size to at least
@@ -647,7 +699,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
647 * walk forward and see if ios from higher up in the file have 699 * walk forward and see if ios from higher up in the file have
648 * finished. 700 * finished.
649 */ 701 */
650 node = rb_next(&ordered->rb_node); 702 if (ordered) {
703 node = rb_next(&ordered->rb_node);
704 } else {
705 if (prev)
706 node = rb_next(prev);
707 else
708 node = rb_first(&tree->tree);
709 }
651 i_size_test = 0; 710 i_size_test = 0;
652 if (node) { 711 if (node) {
653 /* 712 /*
@@ -655,10 +714,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
655 * between our ordered extent and the next one. 714 * between our ordered extent and the next one.
656 */ 715 */
657 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 716 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
658 if (test->file_offset > entry_end(ordered)) 717 if (test->file_offset > offset)
659 i_size_test = test->file_offset; 718 i_size_test = test->file_offset;
660 } else { 719 } else {
661 i_size_test = i_size_read(inode); 720 i_size_test = i_size;
662 } 721 }
663 722
664 /* 723 /*
@@ -667,15 +726,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
667 * are no delalloc bytes in this area, it is safe to update 726 * are no delalloc bytes in this area, it is safe to update
668 * disk_i_size to the end of the region. 727 * disk_i_size to the end of the region.
669 */ 728 */
670 if (i_size_test > entry_end(ordered) && 729 if (i_size_test > offset &&
671 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 730 !test_range_bit(io_tree, offset, i_size_test - 1,
672 EXTENT_DELALLOC, 0, NULL)) { 731 EXTENT_DELALLOC, 0, NULL)) {
673 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 732 new_i_size = min_t(u64, i_size_test, i_size);
674 } 733 }
675 BTRFS_I(inode)->disk_i_size = new_i_size; 734 BTRFS_I(inode)->disk_i_size = new_i_size;
735 ret = 0;
676out: 736out:
677 mutex_unlock(&tree->mutex); 737 /*
678 return 0; 738 * we need to remove the ordered extent with the tree lock held
739 * so that other people calling this function don't find our fully
740 * processed ordered entry and skip updating the i_size
741 */
742 if (ordered)
743 __btrfs_remove_ordered_extent(inode, ordered);
744 spin_unlock(&tree->lock);
745 if (ordered)
746 wake_up(&ordered->wait);
747 return ret;
679} 748}
680 749
681/* 750/*
@@ -699,7 +768,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
699 if (!ordered) 768 if (!ordered)
700 return 1; 769 return 1;
701 770
702 mutex_lock(&tree->mutex); 771 spin_lock(&tree->lock);
703 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 772 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
704 if (disk_bytenr >= ordered_sum->bytenr) { 773 if (disk_bytenr >= ordered_sum->bytenr) {
705 num_sectors = ordered_sum->len / sectorsize; 774 num_sectors = ordered_sum->len / sectorsize;
@@ -714,7 +783,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
714 } 783 }
715 } 784 }
716out: 785out:
717 mutex_unlock(&tree->mutex); 786 spin_unlock(&tree->lock);
718 btrfs_put_ordered_extent(ordered); 787 btrfs_put_ordered_extent(ordered);
719 return ret; 788 return ret;
720} 789}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f82e87488ca8..c82f76a9f040 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -21,7 +21,7 @@
21 21
22/* one of these per inode */ 22/* one of these per inode */
23struct btrfs_ordered_inode_tree { 23struct btrfs_ordered_inode_tree {
24 struct mutex mutex; 24 spinlock_t lock;
25 struct rb_root tree; 25 struct rb_root tree;
26 struct rb_node *last; 26 struct rb_node *last;
27}; 27};
@@ -128,8 +128,8 @@ static inline int btrfs_ordered_sum_size(struct btrfs_root *root,
128static inline void 128static inline void
129btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) 129btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
130{ 130{
131 mutex_init(&t->mutex); 131 spin_lock_init(&t->lock);
132 t->tree.rb_node = NULL; 132 t->tree = RB_ROOT;
133 t->last = NULL; 133 t->last = NULL;
134} 134}
135 135
@@ -137,7 +137,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
137int btrfs_remove_ordered_extent(struct inode *inode, 137int btrfs_remove_ordered_extent(struct inode *inode,
138 struct btrfs_ordered_extent *entry); 138 struct btrfs_ordered_extent *entry);
139int btrfs_dec_test_ordered_pending(struct inode *inode, 139int btrfs_dec_test_ordered_pending(struct inode *inode,
140 u64 file_offset, u64 io_size); 140 struct btrfs_ordered_extent **cached,
141 u64 file_offset, u64 io_size);
141int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 142int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
142 u64 start, u64 len, u64 disk_len, int tyep); 143 u64 start, u64 len, u64 disk_len, int tyep);
143int btrfs_add_ordered_sum(struct inode *inode, 144int btrfs_add_ordered_sum(struct inode *inode,
@@ -150,12 +151,13 @@ void btrfs_start_ordered_extent(struct inode *inode,
150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 151int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
151struct btrfs_ordered_extent * 152struct btrfs_ordered_extent *
152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 153btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
153int btrfs_ordered_update_i_size(struct inode *inode, 154int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
154 struct btrfs_ordered_extent *ordered); 155 struct btrfs_ordered_extent *ordered);
155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 156int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
156int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
157int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 157int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
158int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 158int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
159 struct btrfs_root *root, 159 struct btrfs_root *root,
160 struct inode *inode); 160 struct inode *inode);
161int btrfs_wait_ordered_extents(struct btrfs_root *root,
162 int nocow_only, int delay_iput);
161#endif 163#endif
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
index d0cc62bccb94..a97314cf6bd6 100644
--- a/fs/btrfs/ref-cache.c
+++ b/fs/btrfs/ref-cache.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h>
20#include <linux/sort.h> 21#include <linux/sort.h>
21#include "ctree.h" 22#include "ctree.h"
22#include "ref-cache.h" 23#include "ref-cache.h"
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h
index bc283ad2db73..e2a55cb2072b 100644
--- a/fs/btrfs/ref-cache.h
+++ b/fs/btrfs/ref-cache.h
@@ -52,7 +52,7 @@ static inline size_t btrfs_leaf_ref_size(int nr_extents)
52 52
53static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) 53static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree)
54{ 54{
55 tree->root.rb_node = NULL; 55 tree->root = RB_ROOT;
56 INIT_LIST_HEAD(&tree->list); 56 INIT_LIST_HEAD(&tree->list);
57 spin_lock_init(&tree->lock); 57 spin_lock_init(&tree->lock);
58} 58}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfcc93c93a7b..e558dd941ded 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -21,6 +21,7 @@
21#include <linux/writeback.h> 21#include <linux/writeback.h>
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/rbtree.h> 23#include <linux/rbtree.h>
24#include <linux/slab.h>
24#include "ctree.h" 25#include "ctree.h"
25#include "disk-io.h" 26#include "disk-io.h"
26#include "transaction.h" 27#include "transaction.h"
@@ -170,14 +171,14 @@ struct async_merge {
170 171
171static void mapping_tree_init(struct mapping_tree *tree) 172static void mapping_tree_init(struct mapping_tree *tree)
172{ 173{
173 tree->rb_root.rb_node = NULL; 174 tree->rb_root = RB_ROOT;
174 spin_lock_init(&tree->lock); 175 spin_lock_init(&tree->lock);
175} 176}
176 177
177static void backref_cache_init(struct backref_cache *cache) 178static void backref_cache_init(struct backref_cache *cache)
178{ 179{
179 int i; 180 int i;
180 cache->rb_root.rb_node = NULL; 181 cache->rb_root = RB_ROOT;
181 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 182 for (i = 0; i < BTRFS_MAX_LEVEL; i++)
182 INIT_LIST_HEAD(&cache->pending[i]); 183 INIT_LIST_HEAD(&cache->pending[i]);
183 spin_lock_init(&cache->lock); 184 spin_lock_init(&cache->lock);
@@ -1561,6 +1562,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1561 return 0; 1562 return 0;
1562} 1563}
1563 1564
1565static void put_inodes(struct list_head *list)
1566{
1567 struct inodevec *ivec;
1568 while (!list_empty(list)) {
1569 ivec = list_entry(list->next, struct inodevec, list);
1570 list_del(&ivec->list);
1571 while (ivec->nr > 0) {
1572 ivec->nr--;
1573 iput(ivec->inode[ivec->nr]);
1574 }
1575 kfree(ivec);
1576 }
1577}
1578
1564static int find_next_key(struct btrfs_path *path, int level, 1579static int find_next_key(struct btrfs_path *path, int level,
1565 struct btrfs_key *key) 1580 struct btrfs_key *key)
1566 1581
@@ -1723,6 +1738,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1723 1738
1724 btrfs_btree_balance_dirty(root, nr); 1739 btrfs_btree_balance_dirty(root, nr);
1725 1740
1741 /*
1742 * put inodes outside transaction, otherwise we may deadlock.
1743 */
1744 put_inodes(&inode_list);
1745
1726 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1746 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1727 invalidate_extent_cache(root, &key, &next_key); 1747 invalidate_extent_cache(root, &key, &next_key);
1728 } 1748 }
@@ -1752,19 +1772,7 @@ out:
1752 1772
1753 btrfs_btree_balance_dirty(root, nr); 1773 btrfs_btree_balance_dirty(root, nr);
1754 1774
1755 /* 1775 put_inodes(&inode_list);
1756 * put inodes while we aren't holding the tree locks
1757 */
1758 while (!list_empty(&inode_list)) {
1759 struct inodevec *ivec;
1760 ivec = list_entry(inode_list.next, struct inodevec, list);
1761 list_del(&ivec->list);
1762 while (ivec->nr > 0) {
1763 ivec->nr--;
1764 iput(ivec->inode[ivec->nr]);
1765 }
1766 kfree(ivec);
1767 }
1768 1776
1769 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1777 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1770 invalidate_extent_cache(root, &key, &next_key); 1778 invalidate_extent_cache(root, &key, &next_key);
@@ -2652,7 +2660,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
2652 EXTENT_BOUNDARY, GFP_NOFS); 2660 EXTENT_BOUNDARY, GFP_NOFS);
2653 nr++; 2661 nr++;
2654 } 2662 }
2655 btrfs_set_extent_delalloc(inode, page_start, page_end); 2663 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
2656 2664
2657 set_page_dirty(page); 2665 set_page_dirty(page);
2658 dirty_page++; 2666 dirty_page++;
@@ -3274,8 +3282,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3274 return -ENOMEM; 3282 return -ENOMEM;
3275 3283
3276 path = btrfs_alloc_path(); 3284 path = btrfs_alloc_path();
3277 if (!path) 3285 if (!path) {
3286 kfree(cluster);
3278 return -ENOMEM; 3287 return -ENOMEM;
3288 }
3279 3289
3280 rc->extents_found = 0; 3290 rc->extents_found = 0;
3281 rc->extents_skipped = 0; 3291 rc->extents_skipped = 0;
@@ -3478,7 +3488,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3478 key.objectid = objectid; 3488 key.objectid = objectid;
3479 key.type = BTRFS_INODE_ITEM_KEY; 3489 key.type = BTRFS_INODE_ITEM_KEY;
3480 key.offset = 0; 3490 key.offset = 0;
3481 inode = btrfs_iget(root->fs_info->sb, &key, root); 3491 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
3482 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 3492 BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
3483 BTRFS_I(inode)->index_cnt = group->key.objectid; 3493 BTRFS_I(inode)->index_cnt = group->key.objectid;
3484 3494
@@ -3534,8 +3544,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3534 (unsigned long long)rc->block_group->key.objectid, 3544 (unsigned long long)rc->block_group->key.objectid,
3535 (unsigned long long)rc->block_group->flags); 3545 (unsigned long long)rc->block_group->flags);
3536 3546
3537 btrfs_start_delalloc_inodes(fs_info->tree_root); 3547 btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3538 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3548 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
3539 3549
3540 while (1) { 3550 while (1) {
3541 rc->extents_found = 0; 3551 rc->extents_found = 0;
@@ -3755,6 +3765,8 @@ out:
3755 BTRFS_DATA_RELOC_TREE_OBJECTID); 3765 BTRFS_DATA_RELOC_TREE_OBJECTID);
3756 if (IS_ERR(fs_root)) 3766 if (IS_ERR(fs_root))
3757 err = PTR_ERR(fs_root); 3767 err = PTR_ERR(fs_root);
3768 else
3769 btrfs_orphan_cleanup(fs_root);
3758 } 3770 }
3759 return err; 3771 return err;
3760} 3772}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 752a5463bf53..1866dff0538e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -38,6 +38,7 @@
38#include <linux/namei.h> 38#include <linux/namei.h>
39#include <linux/miscdevice.h> 39#include <linux/miscdevice.h>
40#include <linux/magic.h> 40#include <linux/magic.h>
41#include <linux/slab.h>
41#include "compat.h" 42#include "compat.h"
42#include "ctree.h" 43#include "ctree.h"
43#include "disk-io.h" 44#include "disk-io.h"
@@ -63,25 +64,26 @@ static void btrfs_put_super(struct super_block *sb)
63} 64}
64 65
65enum { 66enum {
66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 67 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 68 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, 69 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
69 Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 70 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
70 Opt_discard, Opt_err, 71 Opt_discard, Opt_err,
71}; 72};
72 73
73static match_table_t tokens = { 74static match_table_t tokens = {
74 {Opt_degraded, "degraded"}, 75 {Opt_degraded, "degraded"},
75 {Opt_subvol, "subvol=%s"}, 76 {Opt_subvol, "subvol=%s"},
77 {Opt_subvolid, "subvolid=%d"},
76 {Opt_device, "device=%s"}, 78 {Opt_device, "device=%s"},
77 {Opt_nodatasum, "nodatasum"}, 79 {Opt_nodatasum, "nodatasum"},
78 {Opt_nodatacow, "nodatacow"}, 80 {Opt_nodatacow, "nodatacow"},
79 {Opt_nobarrier, "nobarrier"}, 81 {Opt_nobarrier, "nobarrier"},
80 {Opt_max_extent, "max_extent=%s"},
81 {Opt_max_inline, "max_inline=%s"}, 82 {Opt_max_inline, "max_inline=%s"},
82 {Opt_alloc_start, "alloc_start=%s"}, 83 {Opt_alloc_start, "alloc_start=%s"},
83 {Opt_thread_pool, "thread_pool=%d"}, 84 {Opt_thread_pool, "thread_pool=%d"},
84 {Opt_compress, "compress"}, 85 {Opt_compress, "compress"},
86 {Opt_compress_force, "compress-force"},
85 {Opt_ssd, "ssd"}, 87 {Opt_ssd, "ssd"},
86 {Opt_ssd_spread, "ssd_spread"}, 88 {Opt_ssd_spread, "ssd_spread"},
87 {Opt_nossd, "nossd"}, 89 {Opt_nossd, "nossd"},
@@ -93,31 +95,6 @@ static match_table_t tokens = {
93 {Opt_err, NULL}, 95 {Opt_err, NULL},
94}; 96};
95 97
96u64 btrfs_parse_size(char *str)
97{
98 u64 res;
99 int mult = 1;
100 char *end;
101 char last;
102
103 res = simple_strtoul(str, &end, 10);
104
105 last = end[0];
106 if (isalpha(last)) {
107 last = tolower(last);
108 switch (last) {
109 case 'g':
110 mult *= 1024;
111 case 'm':
112 mult *= 1024;
113 case 'k':
114 mult *= 1024;
115 }
116 res = res * mult;
117 }
118 return res;
119}
120
121/* 98/*
122 * Regular mount options parser. Everything that is needed only when 99 * Regular mount options parser. Everything that is needed only when
123 * reading in a new superblock is parsed here. 100 * reading in a new superblock is parsed here.
@@ -126,8 +103,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
126{ 103{
127 struct btrfs_fs_info *info = root->fs_info; 104 struct btrfs_fs_info *info = root->fs_info;
128 substring_t args[MAX_OPT_ARGS]; 105 substring_t args[MAX_OPT_ARGS];
129 char *p, *num; 106 char *p, *num, *orig;
130 int intarg; 107 int intarg;
108 int ret = 0;
131 109
132 if (!options) 110 if (!options)
133 return 0; 111 return 0;
@@ -140,6 +118,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
140 if (!options) 118 if (!options)
141 return -ENOMEM; 119 return -ENOMEM;
142 120
121 orig = options;
143 122
144 while ((p = strsep(&options, ",")) != NULL) { 123 while ((p = strsep(&options, ",")) != NULL) {
145 int token; 124 int token;
@@ -153,6 +132,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
153 btrfs_set_opt(info->mount_opt, DEGRADED); 132 btrfs_set_opt(info->mount_opt, DEGRADED);
154 break; 133 break;
155 case Opt_subvol: 134 case Opt_subvol:
135 case Opt_subvolid:
156 case Opt_device: 136 case Opt_device:
157 /* 137 /*
158 * These are parsed by btrfs_parse_early_options 138 * These are parsed by btrfs_parse_early_options
@@ -172,6 +152,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
172 printk(KERN_INFO "btrfs: use compression\n"); 152 printk(KERN_INFO "btrfs: use compression\n");
173 btrfs_set_opt(info->mount_opt, COMPRESS); 153 btrfs_set_opt(info->mount_opt, COMPRESS);
174 break; 154 break;
155 case Opt_compress_force:
156 printk(KERN_INFO "btrfs: forcing compression\n");
157 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
158 btrfs_set_opt(info->mount_opt, COMPRESS);
159 break;
175 case Opt_ssd: 160 case Opt_ssd:
176 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 161 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
177 btrfs_set_opt(info->mount_opt, SSD); 162 btrfs_set_opt(info->mount_opt, SSD);
@@ -202,22 +187,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
202 info->thread_pool_size); 187 info->thread_pool_size);
203 } 188 }
204 break; 189 break;
205 case Opt_max_extent:
206 num = match_strdup(&args[0]);
207 if (num) {
208 info->max_extent = btrfs_parse_size(num);
209 kfree(num);
210
211 info->max_extent = max_t(u64,
212 info->max_extent, root->sectorsize);
213 printk(KERN_INFO "btrfs: max_extent at %llu\n",
214 (unsigned long long)info->max_extent);
215 }
216 break;
217 case Opt_max_inline: 190 case Opt_max_inline:
218 num = match_strdup(&args[0]); 191 num = match_strdup(&args[0]);
219 if (num) { 192 if (num) {
220 info->max_inline = btrfs_parse_size(num); 193 info->max_inline = memparse(num, NULL);
221 kfree(num); 194 kfree(num);
222 195
223 if (info->max_inline) { 196 if (info->max_inline) {
@@ -232,7 +205,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
232 case Opt_alloc_start: 205 case Opt_alloc_start:
233 num = match_strdup(&args[0]); 206 num = match_strdup(&args[0]);
234 if (num) { 207 if (num) {
235 info->alloc_start = btrfs_parse_size(num); 208 info->alloc_start = memparse(num, NULL);
236 kfree(num); 209 kfree(num);
237 printk(KERN_INFO 210 printk(KERN_INFO
238 "btrfs: allocations start at %llu\n", 211 "btrfs: allocations start at %llu\n",
@@ -262,12 +235,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
262 case Opt_discard: 235 case Opt_discard:
263 btrfs_set_opt(info->mount_opt, DISCARD); 236 btrfs_set_opt(info->mount_opt, DISCARD);
264 break; 237 break;
238 case Opt_err:
239 printk(KERN_INFO "btrfs: unrecognized mount option "
240 "'%s'\n", p);
241 ret = -EINVAL;
242 goto out;
265 default: 243 default:
266 break; 244 break;
267 } 245 }
268 } 246 }
269 kfree(options); 247out:
270 return 0; 248 kfree(orig);
249 return ret;
271} 250}
272 251
273/* 252/*
@@ -277,12 +256,13 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
277 * only when we need to allocate a new super block. 256 * only when we need to allocate a new super block.
278 */ 257 */
279static int btrfs_parse_early_options(const char *options, fmode_t flags, 258static int btrfs_parse_early_options(const char *options, fmode_t flags,
280 void *holder, char **subvol_name, 259 void *holder, char **subvol_name, u64 *subvol_objectid,
281 struct btrfs_fs_devices **fs_devices) 260 struct btrfs_fs_devices **fs_devices)
282{ 261{
283 substring_t args[MAX_OPT_ARGS]; 262 substring_t args[MAX_OPT_ARGS];
284 char *opts, *p; 263 char *opts, *p;
285 int error = 0; 264 int error = 0;
265 int intarg;
286 266
287 if (!options) 267 if (!options)
288 goto out; 268 goto out;
@@ -305,6 +285,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
305 case Opt_subvol: 285 case Opt_subvol:
306 *subvol_name = match_strdup(&args[0]); 286 *subvol_name = match_strdup(&args[0]);
307 break; 287 break;
288 case Opt_subvolid:
289 intarg = 0;
290 error = match_int(&args[0], &intarg);
291 if (!error) {
292 /* we want the original fs_tree */
293 if (!intarg)
294 *subvol_objectid =
295 BTRFS_FS_TREE_OBJECTID;
296 else
297 *subvol_objectid = intarg;
298 }
299 break;
308 case Opt_device: 300 case Opt_device:
309 error = btrfs_scan_one_device(match_strdup(&args[0]), 301 error = btrfs_scan_one_device(match_strdup(&args[0]),
310 flags, holder, fs_devices); 302 flags, holder, fs_devices);
@@ -332,6 +324,110 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
332 return error; 324 return error;
333} 325}
334 326
327static struct dentry *get_default_root(struct super_block *sb,
328 u64 subvol_objectid)
329{
330 struct btrfs_root *root = sb->s_fs_info;
331 struct btrfs_root *new_root;
332 struct btrfs_dir_item *di;
333 struct btrfs_path *path;
334 struct btrfs_key location;
335 struct inode *inode;
336 struct dentry *dentry;
337 u64 dir_id;
338 int new = 0;
339
340 /*
341 * We have a specific subvol we want to mount, just setup location and
342 * go look up the root.
343 */
344 if (subvol_objectid) {
345 location.objectid = subvol_objectid;
346 location.type = BTRFS_ROOT_ITEM_KEY;
347 location.offset = (u64)-1;
348 goto find_root;
349 }
350
351 path = btrfs_alloc_path();
352 if (!path)
353 return ERR_PTR(-ENOMEM);
354 path->leave_spinning = 1;
355
356 /*
357 * Find the "default" dir item which points to the root item that we
358 * will mount by default if we haven't been given a specific subvolume
359 * to mount.
360 */
361 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
362 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
363 if (!di) {
364 /*
365 * Ok the default dir item isn't there. This is weird since
366 * it's always been there, but don't freak out, just try and
367 * mount to root most subvolume.
368 */
369 btrfs_free_path(path);
370 dir_id = BTRFS_FIRST_FREE_OBJECTID;
371 new_root = root->fs_info->fs_root;
372 goto setup_root;
373 }
374
375 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
376 btrfs_free_path(path);
377
378find_root:
379 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
380 if (IS_ERR(new_root))
381 return ERR_PTR(PTR_ERR(new_root));
382
383 if (btrfs_root_refs(&new_root->root_item) == 0)
384 return ERR_PTR(-ENOENT);
385
386 dir_id = btrfs_root_dirid(&new_root->root_item);
387setup_root:
388 location.objectid = dir_id;
389 location.type = BTRFS_INODE_ITEM_KEY;
390 location.offset = 0;
391
392 inode = btrfs_iget(sb, &location, new_root, &new);
393 if (!inode)
394 return ERR_PTR(-ENOMEM);
395
396 /*
397 * If we're just mounting the root most subvol put the inode and return
398 * a reference to the dentry. We will have already gotten a reference
399 * to the inode in btrfs_fill_super so we're good to go.
400 */
401 if (!new && sb->s_root->d_inode == inode) {
402 iput(inode);
403 return dget(sb->s_root);
404 }
405
406 if (new) {
407 const struct qstr name = { .name = "/", .len = 1 };
408
409 /*
410 * New inode, we need to make the dentry a sibling of s_root so
411 * everything gets cleaned up properly on unmount.
412 */
413 dentry = d_alloc(sb->s_root, &name);
414 if (!dentry) {
415 iput(inode);
416 return ERR_PTR(-ENOMEM);
417 }
418 d_splice_alias(inode, dentry);
419 } else {
420 /*
421 * We found the inode in cache, just find a dentry for it and
422 * put the reference to the inode we just got.
423 */
424 dentry = d_find_alias(inode);
425 iput(inode);
426 }
427
428 return dentry;
429}
430
335static int btrfs_fill_super(struct super_block *sb, 431static int btrfs_fill_super(struct super_block *sb,
336 struct btrfs_fs_devices *fs_devices, 432 struct btrfs_fs_devices *fs_devices,
337 void *data, int silent) 433 void *data, int silent)
@@ -365,7 +461,7 @@ static int btrfs_fill_super(struct super_block *sb,
365 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 461 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
366 key.type = BTRFS_INODE_ITEM_KEY; 462 key.type = BTRFS_INODE_ITEM_KEY;
367 key.offset = 0; 463 key.offset = 0;
368 inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root); 464 inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root, NULL);
369 if (IS_ERR(inode)) { 465 if (IS_ERR(inode)) {
370 err = PTR_ERR(inode); 466 err = PTR_ERR(inode);
371 goto fail_close; 467 goto fail_close;
@@ -377,12 +473,6 @@ static int btrfs_fill_super(struct super_block *sb,
377 err = -ENOMEM; 473 err = -ENOMEM;
378 goto fail_close; 474 goto fail_close;
379 } 475 }
380#if 0
381 /* this does the super kobj at the same time */
382 err = btrfs_sysfs_add_super(tree_root->fs_info);
383 if (err)
384 goto fail_close;
385#endif
386 476
387 sb->s_root = root_dentry; 477 sb->s_root = root_dentry;
388 478
@@ -405,8 +495,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
405 return 0; 495 return 0;
406 } 496 }
407 497
408 btrfs_start_delalloc_inodes(root); 498 btrfs_start_delalloc_inodes(root, 0);
409 btrfs_wait_ordered_extents(root, 0); 499 btrfs_wait_ordered_extents(root, 0, 0);
410 500
411 trans = btrfs_start_transaction(root, 1); 501 trans = btrfs_start_transaction(root, 1);
412 ret = btrfs_commit_transaction(trans, root); 502 ret = btrfs_commit_transaction(trans, root);
@@ -426,9 +516,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
426 seq_puts(seq, ",nodatacow"); 516 seq_puts(seq, ",nodatacow");
427 if (btrfs_test_opt(root, NOBARRIER)) 517 if (btrfs_test_opt(root, NOBARRIER))
428 seq_puts(seq, ",nobarrier"); 518 seq_puts(seq, ",nobarrier");
429 if (info->max_extent != (u64)-1)
430 seq_printf(seq, ",max_extent=%llu",
431 (unsigned long long)info->max_extent);
432 if (info->max_inline != 8192 * 1024) 519 if (info->max_inline != 8192 * 1024)
433 seq_printf(seq, ",max_inline=%llu", 520 seq_printf(seq, ",max_inline=%llu",
434 (unsigned long long)info->max_inline); 521 (unsigned long long)info->max_inline);
@@ -450,6 +537,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
450 seq_puts(seq, ",notreelog"); 537 seq_puts(seq, ",notreelog");
451 if (btrfs_test_opt(root, FLUSHONCOMMIT)) 538 if (btrfs_test_opt(root, FLUSHONCOMMIT))
452 seq_puts(seq, ",flushoncommit"); 539 seq_puts(seq, ",flushoncommit");
540 if (btrfs_test_opt(root, DISCARD))
541 seq_puts(seq, ",discard");
453 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 542 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
454 seq_puts(seq, ",noacl"); 543 seq_puts(seq, ",noacl");
455 return 0; 544 return 0;
@@ -472,19 +561,22 @@ static int btrfs_test_super(struct super_block *s, void *data)
472static int btrfs_get_sb(struct file_system_type *fs_type, int flags, 561static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
473 const char *dev_name, void *data, struct vfsmount *mnt) 562 const char *dev_name, void *data, struct vfsmount *mnt)
474{ 563{
475 char *subvol_name = NULL;
476 struct block_device *bdev = NULL; 564 struct block_device *bdev = NULL;
477 struct super_block *s; 565 struct super_block *s;
478 struct dentry *root; 566 struct dentry *root;
479 struct btrfs_fs_devices *fs_devices = NULL; 567 struct btrfs_fs_devices *fs_devices = NULL;
480 fmode_t mode = FMODE_READ; 568 fmode_t mode = FMODE_READ;
569 char *subvol_name = NULL;
570 u64 subvol_objectid = 0;
481 int error = 0; 571 int error = 0;
572 int found = 0;
482 573
483 if (!(flags & MS_RDONLY)) 574 if (!(flags & MS_RDONLY))
484 mode |= FMODE_WRITE; 575 mode |= FMODE_WRITE;
485 576
486 error = btrfs_parse_early_options(data, mode, fs_type, 577 error = btrfs_parse_early_options(data, mode, fs_type,
487 &subvol_name, &fs_devices); 578 &subvol_name, &subvol_objectid,
579 &fs_devices);
488 if (error) 580 if (error)
489 return error; 581 return error;
490 582
@@ -513,6 +605,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
513 goto error_close_devices; 605 goto error_close_devices;
514 } 606 }
515 607
608 found = 1;
516 btrfs_close_devices(fs_devices); 609 btrfs_close_devices(fs_devices);
517 } else { 610 } else {
518 char b[BDEVNAME_SIZE]; 611 char b[BDEVNAME_SIZE];
@@ -530,25 +623,35 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
530 s->s_flags |= MS_ACTIVE; 623 s->s_flags |= MS_ACTIVE;
531 } 624 }
532 625
533 if (!strcmp(subvol_name, ".")) 626 root = get_default_root(s, subvol_objectid);
534 root = dget(s->s_root); 627 if (IS_ERR(root)) {
535 else { 628 error = PTR_ERR(root);
536 mutex_lock(&s->s_root->d_inode->i_mutex); 629 deactivate_locked_super(s);
537 root = lookup_one_len(subvol_name, s->s_root, 630 goto error;
631 }
632 /* if they gave us a subvolume name bind mount into that */
633 if (strcmp(subvol_name, ".")) {
634 struct dentry *new_root;
635 mutex_lock(&root->d_inode->i_mutex);
636 new_root = lookup_one_len(subvol_name, root,
538 strlen(subvol_name)); 637 strlen(subvol_name));
539 mutex_unlock(&s->s_root->d_inode->i_mutex); 638 mutex_unlock(&root->d_inode->i_mutex);
540 639
541 if (IS_ERR(root)) { 640 if (IS_ERR(new_root)) {
542 deactivate_locked_super(s); 641 deactivate_locked_super(s);
543 error = PTR_ERR(root); 642 error = PTR_ERR(new_root);
544 goto error_free_subvol_name; 643 dput(root);
644 goto error_close_devices;
545 } 645 }
546 if (!root->d_inode) { 646 if (!new_root->d_inode) {
547 dput(root); 647 dput(root);
648 dput(new_root);
548 deactivate_locked_super(s); 649 deactivate_locked_super(s);
549 error = -ENXIO; 650 error = -ENXIO;
550 goto error_free_subvol_name; 651 goto error_close_devices;
551 } 652 }
653 dput(root);
654 root = new_root;
552 } 655 }
553 656
554 mnt->mnt_sb = s; 657 mnt->mnt_sb = s;
@@ -563,6 +666,7 @@ error_close_devices:
563 btrfs_close_devices(fs_devices); 666 btrfs_close_devices(fs_devices);
564error_free_subvol_name: 667error_free_subvol_name:
565 kfree(subvol_name); 668 kfree(subvol_name);
669error:
566 return error; 670 return error;
567} 671}
568 672
@@ -607,14 +711,37 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
607{ 711{
608 struct btrfs_root *root = btrfs_sb(dentry->d_sb); 712 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
609 struct btrfs_super_block *disk_super = &root->fs_info->super_copy; 713 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
714 struct list_head *head = &root->fs_info->space_info;
715 struct btrfs_space_info *found;
716 u64 total_used = 0;
717 u64 data_used = 0;
610 int bits = dentry->d_sb->s_blocksize_bits; 718 int bits = dentry->d_sb->s_blocksize_bits;
611 __be32 *fsid = (__be32 *)root->fs_info->fsid; 719 __be32 *fsid = (__be32 *)root->fs_info->fsid;
612 720
721 rcu_read_lock();
722 list_for_each_entry_rcu(found, head, list) {
723 if (found->flags & (BTRFS_BLOCK_GROUP_DUP|
724 BTRFS_BLOCK_GROUP_RAID10|
725 BTRFS_BLOCK_GROUP_RAID1)) {
726 total_used += found->bytes_used;
727 if (found->flags & BTRFS_BLOCK_GROUP_DATA)
728 data_used += found->bytes_used;
729 else
730 data_used += found->total_bytes;
731 }
732
733 total_used += found->bytes_used;
734 if (found->flags & BTRFS_BLOCK_GROUP_DATA)
735 data_used += found->bytes_used;
736 else
737 data_used += found->total_bytes;
738 }
739 rcu_read_unlock();
740
613 buf->f_namelen = BTRFS_NAME_LEN; 741 buf->f_namelen = BTRFS_NAME_LEN;
614 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 742 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
615 buf->f_bfree = buf->f_blocks - 743 buf->f_bfree = buf->f_blocks - (total_used >> bits);
616 (btrfs_super_bytes_used(disk_super) >> bits); 744 buf->f_bavail = buf->f_blocks - (data_used >> bits);
617 buf->f_bavail = buf->f_bfree;
618 buf->f_bsize = dentry->d_sb->s_blocksize; 745 buf->f_bsize = dentry->d_sb->s_blocksize;
619 buf->f_type = BTRFS_SUPER_MAGIC; 746 buf->f_type = BTRFS_SUPER_MAGIC;
620 747
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index a240b6fa81df..4ce16ef702a3 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -164,12 +164,12 @@ static void btrfs_root_release(struct kobject *kobj)
164 complete(&root->kobj_unregister); 164 complete(&root->kobj_unregister);
165} 165}
166 166
167static struct sysfs_ops btrfs_super_attr_ops = { 167static const struct sysfs_ops btrfs_super_attr_ops = {
168 .show = btrfs_super_attr_show, 168 .show = btrfs_super_attr_show,
169 .store = btrfs_super_attr_store, 169 .store = btrfs_super_attr_store,
170}; 170};
171 171
172static struct sysfs_ops btrfs_root_attr_ops = { 172static const struct sysfs_ops btrfs_root_attr_ops = {
173 .show = btrfs_root_attr_show, 173 .show = btrfs_root_attr_show,
174 .store = btrfs_root_attr_store, 174 .store = btrfs_root_attr_store,
175}; 175};
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c207e8c32c9b..2cb116099b90 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/fs.h> 19#include <linux/fs.h>
20#include <linux/slab.h>
20#include <linux/sched.h> 21#include <linux/sched.h>
21#include <linux/writeback.h> 22#include <linux/writeback.h>
22#include <linux/pagemap.h> 23#include <linux/pagemap.h>
@@ -69,7 +70,7 @@ static noinline int join_transaction(struct btrfs_root *root)
69 cur_trans->commit_done = 0; 70 cur_trans->commit_done = 0;
70 cur_trans->start_time = get_seconds(); 71 cur_trans->start_time = get_seconds();
71 72
72 cur_trans->delayed_refs.root.rb_node = NULL; 73 cur_trans->delayed_refs.root = RB_ROOT;
73 cur_trans->delayed_refs.num_entries = 0; 74 cur_trans->delayed_refs.num_entries = 0;
74 cur_trans->delayed_refs.num_heads_ready = 0; 75 cur_trans->delayed_refs.num_heads_ready = 0;
75 cur_trans->delayed_refs.num_heads = 0; 76 cur_trans->delayed_refs.num_heads = 0;
@@ -147,18 +148,13 @@ static void wait_current_trans(struct btrfs_root *root)
147 while (1) { 148 while (1) {
148 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 149 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
149 TASK_UNINTERRUPTIBLE); 150 TASK_UNINTERRUPTIBLE);
150 if (cur_trans->blocked) { 151 if (!cur_trans->blocked)
151 mutex_unlock(&root->fs_info->trans_mutex);
152 schedule();
153 mutex_lock(&root->fs_info->trans_mutex);
154 finish_wait(&root->fs_info->transaction_wait,
155 &wait);
156 } else {
157 finish_wait(&root->fs_info->transaction_wait,
158 &wait);
159 break; 152 break;
160 } 153 mutex_unlock(&root->fs_info->trans_mutex);
154 schedule();
155 mutex_lock(&root->fs_info->trans_mutex);
161 } 156 }
157 finish_wait(&root->fs_info->transaction_wait, &wait);
162 put_transaction(cur_trans); 158 put_transaction(cur_trans);
163 } 159 }
164} 160}
@@ -333,6 +329,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
333 memset(trans, 0, sizeof(*trans)); 329 memset(trans, 0, sizeof(*trans));
334 kmem_cache_free(btrfs_trans_handle_cachep, trans); 330 kmem_cache_free(btrfs_trans_handle_cachep, trans);
335 331
332 if (throttle)
333 btrfs_run_delayed_iputs(root);
334
336 return 0; 335 return 0;
337} 336}
338 337
@@ -354,7 +353,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
354 * those extents are sent to disk but does not wait on them 353 * those extents are sent to disk but does not wait on them
355 */ 354 */
356int btrfs_write_marked_extents(struct btrfs_root *root, 355int btrfs_write_marked_extents(struct btrfs_root *root,
357 struct extent_io_tree *dirty_pages) 356 struct extent_io_tree *dirty_pages, int mark)
358{ 357{
359 int ret; 358 int ret;
360 int err = 0; 359 int err = 0;
@@ -367,7 +366,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
367 366
368 while (1) { 367 while (1) {
369 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 368 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
370 EXTENT_DIRTY); 369 mark);
371 if (ret) 370 if (ret)
372 break; 371 break;
373 while (start <= end) { 372 while (start <= end) {
@@ -413,7 +412,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
413 * on all the pages and clear them from the dirty pages state tree 412 * on all the pages and clear them from the dirty pages state tree
414 */ 413 */
415int btrfs_wait_marked_extents(struct btrfs_root *root, 414int btrfs_wait_marked_extents(struct btrfs_root *root,
416 struct extent_io_tree *dirty_pages) 415 struct extent_io_tree *dirty_pages, int mark)
417{ 416{
418 int ret; 417 int ret;
419 int err = 0; 418 int err = 0;
@@ -425,12 +424,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
425 unsigned long index; 424 unsigned long index;
426 425
427 while (1) { 426 while (1) {
428 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 427 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
429 EXTENT_DIRTY); 428 mark);
430 if (ret) 429 if (ret)
431 break; 430 break;
432 431
433 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 432 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
434 while (start <= end) { 433 while (start <= end) {
435 index = start >> PAGE_CACHE_SHIFT; 434 index = start >> PAGE_CACHE_SHIFT;
436 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 435 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
@@ -460,13 +459,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
460 * those extents are on disk for transaction or log commit 459 * those extents are on disk for transaction or log commit
461 */ 460 */
462int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 461int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
463 struct extent_io_tree *dirty_pages) 462 struct extent_io_tree *dirty_pages, int mark)
464{ 463{
465 int ret; 464 int ret;
466 int ret2; 465 int ret2;
467 466
468 ret = btrfs_write_marked_extents(root, dirty_pages); 467 ret = btrfs_write_marked_extents(root, dirty_pages, mark);
469 ret2 = btrfs_wait_marked_extents(root, dirty_pages); 468 ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
470 return ret || ret2; 469 return ret || ret2;
471} 470}
472 471
@@ -479,7 +478,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
479 return filemap_write_and_wait(btree_inode->i_mapping); 478 return filemap_write_and_wait(btree_inode->i_mapping);
480 } 479 }
481 return btrfs_write_and_wait_marked_extents(root, 480 return btrfs_write_and_wait_marked_extents(root,
482 &trans->transaction->dirty_pages); 481 &trans->transaction->dirty_pages,
482 EXTENT_DIRTY);
483} 483}
484 484
485/* 485/*
@@ -497,13 +497,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
497{ 497{
498 int ret; 498 int ret;
499 u64 old_root_bytenr; 499 u64 old_root_bytenr;
500 u64 old_root_used;
500 struct btrfs_root *tree_root = root->fs_info->tree_root; 501 struct btrfs_root *tree_root = root->fs_info->tree_root;
501 502
503 old_root_used = btrfs_root_used(&root->root_item);
502 btrfs_write_dirty_block_groups(trans, root); 504 btrfs_write_dirty_block_groups(trans, root);
503 505
504 while (1) { 506 while (1) {
505 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 507 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
506 if (old_root_bytenr == root->node->start) 508 if (old_root_bytenr == root->node->start &&
509 old_root_used == btrfs_root_used(&root->root_item))
507 break; 510 break;
508 511
509 btrfs_set_root_node(&root->root_item, root->node); 512 btrfs_set_root_node(&root->root_item, root->node);
@@ -512,6 +515,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
512 &root->root_item); 515 &root->root_item);
513 BUG_ON(ret); 516 BUG_ON(ret);
514 517
518 old_root_used = btrfs_root_used(&root->root_item);
515 ret = btrfs_write_dirty_block_groups(trans, root); 519 ret = btrfs_write_dirty_block_groups(trans, root);
516 BUG_ON(ret); 520 BUG_ON(ret);
517 } 521 }
@@ -752,10 +756,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
752 struct btrfs_root_item *new_root_item; 756 struct btrfs_root_item *new_root_item;
753 struct btrfs_root *tree_root = fs_info->tree_root; 757 struct btrfs_root *tree_root = fs_info->tree_root;
754 struct btrfs_root *root = pending->root; 758 struct btrfs_root *root = pending->root;
759 struct btrfs_root *parent_root;
760 struct inode *parent_inode;
755 struct extent_buffer *tmp; 761 struct extent_buffer *tmp;
756 struct extent_buffer *old; 762 struct extent_buffer *old;
757 int ret; 763 int ret;
758 u64 objectid; 764 u64 objectid;
765 int namelen;
766 u64 index = 0;
767
768 parent_inode = pending->dentry->d_parent->d_inode;
769 parent_root = BTRFS_I(parent_inode)->root;
759 770
760 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 771 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
761 if (!new_root_item) { 772 if (!new_root_item) {
@@ -766,83 +777,59 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
766 if (ret) 777 if (ret)
767 goto fail; 778 goto fail;
768 779
769 record_root_in_trans(trans, root);
770 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
771 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
772
773 key.objectid = objectid; 780 key.objectid = objectid;
774 /* record when the snapshot was created in key.offset */ 781 /* record when the snapshot was created in key.offset */
775 key.offset = trans->transid; 782 key.offset = trans->transid;
776 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 783 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
777 784
778 old = btrfs_lock_root_node(root);
779 btrfs_cow_block(trans, root, old, NULL, 0, &old);
780 btrfs_set_lock_blocking(old);
781
782 btrfs_copy_root(trans, root, old, &tmp, objectid);
783 btrfs_tree_unlock(old);
784 free_extent_buffer(old);
785
786 btrfs_set_root_node(new_root_item, tmp);
787 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
788 new_root_item);
789 btrfs_tree_unlock(tmp);
790 free_extent_buffer(tmp);
791 if (ret)
792 goto fail;
793
794 key.offset = (u64)-1;
795 memcpy(&pending->root_key, &key, sizeof(key)); 785 memcpy(&pending->root_key, &key, sizeof(key));
796fail: 786 pending->root_key.offset = (u64)-1;
797 kfree(new_root_item);
798 btrfs_unreserve_metadata_space(root, 6);
799 return ret;
800}
801
802static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
803 struct btrfs_pending_snapshot *pending)
804{
805 int ret;
806 int namelen;
807 u64 index = 0;
808 struct btrfs_trans_handle *trans;
809 struct inode *parent_inode;
810 struct inode *inode;
811 struct btrfs_root *parent_root;
812
813 parent_inode = pending->dentry->d_parent->d_inode;
814 parent_root = BTRFS_I(parent_inode)->root;
815 trans = btrfs_join_transaction(parent_root, 1);
816 787
788 record_root_in_trans(trans, parent_root);
817 /* 789 /*
818 * insert the directory item 790 * insert the directory item
819 */ 791 */
820 namelen = strlen(pending->name); 792 namelen = strlen(pending->name);
821 ret = btrfs_set_inode_index(parent_inode, &index); 793 ret = btrfs_set_inode_index(parent_inode, &index);
794 BUG_ON(ret);
822 ret = btrfs_insert_dir_item(trans, parent_root, 795 ret = btrfs_insert_dir_item(trans, parent_root,
823 pending->name, namelen, 796 pending->name, namelen,
824 parent_inode->i_ino, 797 parent_inode->i_ino,
825 &pending->root_key, BTRFS_FT_DIR, index); 798 &pending->root_key, BTRFS_FT_DIR, index);
826 799 BUG_ON(ret);
827 if (ret)
828 goto fail;
829 800
830 btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); 801 btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2);
831 ret = btrfs_update_inode(trans, parent_root, parent_inode); 802 ret = btrfs_update_inode(trans, parent_root, parent_inode);
832 BUG_ON(ret); 803 BUG_ON(ret);
833 804
805 record_root_in_trans(trans, root);
806 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
807 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
808
809 old = btrfs_lock_root_node(root);
810 btrfs_cow_block(trans, root, old, NULL, 0, &old);
811 btrfs_set_lock_blocking(old);
812
813 btrfs_copy_root(trans, root, old, &tmp, objectid);
814 btrfs_tree_unlock(old);
815 free_extent_buffer(old);
816
817 btrfs_set_root_node(new_root_item, tmp);
818 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
819 new_root_item);
820 BUG_ON(ret);
821 btrfs_tree_unlock(tmp);
822 free_extent_buffer(tmp);
823
834 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 824 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
835 pending->root_key.objectid, 825 pending->root_key.objectid,
836 parent_root->root_key.objectid, 826 parent_root->root_key.objectid,
837 parent_inode->i_ino, index, pending->name, 827 parent_inode->i_ino, index, pending->name,
838 namelen); 828 namelen);
839
840 BUG_ON(ret); 829 BUG_ON(ret);
841 830
842 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
843 d_instantiate(pending->dentry, inode);
844fail: 831fail:
845 btrfs_end_transaction(trans, fs_info->fs_root); 832 kfree(new_root_item);
846 return ret; 833 return ret;
847} 834}
848 835
@@ -863,25 +850,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
863 return 0; 850 return 0;
864} 851}
865 852
866static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
867 struct btrfs_fs_info *fs_info)
868{
869 struct btrfs_pending_snapshot *pending;
870 struct list_head *head = &trans->transaction->pending_snapshots;
871 int ret;
872
873 while (!list_empty(head)) {
874 pending = list_entry(head->next,
875 struct btrfs_pending_snapshot, list);
876 ret = finish_pending_snapshot(fs_info, pending);
877 BUG_ON(ret);
878 list_del(&pending->list);
879 kfree(pending->name);
880 kfree(pending);
881 }
882 return 0;
883}
884
885static void update_super_roots(struct btrfs_root *root) 853static void update_super_roots(struct btrfs_root *root)
886{ 854{
887 struct btrfs_root_item *root_item; 855 struct btrfs_root_item *root_item;
@@ -993,12 +961,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
993 961
994 mutex_unlock(&root->fs_info->trans_mutex); 962 mutex_unlock(&root->fs_info->trans_mutex);
995 963
996 if (flush_on_commit) { 964 if (flush_on_commit || snap_pending) {
997 btrfs_start_delalloc_inodes(root); 965 btrfs_start_delalloc_inodes(root, 1);
998 ret = btrfs_wait_ordered_extents(root, 0); 966 ret = btrfs_wait_ordered_extents(root, 0, 1);
999 BUG_ON(ret);
1000 } else if (snap_pending) {
1001 ret = btrfs_wait_ordered_extents(root, 1);
1002 BUG_ON(ret); 967 BUG_ON(ret);
1003 } 968 }
1004 969
@@ -1096,9 +1061,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1096 1061
1097 btrfs_finish_extent_commit(trans, root); 1062 btrfs_finish_extent_commit(trans, root);
1098 1063
1099 /* do the directory inserts of any pending snapshot creations */
1100 finish_pending_snapshots(trans, root->fs_info);
1101
1102 mutex_lock(&root->fs_info->trans_mutex); 1064 mutex_lock(&root->fs_info->trans_mutex);
1103 1065
1104 cur_trans->commit_done = 1; 1066 cur_trans->commit_done = 1;
@@ -1116,6 +1078,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1116 current->journal_info = NULL; 1078 current->journal_info = NULL;
1117 1079
1118 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1080 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1081
1082 if (current != root->fs_info->transaction_kthread)
1083 btrfs_run_delayed_iputs(root);
1084
1119 return ret; 1085 return ret;
1120} 1086}
1121 1087
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d4e3e7a6938c..93c7ccb33118 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
108 struct btrfs_root *root); 108 struct btrfs_root *root);
109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
110 struct extent_io_tree *dirty_pages); 110 struct extent_io_tree *dirty_pages, int mark);
111int btrfs_write_marked_extents(struct btrfs_root *root, 111int btrfs_write_marked_extents(struct btrfs_root *root,
112 struct extent_io_tree *dirty_pages); 112 struct extent_io_tree *dirty_pages, int mark);
113int btrfs_wait_marked_extents(struct btrfs_root *root, 113int btrfs_wait_marked_extents(struct btrfs_root *root,
114 struct extent_io_tree *dirty_pages); 114 struct extent_io_tree *dirty_pages, int mark);
115int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 115int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
116#endif 116#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 741666a7676a..af57dd2b43d4 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h>
20#include "ctree.h" 21#include "ctree.h"
21#include "transaction.h" 22#include "transaction.h"
22#include "disk-io.h" 23#include "disk-io.h"
@@ -445,7 +446,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
445 key.objectid = objectid; 446 key.objectid = objectid;
446 key.type = BTRFS_INODE_ITEM_KEY; 447 key.type = BTRFS_INODE_ITEM_KEY;
447 key.offset = 0; 448 key.offset = 0;
448 inode = btrfs_iget(root->fs_info->sb, &key, root); 449 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
449 if (IS_ERR(inode)) { 450 if (IS_ERR(inode)) {
450 inode = NULL; 451 inode = NULL;
451 } else if (is_bad_inode(inode)) { 452 } else if (is_bad_inode(inode)) {
@@ -542,8 +543,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
542 543
543 saved_nbytes = inode_get_bytes(inode); 544 saved_nbytes = inode_get_bytes(inode);
544 /* drop any overlapping extents */ 545 /* drop any overlapping extents */
545 ret = btrfs_drop_extents(trans, root, inode, 546 ret = btrfs_drop_extents(trans, inode, start, extent_end,
546 start, extent_end, extent_end, start, &alloc_hint, 1); 547 &alloc_hint, 1);
547 BUG_ON(ret); 548 BUG_ON(ret);
548 549
549 if (found_type == BTRFS_FILE_EXTENT_REG || 550 if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -930,6 +931,17 @@ out_nowrite:
930 return 0; 931 return 0;
931} 932}
932 933
934static int insert_orphan_item(struct btrfs_trans_handle *trans,
935 struct btrfs_root *root, u64 offset)
936{
937 int ret;
938 ret = btrfs_find_orphan_item(root, offset);
939 if (ret > 0)
940 ret = btrfs_insert_orphan_item(trans, root, offset);
941 return ret;
942}
943
944
933/* 945/*
934 * There are a few corners where the link count of the file can't 946 * There are a few corners where the link count of the file can't
935 * be properly maintained during replay. So, instead of adding 947 * be properly maintained during replay. So, instead of adding
@@ -997,9 +1009,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
997 } 1009 }
998 BTRFS_I(inode)->index_cnt = (u64)-1; 1010 BTRFS_I(inode)->index_cnt = (u64)-1;
999 1011
1000 if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { 1012 if (inode->i_nlink == 0) {
1001 ret = replay_dir_deletes(trans, root, NULL, path, 1013 if (S_ISDIR(inode->i_mode)) {
1002 inode->i_ino, 1); 1014 ret = replay_dir_deletes(trans, root, NULL, path,
1015 inode->i_ino, 1);
1016 BUG_ON(ret);
1017 }
1018 ret = insert_orphan_item(trans, root, inode->i_ino);
1003 BUG_ON(ret); 1019 BUG_ON(ret);
1004 } 1020 }
1005 btrfs_free_path(path); 1021 btrfs_free_path(path);
@@ -1587,7 +1603,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1587 /* inode keys are done during the first stage */ 1603 /* inode keys are done during the first stage */
1588 if (key.type == BTRFS_INODE_ITEM_KEY && 1604 if (key.type == BTRFS_INODE_ITEM_KEY &&
1589 wc->stage == LOG_WALK_REPLAY_INODES) { 1605 wc->stage == LOG_WALK_REPLAY_INODES) {
1590 struct inode *inode;
1591 struct btrfs_inode_item *inode_item; 1606 struct btrfs_inode_item *inode_item;
1592 u32 mode; 1607 u32 mode;
1593 1608
@@ -1603,31 +1618,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1603 eb, i, &key); 1618 eb, i, &key);
1604 BUG_ON(ret); 1619 BUG_ON(ret);
1605 1620
1606 /* for regular files, truncate away 1621 /* for regular files, make sure corresponding
1607 * extents past the new EOF 1622 * orhpan item exist. extents past the new EOF
1623 * will be truncated later by orphan cleanup.
1608 */ 1624 */
1609 if (S_ISREG(mode)) { 1625 if (S_ISREG(mode)) {
1610 inode = read_one_inode(root, 1626 ret = insert_orphan_item(wc->trans, root,
1611 key.objectid); 1627 key.objectid);
1612 BUG_ON(!inode);
1613
1614 ret = btrfs_truncate_inode_items(wc->trans,
1615 root, inode, inode->i_size,
1616 BTRFS_EXTENT_DATA_KEY);
1617 BUG_ON(ret); 1628 BUG_ON(ret);
1618
1619 /* if the nlink count is zero here, the iput
1620 * will free the inode. We bump it to make
1621 * sure it doesn't get freed until the link
1622 * count fixup is done
1623 */
1624 if (inode->i_nlink == 0) {
1625 btrfs_inc_nlink(inode);
1626 btrfs_update_inode(wc->trans,
1627 root, inode);
1628 }
1629 iput(inode);
1630 } 1629 }
1630
1631 ret = link_to_fixup_dir(wc->trans, root, 1631 ret = link_to_fixup_dir(wc->trans, root,
1632 path, key.objectid); 1632 path, key.objectid);
1633 BUG_ON(ret); 1633 BUG_ON(ret);
@@ -1977,10 +1977,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1977{ 1977{
1978 int index1; 1978 int index1;
1979 int index2; 1979 int index2;
1980 int mark;
1980 int ret; 1981 int ret;
1981 struct btrfs_root *log = root->log_root; 1982 struct btrfs_root *log = root->log_root;
1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 1983 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1983 u64 log_transid = 0; 1984 unsigned long log_transid = 0;
1984 1985
1985 mutex_lock(&root->log_mutex); 1986 mutex_lock(&root->log_mutex);
1986 index1 = root->log_transid % 2; 1987 index1 = root->log_transid % 2;
@@ -2014,24 +2015,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2014 goto out; 2015 goto out;
2015 } 2016 }
2016 2017
2018 log_transid = root->log_transid;
2019 if (log_transid % 2 == 0)
2020 mark = EXTENT_DIRTY;
2021 else
2022 mark = EXTENT_NEW;
2023
2017 /* we start IO on all the marked extents here, but we don't actually 2024 /* we start IO on all the marked extents here, but we don't actually
2018 * wait for them until later. 2025 * wait for them until later.
2019 */ 2026 */
2020 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); 2027 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2021 BUG_ON(ret); 2028 BUG_ON(ret);
2022 2029
2023 btrfs_set_root_node(&log->root_item, log->node); 2030 btrfs_set_root_node(&log->root_item, log->node);
2024 2031
2025 root->log_batch = 0; 2032 root->log_batch = 0;
2026 log_transid = root->log_transid;
2027 root->log_transid++; 2033 root->log_transid++;
2028 log->log_transid = root->log_transid; 2034 log->log_transid = root->log_transid;
2029 root->log_start_pid = 0; 2035 root->log_start_pid = 0;
2030 smp_mb(); 2036 smp_mb();
2031 /* 2037 /*
2032 * log tree has been flushed to disk, new modifications of 2038 * IO has been started, blocks of the log tree have WRITTEN flag set
2033 * the log will be written to new positions. so it's safe to 2039 * in their headers. new modifications of the log will be written to
2034 * allow log writers to go in. 2040 * new positions. so it's safe to allow log writers to go in.
2035 */ 2041 */
2036 mutex_unlock(&root->log_mutex); 2042 mutex_unlock(&root->log_mutex);
2037 2043
@@ -2052,7 +2058,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2052 2058
2053 index2 = log_root_tree->log_transid % 2; 2059 index2 = log_root_tree->log_transid % 2;
2054 if (atomic_read(&log_root_tree->log_commit[index2])) { 2060 if (atomic_read(&log_root_tree->log_commit[index2])) {
2055 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2061 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2056 wait_log_commit(trans, log_root_tree, 2062 wait_log_commit(trans, log_root_tree,
2057 log_root_tree->log_transid); 2063 log_root_tree->log_transid);
2058 mutex_unlock(&log_root_tree->log_mutex); 2064 mutex_unlock(&log_root_tree->log_mutex);
@@ -2072,16 +2078,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2072 * check the full commit flag again 2078 * check the full commit flag again
2073 */ 2079 */
2074 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2080 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2075 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2081 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2076 mutex_unlock(&log_root_tree->log_mutex); 2082 mutex_unlock(&log_root_tree->log_mutex);
2077 ret = -EAGAIN; 2083 ret = -EAGAIN;
2078 goto out_wake_log_root; 2084 goto out_wake_log_root;
2079 } 2085 }
2080 2086
2081 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2087 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2082 &log_root_tree->dirty_log_pages); 2088 &log_root_tree->dirty_log_pages,
2089 EXTENT_DIRTY | EXTENT_NEW);
2083 BUG_ON(ret); 2090 BUG_ON(ret);
2084 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2091 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2085 2092
2086 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 2093 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
2087 log_root_tree->node->start); 2094 log_root_tree->node->start);
@@ -2147,12 +2154,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2147 2154
2148 while (1) { 2155 while (1) {
2149 ret = find_first_extent_bit(&log->dirty_log_pages, 2156 ret = find_first_extent_bit(&log->dirty_log_pages,
2150 0, &start, &end, EXTENT_DIRTY); 2157 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
2151 if (ret) 2158 if (ret)
2152 break; 2159 break;
2153 2160
2154 clear_extent_dirty(&log->dirty_log_pages, 2161 clear_extent_bits(&log->dirty_log_pages, start, end,
2155 start, end, GFP_NOFS); 2162 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2156 } 2163 }
2157 2164
2158 if (log->log_transid > 0) { 2165 if (log->log_transid > 0) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7eda483d7b5a..8db7b14bbae8 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -17,6 +17,7 @@
17 */ 17 */
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/bio.h> 19#include <linux/bio.h>
20#include <linux/slab.h>
20#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
21#include <linux/blkdev.h> 22#include <linux/blkdev.h>
22#include <linux/random.h> 23#include <linux/random.h>
@@ -256,13 +257,13 @@ loop_lock:
256 wake_up(&fs_info->async_submit_wait); 257 wake_up(&fs_info->async_submit_wait);
257 258
258 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 259 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
259 submit_bio(cur->bi_rw, cur);
260 num_run++;
261 batch_run++;
262 260
263 if (bio_rw_flagged(cur, BIO_RW_SYNCIO)) 261 if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
264 num_sync_run++; 262 num_sync_run++;
265 263
264 submit_bio(cur->bi_rw, cur);
265 num_run++;
266 batch_run++;
266 if (need_resched()) { 267 if (need_resched()) {
267 if (num_sync_run) { 268 if (num_sync_run) {
268 blk_run_backing_dev(bdi, NULL); 269 blk_run_backing_dev(bdi, NULL);
@@ -325,16 +326,6 @@ loop_lock:
325 num_sync_run = 0; 326 num_sync_run = 0;
326 blk_run_backing_dev(bdi, NULL); 327 blk_run_backing_dev(bdi, NULL);
327 } 328 }
328
329 cond_resched();
330 if (again)
331 goto loop;
332
333 spin_lock(&device->io_lock);
334 if (device->pending_bios.head || device->pending_sync_bios.head)
335 goto loop_lock;
336 spin_unlock(&device->io_lock);
337
338 /* 329 /*
339 * IO has already been through a long path to get here. Checksumming, 330 * IO has already been through a long path to get here. Checksumming,
340 * async helper threads, perhaps compression. We've done a pretty 331 * async helper threads, perhaps compression. We've done a pretty
@@ -346,6 +337,16 @@ loop_lock:
346 * cared about found its way down here. 337 * cared about found its way down here.
347 */ 338 */
348 blk_run_backing_dev(bdi, NULL); 339 blk_run_backing_dev(bdi, NULL);
340
341 cond_resched();
342 if (again)
343 goto loop;
344
345 spin_lock(&device->io_lock);
346 if (device->pending_bios.head || device->pending_sync_bios.head)
347 goto loop_lock;
348 spin_unlock(&device->io_lock);
349
349done: 350done:
350 return 0; 351 return 0;
351} 352}
@@ -365,6 +366,7 @@ static noinline int device_list_add(const char *path,
365 struct btrfs_device *device; 366 struct btrfs_device *device;
366 struct btrfs_fs_devices *fs_devices; 367 struct btrfs_fs_devices *fs_devices;
367 u64 found_transid = btrfs_super_generation(disk_super); 368 u64 found_transid = btrfs_super_generation(disk_super);
369 char *name;
368 370
369 fs_devices = find_fsid(disk_super->fsid); 371 fs_devices = find_fsid(disk_super->fsid);
370 if (!fs_devices) { 372 if (!fs_devices) {
@@ -411,6 +413,12 @@ static noinline int device_list_add(const char *path,
411 413
412 device->fs_devices = fs_devices; 414 device->fs_devices = fs_devices;
413 fs_devices->num_devices++; 415 fs_devices->num_devices++;
416 } else if (strcmp(device->name, path)) {
417 name = kstrdup(path, GFP_NOFS);
418 if (!name)
419 return -ENOMEM;
420 kfree(device->name);
421 device->name = name;
414 } 422 }
415 423
416 if (found_transid > fs_devices->latest_trans) { 424 if (found_transid > fs_devices->latest_trans) {
@@ -592,7 +600,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
592 goto error_close; 600 goto error_close;
593 601
594 disk_super = (struct btrfs_super_block *)bh->b_data; 602 disk_super = (struct btrfs_super_block *)bh->b_data;
595 devid = le64_to_cpu(disk_super->dev_item.devid); 603 devid = btrfs_stack_device_id(&disk_super->dev_item);
596 if (devid != device->devid) 604 if (devid != device->devid)
597 goto error_brelse; 605 goto error_brelse;
598 606
@@ -694,7 +702,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
694 goto error_close; 702 goto error_close;
695 } 703 }
696 disk_super = (struct btrfs_super_block *)bh->b_data; 704 disk_super = (struct btrfs_super_block *)bh->b_data;
697 devid = le64_to_cpu(disk_super->dev_item.devid); 705 devid = btrfs_stack_device_id(&disk_super->dev_item);
698 transid = btrfs_super_generation(disk_super); 706 transid = btrfs_super_generation(disk_super);
699 if (disk_super->label[0]) 707 if (disk_super->label[0])
700 printk(KERN_INFO "device label %s ", disk_super->label); 708 printk(KERN_INFO "device label %s ", disk_super->label);
@@ -1135,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1135 root->fs_info->avail_metadata_alloc_bits; 1143 root->fs_info->avail_metadata_alloc_bits;
1136 1144
1137 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && 1145 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
1138 root->fs_info->fs_devices->rw_devices <= 4) { 1146 root->fs_info->fs_devices->num_devices <= 4) {
1139 printk(KERN_ERR "btrfs: unable to go below four devices " 1147 printk(KERN_ERR "btrfs: unable to go below four devices "
1140 "on raid10\n"); 1148 "on raid10\n");
1141 ret = -EINVAL; 1149 ret = -EINVAL;
@@ -1143,7 +1151,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1143 } 1151 }
1144 1152
1145 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && 1153 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
1146 root->fs_info->fs_devices->rw_devices <= 2) { 1154 root->fs_info->fs_devices->num_devices <= 2) {
1147 printk(KERN_ERR "btrfs: unable to go below two " 1155 printk(KERN_ERR "btrfs: unable to go below two "
1148 "devices on raid1\n"); 1156 "devices on raid1\n");
1149 ret = -EINVAL; 1157 ret = -EINVAL;
@@ -1187,7 +1195,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1187 goto error_close; 1195 goto error_close;
1188 } 1196 }
1189 disk_super = (struct btrfs_super_block *)bh->b_data; 1197 disk_super = (struct btrfs_super_block *)bh->b_data;
1190 devid = le64_to_cpu(disk_super->dev_item.devid); 1198 devid = btrfs_stack_device_id(&disk_super->dev_item);
1191 dev_uuid = disk_super->dev_item.uuid; 1199 dev_uuid = disk_super->dev_item.uuid;
1192 device = btrfs_find_device(root, devid, dev_uuid, 1200 device = btrfs_find_device(root, devid, dev_uuid,
1193 disk_super->fsid); 1201 disk_super->fsid);
@@ -1434,8 +1442,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1434 return -EINVAL; 1442 return -EINVAL;
1435 1443
1436 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); 1444 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
1437 if (!bdev) 1445 if (IS_ERR(bdev))
1438 return -EIO; 1446 return PTR_ERR(bdev);
1439 1447
1440 if (root->fs_info->fs_devices->seeding) { 1448 if (root->fs_info->fs_devices->seeding) {
1441 seeding_dev = 1; 1449 seeding_dev = 1;
@@ -2191,9 +2199,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2191 min_stripes = 2; 2199 min_stripes = 2;
2192 } 2200 }
2193 if (type & (BTRFS_BLOCK_GROUP_RAID1)) { 2201 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2194 num_stripes = min_t(u64, 2, fs_devices->rw_devices); 2202 if (fs_devices->rw_devices < 2)
2195 if (num_stripes < 2)
2196 return -ENOSPC; 2203 return -ENOSPC;
2204 num_stripes = 2;
2197 min_stripes = 2; 2205 min_stripes = 2;
2198 } 2206 }
2199 if (type & (BTRFS_BLOCK_GROUP_RAID10)) { 2207 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
@@ -2209,7 +2217,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2209 max_chunk_size = 10 * calc_size; 2217 max_chunk_size = 10 * calc_size;
2210 min_stripe_size = 64 * 1024 * 1024; 2218 min_stripe_size = 64 * 1024 * 1024;
2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 2219 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2212 max_chunk_size = 4 * calc_size; 2220 max_chunk_size = 256 * 1024 * 1024;
2213 min_stripe_size = 32 * 1024 * 1024; 2221 min_stripe_size = 32 * 1024 * 1024;
2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2222 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2215 calc_size = 8 * 1024 * 1024; 2223 calc_size = 8 * 1024 * 1024;
@@ -2237,8 +2245,16 @@ again:
2237 do_div(calc_size, stripe_len); 2245 do_div(calc_size, stripe_len);
2238 calc_size *= stripe_len; 2246 calc_size *= stripe_len;
2239 } 2247 }
2248
2240 /* we don't want tiny stripes */ 2249 /* we don't want tiny stripes */
2241 calc_size = max_t(u64, min_stripe_size, calc_size); 2250 if (!looped)
2251 calc_size = max_t(u64, min_stripe_size, calc_size);
2252
2253 /*
2254 * we're about to do_div by the stripe_len so lets make sure
2255 * we end up with something bigger than a stripe
2256 */
2257 calc_size = max_t(u64, calc_size, stripe_len * 4);
2242 2258
2243 do_div(calc_size, stripe_len); 2259 do_div(calc_size, stripe_len);
2244 calc_size *= stripe_len; 2260 calc_size *= stripe_len;
@@ -2538,6 +2554,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2538 if (!em) 2554 if (!em)
2539 return 1; 2555 return 1;
2540 2556
2557 if (btrfs_test_opt(root, DEGRADED)) {
2558 free_extent_map(em);
2559 return 0;
2560 }
2561
2541 map = (struct map_lookup *)em->bdev; 2562 map = (struct map_lookup *)em->bdev;
2542 for (i = 0; i < map->num_stripes; i++) { 2563 for (i = 0; i < map->num_stripes; i++) {
2543 if (!map->stripes[i].dev->writeable) { 2564 if (!map->stripes[i].dev->writeable) {
@@ -2649,8 +2670,10 @@ again:
2649 em = lookup_extent_mapping(em_tree, logical, *length); 2670 em = lookup_extent_mapping(em_tree, logical, *length);
2650 read_unlock(&em_tree->lock); 2671 read_unlock(&em_tree->lock);
2651 2672
2652 if (!em && unplug_page) 2673 if (!em && unplug_page) {
2674 kfree(multi);
2653 return 0; 2675 return 0;
2676 }
2654 2677
2655 if (!em) { 2678 if (!em) {
2656 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2679 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
@@ -3375,6 +3398,8 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
3375 key.type = 0; 3398 key.type = 0;
3376again: 3399again:
3377 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3400 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3401 if (ret < 0)
3402 goto error;
3378 while (1) { 3403 while (1) {
3379 leaf = path->nodes[0]; 3404 leaf = path->nodes[0];
3380 slot = path->slots[0]; 3405 slot = path->slots[0];
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b6dd5967c48a..193b58f7d3f3 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -85,22 +85,23 @@ out:
85 return ret; 85 return ret;
86} 86}
87 87
88int __btrfs_setxattr(struct inode *inode, const char *name, 88static int do_setxattr(struct btrfs_trans_handle *trans,
89 const void *value, size_t size, int flags) 89 struct inode *inode, const char *name,
90 const void *value, size_t size, int flags)
90{ 91{
91 struct btrfs_dir_item *di; 92 struct btrfs_dir_item *di;
92 struct btrfs_root *root = BTRFS_I(inode)->root; 93 struct btrfs_root *root = BTRFS_I(inode)->root;
93 struct btrfs_trans_handle *trans;
94 struct btrfs_path *path; 94 struct btrfs_path *path;
95 int ret = 0, mod = 0; 95 size_t name_len = strlen(name);
96 int ret = 0;
97
98 if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
99 return -ENOSPC;
96 100
97 path = btrfs_alloc_path(); 101 path = btrfs_alloc_path();
98 if (!path) 102 if (!path)
99 return -ENOMEM; 103 return -ENOMEM;
100 104
101 trans = btrfs_join_transaction(root, 1);
102 btrfs_set_trans_block_group(trans, inode);
103
104 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
105 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
106 strlen(name), -1); 107 strlen(name), -1);
@@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
118 } 119 }
119 120
120 ret = btrfs_delete_one_dir_name(trans, root, path, di); 121 ret = btrfs_delete_one_dir_name(trans, root, path, di);
121 if (ret) 122 BUG_ON(ret);
122 goto out;
123 btrfs_release_path(root, path); 123 btrfs_release_path(root, path);
124 124
125 /* if we don't have a value then we are removing the xattr */ 125 /* if we don't have a value then we are removing the xattr */
126 if (!value) { 126 if (!value)
127 mod = 1;
128 goto out; 127 goto out;
129 }
130 } else { 128 } else {
131 btrfs_release_path(root, path); 129 btrfs_release_path(root, path);
132 130
@@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
138 } 136 }
139 137
140 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
141 ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), 139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino,
142 value, size, inode->i_ino); 140 name, name_len, value, size);
141 BUG_ON(ret);
142out:
143 btrfs_free_path(path);
144 return ret;
145}
146
147int __btrfs_setxattr(struct btrfs_trans_handle *trans,
148 struct inode *inode, const char *name,
149 const void *value, size_t size, int flags)
150{
151 struct btrfs_root *root = BTRFS_I(inode)->root;
152 int ret;
153
154 if (trans)
155 return do_setxattr(trans, inode, name, value, size, flags);
156
157 ret = btrfs_reserve_metadata_space(root, 2);
143 if (ret) 158 if (ret)
144 goto out; 159 return ret;
145 mod = 1;
146 160
147out: 161 trans = btrfs_start_transaction(root, 1);
148 if (mod) { 162 if (!trans) {
149 inode->i_ctime = CURRENT_TIME; 163 ret = -ENOMEM;
150 ret = btrfs_update_inode(trans, root, inode); 164 goto out;
151 } 165 }
166 btrfs_set_trans_block_group(trans, inode);
152 167
153 btrfs_end_transaction(trans, root); 168 ret = do_setxattr(trans, inode, name, value, size, flags);
154 btrfs_free_path(path); 169 if (ret)
170 goto out;
171
172 inode->i_ctime = CURRENT_TIME;
173 ret = btrfs_update_inode(trans, root, inode);
174 BUG_ON(ret);
175out:
176 btrfs_end_transaction_throttle(trans, root);
177 btrfs_unreserve_metadata_space(root, 2);
155 return ret; 178 return ret;
156} 179}
157 180
@@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
314 337
315 if (size == 0) 338 if (size == 0)
316 value = ""; /* empty EA, do not remove */ 339 value = ""; /* empty EA, do not remove */
317 return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); 340
341 return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
342 flags);
318} 343}
319 344
320int btrfs_removexattr(struct dentry *dentry, const char *name) 345int btrfs_removexattr(struct dentry *dentry, const char *name)
@@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
329 354
330 if (!btrfs_is_valid_xattr(name)) 355 if (!btrfs_is_valid_xattr(name))
331 return -EOPNOTSUPP; 356 return -EOPNOTSUPP;
332 return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); 357
358 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
359 XATTR_REPLACE);
333} 360}
334 361
335int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) 362int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
363 struct inode *inode, struct inode *dir)
336{ 364{
337 int err; 365 int err;
338 size_t len; 366 size_t len;
@@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
354 } else { 382 } else {
355 strcpy(name, XATTR_SECURITY_PREFIX); 383 strcpy(name, XATTR_SECURITY_PREFIX);
356 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); 384 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
357 err = __btrfs_setxattr(inode, name, value, len, 0); 385 err = __btrfs_setxattr(trans, inode, name, value, len, 0);
358 kfree(name); 386 kfree(name);
359 } 387 }
360 388
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index c71e9c3cf3f7..721efa0346e0 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[];
27 27
28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
29 void *buffer, size_t size); 29 void *buffer, size_t size);
30extern int __btrfs_setxattr(struct inode *inode, const char *name, 30extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
31 const void *value, size_t size, int flags); 31 struct inode *inode, const char *name,
32 32 const void *value, size_t size, int flags);
33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
34 void *buffer, size_t size); 34 void *buffer, size_t size);
35extern int btrfs_setxattr(struct dentry *dentry, const char *name, 35extern int btrfs_setxattr(struct dentry *dentry, const char *name,
36 const void *value, size_t size, int flags); 36 const void *value, size_t size, int flags);
37extern int btrfs_removexattr(struct dentry *dentry, const char *name); 37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38 38
39extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); 39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
40 struct inode *inode, struct inode *dir);
40 41
41#endif /* __XATTR__ */ 42#endif /* __XATTR__ */