aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/acl.c87
-rw-r--r--fs/btrfs/btrfs_inode.h16
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/ctree.c457
-rw-r--r--fs/btrfs/ctree.h58
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/dir-item.c39
-rw-r--r--fs/btrfs/disk-io.c131
-rw-r--r--fs/btrfs/disk-io.h10
-rw-r--r--fs/btrfs/extent-tree.c330
-rw-r--r--fs/btrfs/extent_io.c309
-rw-r--r--fs/btrfs/extent_io.h55
-rw-r--r--fs/btrfs/extent_map.c155
-rw-r--r--fs/btrfs/file-item.c48
-rw-r--r--fs/btrfs/file.c201
-rw-r--r--fs/btrfs/free-space-cache.c173
-rw-r--r--fs/btrfs/inode.c274
-rw-r--r--fs/btrfs/ioctl.c27
-rw-r--r--fs/btrfs/locking.c280
-rw-r--r--fs/btrfs/locking.h36
-rw-r--r--fs/btrfs/ref-cache.c68
-rw-r--r--fs/btrfs/ref-cache.h52
-rw-r--r--fs/btrfs/relocation.c3
-rw-r--r--fs/btrfs/root-tree.c5
-rw-r--r--fs/btrfs/struct-funcs.c100
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/transaction.c112
-rw-r--r--fs/btrfs/tree-log.c18
-rw-r--r--fs/btrfs/volumes.c17
-rw-r--r--fs/btrfs/xattr.c66
32 files changed, 1502 insertions, 1653 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9b72dcf1cd25..40e6ac08c21f 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o
11
12btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index f66fc9959733..eb159aaa5a11 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -28,9 +28,7 @@
28#include "btrfs_inode.h" 28#include "btrfs_inode.h"
29#include "xattr.h" 29#include "xattr.h"
30 30
31#ifdef CONFIG_BTRFS_FS_POSIX_ACL 31struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
32
33static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
34{ 32{
35 int size; 33 int size;
36 const char *name; 34 const char *name;
@@ -111,7 +109,6 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
111 int ret, size = 0; 109 int ret, size = 0;
112 const char *name; 110 const char *name;
113 char *value = NULL; 111 char *value = NULL;
114 mode_t mode;
115 112
116 if (acl) { 113 if (acl) {
117 ret = posix_acl_valid(acl); 114 ret = posix_acl_valid(acl);
@@ -122,13 +119,11 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
122 119
123 switch (type) { 120 switch (type) {
124 case ACL_TYPE_ACCESS: 121 case ACL_TYPE_ACCESS:
125 mode = inode->i_mode;
126 name = POSIX_ACL_XATTR_ACCESS; 122 name = POSIX_ACL_XATTR_ACCESS;
127 if (acl) { 123 if (acl) {
128 ret = posix_acl_equiv_mode(acl, &mode); 124 ret = posix_acl_equiv_mode(acl, &inode->i_mode);
129 if (ret < 0) 125 if (ret < 0)
130 return ret; 126 return ret;
131 inode->i_mode = mode;
132 } 127 }
133 ret = 0; 128 ret = 0;
134 break; 129 break;
@@ -195,28 +190,6 @@ out:
195 return ret; 190 return ret;
196} 191}
197 192
198int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
199{
200 int error = -EAGAIN;
201
202 if (flags & IPERM_FLAG_RCU) {
203 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
204 error = -ECHILD;
205
206 } else {
207 struct posix_acl *acl;
208 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
209 if (IS_ERR(acl))
210 return PTR_ERR(acl);
211 if (acl) {
212 error = posix_acl_permission(inode, acl, mask);
213 posix_acl_release(acl);
214 }
215 }
216
217 return error;
218}
219
220/* 193/*
221 * btrfs_init_acl is already generally called under fs_mutex, so the locking 194 * btrfs_init_acl is already generally called under fs_mutex, so the locking
222 * stuff has been fixed to work with that. If the locking stuff changes, we 195 * stuff has been fixed to work with that. If the locking stuff changes, we
@@ -244,31 +217,20 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
244 } 217 }
245 218
246 if (IS_POSIXACL(dir) && acl) { 219 if (IS_POSIXACL(dir) && acl) {
247 struct posix_acl *clone;
248 mode_t mode;
249
250 if (S_ISDIR(inode->i_mode)) { 220 if (S_ISDIR(inode->i_mode)) {
251 ret = btrfs_set_acl(trans, inode, acl, 221 ret = btrfs_set_acl(trans, inode, acl,
252 ACL_TYPE_DEFAULT); 222 ACL_TYPE_DEFAULT);
253 if (ret) 223 if (ret)
254 goto failed; 224 goto failed;
255 } 225 }
256 clone = posix_acl_clone(acl, GFP_NOFS); 226 ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
257 ret = -ENOMEM; 227 if (ret < 0)
258 if (!clone) 228 return ret;
259 goto failed; 229
260 230 if (ret > 0) {
261 mode = inode->i_mode; 231 /* we need an acl */
262 ret = posix_acl_create_masq(clone, &mode); 232 ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
263 if (ret >= 0) {
264 inode->i_mode = mode;
265 if (ret > 0) {
266 /* we need an acl */
267 ret = btrfs_set_acl(trans, inode, clone,
268 ACL_TYPE_ACCESS);
269 }
270 } 233 }
271 posix_acl_release(clone);
272 } 234 }
273failed: 235failed:
274 posix_acl_release(acl); 236 posix_acl_release(acl);
@@ -278,7 +240,7 @@ failed:
278 240
279int btrfs_acl_chmod(struct inode *inode) 241int btrfs_acl_chmod(struct inode *inode)
280{ 242{
281 struct posix_acl *acl, *clone; 243 struct posix_acl *acl;
282 int ret = 0; 244 int ret = 0;
283 245
284 if (S_ISLNK(inode->i_mode)) 246 if (S_ISLNK(inode->i_mode))
@@ -291,17 +253,11 @@ int btrfs_acl_chmod(struct inode *inode)
291 if (IS_ERR_OR_NULL(acl)) 253 if (IS_ERR_OR_NULL(acl))
292 return PTR_ERR(acl); 254 return PTR_ERR(acl);
293 255
294 clone = posix_acl_clone(acl, GFP_KERNEL); 256 ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
257 if (ret)
258 return ret;
259 ret = btrfs_set_acl(NULL, inode, acl, ACL_TYPE_ACCESS);
295 posix_acl_release(acl); 260 posix_acl_release(acl);
296 if (!clone)
297 return -ENOMEM;
298
299 ret = posix_acl_chmod_masq(clone, inode->i_mode);
300 if (!ret)
301 ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
302
303 posix_acl_release(clone);
304
305 return ret; 261 return ret;
306} 262}
307 263
@@ -318,18 +274,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = {
318 .get = btrfs_xattr_acl_get, 274 .get = btrfs_xattr_acl_get,
319 .set = btrfs_xattr_acl_set, 275 .set = btrfs_xattr_acl_set,
320}; 276};
321
322#else /* CONFIG_BTRFS_FS_POSIX_ACL */
323
324int btrfs_acl_chmod(struct inode *inode)
325{
326 return 0;
327}
328
329int btrfs_init_acl(struct btrfs_trans_handle *trans,
330 struct inode *inode, struct inode *dir)
331{
332 return 0;
333}
334
335#endif /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 52d7eca8c7bf..502b9e988679 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -34,6 +34,9 @@ struct btrfs_inode {
34 */ 34 */
35 struct btrfs_key location; 35 struct btrfs_key location;
36 36
37 /* Lock for counters */
38 spinlock_t lock;
39
37 /* the extent_tree has caches of all the extent mappings to disk */ 40 /* the extent_tree has caches of all the extent mappings to disk */
38 struct extent_map_tree extent_tree; 41 struct extent_map_tree extent_tree;
39 42
@@ -134,8 +137,8 @@ struct btrfs_inode {
134 * items we think we'll end up using, and reserved_extents is the number 137 * items we think we'll end up using, and reserved_extents is the number
135 * of extent items we've reserved metadata for. 138 * of extent items we've reserved metadata for.
136 */ 139 */
137 atomic_t outstanding_extents; 140 unsigned outstanding_extents;
138 atomic_t reserved_extents; 141 unsigned reserved_extents;
139 142
140 /* 143 /*
141 * ordered_data_close is set by truncate when a file that used 144 * ordered_data_close is set by truncate when a file that used
@@ -184,4 +187,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
184 BTRFS_I(inode)->disk_i_size = size; 187 BTRFS_I(inode)->disk_i_size = size;
185} 188}
186 189
190static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
191 struct inode *inode)
192{
193 if (root == root->fs_info->tree_root ||
194 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
195 return true;
196 return false;
197}
198
187#endif 199#endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bfe42b03eaf9..8ec5d86f1734 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -338,6 +338,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
338 u64 first_byte = disk_start; 338 u64 first_byte = disk_start;
339 struct block_device *bdev; 339 struct block_device *bdev;
340 int ret; 340 int ret;
341 int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
341 342
342 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); 343 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
343 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 344 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
@@ -392,8 +393,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
392 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 393 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
393 BUG_ON(ret); 394 BUG_ON(ret);
394 395
395 ret = btrfs_csum_one_bio(root, inode, bio, start, 1); 396 if (!skip_sum) {
396 BUG_ON(ret); 397 ret = btrfs_csum_one_bio(root, inode, bio,
398 start, 1);
399 BUG_ON(ret);
400 }
397 401
398 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 402 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
399 BUG_ON(ret); 403 BUG_ON(ret);
@@ -418,8 +422,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
418 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 422 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
419 BUG_ON(ret); 423 BUG_ON(ret);
420 424
421 ret = btrfs_csum_one_bio(root, inode, bio, start, 1); 425 if (!skip_sum) {
422 BUG_ON(ret); 426 ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
427 BUG_ON(ret);
428 }
423 429
424 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 430 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
425 BUG_ON(ret); 431 BUG_ON(ret);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 2e667868e0d2..011cab3aca8d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
54{ 54{
55 int i; 55 int i;
56 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 56 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
57 if (p->nodes[i] && p->locks[i]) 57 if (!p->nodes[i] || !p->locks[i])
58 btrfs_set_lock_blocking(p->nodes[i]); 58 continue;
59 btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
60 if (p->locks[i] == BTRFS_READ_LOCK)
61 p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
62 else if (p->locks[i] == BTRFS_WRITE_LOCK)
63 p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
59 } 64 }
60} 65}
61 66
@@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
68 * for held 73 * for held
69 */ 74 */
70noinline void btrfs_clear_path_blocking(struct btrfs_path *p, 75noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
71 struct extent_buffer *held) 76 struct extent_buffer *held, int held_rw)
72{ 77{
73 int i; 78 int i;
74 79
@@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
79 * really sure by forcing the path to blocking before we clear 84 * really sure by forcing the path to blocking before we clear
80 * the path blocking. 85 * the path blocking.
81 */ 86 */
82 if (held) 87 if (held) {
83 btrfs_set_lock_blocking(held); 88 btrfs_set_lock_blocking_rw(held, held_rw);
89 if (held_rw == BTRFS_WRITE_LOCK)
90 held_rw = BTRFS_WRITE_LOCK_BLOCKING;
91 else if (held_rw == BTRFS_READ_LOCK)
92 held_rw = BTRFS_READ_LOCK_BLOCKING;
93 }
84 btrfs_set_path_blocking(p); 94 btrfs_set_path_blocking(p);
85#endif 95#endif
86 96
87 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { 97 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
88 if (p->nodes[i] && p->locks[i]) 98 if (p->nodes[i] && p->locks[i]) {
89 btrfs_clear_lock_blocking(p->nodes[i]); 99 btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
100 if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
101 p->locks[i] = BTRFS_WRITE_LOCK;
102 else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
103 p->locks[i] = BTRFS_READ_LOCK;
104 }
90 } 105 }
91 106
92#ifdef CONFIG_DEBUG_LOCK_ALLOC 107#ifdef CONFIG_DEBUG_LOCK_ALLOC
93 if (held) 108 if (held)
94 btrfs_clear_lock_blocking(held); 109 btrfs_clear_lock_blocking_rw(held, held_rw);
95#endif 110#endif
96} 111}
97 112
@@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p)
119 if (!p->nodes[i]) 134 if (!p->nodes[i])
120 continue; 135 continue;
121 if (p->locks[i]) { 136 if (p->locks[i]) {
122 btrfs_tree_unlock(p->nodes[i]); 137 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
123 p->locks[i] = 0; 138 p->locks[i] = 0;
124 } 139 }
125 free_extent_buffer(p->nodes[i]); 140 free_extent_buffer(p->nodes[i]);
@@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
167 return eb; 182 return eb;
168} 183}
169 184
185/* loop around taking references on and locking the root node of the
186 * tree until you end up with a lock on the root. A locked buffer
187 * is returned, with a reference held.
188 */
189struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
190{
191 struct extent_buffer *eb;
192
193 while (1) {
194 eb = btrfs_root_node(root);
195 btrfs_tree_read_lock(eb);
196 if (eb == root->node)
197 break;
198 btrfs_tree_read_unlock(eb);
199 free_extent_buffer(eb);
200 }
201 return eb;
202}
203
170/* cowonly root (everything not a reference counted cow subvolume), just get 204/* cowonly root (everything not a reference counted cow subvolume), just get
171 * put onto a simple dirty list. transaction.c walks this to make sure they 205 * put onto a simple dirty list. transaction.c walks this to make sure they
172 * get properly updated on disk. 206 * get properly updated on disk.
@@ -626,14 +660,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
626 for (i = start_slot; i < end_slot; i++) { 660 for (i = start_slot; i < end_slot; i++) {
627 int close = 1; 661 int close = 1;
628 662
629 if (!parent->map_token) {
630 map_extent_buffer(parent,
631 btrfs_node_key_ptr_offset(i),
632 sizeof(struct btrfs_key_ptr),
633 &parent->map_token, &parent->kaddr,
634 &parent->map_start, &parent->map_len,
635 KM_USER1);
636 }
637 btrfs_node_key(parent, &disk_key, i); 663 btrfs_node_key(parent, &disk_key, i);
638 if (!progress_passed && comp_keys(&disk_key, progress) < 0) 664 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
639 continue; 665 continue;
@@ -656,11 +682,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
656 last_block = blocknr; 682 last_block = blocknr;
657 continue; 683 continue;
658 } 684 }
659 if (parent->map_token) {
660 unmap_extent_buffer(parent, parent->map_token,
661 KM_USER1);
662 parent->map_token = NULL;
663 }
664 685
665 cur = btrfs_find_tree_block(root, blocknr, blocksize); 686 cur = btrfs_find_tree_block(root, blocknr, blocksize);
666 if (cur) 687 if (cur)
@@ -701,11 +722,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
701 btrfs_tree_unlock(cur); 722 btrfs_tree_unlock(cur);
702 free_extent_buffer(cur); 723 free_extent_buffer(cur);
703 } 724 }
704 if (parent->map_token) {
705 unmap_extent_buffer(parent, parent->map_token,
706 KM_USER1);
707 parent->map_token = NULL;
708 }
709 return err; 725 return err;
710} 726}
711 727
@@ -746,7 +762,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
746 struct btrfs_disk_key *tmp = NULL; 762 struct btrfs_disk_key *tmp = NULL;
747 struct btrfs_disk_key unaligned; 763 struct btrfs_disk_key unaligned;
748 unsigned long offset; 764 unsigned long offset;
749 char *map_token = NULL;
750 char *kaddr = NULL; 765 char *kaddr = NULL;
751 unsigned long map_start = 0; 766 unsigned long map_start = 0;
752 unsigned long map_len = 0; 767 unsigned long map_len = 0;
@@ -756,18 +771,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
756 mid = (low + high) / 2; 771 mid = (low + high) / 2;
757 offset = p + mid * item_size; 772 offset = p + mid * item_size;
758 773
759 if (!map_token || offset < map_start || 774 if (!kaddr || offset < map_start ||
760 (offset + sizeof(struct btrfs_disk_key)) > 775 (offset + sizeof(struct btrfs_disk_key)) >
761 map_start + map_len) { 776 map_start + map_len) {
762 if (map_token) {
763 unmap_extent_buffer(eb, map_token, KM_USER0);
764 map_token = NULL;
765 }
766 777
767 err = map_private_extent_buffer(eb, offset, 778 err = map_private_extent_buffer(eb, offset,
768 sizeof(struct btrfs_disk_key), 779 sizeof(struct btrfs_disk_key),
769 &map_token, &kaddr, 780 &kaddr, &map_start, &map_len);
770 &map_start, &map_len, KM_USER0);
771 781
772 if (!err) { 782 if (!err) {
773 tmp = (struct btrfs_disk_key *)(kaddr + offset - 783 tmp = (struct btrfs_disk_key *)(kaddr + offset -
@@ -790,14 +800,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
790 high = mid; 800 high = mid;
791 else { 801 else {
792 *slot = mid; 802 *slot = mid;
793 if (map_token)
794 unmap_extent_buffer(eb, map_token, KM_USER0);
795 return 0; 803 return 0;
796 } 804 }
797 } 805 }
798 *slot = low; 806 *slot = low;
799 if (map_token)
800 unmap_extent_buffer(eb, map_token, KM_USER0);
801 return 1; 807 return 1;
802} 808}
803 809
@@ -890,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
890 896
891 mid = path->nodes[level]; 897 mid = path->nodes[level];
892 898
893 WARN_ON(!path->locks[level]); 899 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
900 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
894 WARN_ON(btrfs_header_generation(mid) != trans->transid); 901 WARN_ON(btrfs_header_generation(mid) != trans->transid);
895 902
896 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 903 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
@@ -1228,7 +1235,6 @@ static void reada_for_search(struct btrfs_root *root,
1228 u32 nr; 1235 u32 nr;
1229 u32 blocksize; 1236 u32 blocksize;
1230 u32 nscan = 0; 1237 u32 nscan = 0;
1231 bool map = true;
1232 1238
1233 if (level != 1) 1239 if (level != 1)
1234 return; 1240 return;
@@ -1250,19 +1256,8 @@ static void reada_for_search(struct btrfs_root *root,
1250 1256
1251 nritems = btrfs_header_nritems(node); 1257 nritems = btrfs_header_nritems(node);
1252 nr = slot; 1258 nr = slot;
1253 if (node->map_token || path->skip_locking)
1254 map = false;
1255 1259
1256 while (1) { 1260 while (1) {
1257 if (map && !node->map_token) {
1258 unsigned long offset = btrfs_node_key_ptr_offset(nr);
1259 map_private_extent_buffer(node, offset,
1260 sizeof(struct btrfs_key_ptr),
1261 &node->map_token,
1262 &node->kaddr,
1263 &node->map_start,
1264 &node->map_len, KM_USER1);
1265 }
1266 if (direction < 0) { 1261 if (direction < 0) {
1267 if (nr == 0) 1262 if (nr == 0)
1268 break; 1263 break;
@@ -1281,11 +1276,6 @@ static void reada_for_search(struct btrfs_root *root,
1281 if ((search <= target && target - search <= 65536) || 1276 if ((search <= target && target - search <= 65536) ||
1282 (search > target && search - target <= 65536)) { 1277 (search > target && search - target <= 65536)) {
1283 gen = btrfs_node_ptr_generation(node, nr); 1278 gen = btrfs_node_ptr_generation(node, nr);
1284 if (map && node->map_token) {
1285 unmap_extent_buffer(node, node->map_token,
1286 KM_USER1);
1287 node->map_token = NULL;
1288 }
1289 readahead_tree_block(root, search, blocksize, gen); 1279 readahead_tree_block(root, search, blocksize, gen);
1290 nread += blocksize; 1280 nread += blocksize;
1291 } 1281 }
@@ -1293,10 +1283,6 @@ static void reada_for_search(struct btrfs_root *root,
1293 if ((nread > 65536 || nscan > 32)) 1283 if ((nread > 65536 || nscan > 32))
1294 break; 1284 break;
1295 } 1285 }
1296 if (map && node->map_token) {
1297 unmap_extent_buffer(node, node->map_token, KM_USER1);
1298 node->map_token = NULL;
1299 }
1300} 1286}
1301 1287
1302/* 1288/*
@@ -1409,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
1409 1395
1410 t = path->nodes[i]; 1396 t = path->nodes[i];
1411 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1397 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1412 btrfs_tree_unlock(t); 1398 btrfs_tree_unlock_rw(t, path->locks[i]);
1413 path->locks[i] = 0; 1399 path->locks[i] = 0;
1414 } 1400 }
1415 } 1401 }
@@ -1436,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1436 continue; 1422 continue;
1437 if (!path->locks[i]) 1423 if (!path->locks[i])
1438 continue; 1424 continue;
1439 btrfs_tree_unlock(path->nodes[i]); 1425 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
1440 path->locks[i] = 0; 1426 path->locks[i] = 0;
1441 } 1427 }
1442} 1428}
@@ -1485,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1485 * we can trust our generation number 1471 * we can trust our generation number
1486 */ 1472 */
1487 free_extent_buffer(tmp); 1473 free_extent_buffer(tmp);
1474 btrfs_set_path_blocking(p);
1475
1488 tmp = read_tree_block(root, blocknr, blocksize, gen); 1476 tmp = read_tree_block(root, blocknr, blocksize, gen);
1489 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1477 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1490 *eb_ret = tmp; 1478 *eb_ret = tmp;
@@ -1540,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1540static int 1528static int
1541setup_nodes_for_search(struct btrfs_trans_handle *trans, 1529setup_nodes_for_search(struct btrfs_trans_handle *trans,
1542 struct btrfs_root *root, struct btrfs_path *p, 1530 struct btrfs_root *root, struct btrfs_path *p,
1543 struct extent_buffer *b, int level, int ins_len) 1531 struct extent_buffer *b, int level, int ins_len,
1532 int *write_lock_level)
1544{ 1533{
1545 int ret; 1534 int ret;
1546 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= 1535 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1547 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1536 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1548 int sret; 1537 int sret;
1549 1538
1539 if (*write_lock_level < level + 1) {
1540 *write_lock_level = level + 1;
1541 btrfs_release_path(p);
1542 goto again;
1543 }
1544
1550 sret = reada_for_balance(root, p, level); 1545 sret = reada_for_balance(root, p, level);
1551 if (sret) 1546 if (sret)
1552 goto again; 1547 goto again;
1553 1548
1554 btrfs_set_path_blocking(p); 1549 btrfs_set_path_blocking(p);
1555 sret = split_node(trans, root, p, level); 1550 sret = split_node(trans, root, p, level);
1556 btrfs_clear_path_blocking(p, NULL); 1551 btrfs_clear_path_blocking(p, NULL, 0);
1557 1552
1558 BUG_ON(sret > 0); 1553 BUG_ON(sret > 0);
1559 if (sret) { 1554 if (sret) {
@@ -1565,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
1565 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { 1560 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
1566 int sret; 1561 int sret;
1567 1562
1563 if (*write_lock_level < level + 1) {
1564 *write_lock_level = level + 1;
1565 btrfs_release_path(p);
1566 goto again;
1567 }
1568
1568 sret = reada_for_balance(root, p, level); 1569 sret = reada_for_balance(root, p, level);
1569 if (sret) 1570 if (sret)
1570 goto again; 1571 goto again;
1571 1572
1572 btrfs_set_path_blocking(p); 1573 btrfs_set_path_blocking(p);
1573 sret = balance_level(trans, root, p, level); 1574 sret = balance_level(trans, root, p, level);
1574 btrfs_clear_path_blocking(p, NULL); 1575 btrfs_clear_path_blocking(p, NULL, 0);
1575 1576
1576 if (sret) { 1577 if (sret) {
1577 ret = sret; 1578 ret = sret;
@@ -1615,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1615 int err; 1616 int err;
1616 int level; 1617 int level;
1617 int lowest_unlock = 1; 1618 int lowest_unlock = 1;
1619 int root_lock;
1620 /* everything at write_lock_level or lower must be write locked */
1621 int write_lock_level = 0;
1618 u8 lowest_level = 0; 1622 u8 lowest_level = 0;
1619 1623
1620 lowest_level = p->lowest_level; 1624 lowest_level = p->lowest_level;
1621 WARN_ON(lowest_level && ins_len > 0); 1625 WARN_ON(lowest_level && ins_len > 0);
1622 WARN_ON(p->nodes[0] != NULL); 1626 WARN_ON(p->nodes[0] != NULL);
1623 1627
1624 if (ins_len < 0) 1628 if (ins_len < 0) {
1625 lowest_unlock = 2; 1629 lowest_unlock = 2;
1626 1630
1631 /* when we are removing items, we might have to go up to level
1632 * two as we update tree pointers Make sure we keep write
1633 * for those levels as well
1634 */
1635 write_lock_level = 2;
1636 } else if (ins_len > 0) {
1637 /*
1638 * for inserting items, make sure we have a write lock on
1639 * level 1 so we can update keys
1640 */
1641 write_lock_level = 1;
1642 }
1643
1644 if (!cow)
1645 write_lock_level = -1;
1646
1647 if (cow && (p->keep_locks || p->lowest_level))
1648 write_lock_level = BTRFS_MAX_LEVEL;
1649
1627again: 1650again:
1651 /*
1652 * we try very hard to do read locks on the root
1653 */
1654 root_lock = BTRFS_READ_LOCK;
1655 level = 0;
1628 if (p->search_commit_root) { 1656 if (p->search_commit_root) {
1657 /*
1658 * the commit roots are read only
1659 * so we always do read locks
1660 */
1629 b = root->commit_root; 1661 b = root->commit_root;
1630 extent_buffer_get(b); 1662 extent_buffer_get(b);
1663 level = btrfs_header_level(b);
1631 if (!p->skip_locking) 1664 if (!p->skip_locking)
1632 btrfs_tree_lock(b); 1665 btrfs_tree_read_lock(b);
1633 } else { 1666 } else {
1634 if (p->skip_locking) 1667 if (p->skip_locking) {
1635 b = btrfs_root_node(root); 1668 b = btrfs_root_node(root);
1636 else 1669 level = btrfs_header_level(b);
1637 b = btrfs_lock_root_node(root); 1670 } else {
1671 /* we don't know the level of the root node
1672 * until we actually have it read locked
1673 */
1674 b = btrfs_read_lock_root_node(root);
1675 level = btrfs_header_level(b);
1676 if (level <= write_lock_level) {
1677 /* whoops, must trade for write lock */
1678 btrfs_tree_read_unlock(b);
1679 free_extent_buffer(b);
1680 b = btrfs_lock_root_node(root);
1681 root_lock = BTRFS_WRITE_LOCK;
1682
1683 /* the level might have changed, check again */
1684 level = btrfs_header_level(b);
1685 }
1686 }
1638 } 1687 }
1688 p->nodes[level] = b;
1689 if (!p->skip_locking)
1690 p->locks[level] = root_lock;
1639 1691
1640 while (b) { 1692 while (b) {
1641 level = btrfs_header_level(b); 1693 level = btrfs_header_level(b);
@@ -1644,10 +1696,6 @@ again:
1644 * setup the path here so we can release it under lock 1696 * setup the path here so we can release it under lock
1645 * contention with the cow code 1697 * contention with the cow code
1646 */ 1698 */
1647 p->nodes[level] = b;
1648 if (!p->skip_locking)
1649 p->locks[level] = 1;
1650
1651 if (cow) { 1699 if (cow) {
1652 /* 1700 /*
1653 * if we don't really need to cow this block 1701 * if we don't really need to cow this block
@@ -1659,6 +1707,16 @@ again:
1659 1707
1660 btrfs_set_path_blocking(p); 1708 btrfs_set_path_blocking(p);
1661 1709
1710 /*
1711 * must have write locks on this node and the
1712 * parent
1713 */
1714 if (level + 1 > write_lock_level) {
1715 write_lock_level = level + 1;
1716 btrfs_release_path(p);
1717 goto again;
1718 }
1719
1662 err = btrfs_cow_block(trans, root, b, 1720 err = btrfs_cow_block(trans, root, b,
1663 p->nodes[level + 1], 1721 p->nodes[level + 1],
1664 p->slots[level + 1], &b); 1722 p->slots[level + 1], &b);
@@ -1671,10 +1729,7 @@ cow_done:
1671 BUG_ON(!cow && ins_len); 1729 BUG_ON(!cow && ins_len);
1672 1730
1673 p->nodes[level] = b; 1731 p->nodes[level] = b;
1674 if (!p->skip_locking) 1732 btrfs_clear_path_blocking(p, NULL, 0);
1675 p->locks[level] = 1;
1676
1677 btrfs_clear_path_blocking(p, NULL);
1678 1733
1679 /* 1734 /*
1680 * we have a lock on b and as long as we aren't changing 1735 * we have a lock on b and as long as we aren't changing
@@ -1700,7 +1755,7 @@ cow_done:
1700 } 1755 }
1701 p->slots[level] = slot; 1756 p->slots[level] = slot;
1702 err = setup_nodes_for_search(trans, root, p, b, level, 1757 err = setup_nodes_for_search(trans, root, p, b, level,
1703 ins_len); 1758 ins_len, &write_lock_level);
1704 if (err == -EAGAIN) 1759 if (err == -EAGAIN)
1705 goto again; 1760 goto again;
1706 if (err) { 1761 if (err) {
@@ -1710,6 +1765,19 @@ cow_done:
1710 b = p->nodes[level]; 1765 b = p->nodes[level];
1711 slot = p->slots[level]; 1766 slot = p->slots[level];
1712 1767
1768 /*
1769 * slot 0 is special, if we change the key
1770 * we have to update the parent pointer
1771 * which means we must have a write lock
1772 * on the parent
1773 */
1774 if (slot == 0 && cow &&
1775 write_lock_level < level + 1) {
1776 write_lock_level = level + 1;
1777 btrfs_release_path(p);
1778 goto again;
1779 }
1780
1713 unlock_up(p, level, lowest_unlock); 1781 unlock_up(p, level, lowest_unlock);
1714 1782
1715 if (level == lowest_level) { 1783 if (level == lowest_level) {
@@ -1728,23 +1796,42 @@ cow_done:
1728 } 1796 }
1729 1797
1730 if (!p->skip_locking) { 1798 if (!p->skip_locking) {
1731 btrfs_clear_path_blocking(p, NULL); 1799 level = btrfs_header_level(b);
1732 err = btrfs_try_spin_lock(b); 1800 if (level <= write_lock_level) {
1733 1801 err = btrfs_try_tree_write_lock(b);
1734 if (!err) { 1802 if (!err) {
1735 btrfs_set_path_blocking(p); 1803 btrfs_set_path_blocking(p);
1736 btrfs_tree_lock(b); 1804 btrfs_tree_lock(b);
1737 btrfs_clear_path_blocking(p, b); 1805 btrfs_clear_path_blocking(p, b,
1806 BTRFS_WRITE_LOCK);
1807 }
1808 p->locks[level] = BTRFS_WRITE_LOCK;
1809 } else {
1810 err = btrfs_try_tree_read_lock(b);
1811 if (!err) {
1812 btrfs_set_path_blocking(p);
1813 btrfs_tree_read_lock(b);
1814 btrfs_clear_path_blocking(p, b,
1815 BTRFS_READ_LOCK);
1816 }
1817 p->locks[level] = BTRFS_READ_LOCK;
1738 } 1818 }
1819 p->nodes[level] = b;
1739 } 1820 }
1740 } else { 1821 } else {
1741 p->slots[level] = slot; 1822 p->slots[level] = slot;
1742 if (ins_len > 0 && 1823 if (ins_len > 0 &&
1743 btrfs_leaf_free_space(root, b) < ins_len) { 1824 btrfs_leaf_free_space(root, b) < ins_len) {
1825 if (write_lock_level < 1) {
1826 write_lock_level = 1;
1827 btrfs_release_path(p);
1828 goto again;
1829 }
1830
1744 btrfs_set_path_blocking(p); 1831 btrfs_set_path_blocking(p);
1745 err = split_leaf(trans, root, key, 1832 err = split_leaf(trans, root, key,
1746 p, ins_len, ret == 0); 1833 p, ins_len, ret == 0);
1747 btrfs_clear_path_blocking(p, NULL); 1834 btrfs_clear_path_blocking(p, NULL, 0);
1748 1835
1749 BUG_ON(err > 0); 1836 BUG_ON(err > 0);
1750 if (err) { 1837 if (err) {
@@ -2025,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2025 add_root_to_dirty_list(root); 2112 add_root_to_dirty_list(root);
2026 extent_buffer_get(c); 2113 extent_buffer_get(c);
2027 path->nodes[level] = c; 2114 path->nodes[level] = c;
2028 path->locks[level] = 1; 2115 path->locks[level] = BTRFS_WRITE_LOCK;
2029 path->slots[level] = 0; 2116 path->slots[level] = 0;
2030 return 0; 2117 return 0;
2031} 2118}
@@ -2253,14 +2340,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2253 if (path->slots[0] == i) 2340 if (path->slots[0] == i)
2254 push_space += data_size; 2341 push_space += data_size;
2255 2342
2256 if (!left->map_token) {
2257 map_extent_buffer(left, (unsigned long)item,
2258 sizeof(struct btrfs_item),
2259 &left->map_token, &left->kaddr,
2260 &left->map_start, &left->map_len,
2261 KM_USER1);
2262 }
2263
2264 this_item_size = btrfs_item_size(left, item); 2343 this_item_size = btrfs_item_size(left, item);
2265 if (this_item_size + sizeof(*item) + push_space > free_space) 2344 if (this_item_size + sizeof(*item) + push_space > free_space)
2266 break; 2345 break;
@@ -2271,10 +2350,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2271 break; 2350 break;
2272 i--; 2351 i--;
2273 } 2352 }
2274 if (left->map_token) {
2275 unmap_extent_buffer(left, left->map_token, KM_USER1);
2276 left->map_token = NULL;
2277 }
2278 2353
2279 if (push_items == 0) 2354 if (push_items == 0)
2280 goto out_unlock; 2355 goto out_unlock;
@@ -2316,21 +2391,10 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2316 push_space = BTRFS_LEAF_DATA_SIZE(root); 2391 push_space = BTRFS_LEAF_DATA_SIZE(root);
2317 for (i = 0; i < right_nritems; i++) { 2392 for (i = 0; i < right_nritems; i++) {
2318 item = btrfs_item_nr(right, i); 2393 item = btrfs_item_nr(right, i);
2319 if (!right->map_token) {
2320 map_extent_buffer(right, (unsigned long)item,
2321 sizeof(struct btrfs_item),
2322 &right->map_token, &right->kaddr,
2323 &right->map_start, &right->map_len,
2324 KM_USER1);
2325 }
2326 push_space -= btrfs_item_size(right, item); 2394 push_space -= btrfs_item_size(right, item);
2327 btrfs_set_item_offset(right, item, push_space); 2395 btrfs_set_item_offset(right, item, push_space);
2328 } 2396 }
2329 2397
2330 if (right->map_token) {
2331 unmap_extent_buffer(right, right->map_token, KM_USER1);
2332 right->map_token = NULL;
2333 }
2334 left_nritems -= push_items; 2398 left_nritems -= push_items;
2335 btrfs_set_header_nritems(left, left_nritems); 2399 btrfs_set_header_nritems(left, left_nritems);
2336 2400
@@ -2467,13 +2531,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2467 2531
2468 for (i = 0; i < nr; i++) { 2532 for (i = 0; i < nr; i++) {
2469 item = btrfs_item_nr(right, i); 2533 item = btrfs_item_nr(right, i);
2470 if (!right->map_token) {
2471 map_extent_buffer(right, (unsigned long)item,
2472 sizeof(struct btrfs_item),
2473 &right->map_token, &right->kaddr,
2474 &right->map_start, &right->map_len,
2475 KM_USER1);
2476 }
2477 2534
2478 if (!empty && push_items > 0) { 2535 if (!empty && push_items > 0) {
2479 if (path->slots[0] < i) 2536 if (path->slots[0] < i)
@@ -2496,11 +2553,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2496 push_space += this_item_size + sizeof(*item); 2553 push_space += this_item_size + sizeof(*item);
2497 } 2554 }
2498 2555
2499 if (right->map_token) {
2500 unmap_extent_buffer(right, right->map_token, KM_USER1);
2501 right->map_token = NULL;
2502 }
2503
2504 if (push_items == 0) { 2556 if (push_items == 0) {
2505 ret = 1; 2557 ret = 1;
2506 goto out; 2558 goto out;
@@ -2530,23 +2582,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2530 u32 ioff; 2582 u32 ioff;
2531 2583
2532 item = btrfs_item_nr(left, i); 2584 item = btrfs_item_nr(left, i);
2533 if (!left->map_token) {
2534 map_extent_buffer(left, (unsigned long)item,
2535 sizeof(struct btrfs_item),
2536 &left->map_token, &left->kaddr,
2537 &left->map_start, &left->map_len,
2538 KM_USER1);
2539 }
2540 2585
2541 ioff = btrfs_item_offset(left, item); 2586 ioff = btrfs_item_offset(left, item);
2542 btrfs_set_item_offset(left, item, 2587 btrfs_set_item_offset(left, item,
2543 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); 2588 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
2544 } 2589 }
2545 btrfs_set_header_nritems(left, old_left_nritems + push_items); 2590 btrfs_set_header_nritems(left, old_left_nritems + push_items);
2546 if (left->map_token) {
2547 unmap_extent_buffer(left, left->map_token, KM_USER1);
2548 left->map_token = NULL;
2549 }
2550 2591
2551 /* fixup right node */ 2592 /* fixup right node */
2552 if (push_items > right_nritems) { 2593 if (push_items > right_nritems) {
@@ -2574,21 +2615,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2574 for (i = 0; i < right_nritems; i++) { 2615 for (i = 0; i < right_nritems; i++) {
2575 item = btrfs_item_nr(right, i); 2616 item = btrfs_item_nr(right, i);
2576 2617
2577 if (!right->map_token) {
2578 map_extent_buffer(right, (unsigned long)item,
2579 sizeof(struct btrfs_item),
2580 &right->map_token, &right->kaddr,
2581 &right->map_start, &right->map_len,
2582 KM_USER1);
2583 }
2584
2585 push_space = push_space - btrfs_item_size(right, item); 2618 push_space = push_space - btrfs_item_size(right, item);
2586 btrfs_set_item_offset(right, item, push_space); 2619 btrfs_set_item_offset(right, item, push_space);
2587 } 2620 }
2588 if (right->map_token) {
2589 unmap_extent_buffer(right, right->map_token, KM_USER1);
2590 right->map_token = NULL;
2591 }
2592 2621
2593 btrfs_mark_buffer_dirty(left); 2622 btrfs_mark_buffer_dirty(left);
2594 if (right_nritems) 2623 if (right_nritems)
@@ -2729,23 +2758,10 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2729 struct btrfs_item *item = btrfs_item_nr(right, i); 2758 struct btrfs_item *item = btrfs_item_nr(right, i);
2730 u32 ioff; 2759 u32 ioff;
2731 2760
2732 if (!right->map_token) {
2733 map_extent_buffer(right, (unsigned long)item,
2734 sizeof(struct btrfs_item),
2735 &right->map_token, &right->kaddr,
2736 &right->map_start, &right->map_len,
2737 KM_USER1);
2738 }
2739
2740 ioff = btrfs_item_offset(right, item); 2761 ioff = btrfs_item_offset(right, item);
2741 btrfs_set_item_offset(right, item, ioff + rt_data_off); 2762 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2742 } 2763 }
2743 2764
2744 if (right->map_token) {
2745 unmap_extent_buffer(right, right->map_token, KM_USER1);
2746 right->map_token = NULL;
2747 }
2748
2749 btrfs_set_header_nritems(l, mid); 2765 btrfs_set_header_nritems(l, mid);
2750 ret = 0; 2766 ret = 0;
2751 btrfs_item_key(right, &disk_key, 0); 2767 btrfs_item_key(right, &disk_key, 0);
@@ -3264,23 +3280,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3264 u32 ioff; 3280 u32 ioff;
3265 item = btrfs_item_nr(leaf, i); 3281 item = btrfs_item_nr(leaf, i);
3266 3282
3267 if (!leaf->map_token) {
3268 map_extent_buffer(leaf, (unsigned long)item,
3269 sizeof(struct btrfs_item),
3270 &leaf->map_token, &leaf->kaddr,
3271 &leaf->map_start, &leaf->map_len,
3272 KM_USER1);
3273 }
3274
3275 ioff = btrfs_item_offset(leaf, item); 3283 ioff = btrfs_item_offset(leaf, item);
3276 btrfs_set_item_offset(leaf, item, ioff + size_diff); 3284 btrfs_set_item_offset(leaf, item, ioff + size_diff);
3277 } 3285 }
3278 3286
3279 if (leaf->map_token) {
3280 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3281 leaf->map_token = NULL;
3282 }
3283
3284 /* shift the data */ 3287 /* shift the data */
3285 if (from_end) { 3288 if (from_end) {
3286 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3289 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
@@ -3377,22 +3380,10 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3377 u32 ioff; 3380 u32 ioff;
3378 item = btrfs_item_nr(leaf, i); 3381 item = btrfs_item_nr(leaf, i);
3379 3382
3380 if (!leaf->map_token) {
3381 map_extent_buffer(leaf, (unsigned long)item,
3382 sizeof(struct btrfs_item),
3383 &leaf->map_token, &leaf->kaddr,
3384 &leaf->map_start, &leaf->map_len,
3385 KM_USER1);
3386 }
3387 ioff = btrfs_item_offset(leaf, item); 3383 ioff = btrfs_item_offset(leaf, item);
3388 btrfs_set_item_offset(leaf, item, ioff - data_size); 3384 btrfs_set_item_offset(leaf, item, ioff - data_size);
3389 } 3385 }
3390 3386
3391 if (leaf->map_token) {
3392 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3393 leaf->map_token = NULL;
3394 }
3395
3396 /* shift the data */ 3387 /* shift the data */
3397 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3388 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3398 data_end - data_size, btrfs_leaf_data(leaf) + 3389 data_end - data_size, btrfs_leaf_data(leaf) +
@@ -3494,27 +3485,13 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3494 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3485 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3495 */ 3486 */
3496 /* first correct the data pointers */ 3487 /* first correct the data pointers */
3497 WARN_ON(leaf->map_token);
3498 for (i = slot; i < nritems; i++) { 3488 for (i = slot; i < nritems; i++) {
3499 u32 ioff; 3489 u32 ioff;
3500 3490
3501 item = btrfs_item_nr(leaf, i); 3491 item = btrfs_item_nr(leaf, i);
3502 if (!leaf->map_token) {
3503 map_extent_buffer(leaf, (unsigned long)item,
3504 sizeof(struct btrfs_item),
3505 &leaf->map_token, &leaf->kaddr,
3506 &leaf->map_start, &leaf->map_len,
3507 KM_USER1);
3508 }
3509
3510 ioff = btrfs_item_offset(leaf, item); 3492 ioff = btrfs_item_offset(leaf, item);
3511 btrfs_set_item_offset(leaf, item, ioff - total_data); 3493 btrfs_set_item_offset(leaf, item, ioff - total_data);
3512 } 3494 }
3513 if (leaf->map_token) {
3514 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3515 leaf->map_token = NULL;
3516 }
3517
3518 /* shift the items */ 3495 /* shift the items */
3519 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3496 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3520 btrfs_item_nr_offset(slot), 3497 btrfs_item_nr_offset(slot),
@@ -3608,27 +3585,13 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
3608 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3585 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3609 */ 3586 */
3610 /* first correct the data pointers */ 3587 /* first correct the data pointers */
3611 WARN_ON(leaf->map_token);
3612 for (i = slot; i < nritems; i++) { 3588 for (i = slot; i < nritems; i++) {
3613 u32 ioff; 3589 u32 ioff;
3614 3590
3615 item = btrfs_item_nr(leaf, i); 3591 item = btrfs_item_nr(leaf, i);
3616 if (!leaf->map_token) {
3617 map_extent_buffer(leaf, (unsigned long)item,
3618 sizeof(struct btrfs_item),
3619 &leaf->map_token, &leaf->kaddr,
3620 &leaf->map_start, &leaf->map_len,
3621 KM_USER1);
3622 }
3623
3624 ioff = btrfs_item_offset(leaf, item); 3592 ioff = btrfs_item_offset(leaf, item);
3625 btrfs_set_item_offset(leaf, item, ioff - total_data); 3593 btrfs_set_item_offset(leaf, item, ioff - total_data);
3626 } 3594 }
3627 if (leaf->map_token) {
3628 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3629 leaf->map_token = NULL;
3630 }
3631
3632 /* shift the items */ 3595 /* shift the items */
3633 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3596 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3634 btrfs_item_nr_offset(slot), 3597 btrfs_item_nr_offset(slot),
@@ -3840,22 +3803,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3840 u32 ioff; 3803 u32 ioff;
3841 3804
3842 item = btrfs_item_nr(leaf, i); 3805 item = btrfs_item_nr(leaf, i);
3843 if (!leaf->map_token) {
3844 map_extent_buffer(leaf, (unsigned long)item,
3845 sizeof(struct btrfs_item),
3846 &leaf->map_token, &leaf->kaddr,
3847 &leaf->map_start, &leaf->map_len,
3848 KM_USER1);
3849 }
3850 ioff = btrfs_item_offset(leaf, item); 3806 ioff = btrfs_item_offset(leaf, item);
3851 btrfs_set_item_offset(leaf, item, ioff + dsize); 3807 btrfs_set_item_offset(leaf, item, ioff + dsize);
3852 } 3808 }
3853 3809
3854 if (leaf->map_token) {
3855 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3856 leaf->map_token = NULL;
3857 }
3858
3859 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 3810 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
3860 btrfs_item_nr_offset(slot + nr), 3811 btrfs_item_nr_offset(slot + nr),
3861 sizeof(struct btrfs_item) * 3812 sizeof(struct btrfs_item) *
@@ -4004,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
4004 3955
4005 WARN_ON(!path->keep_locks); 3956 WARN_ON(!path->keep_locks);
4006again: 3957again:
4007 cur = btrfs_lock_root_node(root); 3958 cur = btrfs_read_lock_root_node(root);
4008 level = btrfs_header_level(cur); 3959 level = btrfs_header_level(cur);
4009 WARN_ON(path->nodes[level]); 3960 WARN_ON(path->nodes[level]);
4010 path->nodes[level] = cur; 3961 path->nodes[level] = cur;
4011 path->locks[level] = 1; 3962 path->locks[level] = BTRFS_READ_LOCK;
4012 3963
4013 if (btrfs_header_generation(cur) < min_trans) { 3964 if (btrfs_header_generation(cur) < min_trans) {
4014 ret = 1; 3965 ret = 1;
@@ -4098,12 +4049,12 @@ find_next_key:
4098 cur = read_node_slot(root, cur, slot); 4049 cur = read_node_slot(root, cur, slot);
4099 BUG_ON(!cur); 4050 BUG_ON(!cur);
4100 4051
4101 btrfs_tree_lock(cur); 4052 btrfs_tree_read_lock(cur);
4102 4053
4103 path->locks[level - 1] = 1; 4054 path->locks[level - 1] = BTRFS_READ_LOCK;
4104 path->nodes[level - 1] = cur; 4055 path->nodes[level - 1] = cur;
4105 unlock_up(path, level, 1); 4056 unlock_up(path, level, 1);
4106 btrfs_clear_path_blocking(path, NULL); 4057 btrfs_clear_path_blocking(path, NULL, 0);
4107 } 4058 }
4108out: 4059out:
4109 if (ret == 0) 4060 if (ret == 0)
@@ -4218,30 +4169,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4218 u32 nritems; 4169 u32 nritems;
4219 int ret; 4170 int ret;
4220 int old_spinning = path->leave_spinning; 4171 int old_spinning = path->leave_spinning;
4221 int force_blocking = 0; 4172 int next_rw_lock = 0;
4222 4173
4223 nritems = btrfs_header_nritems(path->nodes[0]); 4174 nritems = btrfs_header_nritems(path->nodes[0]);
4224 if (nritems == 0) 4175 if (nritems == 0)
4225 return 1; 4176 return 1;
4226 4177
4227 /*
4228 * we take the blocks in an order that upsets lockdep. Using
4229 * blocking mode is the only way around it.
4230 */
4231#ifdef CONFIG_DEBUG_LOCK_ALLOC
4232 force_blocking = 1;
4233#endif
4234
4235 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 4178 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4236again: 4179again:
4237 level = 1; 4180 level = 1;
4238 next = NULL; 4181 next = NULL;
4182 next_rw_lock = 0;
4239 btrfs_release_path(path); 4183 btrfs_release_path(path);
4240 4184
4241 path->keep_locks = 1; 4185 path->keep_locks = 1;
4242 4186 path->leave_spinning = 1;
4243 if (!force_blocking)
4244 path->leave_spinning = 1;
4245 4187
4246 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4188 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4247 path->keep_locks = 0; 4189 path->keep_locks = 0;
@@ -4281,11 +4223,12 @@ again:
4281 } 4223 }
4282 4224
4283 if (next) { 4225 if (next) {
4284 btrfs_tree_unlock(next); 4226 btrfs_tree_unlock_rw(next, next_rw_lock);
4285 free_extent_buffer(next); 4227 free_extent_buffer(next);
4286 } 4228 }
4287 4229
4288 next = c; 4230 next = c;
4231 next_rw_lock = path->locks[level];
4289 ret = read_block_for_search(NULL, root, path, &next, level, 4232 ret = read_block_for_search(NULL, root, path, &next, level,
4290 slot, &key); 4233 slot, &key);
4291 if (ret == -EAGAIN) 4234 if (ret == -EAGAIN)
@@ -4297,15 +4240,14 @@ again:
4297 } 4240 }
4298 4241
4299 if (!path->skip_locking) { 4242 if (!path->skip_locking) {
4300 ret = btrfs_try_spin_lock(next); 4243 ret = btrfs_try_tree_read_lock(next);
4301 if (!ret) { 4244 if (!ret) {
4302 btrfs_set_path_blocking(path); 4245 btrfs_set_path_blocking(path);
4303 btrfs_tree_lock(next); 4246 btrfs_tree_read_lock(next);
4304 if (!force_blocking) 4247 btrfs_clear_path_blocking(path, next,
4305 btrfs_clear_path_blocking(path, next); 4248 BTRFS_READ_LOCK);
4306 } 4249 }
4307 if (force_blocking) 4250 next_rw_lock = BTRFS_READ_LOCK;
4308 btrfs_set_lock_blocking(next);
4309 } 4251 }
4310 break; 4252 break;
4311 } 4253 }
@@ -4314,14 +4256,13 @@ again:
4314 level--; 4256 level--;
4315 c = path->nodes[level]; 4257 c = path->nodes[level];
4316 if (path->locks[level]) 4258 if (path->locks[level])
4317 btrfs_tree_unlock(c); 4259 btrfs_tree_unlock_rw(c, path->locks[level]);
4318 4260
4319 free_extent_buffer(c); 4261 free_extent_buffer(c);
4320 path->nodes[level] = next; 4262 path->nodes[level] = next;
4321 path->slots[level] = 0; 4263 path->slots[level] = 0;
4322 if (!path->skip_locking) 4264 if (!path->skip_locking)
4323 path->locks[level] = 1; 4265 path->locks[level] = next_rw_lock;
4324
4325 if (!level) 4266 if (!level)
4326 break; 4267 break;
4327 4268
@@ -4336,16 +4277,14 @@ again:
4336 } 4277 }
4337 4278
4338 if (!path->skip_locking) { 4279 if (!path->skip_locking) {
4339 btrfs_assert_tree_locked(path->nodes[level]); 4280 ret = btrfs_try_tree_read_lock(next);
4340 ret = btrfs_try_spin_lock(next);
4341 if (!ret) { 4281 if (!ret) {
4342 btrfs_set_path_blocking(path); 4282 btrfs_set_path_blocking(path);
4343 btrfs_tree_lock(next); 4283 btrfs_tree_read_lock(next);
4344 if (!force_blocking) 4284 btrfs_clear_path_blocking(path, next,
4345 btrfs_clear_path_blocking(path, next); 4285 BTRFS_READ_LOCK);
4346 } 4286 }
4347 if (force_blocking) 4287 next_rw_lock = BTRFS_READ_LOCK;
4348 btrfs_set_lock_blocking(next);
4349 } 4288 }
4350 } 4289 }
4351 ret = 0; 4290 ret = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f30ac05dbda7..0469263e327e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -755,6 +755,8 @@ struct btrfs_space_info {
755 chunks for this space */ 755 chunks for this space */
756 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ 756 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
757 757
758 unsigned int flush:1; /* set if we are trying to make space */
759
758 unsigned int force_alloc; /* set if we need to force a chunk 760 unsigned int force_alloc; /* set if we need to force a chunk
759 alloc for this space */ 761 alloc for this space */
760 762
@@ -764,7 +766,7 @@ struct btrfs_space_info {
764 struct list_head block_groups[BTRFS_NR_RAID_TYPES]; 766 struct list_head block_groups[BTRFS_NR_RAID_TYPES];
765 spinlock_t lock; 767 spinlock_t lock;
766 struct rw_semaphore groups_sem; 768 struct rw_semaphore groups_sem;
767 atomic_t caching_threads; 769 wait_queue_head_t wait;
768}; 770};
769 771
770struct btrfs_block_rsv { 772struct btrfs_block_rsv {
@@ -824,6 +826,7 @@ struct btrfs_caching_control {
824 struct list_head list; 826 struct list_head list;
825 struct mutex mutex; 827 struct mutex mutex;
826 wait_queue_head_t wait; 828 wait_queue_head_t wait;
829 struct btrfs_work work;
827 struct btrfs_block_group_cache *block_group; 830 struct btrfs_block_group_cache *block_group;
828 u64 progress; 831 u64 progress;
829 atomic_t count; 832 atomic_t count;
@@ -1032,6 +1035,8 @@ struct btrfs_fs_info {
1032 struct btrfs_workers endio_write_workers; 1035 struct btrfs_workers endio_write_workers;
1033 struct btrfs_workers endio_freespace_worker; 1036 struct btrfs_workers endio_freespace_worker;
1034 struct btrfs_workers submit_workers; 1037 struct btrfs_workers submit_workers;
1038 struct btrfs_workers caching_workers;
1039
1035 /* 1040 /*
1036 * fixup workers take dirty pages that didn't properly go through 1041 * fixup workers take dirty pages that didn't properly go through
1037 * the cow mechanism and make them safe to write. It happens 1042 * the cow mechanism and make them safe to write. It happens
@@ -1219,7 +1224,7 @@ struct btrfs_root {
1219 * right now this just gets used so that a root has its own devid 1224 * right now this just gets used so that a root has its own devid
1220 * for stat. It may be used for more later 1225 * for stat. It may be used for more later
1221 */ 1226 */
1222 struct super_block anon_super; 1227 dev_t anon_dev;
1223}; 1228};
1224 1229
1225struct btrfs_ioctl_defrag_range_args { 1230struct btrfs_ioctl_defrag_range_args {
@@ -1335,6 +1340,11 @@ struct btrfs_ioctl_defrag_range_args {
1335 */ 1340 */
1336#define BTRFS_STRING_ITEM_KEY 253 1341#define BTRFS_STRING_ITEM_KEY 253
1337 1342
1343/*
1344 * Flags for mount options.
1345 *
1346 * Note: don't forget to add new options to btrfs_show_options()
1347 */
1338#define BTRFS_MOUNT_NODATASUM (1 << 0) 1348#define BTRFS_MOUNT_NODATASUM (1 << 0)
1339#define BTRFS_MOUNT_NODATACOW (1 << 1) 1349#define BTRFS_MOUNT_NODATACOW (1 << 1)
1340#define BTRFS_MOUNT_NOBARRIER (1 << 2) 1350#define BTRFS_MOUNT_NOBARRIER (1 << 2)
@@ -2123,7 +2133,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
2123 2133
2124/* extent-tree.c */ 2134/* extent-tree.c */
2125static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, 2135static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
2126 int num_items) 2136 unsigned num_items)
2127{ 2137{
2128 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 2138 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
2129 3 * num_items; 2139 3 * num_items;
@@ -2217,9 +2227,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
2217void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 2227void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
2218int btrfs_check_data_free_space(struct inode *inode, u64 bytes); 2228int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
2219void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); 2229void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
2220int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
2221 struct btrfs_root *root,
2222 int num_items);
2223void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 2230void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
2224 struct btrfs_root *root); 2231 struct btrfs_root *root);
2225int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 2232int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2325,7 +2332,7 @@ struct btrfs_path *btrfs_alloc_path(void);
2325void btrfs_free_path(struct btrfs_path *p); 2332void btrfs_free_path(struct btrfs_path *p);
2326void btrfs_set_path_blocking(struct btrfs_path *p); 2333void btrfs_set_path_blocking(struct btrfs_path *p);
2327void btrfs_clear_path_blocking(struct btrfs_path *p, 2334void btrfs_clear_path_blocking(struct btrfs_path *p,
2328 struct extent_buffer *held); 2335 struct extent_buffer *held, int held_rw);
2329void btrfs_unlock_up_safe(struct btrfs_path *p, int level); 2336void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
2330 2337
2331int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2338int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2399,8 +2406,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2399 btrfs_root_item *item, struct btrfs_key *key); 2406 btrfs_root_item *item, struct btrfs_key *key);
2400int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 2407int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2401int btrfs_find_orphan_roots(struct btrfs_root *tree_root); 2408int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2402int btrfs_set_root_node(struct btrfs_root_item *item, 2409void btrfs_set_root_node(struct btrfs_root_item *item,
2403 struct extent_buffer *node); 2410 struct extent_buffer *node);
2404void btrfs_check_and_init_root_item(struct btrfs_root_item *item); 2411void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2405 2412
2406/* dir-item.c */ 2413/* dir-item.c */
@@ -2505,6 +2512,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
2505int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 2512int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
2506 struct list_head *list, int search_commit); 2513 struct list_head *list, int search_commit);
2507/* inode.c */ 2514/* inode.c */
2515struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
2516 size_t pg_offset, u64 start, u64 len,
2517 int create);
2508 2518
2509/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ 2519/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
2510#if defined(ClearPageFsMisc) && !defined(ClearPageChecked) 2520#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
@@ -2513,6 +2523,14 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
2513#define PageChecked PageFsMisc 2523#define PageChecked PageFsMisc
2514#endif 2524#endif
2515 2525
2526/* This forces readahead on a given range of bytes in an inode */
2527static inline void btrfs_force_ra(struct address_space *mapping,
2528 struct file_ra_state *ra, struct file *file,
2529 pgoff_t offset, unsigned long req_size)
2530{
2531 page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2532}
2533
2516struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); 2534struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
2517int btrfs_set_inode_index(struct inode *dir, u64 *index); 2535int btrfs_set_inode_index(struct inode *dir, u64 *index);
2518int btrfs_unlink_inode(struct btrfs_trans_handle *trans, 2536int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -2541,9 +2559,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
2541int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 2559int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2542 size_t size, struct bio *bio, unsigned long bio_flags); 2560 size_t size, struct bio *bio, unsigned long bio_flags);
2543 2561
2544unsigned long btrfs_force_ra(struct address_space *mapping,
2545 struct file_ra_state *ra, struct file *file,
2546 pgoff_t offset, pgoff_t last_index);
2547int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2562int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2548int btrfs_readpage(struct file *file, struct page *page); 2563int btrfs_readpage(struct file *file, struct page *page);
2549void btrfs_evict_inode(struct inode *inode); 2564void btrfs_evict_inode(struct inode *inode);
@@ -2597,7 +2612,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
2597int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, 2612int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
2598 struct inode *inode); 2613 struct inode *inode);
2599int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); 2614int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
2600int btrfs_sync_file(struct file *file, int datasync); 2615int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
2601int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 2616int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2602 int skip_pinned); 2617 int skip_pinned);
2603extern const struct file_operations btrfs_file_operations; 2618extern const struct file_operations btrfs_file_operations;
@@ -2637,13 +2652,22 @@ do { \
2637 2652
2638/* acl.c */ 2653/* acl.c */
2639#ifdef CONFIG_BTRFS_FS_POSIX_ACL 2654#ifdef CONFIG_BTRFS_FS_POSIX_ACL
2640int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); 2655struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
2641#else
2642#define btrfs_check_acl NULL
2643#endif
2644int btrfs_init_acl(struct btrfs_trans_handle *trans, 2656int btrfs_init_acl(struct btrfs_trans_handle *trans,
2645 struct inode *inode, struct inode *dir); 2657 struct inode *inode, struct inode *dir);
2646int btrfs_acl_chmod(struct inode *inode); 2658int btrfs_acl_chmod(struct inode *inode);
2659#else
2660#define btrfs_get_acl NULL
2661static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
2662 struct inode *inode, struct inode *dir)
2663{
2664 return 0;
2665}
2666static inline int btrfs_acl_chmod(struct inode *inode)
2667{
2668 return 0;
2669}
2670#endif
2647 2671
2648/* relocation.c */ 2672/* relocation.c */
2649int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); 2673int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 98c68e658a9b..b52c672f4c18 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
735 } 735 }
736 736
737 /* reset all the locked nodes in the patch to spinning locks. */ 737 /* reset all the locked nodes in the patch to spinning locks. */
738 btrfs_clear_path_blocking(path, NULL); 738 btrfs_clear_path_blocking(path, NULL, 0);
739 739
740 /* insert the keys of the items */ 740 /* insert the keys of the items */
741 ret = setup_items_for_insert(trans, root, path, keys, data_size, 741 ret = setup_items_for_insert(trans, root, path, keys, data_size,
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 8d27af4bd8b9..7083d08b2a21 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -25,7 +25,7 @@
25#include <linux/mutex.h> 25#include <linux/mutex.h>
26#include <linux/list.h> 26#include <linux/list.h>
27#include <linux/wait.h> 27#include <linux/wait.h>
28#include <asm/atomic.h> 28#include <linux/atomic.h>
29 29
30#include "ctree.h" 30#include "ctree.h"
31 31
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 685f2593c4f0..31d84e78129b 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
89 data_size = sizeof(*dir_item) + name_len + data_len; 89 data_size = sizeof(*dir_item) + name_len + data_len;
90 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 90 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
91 name, name_len); 91 name, name_len);
92 /* 92 if (IS_ERR(dir_item))
93 * FIXME: at some point we should handle xattr's that are larger than 93 return PTR_ERR(dir_item);
94 * what we can fit in our leaf. We set location to NULL b/c we arent
95 * pointing at anything else, that will change if we store the xattr
96 * data in a separate inode.
97 */
98 BUG_ON(IS_ERR(dir_item));
99 memset(&location, 0, sizeof(location)); 94 memset(&location, 0, sizeof(location));
100 95
101 leaf = path->nodes[0]; 96 leaf = path->nodes[0];
@@ -203,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
203 struct btrfs_key key; 198 struct btrfs_key key;
204 int ins_len = mod < 0 ? -1 : 0; 199 int ins_len = mod < 0 ? -1 : 0;
205 int cow = mod != 0; 200 int cow = mod != 0;
206 struct btrfs_key found_key;
207 struct extent_buffer *leaf;
208 201
209 key.objectid = dir; 202 key.objectid = dir;
210 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); 203 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
@@ -214,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
214 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); 207 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
215 if (ret < 0) 208 if (ret < 0)
216 return ERR_PTR(ret); 209 return ERR_PTR(ret);
217 if (ret > 0) { 210 if (ret > 0)
218 if (path->slots[0] == 0)
219 return NULL;
220 path->slots[0]--;
221 }
222
223 leaf = path->nodes[0];
224 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
225
226 if (found_key.objectid != dir ||
227 btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
228 found_key.offset != key.offset)
229 return NULL; 211 return NULL;
230 212
231 return btrfs_match_dir_item_name(root, path, name, name_len); 213 return btrfs_match_dir_item_name(root, path, name, name_len);
@@ -320,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
320 struct btrfs_key key; 302 struct btrfs_key key;
321 int ins_len = mod < 0 ? -1 : 0; 303 int ins_len = mod < 0 ? -1 : 0;
322 int cow = mod != 0; 304 int cow = mod != 0;
323 struct btrfs_key found_key;
324 struct extent_buffer *leaf;
325 305
326 key.objectid = dir; 306 key.objectid = dir;
327 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 307 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
@@ -329,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
329 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); 309 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
330 if (ret < 0) 310 if (ret < 0)
331 return ERR_PTR(ret); 311 return ERR_PTR(ret);
332 if (ret > 0) { 312 if (ret > 0)
333 if (path->slots[0] == 0)
334 return NULL;
335 path->slots[0]--;
336 }
337
338 leaf = path->nodes[0];
339 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
340
341 if (found_key.objectid != dir ||
342 btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
343 found_key.offset != key.offset)
344 return NULL; 313 return NULL;
345 314
346 return btrfs_match_dir_item_name(root, path, name, name_len); 315 return btrfs_match_dir_item_name(root, path, name, name_len);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1ac8db5dc0a3..07b3ac662e19 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -100,38 +100,83 @@ struct async_submit_bio {
100 struct btrfs_work work; 100 struct btrfs_work work;
101}; 101};
102 102
103/* These are used to set the lockdep class on the extent buffer locks. 103/*
104 * The class is set by the readpage_end_io_hook after the buffer has 104 * Lockdep class keys for extent_buffer->lock's in this root. For a given
105 * passed csum validation but before the pages are unlocked. 105 * eb, the lockdep key is determined by the btrfs_root it belongs to and
106 * the level the eb occupies in the tree.
107 *
108 * Different roots are used for different purposes and may nest inside each
109 * other and they require separate keysets. As lockdep keys should be
110 * static, assign keysets according to the purpose of the root as indicated
111 * by btrfs_root->objectid. This ensures that all special purpose roots
112 * have separate keysets.
106 * 113 *
107 * The lockdep class is also set by btrfs_init_new_buffer on freshly 114 * Lock-nesting across peer nodes is always done with the immediate parent
108 * allocated blocks. 115 * node locked thus preventing deadlock. As lockdep doesn't know this, use
116 * subclass to avoid triggering lockdep warning in such cases.
109 * 117 *
110 * The class is based on the level in the tree block, which allows lockdep 118 * The key is set by the readpage_end_io_hook after the buffer has passed
111 * to know that lower nodes nest inside the locks of higher nodes. 119 * csum validation but before the pages are unlocked. It is also set by
120 * btrfs_init_new_buffer on freshly allocated blocks.
112 * 121 *
113 * We also add a check to make sure the highest level of the tree is 122 * We also add a check to make sure the highest level of the tree is the
114 * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this 123 * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
115 * code needs update as well. 124 * needs update as well.
116 */ 125 */
117#ifdef CONFIG_DEBUG_LOCK_ALLOC 126#ifdef CONFIG_DEBUG_LOCK_ALLOC
118# if BTRFS_MAX_LEVEL != 8 127# if BTRFS_MAX_LEVEL != 8
119# error 128# error
120# endif 129# endif
121static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; 130
122static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { 131static struct btrfs_lockdep_keyset {
123 /* leaf */ 132 u64 id; /* root objectid */
124 "btrfs-extent-00", 133 const char *name_stem; /* lock name stem */
125 "btrfs-extent-01", 134 char names[BTRFS_MAX_LEVEL + 1][20];
126 "btrfs-extent-02", 135 struct lock_class_key keys[BTRFS_MAX_LEVEL + 1];
127 "btrfs-extent-03", 136} btrfs_lockdep_keysets[] = {
128 "btrfs-extent-04", 137 { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" },
129 "btrfs-extent-05", 138 { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" },
130 "btrfs-extent-06", 139 { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" },
131 "btrfs-extent-07", 140 { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
132 /* highest possible level */ 141 { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
133 "btrfs-extent-08", 142 { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
143 { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
144 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
145 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
146 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
147 { .id = 0, .name_stem = "tree" },
134}; 148};
149
150void __init btrfs_init_lockdep(void)
151{
152 int i, j;
153
154 /* initialize lockdep class names */
155 for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
156 struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
157
158 for (j = 0; j < ARRAY_SIZE(ks->names); j++)
159 snprintf(ks->names[j], sizeof(ks->names[j]),
160 "btrfs-%s-%02d", ks->name_stem, j);
161 }
162}
163
164void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
165 int level)
166{
167 struct btrfs_lockdep_keyset *ks;
168
169 BUG_ON(level >= ARRAY_SIZE(ks->keys));
170
171 /* find the matching keyset, id 0 is the default entry */
172 for (ks = btrfs_lockdep_keysets; ks->id; ks++)
173 if (ks->id == objectid)
174 break;
175
176 lockdep_set_class_and_name(&eb->lock,
177 &ks->keys[level], ks->names[level]);
178}
179
135#endif 180#endif
136 181
137/* 182/*
@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
217 unsigned long len; 262 unsigned long len;
218 unsigned long cur_len; 263 unsigned long cur_len;
219 unsigned long offset = BTRFS_CSUM_SIZE; 264 unsigned long offset = BTRFS_CSUM_SIZE;
220 char *map_token = NULL;
221 char *kaddr; 265 char *kaddr;
222 unsigned long map_start; 266 unsigned long map_start;
223 unsigned long map_len; 267 unsigned long map_len;
@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
228 len = buf->len - offset; 272 len = buf->len - offset;
229 while (len > 0) { 273 while (len > 0) {
230 err = map_private_extent_buffer(buf, offset, 32, 274 err = map_private_extent_buffer(buf, offset, 32,
231 &map_token, &kaddr, 275 &kaddr, &map_start, &map_len);
232 &map_start, &map_len, KM_USER0);
233 if (err) 276 if (err)
234 return 1; 277 return 1;
235 cur_len = min(len, map_len - (offset - map_start)); 278 cur_len = min(len, map_len - (offset - map_start));
@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
237 crc, cur_len); 280 crc, cur_len);
238 len -= cur_len; 281 len -= cur_len;
239 offset += cur_len; 282 offset += cur_len;
240 unmap_extent_buffer(buf, map_token, KM_USER0);
241 } 283 }
242 if (csum_size > sizeof(inline_result)) { 284 if (csum_size > sizeof(inline_result)) {
243 result = kzalloc(csum_size * sizeof(char), GFP_NOFS); 285 result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root,
494 return 0; 536 return 0;
495} 537}
496 538
497#ifdef CONFIG_DEBUG_LOCK_ALLOC
498void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
499{
500 lockdep_set_class_and_name(&eb->lock,
501 &btrfs_eb_class[level],
502 btrfs_eb_name[level]);
503}
504#endif
505
506static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 539static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
507 struct extent_state *state) 540 struct extent_state *state)
508{ 541{
@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
553 } 586 }
554 found_level = btrfs_header_level(eb); 587 found_level = btrfs_header_level(eb);
555 588
556 btrfs_set_buffer_lockdep_class(eb, found_level); 589 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
590 eb, found_level);
557 591
558 ret = csum_tree_block(root, eb, 1); 592 ret = csum_tree_block(root, eb, 1);
559 if (ret) { 593 if (ret) {
@@ -1077,12 +1111,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1077 init_completion(&root->kobj_unregister); 1111 init_completion(&root->kobj_unregister);
1078 root->defrag_running = 0; 1112 root->defrag_running = 0;
1079 root->root_key.objectid = objectid; 1113 root->root_key.objectid = objectid;
1080 root->anon_super.s_root = NULL; 1114 root->anon_dev = 0;
1081 root->anon_super.s_dev = 0;
1082 INIT_LIST_HEAD(&root->anon_super.s_list);
1083 INIT_LIST_HEAD(&root->anon_super.s_instances);
1084 init_rwsem(&root->anon_super.s_umount);
1085
1086 return 0; 1115 return 0;
1087} 1116}
1088 1117
@@ -1311,7 +1340,7 @@ again:
1311 spin_lock_init(&root->cache_lock); 1340 spin_lock_init(&root->cache_lock);
1312 init_waitqueue_head(&root->cache_wait); 1341 init_waitqueue_head(&root->cache_wait);
1313 1342
1314 ret = set_anon_super(&root->anon_super, NULL); 1343 ret = get_anon_bdev(&root->anon_dev);
1315 if (ret) 1344 if (ret)
1316 goto fail; 1345 goto fail;
1317 1346
@@ -1603,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1603 goto fail_bdi; 1632 goto fail_bdi;
1604 } 1633 }
1605 1634
1606 fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; 1635 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
1607 1636
1608 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1637 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1609 INIT_LIST_HEAD(&fs_info->trans_list); 1638 INIT_LIST_HEAD(&fs_info->trans_list);
@@ -1807,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1807 fs_info->thread_pool_size), 1836 fs_info->thread_pool_size),
1808 &fs_info->generic_worker); 1837 &fs_info->generic_worker);
1809 1838
1839 btrfs_init_workers(&fs_info->caching_workers, "cache",
1840 2, &fs_info->generic_worker);
1841
1810 /* a higher idle thresh on the submit workers makes it much more 1842 /* a higher idle thresh on the submit workers makes it much more
1811 * likely that bios will be send down in a sane order to the 1843 * likely that bios will be send down in a sane order to the
1812 * devices 1844 * devices
@@ -1860,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1860 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1892 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1861 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 1893 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1862 btrfs_start_workers(&fs_info->delayed_workers, 1); 1894 btrfs_start_workers(&fs_info->delayed_workers, 1);
1895 btrfs_start_workers(&fs_info->caching_workers, 1);
1863 1896
1864 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1897 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1865 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1898 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2117,6 +2150,7 @@ fail_sb_buffer:
2117 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2150 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2118 btrfs_stop_workers(&fs_info->submit_workers); 2151 btrfs_stop_workers(&fs_info->submit_workers);
2119 btrfs_stop_workers(&fs_info->delayed_workers); 2152 btrfs_stop_workers(&fs_info->delayed_workers);
2153 btrfs_stop_workers(&fs_info->caching_workers);
2120fail_alloc: 2154fail_alloc:
2121 kfree(fs_info->delayed_root); 2155 kfree(fs_info->delayed_root);
2122fail_iput: 2156fail_iput:
@@ -2393,10 +2427,8 @@ static void free_fs_root(struct btrfs_root *root)
2393{ 2427{
2394 iput(root->cache_inode); 2428 iput(root->cache_inode);
2395 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 2429 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2396 if (root->anon_super.s_dev) { 2430 if (root->anon_dev)
2397 down_write(&root->anon_super.s_umount); 2431 free_anon_bdev(root->anon_dev);
2398 kill_anon_super(&root->anon_super);
2399 }
2400 free_extent_buffer(root->node); 2432 free_extent_buffer(root->node);
2401 free_extent_buffer(root->commit_root); 2433 free_extent_buffer(root->commit_root);
2402 kfree(root->free_ino_ctl); 2434 kfree(root->free_ino_ctl);
@@ -2584,6 +2616,7 @@ int close_ctree(struct btrfs_root *root)
2584 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2616 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2585 btrfs_stop_workers(&fs_info->submit_workers); 2617 btrfs_stop_workers(&fs_info->submit_workers);
2586 btrfs_stop_workers(&fs_info->delayed_workers); 2618 btrfs_stop_workers(&fs_info->delayed_workers);
2619 btrfs_stop_workers(&fs_info->caching_workers);
2587 2620
2588 btrfs_close_devices(fs_info->fs_devices); 2621 btrfs_close_devices(fs_info->fs_devices);
2589 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2622 btrfs_mapping_tree_free(&fs_info->mapping_tree);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index a0b610a67aae..bec3ea4bd67f 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page);
87 87
88 88
89#ifdef CONFIG_DEBUG_LOCK_ALLOC 89#ifdef CONFIG_DEBUG_LOCK_ALLOC
90void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); 90void btrfs_init_lockdep(void);
91void btrfs_set_buffer_lockdep_class(u64 objectid,
92 struct extent_buffer *eb, int level);
91#else 93#else
92static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, 94static inline void btrfs_init_lockdep(void)
93 int level) 95{ }
96static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
97 struct extent_buffer *eb, int level)
94{ 98{
95} 99}
96#endif 100#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 71cd456fdb60..66bac226944e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
320 return total_added; 320 return total_added;
321} 321}
322 322
323static int caching_kthread(void *data) 323static noinline void caching_thread(struct btrfs_work *work)
324{ 324{
325 struct btrfs_block_group_cache *block_group = data; 325 struct btrfs_block_group_cache *block_group;
326 struct btrfs_fs_info *fs_info = block_group->fs_info; 326 struct btrfs_fs_info *fs_info;
327 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; 327 struct btrfs_caching_control *caching_ctl;
328 struct btrfs_root *extent_root = fs_info->extent_root; 328 struct btrfs_root *extent_root;
329 struct btrfs_path *path; 329 struct btrfs_path *path;
330 struct extent_buffer *leaf; 330 struct extent_buffer *leaf;
331 struct btrfs_key key; 331 struct btrfs_key key;
@@ -334,9 +334,14 @@ static int caching_kthread(void *data)
334 u32 nritems; 334 u32 nritems;
335 int ret = 0; 335 int ret = 0;
336 336
337 caching_ctl = container_of(work, struct btrfs_caching_control, work);
338 block_group = caching_ctl->block_group;
339 fs_info = block_group->fs_info;
340 extent_root = fs_info->extent_root;
341
337 path = btrfs_alloc_path(); 342 path = btrfs_alloc_path();
338 if (!path) 343 if (!path)
339 return -ENOMEM; 344 goto out;
340 345
341 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 346 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
342 347
@@ -433,13 +438,11 @@ err:
433 free_excluded_extents(extent_root, block_group); 438 free_excluded_extents(extent_root, block_group);
434 439
435 mutex_unlock(&caching_ctl->mutex); 440 mutex_unlock(&caching_ctl->mutex);
441out:
436 wake_up(&caching_ctl->wait); 442 wake_up(&caching_ctl->wait);
437 443
438 put_caching_control(caching_ctl); 444 put_caching_control(caching_ctl);
439 atomic_dec(&block_group->space_info->caching_threads);
440 btrfs_put_block_group(block_group); 445 btrfs_put_block_group(block_group);
441
442 return 0;
443} 446}
444 447
445static int cache_block_group(struct btrfs_block_group_cache *cache, 448static int cache_block_group(struct btrfs_block_group_cache *cache,
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
449{ 452{
450 struct btrfs_fs_info *fs_info = cache->fs_info; 453 struct btrfs_fs_info *fs_info = cache->fs_info;
451 struct btrfs_caching_control *caching_ctl; 454 struct btrfs_caching_control *caching_ctl;
452 struct task_struct *tsk;
453 int ret = 0; 455 int ret = 0;
454 456
455 smp_mb(); 457 smp_mb();
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
501 caching_ctl->progress = cache->key.objectid; 503 caching_ctl->progress = cache->key.objectid;
502 /* one for caching kthread, one for caching block group list */ 504 /* one for caching kthread, one for caching block group list */
503 atomic_set(&caching_ctl->count, 2); 505 atomic_set(&caching_ctl->count, 2);
506 caching_ctl->work.func = caching_thread;
504 507
505 spin_lock(&cache->lock); 508 spin_lock(&cache->lock);
506 if (cache->cached != BTRFS_CACHE_NO) { 509 if (cache->cached != BTRFS_CACHE_NO) {
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
516 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 519 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
517 up_write(&fs_info->extent_commit_sem); 520 up_write(&fs_info->extent_commit_sem);
518 521
519 atomic_inc(&cache->space_info->caching_threads);
520 btrfs_get_block_group(cache); 522 btrfs_get_block_group(cache);
521 523
522 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 524 btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
523 cache->key.objectid);
524 if (IS_ERR(tsk)) {
525 ret = PTR_ERR(tsk);
526 printk(KERN_ERR "error running thread %d\n", ret);
527 BUG();
528 }
529 525
530 return ret; 526 return ret;
531} 527}
@@ -667,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
667 struct btrfs_path *path; 663 struct btrfs_path *path;
668 664
669 path = btrfs_alloc_path(); 665 path = btrfs_alloc_path();
670 BUG_ON(!path); 666 if (!path)
667 return -ENOMEM;
668
671 key.objectid = start; 669 key.objectid = start;
672 key.offset = len; 670 key.offset = len;
673 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 671 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -2932,9 +2930,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2932 found->full = 0; 2930 found->full = 0;
2933 found->force_alloc = CHUNK_ALLOC_NO_FORCE; 2931 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
2934 found->chunk_alloc = 0; 2932 found->chunk_alloc = 0;
2933 found->flush = 0;
2934 init_waitqueue_head(&found->wait);
2935 *space_info = found; 2935 *space_info = found;
2936 list_add_rcu(&found->list, &info->space_info); 2936 list_add_rcu(&found->list, &info->space_info);
2937 atomic_set(&found->caching_threads, 0);
2938 return 0; 2937 return 0;
2939} 2938}
2940 2939
@@ -3275,6 +3274,9 @@ again:
3275 } 3274 }
3276 3275
3277 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3276 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3277 if (ret < 0 && ret != -ENOSPC)
3278 goto out;
3279
3278 spin_lock(&space_info->lock); 3280 spin_lock(&space_info->lock);
3279 if (ret) 3281 if (ret)
3280 space_info->full = 1; 3282 space_info->full = 1;
@@ -3284,6 +3286,7 @@ again:
3284 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; 3286 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3285 space_info->chunk_alloc = 0; 3287 space_info->chunk_alloc = 0;
3286 spin_unlock(&space_info->lock); 3288 spin_unlock(&space_info->lock);
3289out:
3287 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3290 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3288 return ret; 3291 return ret;
3289} 3292}
@@ -3314,6 +3317,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3314 if (reserved == 0) 3317 if (reserved == 0)
3315 return 0; 3318 return 0;
3316 3319
3320 smp_mb();
3321 if (root->fs_info->delalloc_bytes == 0) {
3322 if (trans)
3323 return 0;
3324 btrfs_wait_ordered_extents(root, 0, 0);
3325 return 0;
3326 }
3327
3317 max_reclaim = min(reserved, to_reclaim); 3328 max_reclaim = min(reserved, to_reclaim);
3318 3329
3319 while (loops < 1024) { 3330 while (loops < 1024) {
@@ -3356,6 +3367,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3356 } 3367 }
3357 3368
3358 } 3369 }
3370 if (reclaimed >= to_reclaim && !trans)
3371 btrfs_wait_ordered_extents(root, 0, 0);
3359 return reclaimed >= to_reclaim; 3372 return reclaimed >= to_reclaim;
3360} 3373}
3361 3374
@@ -3380,15 +3393,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
3380 u64 num_bytes = orig_bytes; 3393 u64 num_bytes = orig_bytes;
3381 int retries = 0; 3394 int retries = 0;
3382 int ret = 0; 3395 int ret = 0;
3383 bool reserved = false;
3384 bool committed = false; 3396 bool committed = false;
3397 bool flushing = false;
3385 3398
3386again: 3399again:
3387 ret = -ENOSPC; 3400 ret = 0;
3388 if (reserved)
3389 num_bytes = 0;
3390
3391 spin_lock(&space_info->lock); 3401 spin_lock(&space_info->lock);
3402 /*
3403 * We only want to wait if somebody other than us is flushing and we are
3404 * actually alloed to flush.
3405 */
3406 while (flush && !flushing && space_info->flush) {
3407 spin_unlock(&space_info->lock);
3408 /*
3409 * If we have a trans handle we can't wait because the flusher
3410 * may have to commit the transaction, which would mean we would
3411 * deadlock since we are waiting for the flusher to finish, but
3412 * hold the current transaction open.
3413 */
3414 if (trans)
3415 return -EAGAIN;
3416 ret = wait_event_interruptible(space_info->wait,
3417 !space_info->flush);
3418 /* Must have been interrupted, return */
3419 if (ret)
3420 return -EINTR;
3421
3422 spin_lock(&space_info->lock);
3423 }
3424
3425 ret = -ENOSPC;
3392 unused = space_info->bytes_used + space_info->bytes_reserved + 3426 unused = space_info->bytes_used + space_info->bytes_reserved +
3393 space_info->bytes_pinned + space_info->bytes_readonly + 3427 space_info->bytes_pinned + space_info->bytes_readonly +
3394 space_info->bytes_may_use; 3428 space_info->bytes_may_use;
@@ -3403,8 +3437,7 @@ again:
3403 if (unused <= space_info->total_bytes) { 3437 if (unused <= space_info->total_bytes) {
3404 unused = space_info->total_bytes - unused; 3438 unused = space_info->total_bytes - unused;
3405 if (unused >= num_bytes) { 3439 if (unused >= num_bytes) {
3406 if (!reserved) 3440 space_info->bytes_reserved += orig_bytes;
3407 space_info->bytes_reserved += orig_bytes;
3408 ret = 0; 3441 ret = 0;
3409 } else { 3442 } else {
3410 /* 3443 /*
@@ -3429,17 +3462,14 @@ again:
3429 * to reclaim space we can actually use it instead of somebody else 3462 * to reclaim space we can actually use it instead of somebody else
3430 * stealing it from us. 3463 * stealing it from us.
3431 */ 3464 */
3432 if (ret && !reserved) { 3465 if (ret && flush) {
3433 space_info->bytes_reserved += orig_bytes; 3466 flushing = true;
3434 reserved = true; 3467 space_info->flush = 1;
3435 } 3468 }
3436 3469
3437 spin_unlock(&space_info->lock); 3470 spin_unlock(&space_info->lock);
3438 3471
3439 if (!ret) 3472 if (!ret || !flush)
3440 return 0;
3441
3442 if (!flush)
3443 goto out; 3473 goto out;
3444 3474
3445 /* 3475 /*
@@ -3447,11 +3477,11 @@ again:
3447 * metadata until after the IO is completed. 3477 * metadata until after the IO is completed.
3448 */ 3478 */
3449 ret = shrink_delalloc(trans, root, num_bytes, 1); 3479 ret = shrink_delalloc(trans, root, num_bytes, 1);
3450 if (ret > 0) 3480 if (ret < 0)
3451 return 0;
3452 else if (ret < 0)
3453 goto out; 3481 goto out;
3454 3482
3483 ret = 0;
3484
3455 /* 3485 /*
3456 * So if we were overcommitted it's possible that somebody else flushed 3486 * So if we were overcommitted it's possible that somebody else flushed
3457 * out enough space and we simply didn't have enough space to reclaim, 3487 * out enough space and we simply didn't have enough space to reclaim,
@@ -3462,11 +3492,11 @@ again:
3462 goto again; 3492 goto again;
3463 } 3493 }
3464 3494
3465 spin_lock(&space_info->lock);
3466 /* 3495 /*
3467 * Not enough space to be reclaimed, don't bother committing the 3496 * Not enough space to be reclaimed, don't bother committing the
3468 * transaction. 3497 * transaction.
3469 */ 3498 */
3499 spin_lock(&space_info->lock);
3470 if (space_info->bytes_pinned < orig_bytes) 3500 if (space_info->bytes_pinned < orig_bytes)
3471 ret = -ENOSPC; 3501 ret = -ENOSPC;
3472 spin_unlock(&space_info->lock); 3502 spin_unlock(&space_info->lock);
@@ -3474,10 +3504,13 @@ again:
3474 goto out; 3504 goto out;
3475 3505
3476 ret = -EAGAIN; 3506 ret = -EAGAIN;
3477 if (trans || committed) 3507 if (trans)
3478 goto out; 3508 goto out;
3479 3509
3480 ret = -ENOSPC; 3510 ret = -ENOSPC;
3511 if (committed)
3512 goto out;
3513
3481 trans = btrfs_join_transaction(root); 3514 trans = btrfs_join_transaction(root);
3482 if (IS_ERR(trans)) 3515 if (IS_ERR(trans))
3483 goto out; 3516 goto out;
@@ -3489,12 +3522,12 @@ again:
3489 } 3522 }
3490 3523
3491out: 3524out:
3492 if (reserved) { 3525 if (flushing) {
3493 spin_lock(&space_info->lock); 3526 spin_lock(&space_info->lock);
3494 space_info->bytes_reserved -= orig_bytes; 3527 space_info->flush = 0;
3528 wake_up_all(&space_info->wait);
3495 spin_unlock(&space_info->lock); 3529 spin_unlock(&space_info->lock);
3496 } 3530 }
3497
3498 return ret; 3531 return ret;
3499} 3532}
3500 3533
@@ -3704,7 +3737,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3704 if (commit_trans) { 3737 if (commit_trans) {
3705 if (trans) 3738 if (trans)
3706 return -EAGAIN; 3739 return -EAGAIN;
3707
3708 trans = btrfs_join_transaction(root); 3740 trans = btrfs_join_transaction(root);
3709 BUG_ON(IS_ERR(trans)); 3741 BUG_ON(IS_ERR(trans));
3710 ret = btrfs_commit_transaction(trans, root); 3742 ret = btrfs_commit_transaction(trans, root);
@@ -3874,26 +3906,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3874 return 0; 3906 return 0;
3875} 3907}
3876 3908
3877int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3878 struct btrfs_root *root,
3879 int num_items)
3880{
3881 u64 num_bytes;
3882 int ret;
3883
3884 if (num_items == 0 || root->fs_info->chunk_root == root)
3885 return 0;
3886
3887 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
3888 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3889 num_bytes);
3890 if (!ret) {
3891 trans->bytes_reserved += num_bytes;
3892 trans->block_rsv = &root->fs_info->trans_block_rsv;
3893 }
3894 return ret;
3895}
3896
3897void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 3909void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
3898 struct btrfs_root *root) 3910 struct btrfs_root *root)
3899{ 3911{
@@ -3944,6 +3956,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
3944 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3956 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3945} 3957}
3946 3958
3959static unsigned drop_outstanding_extent(struct inode *inode)
3960{
3961 unsigned dropped_extents = 0;
3962
3963 spin_lock(&BTRFS_I(inode)->lock);
3964 BUG_ON(!BTRFS_I(inode)->outstanding_extents);
3965 BTRFS_I(inode)->outstanding_extents--;
3966
3967 /*
3968 * If we have more or the same amount of outsanding extents than we have
3969 * reserved then we need to leave the reserved extents count alone.
3970 */
3971 if (BTRFS_I(inode)->outstanding_extents >=
3972 BTRFS_I(inode)->reserved_extents)
3973 goto out;
3974
3975 dropped_extents = BTRFS_I(inode)->reserved_extents -
3976 BTRFS_I(inode)->outstanding_extents;
3977 BTRFS_I(inode)->reserved_extents -= dropped_extents;
3978out:
3979 spin_unlock(&BTRFS_I(inode)->lock);
3980 return dropped_extents;
3981}
3982
3947static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) 3983static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
3948{ 3984{
3949 return num_bytes >>= 3; 3985 return num_bytes >>= 3;
@@ -3953,9 +3989,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3953{ 3989{
3954 struct btrfs_root *root = BTRFS_I(inode)->root; 3990 struct btrfs_root *root = BTRFS_I(inode)->root;
3955 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3991 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3956 u64 to_reserve; 3992 u64 to_reserve = 0;
3957 int nr_extents; 3993 unsigned nr_extents = 0;
3958 int reserved_extents;
3959 int ret; 3994 int ret;
3960 3995
3961 if (btrfs_transaction_in_commit(root->fs_info)) 3996 if (btrfs_transaction_in_commit(root->fs_info))
@@ -3963,66 +3998,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3963 3998
3964 num_bytes = ALIGN(num_bytes, root->sectorsize); 3999 num_bytes = ALIGN(num_bytes, root->sectorsize);
3965 4000
3966 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 4001 spin_lock(&BTRFS_I(inode)->lock);
3967 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); 4002 BTRFS_I(inode)->outstanding_extents++;
4003
4004 if (BTRFS_I(inode)->outstanding_extents >
4005 BTRFS_I(inode)->reserved_extents) {
4006 nr_extents = BTRFS_I(inode)->outstanding_extents -
4007 BTRFS_I(inode)->reserved_extents;
4008 BTRFS_I(inode)->reserved_extents += nr_extents;
3968 4009
3969 if (nr_extents > reserved_extents) {
3970 nr_extents -= reserved_extents;
3971 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 4010 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
3972 } else {
3973 nr_extents = 0;
3974 to_reserve = 0;
3975 } 4011 }
4012 spin_unlock(&BTRFS_I(inode)->lock);
3976 4013
3977 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4014 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3978 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4015 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
3979 if (ret) 4016 if (ret) {
4017 unsigned dropped;
4018 /*
4019 * We don't need the return value since our reservation failed,
4020 * we just need to clean up our counter.
4021 */
4022 dropped = drop_outstanding_extent(inode);
4023 WARN_ON(dropped > 1);
3980 return ret; 4024 return ret;
3981 4025 }
3982 atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
3983 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3984 4026
3985 block_rsv_add_bytes(block_rsv, to_reserve, 1); 4027 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3986 4028
3987 if (block_rsv->size > 512 * 1024 * 1024)
3988 shrink_delalloc(NULL, root, to_reserve, 0);
3989
3990 return 0; 4029 return 0;
3991} 4030}
3992 4031
3993void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) 4032void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
3994{ 4033{
3995 struct btrfs_root *root = BTRFS_I(inode)->root; 4034 struct btrfs_root *root = BTRFS_I(inode)->root;
3996 u64 to_free; 4035 u64 to_free = 0;
3997 int nr_extents; 4036 unsigned dropped;
3998 int reserved_extents;
3999 4037
4000 num_bytes = ALIGN(num_bytes, root->sectorsize); 4038 num_bytes = ALIGN(num_bytes, root->sectorsize);
4001 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4039 dropped = drop_outstanding_extent(inode);
4002 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
4003
4004 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
4005 do {
4006 int old, new;
4007
4008 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
4009 if (nr_extents >= reserved_extents) {
4010 nr_extents = 0;
4011 break;
4012 }
4013 old = reserved_extents;
4014 nr_extents = reserved_extents - nr_extents;
4015 new = reserved_extents - nr_extents;
4016 old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
4017 reserved_extents, new);
4018 if (likely(old == reserved_extents))
4019 break;
4020 reserved_extents = old;
4021 } while (1);
4022 4040
4023 to_free = calc_csum_metadata_size(inode, num_bytes); 4041 to_free = calc_csum_metadata_size(inode, num_bytes);
4024 if (nr_extents > 0) 4042 if (dropped > 0)
4025 to_free += btrfs_calc_trans_metadata_size(root, nr_extents); 4043 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4026 4044
4027 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4045 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4028 to_free); 4046 to_free);
@@ -4444,7 +4462,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4444 printk(KERN_ERR "umm, got %d back from search" 4462 printk(KERN_ERR "umm, got %d back from search"
4445 ", was looking for %llu\n", ret, 4463 ", was looking for %llu\n", ret,
4446 (unsigned long long)bytenr); 4464 (unsigned long long)bytenr);
4447 btrfs_print_leaf(extent_root, path->nodes[0]); 4465 if (ret > 0)
4466 btrfs_print_leaf(extent_root,
4467 path->nodes[0]);
4448 } 4468 }
4449 BUG_ON(ret); 4469 BUG_ON(ret);
4450 extent_slot = path->slots[0]; 4470 extent_slot = path->slots[0];
@@ -4990,14 +5010,10 @@ have_block_group:
4990 } 5010 }
4991 5011
4992 /* 5012 /*
4993 * We only want to start kthread caching if we are at 5013 * The caching workers are limited to 2 threads, so we
4994 * the point where we will wait for caching to make 5014 * can queue as much work as we care to.
4995 * progress, or if our ideal search is over and we've
4996 * found somebody to start caching.
4997 */ 5015 */
4998 if (loop > LOOP_CACHING_NOWAIT || 5016 if (loop > LOOP_FIND_IDEAL) {
4999 (loop > LOOP_FIND_IDEAL &&
5000 atomic_read(&space_info->caching_threads) < 2)) {
5001 ret = cache_block_group(block_group, trans, 5017 ret = cache_block_group(block_group, trans,
5002 orig_root, 0); 5018 orig_root, 0);
5003 BUG_ON(ret); 5019 BUG_ON(ret);
@@ -5065,7 +5081,9 @@ have_block_group:
5065 * group is does point to and try again 5081 * group is does point to and try again
5066 */ 5082 */
5067 if (!last_ptr_loop && last_ptr->block_group && 5083 if (!last_ptr_loop && last_ptr->block_group &&
5068 last_ptr->block_group != block_group) { 5084 last_ptr->block_group != block_group &&
5085 index <=
5086 get_block_group_index(last_ptr->block_group)) {
5069 5087
5070 btrfs_put_block_group(block_group); 5088 btrfs_put_block_group(block_group);
5071 block_group = last_ptr->block_group; 5089 block_group = last_ptr->block_group;
@@ -5219,8 +5237,7 @@ loop:
5219 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5237 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
5220 found_uncached_bg = false; 5238 found_uncached_bg = false;
5221 loop++; 5239 loop++;
5222 if (!ideal_cache_percent && 5240 if (!ideal_cache_percent)
5223 atomic_read(&space_info->caching_threads))
5224 goto search; 5241 goto search;
5225 5242
5226 /* 5243 /*
@@ -5494,7 +5511,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
5494 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 5511 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
5495 5512
5496 path = btrfs_alloc_path(); 5513 path = btrfs_alloc_path();
5497 BUG_ON(!path); 5514 if (!path)
5515 return -ENOMEM;
5498 5516
5499 path->leave_spinning = 1; 5517 path->leave_spinning = 1;
5500 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5518 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -5623,7 +5641,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
5623 if (!buf) 5641 if (!buf)
5624 return ERR_PTR(-ENOMEM); 5642 return ERR_PTR(-ENOMEM);
5625 btrfs_set_header_generation(buf, trans->transid); 5643 btrfs_set_header_generation(buf, trans->transid);
5626 btrfs_set_buffer_lockdep_class(buf, level); 5644 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
5627 btrfs_tree_lock(buf); 5645 btrfs_tree_lock(buf);
5628 clean_tree_block(trans, root, buf); 5646 clean_tree_block(trans, root, buf);
5629 5647
@@ -5910,7 +5928,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
5910 return 1; 5928 return 1;
5911 5929
5912 if (path->locks[level] && !wc->keep_locks) { 5930 if (path->locks[level] && !wc->keep_locks) {
5913 btrfs_tree_unlock(eb); 5931 btrfs_tree_unlock_rw(eb, path->locks[level]);
5914 path->locks[level] = 0; 5932 path->locks[level] = 0;
5915 } 5933 }
5916 return 0; 5934 return 0;
@@ -5934,7 +5952,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
5934 * keep the tree lock 5952 * keep the tree lock
5935 */ 5953 */
5936 if (path->locks[level] && level > 0) { 5954 if (path->locks[level] && level > 0) {
5937 btrfs_tree_unlock(eb); 5955 btrfs_tree_unlock_rw(eb, path->locks[level]);
5938 path->locks[level] = 0; 5956 path->locks[level] = 0;
5939 } 5957 }
5940 return 0; 5958 return 0;
@@ -6047,7 +6065,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6047 BUG_ON(level != btrfs_header_level(next)); 6065 BUG_ON(level != btrfs_header_level(next));
6048 path->nodes[level] = next; 6066 path->nodes[level] = next;
6049 path->slots[level] = 0; 6067 path->slots[level] = 0;
6050 path->locks[level] = 1; 6068 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6051 wc->level = level; 6069 wc->level = level;
6052 if (wc->level == 1) 6070 if (wc->level == 1)
6053 wc->reada_slot = 0; 6071 wc->reada_slot = 0;
@@ -6118,7 +6136,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6118 BUG_ON(level == 0); 6136 BUG_ON(level == 0);
6119 btrfs_tree_lock(eb); 6137 btrfs_tree_lock(eb);
6120 btrfs_set_lock_blocking(eb); 6138 btrfs_set_lock_blocking(eb);
6121 path->locks[level] = 1; 6139 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6122 6140
6123 ret = btrfs_lookup_extent_info(trans, root, 6141 ret = btrfs_lookup_extent_info(trans, root,
6124 eb->start, eb->len, 6142 eb->start, eb->len,
@@ -6127,8 +6145,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6127 BUG_ON(ret); 6145 BUG_ON(ret);
6128 BUG_ON(wc->refs[level] == 0); 6146 BUG_ON(wc->refs[level] == 0);
6129 if (wc->refs[level] == 1) { 6147 if (wc->refs[level] == 1) {
6130 btrfs_tree_unlock(eb); 6148 btrfs_tree_unlock_rw(eb, path->locks[level]);
6131 path->locks[level] = 0;
6132 return 1; 6149 return 1;
6133 } 6150 }
6134 } 6151 }
@@ -6150,7 +6167,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6150 btrfs_header_generation(eb) == trans->transid) { 6167 btrfs_header_generation(eb) == trans->transid) {
6151 btrfs_tree_lock(eb); 6168 btrfs_tree_lock(eb);
6152 btrfs_set_lock_blocking(eb); 6169 btrfs_set_lock_blocking(eb);
6153 path->locks[level] = 1; 6170 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6154 } 6171 }
6155 clean_tree_block(trans, root, eb); 6172 clean_tree_block(trans, root, eb);
6156 } 6173 }
@@ -6229,7 +6246,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
6229 return 0; 6246 return 0;
6230 6247
6231 if (path->locks[level]) { 6248 if (path->locks[level]) {
6232 btrfs_tree_unlock(path->nodes[level]); 6249 btrfs_tree_unlock_rw(path->nodes[level],
6250 path->locks[level]);
6233 path->locks[level] = 0; 6251 path->locks[level] = 0;
6234 } 6252 }
6235 free_extent_buffer(path->nodes[level]); 6253 free_extent_buffer(path->nodes[level]);
@@ -6265,10 +6283,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6265 int level; 6283 int level;
6266 6284
6267 path = btrfs_alloc_path(); 6285 path = btrfs_alloc_path();
6268 BUG_ON(!path); 6286 if (!path)
6287 return -ENOMEM;
6269 6288
6270 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6289 wc = kzalloc(sizeof(*wc), GFP_NOFS);
6271 BUG_ON(!wc); 6290 if (!wc) {
6291 btrfs_free_path(path);
6292 return -ENOMEM;
6293 }
6272 6294
6273 trans = btrfs_start_transaction(tree_root, 0); 6295 trans = btrfs_start_transaction(tree_root, 0);
6274 BUG_ON(IS_ERR(trans)); 6296 BUG_ON(IS_ERR(trans));
@@ -6281,7 +6303,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6281 path->nodes[level] = btrfs_lock_root_node(root); 6303 path->nodes[level] = btrfs_lock_root_node(root);
6282 btrfs_set_lock_blocking(path->nodes[level]); 6304 btrfs_set_lock_blocking(path->nodes[level]);
6283 path->slots[level] = 0; 6305 path->slots[level] = 0;
6284 path->locks[level] = 1; 6306 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6285 memset(&wc->update_progress, 0, 6307 memset(&wc->update_progress, 0,
6286 sizeof(wc->update_progress)); 6308 sizeof(wc->update_progress));
6287 } else { 6309 } else {
@@ -6449,7 +6471,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6449 level = btrfs_header_level(node); 6471 level = btrfs_header_level(node);
6450 path->nodes[level] = node; 6472 path->nodes[level] = node;
6451 path->slots[level] = 0; 6473 path->slots[level] = 0;
6452 path->locks[level] = 1; 6474 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6453 6475
6454 wc->refs[parent_level] = 1; 6476 wc->refs[parent_level] = 1;
6455 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 6477 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -6524,30 +6546,48 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
6524 return flags; 6546 return flags;
6525} 6547}
6526 6548
6527static int set_block_group_ro(struct btrfs_block_group_cache *cache) 6549static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6528{ 6550{
6529 struct btrfs_space_info *sinfo = cache->space_info; 6551 struct btrfs_space_info *sinfo = cache->space_info;
6530 u64 num_bytes; 6552 u64 num_bytes;
6553 u64 min_allocable_bytes;
6531 int ret = -ENOSPC; 6554 int ret = -ENOSPC;
6532 6555
6533 if (cache->ro) 6556
6534 return 0; 6557 /*
6558 * We need some metadata space and system metadata space for
6559 * allocating chunks in some corner cases until we force to set
6560 * it to be readonly.
6561 */
6562 if ((sinfo->flags &
6563 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
6564 !force)
6565 min_allocable_bytes = 1 * 1024 * 1024;
6566 else
6567 min_allocable_bytes = 0;
6535 6568
6536 spin_lock(&sinfo->lock); 6569 spin_lock(&sinfo->lock);
6537 spin_lock(&cache->lock); 6570 spin_lock(&cache->lock);
6571
6572 if (cache->ro) {
6573 ret = 0;
6574 goto out;
6575 }
6576
6538 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6577 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
6539 cache->bytes_super - btrfs_block_group_used(&cache->item); 6578 cache->bytes_super - btrfs_block_group_used(&cache->item);
6540 6579
6541 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 6580 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
6542 sinfo->bytes_may_use + sinfo->bytes_readonly + 6581 sinfo->bytes_may_use + sinfo->bytes_readonly +
6543 cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { 6582 cache->reserved_pinned + num_bytes + min_allocable_bytes <=
6583 sinfo->total_bytes) {
6544 sinfo->bytes_readonly += num_bytes; 6584 sinfo->bytes_readonly += num_bytes;
6545 sinfo->bytes_reserved += cache->reserved_pinned; 6585 sinfo->bytes_reserved += cache->reserved_pinned;
6546 cache->reserved_pinned = 0; 6586 cache->reserved_pinned = 0;
6547 cache->ro = 1; 6587 cache->ro = 1;
6548 ret = 0; 6588 ret = 0;
6549 } 6589 }
6550 6590out:
6551 spin_unlock(&cache->lock); 6591 spin_unlock(&cache->lock);
6552 spin_unlock(&sinfo->lock); 6592 spin_unlock(&sinfo->lock);
6553 return ret; 6593 return ret;
@@ -6571,7 +6611,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
6571 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6611 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
6572 CHUNK_ALLOC_FORCE); 6612 CHUNK_ALLOC_FORCE);
6573 6613
6574 ret = set_block_group_ro(cache); 6614 ret = set_block_group_ro(cache, 0);
6575 if (!ret) 6615 if (!ret)
6576 goto out; 6616 goto out;
6577 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 6617 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -6579,7 +6619,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
6579 CHUNK_ALLOC_FORCE); 6619 CHUNK_ALLOC_FORCE);
6580 if (ret < 0) 6620 if (ret < 0)
6581 goto out; 6621 goto out;
6582 ret = set_block_group_ro(cache); 6622 ret = set_block_group_ro(cache, 0);
6583out: 6623out:
6584 btrfs_end_transaction(trans, root); 6624 btrfs_end_transaction(trans, root);
6585 return ret; 6625 return ret;
@@ -7016,7 +7056,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7016 7056
7017 set_avail_alloc_bits(root->fs_info, cache->flags); 7057 set_avail_alloc_bits(root->fs_info, cache->flags);
7018 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7058 if (btrfs_chunk_readonly(root, cache->key.objectid))
7019 set_block_group_ro(cache); 7059 set_block_group_ro(cache, 1);
7020 } 7060 }
7021 7061
7022 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { 7062 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -7030,9 +7070,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7030 * mirrored block groups. 7070 * mirrored block groups.
7031 */ 7071 */
7032 list_for_each_entry(cache, &space_info->block_groups[3], list) 7072 list_for_each_entry(cache, &space_info->block_groups[3], list)
7033 set_block_group_ro(cache); 7073 set_block_group_ro(cache, 1);
7034 list_for_each_entry(cache, &space_info->block_groups[4], list) 7074 list_for_each_entry(cache, &space_info->block_groups[4], list)
7035 set_block_group_ro(cache); 7075 set_block_group_ro(cache, 1);
7036 } 7076 }
7037 7077
7038 init_global_block_rsv(info); 7078 init_global_block_rsv(info);
@@ -7162,11 +7202,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7162 spin_unlock(&cluster->refill_lock); 7202 spin_unlock(&cluster->refill_lock);
7163 7203
7164 path = btrfs_alloc_path(); 7204 path = btrfs_alloc_path();
7165 BUG_ON(!path); 7205 if (!path) {
7206 ret = -ENOMEM;
7207 goto out;
7208 }
7166 7209
7167 inode = lookup_free_space_inode(root, block_group, path); 7210 inode = lookup_free_space_inode(root, block_group, path);
7168 if (!IS_ERR(inode)) { 7211 if (!IS_ERR(inode)) {
7169 btrfs_orphan_add(trans, inode); 7212 ret = btrfs_orphan_add(trans, inode);
7213 BUG_ON(ret);
7170 clear_nlink(inode); 7214 clear_nlink(inode);
7171 /* One for the block groups ref */ 7215 /* One for the block groups ref */
7172 spin_lock(&block_group->lock); 7216 spin_lock(&block_group->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 7055d11c1efd..d418164a35f1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
254 * 254 *
255 * This should be called with the tree lock held. 255 * This should be called with the tree lock held.
256 */ 256 */
257static int merge_state(struct extent_io_tree *tree, 257static void merge_state(struct extent_io_tree *tree,
258 struct extent_state *state) 258 struct extent_state *state)
259{ 259{
260 struct extent_state *other; 260 struct extent_state *other;
261 struct rb_node *other_node; 261 struct rb_node *other_node;
262 262
263 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 263 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
264 return 0; 264 return;
265 265
266 other_node = rb_prev(&state->rb_node); 266 other_node = rb_prev(&state->rb_node);
267 if (other_node) { 267 if (other_node) {
@@ -281,26 +281,19 @@ static int merge_state(struct extent_io_tree *tree,
281 if (other->start == state->end + 1 && 281 if (other->start == state->end + 1 &&
282 other->state == state->state) { 282 other->state == state->state) {
283 merge_cb(tree, state, other); 283 merge_cb(tree, state, other);
284 other->start = state->start; 284 state->end = other->end;
285 state->tree = NULL; 285 other->tree = NULL;
286 rb_erase(&state->rb_node, &tree->state); 286 rb_erase(&other->rb_node, &tree->state);
287 free_extent_state(state); 287 free_extent_state(other);
288 state = NULL;
289 } 288 }
290 } 289 }
291
292 return 0;
293} 290}
294 291
295static int set_state_cb(struct extent_io_tree *tree, 292static void set_state_cb(struct extent_io_tree *tree,
296 struct extent_state *state, int *bits) 293 struct extent_state *state, int *bits)
297{ 294{
298 if (tree->ops && tree->ops->set_bit_hook) { 295 if (tree->ops && tree->ops->set_bit_hook)
299 return tree->ops->set_bit_hook(tree->mapping->host, 296 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
300 state, bits);
301 }
302
303 return 0;
304} 297}
305 298
306static void clear_state_cb(struct extent_io_tree *tree, 299static void clear_state_cb(struct extent_io_tree *tree,
@@ -310,6 +303,9 @@ static void clear_state_cb(struct extent_io_tree *tree,
310 tree->ops->clear_bit_hook(tree->mapping->host, state, bits); 303 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
311} 304}
312 305
306static void set_state_bits(struct extent_io_tree *tree,
307 struct extent_state *state, int *bits);
308
313/* 309/*
314 * insert an extent_state struct into the tree. 'bits' are set on the 310 * insert an extent_state struct into the tree. 'bits' are set on the
315 * struct before it is inserted. 311 * struct before it is inserted.
@@ -325,8 +321,6 @@ static int insert_state(struct extent_io_tree *tree,
325 int *bits) 321 int *bits)
326{ 322{
327 struct rb_node *node; 323 struct rb_node *node;
328 int bits_to_set = *bits & ~EXTENT_CTLBITS;
329 int ret;
330 324
331 if (end < start) { 325 if (end < start) {
332 printk(KERN_ERR "btrfs end < start %llu %llu\n", 326 printk(KERN_ERR "btrfs end < start %llu %llu\n",
@@ -336,13 +330,9 @@ static int insert_state(struct extent_io_tree *tree,
336 } 330 }
337 state->start = start; 331 state->start = start;
338 state->end = end; 332 state->end = end;
339 ret = set_state_cb(tree, state, bits);
340 if (ret)
341 return ret;
342 333
343 if (bits_to_set & EXTENT_DIRTY) 334 set_state_bits(tree, state, bits);
344 tree->dirty_bytes += end - start + 1; 335
345 state->state |= bits_to_set;
346 node = tree_insert(&tree->state, end, &state->rb_node); 336 node = tree_insert(&tree->state, end, &state->rb_node);
347 if (node) { 337 if (node) {
348 struct extent_state *found; 338 struct extent_state *found;
@@ -351,7 +341,6 @@ static int insert_state(struct extent_io_tree *tree,
351 "%llu %llu\n", (unsigned long long)found->start, 341 "%llu %llu\n", (unsigned long long)found->start,
352 (unsigned long long)found->end, 342 (unsigned long long)found->end,
353 (unsigned long long)start, (unsigned long long)end); 343 (unsigned long long)start, (unsigned long long)end);
354 free_extent_state(state);
355 return -EEXIST; 344 return -EEXIST;
356 } 345 }
357 state->tree = tree; 346 state->tree = tree;
@@ -359,13 +348,11 @@ static int insert_state(struct extent_io_tree *tree,
359 return 0; 348 return 0;
360} 349}
361 350
362static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, 351static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
363 u64 split) 352 u64 split)
364{ 353{
365 if (tree->ops && tree->ops->split_extent_hook) 354 if (tree->ops && tree->ops->split_extent_hook)
366 return tree->ops->split_extent_hook(tree->mapping->host, 355 tree->ops->split_extent_hook(tree->mapping->host, orig, split);
367 orig, split);
368 return 0;
369} 356}
370 357
371/* 358/*
@@ -500,7 +487,8 @@ again:
500 cached_state = NULL; 487 cached_state = NULL;
501 } 488 }
502 489
503 if (cached && cached->tree && cached->start == start) { 490 if (cached && cached->tree && cached->start <= start &&
491 cached->end > start) {
504 if (clear) 492 if (clear)
505 atomic_dec(&cached->refs); 493 atomic_dec(&cached->refs);
506 state = cached; 494 state = cached;
@@ -660,34 +648,25 @@ again:
660 if (start > end) 648 if (start > end)
661 break; 649 break;
662 650
663 if (need_resched()) { 651 cond_resched_lock(&tree->lock);
664 spin_unlock(&tree->lock);
665 cond_resched();
666 spin_lock(&tree->lock);
667 }
668 } 652 }
669out: 653out:
670 spin_unlock(&tree->lock); 654 spin_unlock(&tree->lock);
671 return 0; 655 return 0;
672} 656}
673 657
674static int set_state_bits(struct extent_io_tree *tree, 658static void set_state_bits(struct extent_io_tree *tree,
675 struct extent_state *state, 659 struct extent_state *state,
676 int *bits) 660 int *bits)
677{ 661{
678 int ret;
679 int bits_to_set = *bits & ~EXTENT_CTLBITS; 662 int bits_to_set = *bits & ~EXTENT_CTLBITS;
680 663
681 ret = set_state_cb(tree, state, bits); 664 set_state_cb(tree, state, bits);
682 if (ret)
683 return ret;
684 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 665 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
685 u64 range = state->end - state->start + 1; 666 u64 range = state->end - state->start + 1;
686 tree->dirty_bytes += range; 667 tree->dirty_bytes += range;
687 } 668 }
688 state->state |= bits_to_set; 669 state->state |= bits_to_set;
689
690 return 0;
691} 670}
692 671
693static void cache_state(struct extent_state *state, 672static void cache_state(struct extent_state *state,
@@ -742,7 +721,8 @@ again:
742 spin_lock(&tree->lock); 721 spin_lock(&tree->lock);
743 if (cached_state && *cached_state) { 722 if (cached_state && *cached_state) {
744 state = *cached_state; 723 state = *cached_state;
745 if (state->start == start && state->tree) { 724 if (state->start <= start && state->end > start &&
725 state->tree) {
746 node = &state->rb_node; 726 node = &state->rb_node;
747 goto hit_next; 727 goto hit_next;
748 } 728 }
@@ -779,17 +759,15 @@ hit_next:
779 goto out; 759 goto out;
780 } 760 }
781 761
782 err = set_state_bits(tree, state, &bits); 762 set_state_bits(tree, state, &bits);
783 if (err)
784 goto out;
785 763
786 next_node = rb_next(node);
787 cache_state(state, cached_state); 764 cache_state(state, cached_state);
788 merge_state(tree, state); 765 merge_state(tree, state);
789 if (last_end == (u64)-1) 766 if (last_end == (u64)-1)
790 goto out; 767 goto out;
791 768
792 start = last_end + 1; 769 start = last_end + 1;
770 next_node = rb_next(&state->rb_node);
793 if (next_node && start < end && prealloc && !need_resched()) { 771 if (next_node && start < end && prealloc && !need_resched()) {
794 state = rb_entry(next_node, struct extent_state, 772 state = rb_entry(next_node, struct extent_state,
795 rb_node); 773 rb_node);
@@ -830,9 +808,7 @@ hit_next:
830 if (err) 808 if (err)
831 goto out; 809 goto out;
832 if (state->end <= end) { 810 if (state->end <= end) {
833 err = set_state_bits(tree, state, &bits); 811 set_state_bits(tree, state, &bits);
834 if (err)
835 goto out;
836 cache_state(state, cached_state); 812 cache_state(state, cached_state);
837 merge_state(tree, state); 813 merge_state(tree, state);
838 if (last_end == (u64)-1) 814 if (last_end == (u64)-1)
@@ -862,7 +838,6 @@ hit_next:
862 * Avoid to free 'prealloc' if it can be merged with 838 * Avoid to free 'prealloc' if it can be merged with
863 * the later extent. 839 * the later extent.
864 */ 840 */
865 atomic_inc(&prealloc->refs);
866 err = insert_state(tree, prealloc, start, this_end, 841 err = insert_state(tree, prealloc, start, this_end,
867 &bits); 842 &bits);
868 BUG_ON(err == -EEXIST); 843 BUG_ON(err == -EEXIST);
@@ -872,7 +847,6 @@ hit_next:
872 goto out; 847 goto out;
873 } 848 }
874 cache_state(prealloc, cached_state); 849 cache_state(prealloc, cached_state);
875 free_extent_state(prealloc);
876 prealloc = NULL; 850 prealloc = NULL;
877 start = this_end + 1; 851 start = this_end + 1;
878 goto search_again; 852 goto search_again;
@@ -895,11 +869,7 @@ hit_next:
895 err = split_state(tree, state, prealloc, end + 1); 869 err = split_state(tree, state, prealloc, end + 1);
896 BUG_ON(err == -EEXIST); 870 BUG_ON(err == -EEXIST);
897 871
898 err = set_state_bits(tree, prealloc, &bits); 872 set_state_bits(tree, prealloc, &bits);
899 if (err) {
900 prealloc = NULL;
901 goto out;
902 }
903 cache_state(prealloc, cached_state); 873 cache_state(prealloc, cached_state);
904 merge_state(tree, prealloc); 874 merge_state(tree, prealloc);
905 prealloc = NULL; 875 prealloc = NULL;
@@ -1061,46 +1031,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1061 return 0; 1031 return 0;
1062} 1032}
1063 1033
1064/*
1065 * find the first offset in the io tree with 'bits' set. zero is
1066 * returned if we find something, and *start_ret and *end_ret are
1067 * set to reflect the state struct that was found.
1068 *
1069 * If nothing was found, 1 is returned, < 0 on error
1070 */
1071int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1072 u64 *start_ret, u64 *end_ret, int bits)
1073{
1074 struct rb_node *node;
1075 struct extent_state *state;
1076 int ret = 1;
1077
1078 spin_lock(&tree->lock);
1079 /*
1080 * this search will find all the extents that end after
1081 * our range starts.
1082 */
1083 node = tree_search(tree, start);
1084 if (!node)
1085 goto out;
1086
1087 while (1) {
1088 state = rb_entry(node, struct extent_state, rb_node);
1089 if (state->end >= start && (state->state & bits)) {
1090 *start_ret = state->start;
1091 *end_ret = state->end;
1092 ret = 0;
1093 break;
1094 }
1095 node = rb_next(node);
1096 if (!node)
1097 break;
1098 }
1099out:
1100 spin_unlock(&tree->lock);
1101 return ret;
1102}
1103
1104/* find the first state struct with 'bits' set after 'start', and 1034/* find the first state struct with 'bits' set after 'start', and
1105 * return it. tree->lock must be held. NULL will returned if 1035 * return it. tree->lock must be held. NULL will returned if
1106 * nothing was found after 'start' 1036 * nothing was found after 'start'
@@ -1133,6 +1063,30 @@ out:
1133} 1063}
1134 1064
1135/* 1065/*
1066 * find the first offset in the io tree with 'bits' set. zero is
1067 * returned if we find something, and *start_ret and *end_ret are
1068 * set to reflect the state struct that was found.
1069 *
1070 * If nothing was found, 1 is returned, < 0 on error
1071 */
1072int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1073 u64 *start_ret, u64 *end_ret, int bits)
1074{
1075 struct extent_state *state;
1076 int ret = 1;
1077
1078 spin_lock(&tree->lock);
1079 state = find_first_extent_bit_state(tree, start, bits);
1080 if (state) {
1081 *start_ret = state->start;
1082 *end_ret = state->end;
1083 ret = 0;
1084 }
1085 spin_unlock(&tree->lock);
1086 return ret;
1087}
1088
1089/*
1136 * find a contiguous range of bytes in the file marked as delalloc, not 1090 * find a contiguous range of bytes in the file marked as delalloc, not
1137 * more than 'max_bytes'. start and end are used to return the range, 1091 * more than 'max_bytes'. start and end are used to return the range,
1138 * 1092 *
@@ -1564,7 +1518,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1564 int bitset = 0; 1518 int bitset = 0;
1565 1519
1566 spin_lock(&tree->lock); 1520 spin_lock(&tree->lock);
1567 if (cached && cached->tree && cached->start == start) 1521 if (cached && cached->tree && cached->start <= start &&
1522 cached->end > start)
1568 node = &cached->rb_node; 1523 node = &cached->rb_node;
1569 else 1524 else
1570 node = tree_search(tree, start); 1525 node = tree_search(tree, start);
@@ -2432,6 +2387,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2432 pgoff_t index; 2387 pgoff_t index;
2433 pgoff_t end; /* Inclusive */ 2388 pgoff_t end; /* Inclusive */
2434 int scanned = 0; 2389 int scanned = 0;
2390 int tag;
2435 2391
2436 pagevec_init(&pvec, 0); 2392 pagevec_init(&pvec, 0);
2437 if (wbc->range_cyclic) { 2393 if (wbc->range_cyclic) {
@@ -2442,11 +2398,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2442 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2398 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2443 scanned = 1; 2399 scanned = 1;
2444 } 2400 }
2401 if (wbc->sync_mode == WB_SYNC_ALL)
2402 tag = PAGECACHE_TAG_TOWRITE;
2403 else
2404 tag = PAGECACHE_TAG_DIRTY;
2445retry: 2405retry:
2406 if (wbc->sync_mode == WB_SYNC_ALL)
2407 tag_pages_for_writeback(mapping, index, end);
2446 while (!done && !nr_to_write_done && (index <= end) && 2408 while (!done && !nr_to_write_done && (index <= end) &&
2447 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2409 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2448 PAGECACHE_TAG_DIRTY, min(end - index, 2410 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
2449 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
2450 unsigned i; 2411 unsigned i;
2451 2412
2452 scanned = 1; 2413 scanned = 1;
@@ -2541,7 +2502,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2541 struct writeback_control *wbc) 2502 struct writeback_control *wbc)
2542{ 2503{
2543 int ret; 2504 int ret;
2544 struct address_space *mapping = page->mapping;
2545 struct extent_page_data epd = { 2505 struct extent_page_data epd = {
2546 .bio = NULL, 2506 .bio = NULL,
2547 .tree = tree, 2507 .tree = tree,
@@ -2549,18 +2509,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2549 .extent_locked = 0, 2509 .extent_locked = 0,
2550 .sync_io = wbc->sync_mode == WB_SYNC_ALL, 2510 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2551 }; 2511 };
2552 struct writeback_control wbc_writepages = {
2553 .sync_mode = wbc->sync_mode,
2554 .older_than_this = NULL,
2555 .nr_to_write = 64,
2556 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2557 .range_end = (loff_t)-1,
2558 };
2559 2512
2560 ret = __extent_writepage(page, wbc, &epd); 2513 ret = __extent_writepage(page, wbc, &epd);
2561 2514
2562 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2563 __extent_writepage, &epd, flush_write_bio);
2564 flush_epd_write_bio(&epd); 2515 flush_epd_write_bio(&epd);
2565 return ret; 2516 return ret;
2566} 2517}
@@ -2584,7 +2535,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2584 }; 2535 };
2585 struct writeback_control wbc_writepages = { 2536 struct writeback_control wbc_writepages = {
2586 .sync_mode = mode, 2537 .sync_mode = mode,
2587 .older_than_this = NULL,
2588 .nr_to_write = nr_pages * 2, 2538 .nr_to_write = nr_pages * 2,
2589 .range_start = start, 2539 .range_start = start,
2590 .range_end = end + 1, 2540 .range_end = end + 1,
@@ -3022,8 +2972,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3022 return NULL; 2972 return NULL;
3023 eb->start = start; 2973 eb->start = start;
3024 eb->len = len; 2974 eb->len = len;
3025 spin_lock_init(&eb->lock); 2975 rwlock_init(&eb->lock);
3026 init_waitqueue_head(&eb->lock_wq); 2976 atomic_set(&eb->write_locks, 0);
2977 atomic_set(&eb->read_locks, 0);
2978 atomic_set(&eb->blocking_readers, 0);
2979 atomic_set(&eb->blocking_writers, 0);
2980 atomic_set(&eb->spinning_readers, 0);
2981 atomic_set(&eb->spinning_writers, 0);
2982 init_waitqueue_head(&eb->write_lock_wq);
2983 init_waitqueue_head(&eb->read_lock_wq);
3027 2984
3028#if LEAK_DEBUG 2985#if LEAK_DEBUG
3029 spin_lock_irqsave(&leak_lock, flags); 2986 spin_lock_irqsave(&leak_lock, flags);
@@ -3119,7 +3076,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3119 i = 0; 3076 i = 0;
3120 } 3077 }
3121 for (; i < num_pages; i++, index++) { 3078 for (; i < num_pages; i++, index++) {
3122 p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); 3079 p = find_or_create_page(mapping, index, GFP_NOFS);
3123 if (!p) { 3080 if (!p) {
3124 WARN_ON(1); 3081 WARN_ON(1);
3125 goto free_eb; 3082 goto free_eb;
@@ -3266,6 +3223,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
3266 return was_dirty; 3223 return was_dirty;
3267} 3224}
3268 3225
3226static int __eb_straddles_pages(u64 start, u64 len)
3227{
3228 if (len < PAGE_CACHE_SIZE)
3229 return 1;
3230 if (start & (PAGE_CACHE_SIZE - 1))
3231 return 1;
3232 if ((start + len) & (PAGE_CACHE_SIZE - 1))
3233 return 1;
3234 return 0;
3235}
3236
3237static int eb_straddles_pages(struct extent_buffer *eb)
3238{
3239 return __eb_straddles_pages(eb->start, eb->len);
3240}
3241
3269int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 3242int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3270 struct extent_buffer *eb, 3243 struct extent_buffer *eb,
3271 struct extent_state **cached_state) 3244 struct extent_state **cached_state)
@@ -3277,8 +3250,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3277 num_pages = num_extent_pages(eb->start, eb->len); 3250 num_pages = num_extent_pages(eb->start, eb->len);
3278 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3251 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3279 3252
3280 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3253 if (eb_straddles_pages(eb)) {
3281 cached_state, GFP_NOFS); 3254 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3255 cached_state, GFP_NOFS);
3256 }
3282 for (i = 0; i < num_pages; i++) { 3257 for (i = 0; i < num_pages; i++) {
3283 page = extent_buffer_page(eb, i); 3258 page = extent_buffer_page(eb, i);
3284 if (page) 3259 if (page)
@@ -3296,8 +3271,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3296 3271
3297 num_pages = num_extent_pages(eb->start, eb->len); 3272 num_pages = num_extent_pages(eb->start, eb->len);
3298 3273
3299 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3274 if (eb_straddles_pages(eb)) {
3300 NULL, GFP_NOFS); 3275 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3276 NULL, GFP_NOFS);
3277 }
3301 for (i = 0; i < num_pages; i++) { 3278 for (i = 0; i < num_pages; i++) {
3302 page = extent_buffer_page(eb, i); 3279 page = extent_buffer_page(eb, i);
3303 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3280 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3320,9 +3297,12 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3320 int uptodate; 3297 int uptodate;
3321 unsigned long index; 3298 unsigned long index;
3322 3299
3323 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); 3300 if (__eb_straddles_pages(start, end - start + 1)) {
3324 if (ret) 3301 ret = test_range_bit(tree, start, end,
3325 return 1; 3302 EXTENT_UPTODATE, 1, NULL);
3303 if (ret)
3304 return 1;
3305 }
3326 while (start <= end) { 3306 while (start <= end) {
3327 index = start >> PAGE_CACHE_SHIFT; 3307 index = start >> PAGE_CACHE_SHIFT;
3328 page = find_get_page(tree->mapping, index); 3308 page = find_get_page(tree->mapping, index);
@@ -3350,10 +3330,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
3350 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3330 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
3351 return 1; 3331 return 1;
3352 3332
3353 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3333 if (eb_straddles_pages(eb)) {
3354 EXTENT_UPTODATE, 1, cached_state); 3334 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3355 if (ret) 3335 EXTENT_UPTODATE, 1, cached_state);
3356 return ret; 3336 if (ret)
3337 return ret;
3338 }
3357 3339
3358 num_pages = num_extent_pages(eb->start, eb->len); 3340 num_pages = num_extent_pages(eb->start, eb->len);
3359 for (i = 0; i < num_pages; i++) { 3341 for (i = 0; i < num_pages; i++) {
@@ -3386,9 +3368,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3386 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3368 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
3387 return 0; 3369 return 0;
3388 3370
3389 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3371 if (eb_straddles_pages(eb)) {
3390 EXTENT_UPTODATE, 1, NULL)) { 3372 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3391 return 0; 3373 EXTENT_UPTODATE, 1, NULL)) {
3374 return 0;
3375 }
3392 } 3376 }
3393 3377
3394 if (start) { 3378 if (start) {
@@ -3492,9 +3476,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
3492 page = extent_buffer_page(eb, i); 3476 page = extent_buffer_page(eb, i);
3493 3477
3494 cur = min(len, (PAGE_CACHE_SIZE - offset)); 3478 cur = min(len, (PAGE_CACHE_SIZE - offset));
3495 kaddr = kmap_atomic(page, KM_USER1); 3479 kaddr = page_address(page);
3496 memcpy(dst, kaddr + offset, cur); 3480 memcpy(dst, kaddr + offset, cur);
3497 kunmap_atomic(kaddr, KM_USER1);
3498 3481
3499 dst += cur; 3482 dst += cur;
3500 len -= cur; 3483 len -= cur;
@@ -3504,9 +3487,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
3504} 3487}
3505 3488
3506int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, 3489int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3507 unsigned long min_len, char **token, char **map, 3490 unsigned long min_len, char **map,
3508 unsigned long *map_start, 3491 unsigned long *map_start,
3509 unsigned long *map_len, int km) 3492 unsigned long *map_len)
3510{ 3493{
3511 size_t offset = start & (PAGE_CACHE_SIZE - 1); 3494 size_t offset = start & (PAGE_CACHE_SIZE - 1);
3512 char *kaddr; 3495 char *kaddr;
@@ -3536,42 +3519,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3536 } 3519 }
3537 3520
3538 p = extent_buffer_page(eb, i); 3521 p = extent_buffer_page(eb, i);
3539 kaddr = kmap_atomic(p, km); 3522 kaddr = page_address(p);
3540 *token = kaddr;
3541 *map = kaddr + offset; 3523 *map = kaddr + offset;
3542 *map_len = PAGE_CACHE_SIZE - offset; 3524 *map_len = PAGE_CACHE_SIZE - offset;
3543 return 0; 3525 return 0;
3544} 3526}
3545 3527
3546int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
3547 unsigned long min_len,
3548 char **token, char **map,
3549 unsigned long *map_start,
3550 unsigned long *map_len, int km)
3551{
3552 int err;
3553 int save = 0;
3554 if (eb->map_token) {
3555 unmap_extent_buffer(eb, eb->map_token, km);
3556 eb->map_token = NULL;
3557 save = 1;
3558 }
3559 err = map_private_extent_buffer(eb, start, min_len, token, map,
3560 map_start, map_len, km);
3561 if (!err && save) {
3562 eb->map_token = *token;
3563 eb->kaddr = *map;
3564 eb->map_start = *map_start;
3565 eb->map_len = *map_len;
3566 }
3567 return err;
3568}
3569
3570void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
3571{
3572 kunmap_atomic(token, km);
3573}
3574
3575int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 3528int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
3576 unsigned long start, 3529 unsigned long start,
3577 unsigned long len) 3530 unsigned long len)
@@ -3595,9 +3548,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
3595 3548
3596 cur = min(len, (PAGE_CACHE_SIZE - offset)); 3549 cur = min(len, (PAGE_CACHE_SIZE - offset));
3597 3550
3598 kaddr = kmap_atomic(page, KM_USER0); 3551 kaddr = page_address(page);
3599 ret = memcmp(ptr, kaddr + offset, cur); 3552 ret = memcmp(ptr, kaddr + offset, cur);
3600 kunmap_atomic(kaddr, KM_USER0);
3601 if (ret) 3553 if (ret)
3602 break; 3554 break;
3603 3555
@@ -3630,9 +3582,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
3630 WARN_ON(!PageUptodate(page)); 3582 WARN_ON(!PageUptodate(page));
3631 3583
3632 cur = min(len, PAGE_CACHE_SIZE - offset); 3584 cur = min(len, PAGE_CACHE_SIZE - offset);
3633 kaddr = kmap_atomic(page, KM_USER1); 3585 kaddr = page_address(page);
3634 memcpy(kaddr + offset, src, cur); 3586 memcpy(kaddr + offset, src, cur);
3635 kunmap_atomic(kaddr, KM_USER1);
3636 3587
3637 src += cur; 3588 src += cur;
3638 len -= cur; 3589 len -= cur;
@@ -3661,9 +3612,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
3661 WARN_ON(!PageUptodate(page)); 3612 WARN_ON(!PageUptodate(page));
3662 3613
3663 cur = min(len, PAGE_CACHE_SIZE - offset); 3614 cur = min(len, PAGE_CACHE_SIZE - offset);
3664 kaddr = kmap_atomic(page, KM_USER0); 3615 kaddr = page_address(page);
3665 memset(kaddr + offset, c, cur); 3616 memset(kaddr + offset, c, cur);
3666 kunmap_atomic(kaddr, KM_USER0);
3667 3617
3668 len -= cur; 3618 len -= cur;
3669 offset = 0; 3619 offset = 0;
@@ -3694,9 +3644,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
3694 3644
3695 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); 3645 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
3696 3646
3697 kaddr = kmap_atomic(page, KM_USER0); 3647 kaddr = page_address(page);
3698 read_extent_buffer(src, kaddr + offset, src_offset, cur); 3648 read_extent_buffer(src, kaddr + offset, src_offset, cur);
3699 kunmap_atomic(kaddr, KM_USER0);
3700 3649
3701 src_offset += cur; 3650 src_offset += cur;
3702 len -= cur; 3651 len -= cur;
@@ -3709,20 +3658,17 @@ static void move_pages(struct page *dst_page, struct page *src_page,
3709 unsigned long dst_off, unsigned long src_off, 3658 unsigned long dst_off, unsigned long src_off,
3710 unsigned long len) 3659 unsigned long len)
3711{ 3660{
3712 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3661 char *dst_kaddr = page_address(dst_page);
3713 if (dst_page == src_page) { 3662 if (dst_page == src_page) {
3714 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); 3663 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
3715 } else { 3664 } else {
3716 char *src_kaddr = kmap_atomic(src_page, KM_USER1); 3665 char *src_kaddr = page_address(src_page);
3717 char *p = dst_kaddr + dst_off + len; 3666 char *p = dst_kaddr + dst_off + len;
3718 char *s = src_kaddr + src_off + len; 3667 char *s = src_kaddr + src_off + len;
3719 3668
3720 while (len--) 3669 while (len--)
3721 *--p = *--s; 3670 *--p = *--s;
3722
3723 kunmap_atomic(src_kaddr, KM_USER1);
3724 } 3671 }
3725 kunmap_atomic(dst_kaddr, KM_USER0);
3726} 3672}
3727 3673
3728static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) 3674static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
@@ -3735,20 +3681,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
3735 unsigned long dst_off, unsigned long src_off, 3681 unsigned long dst_off, unsigned long src_off,
3736 unsigned long len) 3682 unsigned long len)
3737{ 3683{
3738 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3684 char *dst_kaddr = page_address(dst_page);
3739 char *src_kaddr; 3685 char *src_kaddr;
3740 3686
3741 if (dst_page != src_page) { 3687 if (dst_page != src_page) {
3742 src_kaddr = kmap_atomic(src_page, KM_USER1); 3688 src_kaddr = page_address(src_page);
3743 } else { 3689 } else {
3744 src_kaddr = dst_kaddr; 3690 src_kaddr = dst_kaddr;
3745 BUG_ON(areas_overlap(src_off, dst_off, len)); 3691 BUG_ON(areas_overlap(src_off, dst_off, len));
3746 } 3692 }
3747 3693
3748 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3694 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3749 kunmap_atomic(dst_kaddr, KM_USER0);
3750 if (dst_page != src_page)
3751 kunmap_atomic(src_kaddr, KM_USER1);
3752} 3695}
3753 3696
3754void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 3697void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a11a92ee2d30..7b2f0c3e7929 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -76,15 +76,15 @@ struct extent_io_ops {
76 struct extent_state *state); 76 struct extent_state *state);
77 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 77 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
78 struct extent_state *state, int uptodate); 78 struct extent_state *state, int uptodate);
79 int (*set_bit_hook)(struct inode *inode, struct extent_state *state, 79 void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
80 int *bits); 80 int *bits);
81 int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, 81 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
82 int *bits); 82 int *bits);
83 int (*merge_extent_hook)(struct inode *inode, 83 void (*merge_extent_hook)(struct inode *inode,
84 struct extent_state *new, 84 struct extent_state *new,
85 struct extent_state *other); 85 struct extent_state *other);
86 int (*split_extent_hook)(struct inode *inode, 86 void (*split_extent_hook)(struct inode *inode,
87 struct extent_state *orig, u64 split); 87 struct extent_state *orig, u64 split);
88 int (*write_cache_pages_lock_hook)(struct page *page); 88 int (*write_cache_pages_lock_hook)(struct page *page);
89}; 89};
90 90
@@ -108,8 +108,6 @@ struct extent_state {
108 wait_queue_head_t wq; 108 wait_queue_head_t wq;
109 atomic_t refs; 109 atomic_t refs;
110 unsigned long state; 110 unsigned long state;
111 u64 split_start;
112 u64 split_end;
113 111
114 /* for use by the FS */ 112 /* for use by the FS */
115 u64 private; 113 u64 private;
@@ -120,8 +118,6 @@ struct extent_state {
120struct extent_buffer { 118struct extent_buffer {
121 u64 start; 119 u64 start;
122 unsigned long len; 120 unsigned long len;
123 char *map_token;
124 char *kaddr;
125 unsigned long map_start; 121 unsigned long map_start;
126 unsigned long map_len; 122 unsigned long map_len;
127 struct page *first_page; 123 struct page *first_page;
@@ -130,14 +126,26 @@ struct extent_buffer {
130 struct rcu_head rcu_head; 126 struct rcu_head rcu_head;
131 atomic_t refs; 127 atomic_t refs;
132 128
133 /* the spinlock is used to protect most operations */ 129 /* count of read lock holders on the extent buffer */
134 spinlock_t lock; 130 atomic_t write_locks;
131 atomic_t read_locks;
132 atomic_t blocking_writers;
133 atomic_t blocking_readers;
134 atomic_t spinning_readers;
135 atomic_t spinning_writers;
136
137 /* protects write locks */
138 rwlock_t lock;
135 139
136 /* 140 /* readers use lock_wq while they wait for the write
137 * when we keep the lock held while blocking, waiters go onto 141 * lock holders to unlock
138 * the wq
139 */ 142 */
140 wait_queue_head_t lock_wq; 143 wait_queue_head_t write_lock_wq;
144
145 /* writers use read_lock_wq while they wait for readers
146 * to unlock
147 */
148 wait_queue_head_t read_lock_wq;
141}; 149};
142 150
143static inline void extent_set_compress_type(unsigned long *bio_flags, 151static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -279,15 +287,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
279int extent_buffer_uptodate(struct extent_io_tree *tree, 287int extent_buffer_uptodate(struct extent_io_tree *tree,
280 struct extent_buffer *eb, 288 struct extent_buffer *eb,
281 struct extent_state *cached_state); 289 struct extent_state *cached_state);
282int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
283 unsigned long min_len, char **token, char **map,
284 unsigned long *map_start,
285 unsigned long *map_len, int km);
286int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, 290int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
287 unsigned long min_len, char **token, char **map, 291 unsigned long min_len, char **map,
288 unsigned long *map_start, 292 unsigned long *map_start,
289 unsigned long *map_len, int km); 293 unsigned long *map_len);
290void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
291int extent_range_uptodate(struct extent_io_tree *tree, 294int extent_range_uptodate(struct extent_io_tree *tree,
292 u64 start, u64 end); 295 u64 start, u64 end);
293int extent_clear_unlock_delalloc(struct inode *inode, 296int extent_clear_unlock_delalloc(struct inode *inode,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2d0410344ea3..7c97b3301459 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
183 return 0; 183 return 0;
184} 184}
185 185
186int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) 186static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
187{ 187{
188 int ret = 0;
189 struct extent_map *merge = NULL; 188 struct extent_map *merge = NULL;
190 struct rb_node *rb; 189 struct rb_node *rb;
191 struct extent_map *em;
192
193 write_lock(&tree->lock);
194 em = lookup_extent_mapping(tree, start, len);
195
196 WARN_ON(!em || em->start != start);
197
198 if (!em)
199 goto out;
200
201 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
202 190
203 if (em->start != 0) { 191 if (em->start != 0) {
204 rb = rb_prev(&em->rb_node); 192 rb = rb_prev(&em->rb_node);
@@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
225 merge->in_tree = 0; 213 merge->in_tree = 0;
226 free_extent_map(merge); 214 free_extent_map(merge);
227 } 215 }
216}
217
218int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
219{
220 int ret = 0;
221 struct extent_map *em;
222
223 write_lock(&tree->lock);
224 em = lookup_extent_mapping(tree, start, len);
225
226 WARN_ON(!em || em->start != start);
227
228 if (!em)
229 goto out;
230
231 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
232
233 try_merge_map(tree, em);
228 234
229 free_extent_map(em); 235 free_extent_map(em);
230out: 236out:
@@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
247 struct extent_map *em) 253 struct extent_map *em)
248{ 254{
249 int ret = 0; 255 int ret = 0;
250 struct extent_map *merge = NULL;
251 struct rb_node *rb; 256 struct rb_node *rb;
252 struct extent_map *exist; 257 struct extent_map *exist;
253 258
@@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree,
263 goto out; 268 goto out;
264 } 269 }
265 atomic_inc(&em->refs); 270 atomic_inc(&em->refs);
266 if (em->start != 0) { 271
267 rb = rb_prev(&em->rb_node); 272 try_merge_map(tree, em);
268 if (rb)
269 merge = rb_entry(rb, struct extent_map, rb_node);
270 if (rb && mergable_maps(merge, em)) {
271 em->start = merge->start;
272 em->len += merge->len;
273 em->block_len += merge->block_len;
274 em->block_start = merge->block_start;
275 merge->in_tree = 0;
276 rb_erase(&merge->rb_node, &tree->map);
277 free_extent_map(merge);
278 }
279 }
280 rb = rb_next(&em->rb_node);
281 if (rb)
282 merge = rb_entry(rb, struct extent_map, rb_node);
283 if (rb && mergable_maps(em, merge)) {
284 em->len += merge->len;
285 em->block_len += merge->len;
286 rb_erase(&merge->rb_node, &tree->map);
287 merge->in_tree = 0;
288 free_extent_map(merge);
289 }
290out: 273out:
291 return ret; 274 return ret;
292} 275}
@@ -299,19 +282,8 @@ static u64 range_end(u64 start, u64 len)
299 return start + len; 282 return start + len;
300} 283}
301 284
302/** 285struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
303 * lookup_extent_mapping - lookup extent_map 286 u64 start, u64 len, int strict)
304 * @tree: tree to lookup in
305 * @start: byte offset to start the search
306 * @len: length of the lookup range
307 *
308 * Find and return the first extent_map struct in @tree that intersects the
309 * [start, len] range. There may be additional objects in the tree that
310 * intersect, so check the object returned carefully to make sure that no
311 * additional lookups are needed.
312 */
313struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
314 u64 start, u64 len)
315{ 287{
316 struct extent_map *em; 288 struct extent_map *em;
317 struct rb_node *rb_node; 289 struct rb_node *rb_node;
@@ -320,38 +292,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
320 u64 end = range_end(start, len); 292 u64 end = range_end(start, len);
321 293
322 rb_node = __tree_search(&tree->map, start, &prev, &next); 294 rb_node = __tree_search(&tree->map, start, &prev, &next);
323 if (!rb_node && prev) {
324 em = rb_entry(prev, struct extent_map, rb_node);
325 if (end > em->start && start < extent_map_end(em))
326 goto found;
327 }
328 if (!rb_node && next) {
329 em = rb_entry(next, struct extent_map, rb_node);
330 if (end > em->start && start < extent_map_end(em))
331 goto found;
332 }
333 if (!rb_node) { 295 if (!rb_node) {
334 em = NULL; 296 if (prev)
335 goto out; 297 rb_node = prev;
336 } 298 else if (next)
337 if (IS_ERR(rb_node)) { 299 rb_node = next;
338 em = ERR_CAST(rb_node); 300 else
339 goto out; 301 return NULL;
340 } 302 }
303
341 em = rb_entry(rb_node, struct extent_map, rb_node); 304 em = rb_entry(rb_node, struct extent_map, rb_node);
342 if (end > em->start && start < extent_map_end(em))
343 goto found;
344 305
345 em = NULL; 306 if (strict && !(end > em->start && start < extent_map_end(em)))
346 goto out; 307 return NULL;
347 308
348found:
349 atomic_inc(&em->refs); 309 atomic_inc(&em->refs);
350out:
351 return em; 310 return em;
352} 311}
353 312
354/** 313/**
314 * lookup_extent_mapping - lookup extent_map
315 * @tree: tree to lookup in
316 * @start: byte offset to start the search
317 * @len: length of the lookup range
318 *
319 * Find and return the first extent_map struct in @tree that intersects the
320 * [start, len] range. There may be additional objects in the tree that
321 * intersect, so check the object returned carefully to make sure that no
322 * additional lookups are needed.
323 */
324struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
325 u64 start, u64 len)
326{
327 return __lookup_extent_mapping(tree, start, len, 1);
328}
329
330/**
355 * search_extent_mapping - find a nearby extent map 331 * search_extent_mapping - find a nearby extent map
356 * @tree: tree to lookup in 332 * @tree: tree to lookup in
357 * @start: byte offset to start the search 333 * @start: byte offset to start the search
@@ -365,38 +341,7 @@ out:
365struct extent_map *search_extent_mapping(struct extent_map_tree *tree, 341struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
366 u64 start, u64 len) 342 u64 start, u64 len)
367{ 343{
368 struct extent_map *em; 344 return __lookup_extent_mapping(tree, start, len, 0);
369 struct rb_node *rb_node;
370 struct rb_node *prev = NULL;
371 struct rb_node *next = NULL;
372
373 rb_node = __tree_search(&tree->map, start, &prev, &next);
374 if (!rb_node && prev) {
375 em = rb_entry(prev, struct extent_map, rb_node);
376 goto found;
377 }
378 if (!rb_node && next) {
379 em = rb_entry(next, struct extent_map, rb_node);
380 goto found;
381 }
382 if (!rb_node) {
383 em = NULL;
384 goto out;
385 }
386 if (IS_ERR(rb_node)) {
387 em = ERR_CAST(rb_node);
388 goto out;
389 }
390 em = rb_entry(rb_node, struct extent_map, rb_node);
391 goto found;
392
393 em = NULL;
394 goto out;
395
396found:
397 atomic_inc(&em->refs);
398out:
399 return em;
400} 345}
401 346
402/** 347/**
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 90d4ee52cd45..b910694f61ed 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -177,6 +177,15 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
177 177
178 WARN_ON(bio->bi_vcnt <= 0); 178 WARN_ON(bio->bi_vcnt <= 0);
179 179
180 /*
181 * the free space stuff is only read when it hasn't been
182 * updated in the current transaction. So, we can safely
183 * read from the commit root and sidestep a nasty deadlock
184 * between reading the free space cache and updating the csum tree.
185 */
186 if (btrfs_is_free_space_inode(root, inode))
187 path->search_commit_root = 1;
188
180 disk_bytenr = (u64)bio->bi_sector << 9; 189 disk_bytenr = (u64)bio->bi_sector << 9;
181 if (dio) 190 if (dio)
182 offset = logical_offset; 191 offset = logical_offset;
@@ -282,7 +291,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
282 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); 291 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
283 292
284 path = btrfs_alloc_path(); 293 path = btrfs_alloc_path();
285 BUG_ON(!path); 294 if (!path)
295 return -ENOMEM;
286 296
287 if (search_commit) { 297 if (search_commit) {
288 path->skip_locking = 1; 298 path->skip_locking = 1;
@@ -664,15 +674,13 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
664 struct btrfs_sector_sum *sector_sum; 674 struct btrfs_sector_sum *sector_sum;
665 u32 nritems; 675 u32 nritems;
666 u32 ins_size; 676 u32 ins_size;
667 char *eb_map;
668 char *eb_token;
669 unsigned long map_len;
670 unsigned long map_start;
671 u16 csum_size = 677 u16 csum_size =
672 btrfs_super_csum_size(&root->fs_info->super_copy); 678 btrfs_super_csum_size(&root->fs_info->super_copy);
673 679
674 path = btrfs_alloc_path(); 680 path = btrfs_alloc_path();
675 BUG_ON(!path); 681 if (!path)
682 return -ENOMEM;
683
676 sector_sum = sums->sums; 684 sector_sum = sums->sums;
677again: 685again:
678 next_offset = (u64)-1; 686 next_offset = (u64)-1;
@@ -814,30 +822,9 @@ found:
814 item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 822 item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
815 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + 823 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
816 btrfs_item_size_nr(leaf, path->slots[0])); 824 btrfs_item_size_nr(leaf, path->slots[0]));
817 eb_token = NULL;
818next_sector: 825next_sector:
819 826
820 if (!eb_token || 827 write_extent_buffer(leaf, &sector_sum->sum, (unsigned long)item, csum_size);
821 (unsigned long)item + csum_size >= map_start + map_len) {
822 int err;
823
824 if (eb_token)
825 unmap_extent_buffer(leaf, eb_token, KM_USER1);
826 eb_token = NULL;
827 err = map_private_extent_buffer(leaf, (unsigned long)item,
828 csum_size,
829 &eb_token, &eb_map,
830 &map_start, &map_len, KM_USER1);
831 if (err)
832 eb_token = NULL;
833 }
834 if (eb_token) {
835 memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
836 &sector_sum->sum, csum_size);
837 } else {
838 write_extent_buffer(leaf, &sector_sum->sum,
839 (unsigned long)item, csum_size);
840 }
841 828
842 total_bytes += root->sectorsize; 829 total_bytes += root->sectorsize;
843 sector_sum++; 830 sector_sum++;
@@ -850,10 +837,7 @@ next_sector:
850 goto next_sector; 837 goto next_sector;
851 } 838 }
852 } 839 }
853 if (eb_token) { 840
854 unmap_extent_buffer(leaf, eb_token, KM_USER1);
855 eb_token = NULL;
856 }
857 btrfs_mark_buffer_dirty(path->nodes[0]); 841 btrfs_mark_buffer_dirty(path->nodes[0]);
858 if (total_bytes < sums->len) { 842 if (total_bytes < sums->len) {
859 btrfs_release_path(path); 843 btrfs_release_path(path);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fa4ef18b66b1..658d66959abe 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -74,7 +74,7 @@ struct inode_defrag {
74 * If an existing record is found the defrag item you 74 * If an existing record is found the defrag item you
75 * pass in is freed 75 * pass in is freed
76 */ 76 */
77static int __btrfs_add_inode_defrag(struct inode *inode, 77static void __btrfs_add_inode_defrag(struct inode *inode,
78 struct inode_defrag *defrag) 78 struct inode_defrag *defrag)
79{ 79{
80 struct btrfs_root *root = BTRFS_I(inode)->root; 80 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode,
106 BTRFS_I(inode)->in_defrag = 1; 106 BTRFS_I(inode)->in_defrag = 1;
107 rb_link_node(&defrag->rb_node, parent, p); 107 rb_link_node(&defrag->rb_node, parent, p);
108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); 108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
109 return 0; 109 return;
110 110
111exists: 111exists:
112 kfree(defrag); 112 kfree(defrag);
113 return 0; 113 return;
114 114
115} 115}
116 116
@@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
123{ 123{
124 struct btrfs_root *root = BTRFS_I(inode)->root; 124 struct btrfs_root *root = BTRFS_I(inode)->root;
125 struct inode_defrag *defrag; 125 struct inode_defrag *defrag;
126 int ret = 0;
127 u64 transid; 126 u64 transid;
128 127
129 if (!btrfs_test_opt(root, AUTO_DEFRAG)) 128 if (!btrfs_test_opt(root, AUTO_DEFRAG))
@@ -150,9 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
150 149
151 spin_lock(&root->fs_info->defrag_inodes_lock); 150 spin_lock(&root->fs_info->defrag_inodes_lock);
152 if (!BTRFS_I(inode)->in_defrag) 151 if (!BTRFS_I(inode)->in_defrag)
153 ret = __btrfs_add_inode_defrag(inode, defrag); 152 __btrfs_add_inode_defrag(inode, defrag);
154 spin_unlock(&root->fs_info->defrag_inodes_lock); 153 spin_unlock(&root->fs_info->defrag_inodes_lock);
155 return ret; 154 return 0;
156} 155}
157 156
158/* 157/*
@@ -855,7 +854,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
855 btrfs_drop_extent_cache(inode, start, end - 1, 0); 854 btrfs_drop_extent_cache(inode, start, end - 1, 0);
856 855
857 path = btrfs_alloc_path(); 856 path = btrfs_alloc_path();
858 BUG_ON(!path); 857 if (!path)
858 return -ENOMEM;
859again: 859again:
860 recow = 0; 860 recow = 0;
861 split = start; 861 split = start;
@@ -1059,7 +1059,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
1059static noinline int prepare_pages(struct btrfs_root *root, struct file *file, 1059static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
1060 struct page **pages, size_t num_pages, 1060 struct page **pages, size_t num_pages,
1061 loff_t pos, unsigned long first_index, 1061 loff_t pos, unsigned long first_index,
1062 unsigned long last_index, size_t write_bytes) 1062 size_t write_bytes)
1063{ 1063{
1064 struct extent_state *cached_state = NULL; 1064 struct extent_state *cached_state = NULL;
1065 int i; 1065 int i;
@@ -1081,7 +1081,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
1081 1081
1082again: 1082again:
1083 for (i = 0; i < num_pages; i++) { 1083 for (i = 0; i < num_pages; i++) {
1084 pages[i] = grab_cache_page(inode->i_mapping, index + i); 1084 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1085 GFP_NOFS);
1085 if (!pages[i]) { 1086 if (!pages[i]) {
1086 faili = i - 1; 1087 faili = i - 1;
1087 err = -ENOMEM; 1088 err = -ENOMEM;
@@ -1158,7 +1159,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1158 struct btrfs_root *root = BTRFS_I(inode)->root; 1159 struct btrfs_root *root = BTRFS_I(inode)->root;
1159 struct page **pages = NULL; 1160 struct page **pages = NULL;
1160 unsigned long first_index; 1161 unsigned long first_index;
1161 unsigned long last_index;
1162 size_t num_written = 0; 1162 size_t num_written = 0;
1163 int nrptrs; 1163 int nrptrs;
1164 int ret = 0; 1164 int ret = 0;
@@ -1171,7 +1171,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1171 return -ENOMEM; 1171 return -ENOMEM;
1172 1172
1173 first_index = pos >> PAGE_CACHE_SHIFT; 1173 first_index = pos >> PAGE_CACHE_SHIFT;
1174 last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
1175 1174
1176 while (iov_iter_count(i) > 0) { 1175 while (iov_iter_count(i) > 0) {
1177 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1176 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
@@ -1205,8 +1204,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1205 * contents of pages from loop to loop 1204 * contents of pages from loop to loop
1206 */ 1205 */
1207 ret = prepare_pages(root, file, pages, num_pages, 1206 ret = prepare_pages(root, file, pages, num_pages,
1208 pos, first_index, last_index, 1207 pos, first_index, write_bytes);
1209 write_bytes);
1210 if (ret) { 1208 if (ret) {
1211 btrfs_delalloc_release_space(inode, 1209 btrfs_delalloc_release_space(inode,
1212 num_pages << PAGE_CACHE_SHIFT); 1210 num_pages << PAGE_CACHE_SHIFT);
@@ -1238,9 +1236,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1238 * managed to copy. 1236 * managed to copy.
1239 */ 1237 */
1240 if (num_pages > dirty_pages) { 1238 if (num_pages > dirty_pages) {
1241 if (copied > 0) 1239 if (copied > 0) {
1242 atomic_inc( 1240 spin_lock(&BTRFS_I(inode)->lock);
1243 &BTRFS_I(inode)->outstanding_extents); 1241 BTRFS_I(inode)->outstanding_extents++;
1242 spin_unlock(&BTRFS_I(inode)->lock);
1243 }
1244 btrfs_delalloc_release_space(inode, 1244 btrfs_delalloc_release_space(inode,
1245 (num_pages - dirty_pages) << 1245 (num_pages - dirty_pages) <<
1246 PAGE_CACHE_SHIFT); 1246 PAGE_CACHE_SHIFT);
@@ -1452,7 +1452,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1452 * important optimization for directories because holding the mutex prevents 1452 * important optimization for directories because holding the mutex prevents
1453 * new operations on the dir while we write to disk. 1453 * new operations on the dir while we write to disk.
1454 */ 1454 */
1455int btrfs_sync_file(struct file *file, int datasync) 1455int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1456{ 1456{
1457 struct dentry *dentry = file->f_path.dentry; 1457 struct dentry *dentry = file->f_path.dentry;
1458 struct inode *inode = dentry->d_inode; 1458 struct inode *inode = dentry->d_inode;
@@ -1462,9 +1462,13 @@ int btrfs_sync_file(struct file *file, int datasync)
1462 1462
1463 trace_btrfs_sync_file(file, datasync); 1463 trace_btrfs_sync_file(file, datasync);
1464 1464
1465 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1466 if (ret)
1467 return ret;
1468 mutex_lock(&inode->i_mutex);
1469
1465 /* we wait first, since the writeback may change the inode */ 1470 /* we wait first, since the writeback may change the inode */
1466 root->log_batch++; 1471 root->log_batch++;
1467 /* the VFS called filemap_fdatawrite for us */
1468 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1472 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1469 root->log_batch++; 1473 root->log_batch++;
1470 1474
@@ -1472,8 +1476,10 @@ int btrfs_sync_file(struct file *file, int datasync)
1472 * check the transaction that last modified this inode 1476 * check the transaction that last modified this inode
1473 * and see if its already been committed 1477 * and see if its already been committed
1474 */ 1478 */
1475 if (!BTRFS_I(inode)->last_trans) 1479 if (!BTRFS_I(inode)->last_trans) {
1480 mutex_unlock(&inode->i_mutex);
1476 goto out; 1481 goto out;
1482 }
1477 1483
1478 /* 1484 /*
1479 * if the last transaction that changed this file was before 1485 * if the last transaction that changed this file was before
@@ -1484,6 +1490,7 @@ int btrfs_sync_file(struct file *file, int datasync)
1484 if (BTRFS_I(inode)->last_trans <= 1490 if (BTRFS_I(inode)->last_trans <=
1485 root->fs_info->last_trans_committed) { 1491 root->fs_info->last_trans_committed) {
1486 BTRFS_I(inode)->last_trans = 0; 1492 BTRFS_I(inode)->last_trans = 0;
1493 mutex_unlock(&inode->i_mutex);
1487 goto out; 1494 goto out;
1488 } 1495 }
1489 1496
@@ -1496,12 +1503,15 @@ int btrfs_sync_file(struct file *file, int datasync)
1496 trans = btrfs_start_transaction(root, 0); 1503 trans = btrfs_start_transaction(root, 0);
1497 if (IS_ERR(trans)) { 1504 if (IS_ERR(trans)) {
1498 ret = PTR_ERR(trans); 1505 ret = PTR_ERR(trans);
1506 mutex_unlock(&inode->i_mutex);
1499 goto out; 1507 goto out;
1500 } 1508 }
1501 1509
1502 ret = btrfs_log_dentry_safe(trans, root, dentry); 1510 ret = btrfs_log_dentry_safe(trans, root, dentry);
1503 if (ret < 0) 1511 if (ret < 0) {
1512 mutex_unlock(&inode->i_mutex);
1504 goto out; 1513 goto out;
1514 }
1505 1515
1506 /* we've logged all the items and now have a consistent 1516 /* we've logged all the items and now have a consistent
1507 * version of the file in the log. It is possible that 1517 * version of the file in the log. It is possible that
@@ -1513,7 +1523,7 @@ int btrfs_sync_file(struct file *file, int datasync)
1513 * file again, but that will end up using the synchronization 1523 * file again, but that will end up using the synchronization
1514 * inside btrfs_sync_log to keep things safe. 1524 * inside btrfs_sync_log to keep things safe.
1515 */ 1525 */
1516 mutex_unlock(&dentry->d_inode->i_mutex); 1526 mutex_unlock(&inode->i_mutex);
1517 1527
1518 if (ret != BTRFS_NO_LOG_SYNC) { 1528 if (ret != BTRFS_NO_LOG_SYNC) {
1519 if (ret > 0) { 1529 if (ret > 0) {
@@ -1528,7 +1538,6 @@ int btrfs_sync_file(struct file *file, int datasync)
1528 } else { 1538 } else {
1529 ret = btrfs_end_transaction(trans, root); 1539 ret = btrfs_end_transaction(trans, root);
1530 } 1540 }
1531 mutex_lock(&dentry->d_inode->i_mutex);
1532out: 1541out:
1533 return ret > 0 ? -EIO : ret; 1542 return ret > 0 ? -EIO : ret;
1534} 1543}
@@ -1664,8 +1673,154 @@ out:
1664 return ret; 1673 return ret;
1665} 1674}
1666 1675
1676static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
1677{
1678 struct btrfs_root *root = BTRFS_I(inode)->root;
1679 struct extent_map *em;
1680 struct extent_state *cached_state = NULL;
1681 u64 lockstart = *offset;
1682 u64 lockend = i_size_read(inode);
1683 u64 start = *offset;
1684 u64 orig_start = *offset;
1685 u64 len = i_size_read(inode);
1686 u64 last_end = 0;
1687 int ret = 0;
1688
1689 lockend = max_t(u64, root->sectorsize, lockend);
1690 if (lockend <= lockstart)
1691 lockend = lockstart + root->sectorsize;
1692
1693 len = lockend - lockstart + 1;
1694
1695 len = max_t(u64, len, root->sectorsize);
1696 if (inode->i_size == 0)
1697 return -ENXIO;
1698
1699 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
1700 &cached_state, GFP_NOFS);
1701
1702 /*
1703 * Delalloc is such a pain. If we have a hole and we have pending
1704 * delalloc for a portion of the hole we will get back a hole that
1705 * exists for the entire range since it hasn't been actually written
1706 * yet. So to take care of this case we need to look for an extent just
1707 * before the position we want in case there is outstanding delalloc
1708 * going on here.
1709 */
1710 if (origin == SEEK_HOLE && start != 0) {
1711 if (start <= root->sectorsize)
1712 em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
1713 root->sectorsize, 0);
1714 else
1715 em = btrfs_get_extent_fiemap(inode, NULL, 0,
1716 start - root->sectorsize,
1717 root->sectorsize, 0);
1718 if (IS_ERR(em)) {
1719 ret = -ENXIO;
1720 goto out;
1721 }
1722 last_end = em->start + em->len;
1723 if (em->block_start == EXTENT_MAP_DELALLOC)
1724 last_end = min_t(u64, last_end, inode->i_size);
1725 free_extent_map(em);
1726 }
1727
1728 while (1) {
1729 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
1730 if (IS_ERR(em)) {
1731 ret = -ENXIO;
1732 break;
1733 }
1734
1735 if (em->block_start == EXTENT_MAP_HOLE) {
1736 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
1737 if (last_end <= orig_start) {
1738 free_extent_map(em);
1739 ret = -ENXIO;
1740 break;
1741 }
1742 }
1743
1744 if (origin == SEEK_HOLE) {
1745 *offset = start;
1746 free_extent_map(em);
1747 break;
1748 }
1749 } else {
1750 if (origin == SEEK_DATA) {
1751 if (em->block_start == EXTENT_MAP_DELALLOC) {
1752 if (start >= inode->i_size) {
1753 free_extent_map(em);
1754 ret = -ENXIO;
1755 break;
1756 }
1757 }
1758
1759 *offset = start;
1760 free_extent_map(em);
1761 break;
1762 }
1763 }
1764
1765 start = em->start + em->len;
1766 last_end = em->start + em->len;
1767
1768 if (em->block_start == EXTENT_MAP_DELALLOC)
1769 last_end = min_t(u64, last_end, inode->i_size);
1770
1771 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
1772 free_extent_map(em);
1773 ret = -ENXIO;
1774 break;
1775 }
1776 free_extent_map(em);
1777 cond_resched();
1778 }
1779 if (!ret)
1780 *offset = min(*offset, inode->i_size);
1781out:
1782 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
1783 &cached_state, GFP_NOFS);
1784 return ret;
1785}
1786
1787static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
1788{
1789 struct inode *inode = file->f_mapping->host;
1790 int ret;
1791
1792 mutex_lock(&inode->i_mutex);
1793 switch (origin) {
1794 case SEEK_END:
1795 case SEEK_CUR:
1796 offset = generic_file_llseek_unlocked(file, offset, origin);
1797 goto out;
1798 case SEEK_DATA:
1799 case SEEK_HOLE:
1800 ret = find_desired_extent(inode, &offset, origin);
1801 if (ret) {
1802 mutex_unlock(&inode->i_mutex);
1803 return ret;
1804 }
1805 }
1806
1807 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
1808 return -EINVAL;
1809 if (offset > inode->i_sb->s_maxbytes)
1810 return -EINVAL;
1811
1812 /* Special lock needed here? */
1813 if (offset != file->f_pos) {
1814 file->f_pos = offset;
1815 file->f_version = 0;
1816 }
1817out:
1818 mutex_unlock(&inode->i_mutex);
1819 return offset;
1820}
1821
1667const struct file_operations btrfs_file_operations = { 1822const struct file_operations btrfs_file_operations = {
1668 .llseek = generic_file_llseek, 1823 .llseek = btrfs_file_llseek,
1669 .read = do_sync_read, 1824 .read = do_sync_read,
1670 .write = do_sync_write, 1825 .write = do_sync_write,
1671 .aio_read = generic_file_aio_read, 1826 .aio_read = generic_file_aio_read,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index bf0d61567f3d..6377713f639c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
98 return inode; 98 return inode;
99 99
100 spin_lock(&block_group->lock); 100 spin_lock(&block_group->lock);
101 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
102 printk(KERN_INFO "Old style space inode found, converting.\n");
103 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
104 block_group->disk_cache_state = BTRFS_DC_CLEAR;
105 }
106
101 if (!btrfs_fs_closing(root->fs_info)) { 107 if (!btrfs_fs_closing(root->fs_info)) {
102 block_group->inode = igrab(inode); 108 block_group->inode = igrab(inode);
103 block_group->iref = 1; 109 block_group->iref = 1;
@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root,
135 btrfs_set_inode_gid(leaf, inode_item, 0); 141 btrfs_set_inode_gid(leaf, inode_item, 0);
136 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 142 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
137 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | 143 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
138 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); 144 BTRFS_INODE_PREALLOC);
139 btrfs_set_inode_nlink(leaf, inode_item, 1); 145 btrfs_set_inode_nlink(leaf, inode_item, 1);
140 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 146 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
141 btrfs_set_inode_block_group(leaf, inode_item, offset); 147 btrfs_set_inode_block_group(leaf, inode_item, offset);
@@ -239,17 +245,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
239 struct btrfs_free_space_header *header; 245 struct btrfs_free_space_header *header;
240 struct extent_buffer *leaf; 246 struct extent_buffer *leaf;
241 struct page *page; 247 struct page *page;
242 u32 *checksums = NULL, *crc;
243 char *disk_crcs = NULL;
244 struct btrfs_key key; 248 struct btrfs_key key;
245 struct list_head bitmaps; 249 struct list_head bitmaps;
246 u64 num_entries; 250 u64 num_entries;
247 u64 num_bitmaps; 251 u64 num_bitmaps;
248 u64 generation; 252 u64 generation;
249 u32 cur_crc = ~(u32)0;
250 pgoff_t index = 0; 253 pgoff_t index = 0;
251 unsigned long first_page_offset;
252 int num_checksums;
253 int ret = 0; 254 int ret = 0;
254 255
255 INIT_LIST_HEAD(&bitmaps); 256 INIT_LIST_HEAD(&bitmaps);
@@ -292,16 +293,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
292 if (!num_entries) 293 if (!num_entries)
293 goto out; 294 goto out;
294 295
295 /* Setup everything for doing checksumming */
296 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
297 checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
298 if (!checksums)
299 goto out;
300 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
301 disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
302 if (!disk_crcs)
303 goto out;
304
305 ret = readahead_cache(inode); 296 ret = readahead_cache(inode);
306 if (ret) 297 if (ret)
307 goto out; 298 goto out;
@@ -311,18 +302,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
311 struct btrfs_free_space *e; 302 struct btrfs_free_space *e;
312 void *addr; 303 void *addr;
313 unsigned long offset = 0; 304 unsigned long offset = 0;
314 unsigned long start_offset = 0;
315 int need_loop = 0; 305 int need_loop = 0;
316 306
317 if (!num_entries && !num_bitmaps) 307 if (!num_entries && !num_bitmaps)
318 break; 308 break;
319 309
320 if (index == 0) { 310 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
321 start_offset = first_page_offset;
322 offset = start_offset;
323 }
324
325 page = grab_cache_page(inode->i_mapping, index);
326 if (!page) 311 if (!page)
327 goto free_cache; 312 goto free_cache;
328 313
@@ -342,8 +327,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
342 if (index == 0) { 327 if (index == 0) {
343 u64 *gen; 328 u64 *gen;
344 329
345 memcpy(disk_crcs, addr, first_page_offset); 330 /*
346 gen = addr + (sizeof(u32) * num_checksums); 331 * We put a bogus crc in the front of the first page in
332 * case old kernels try to mount a fs with the new
333 * format to make sure they discard the cache.
334 */
335 addr += sizeof(u64);
336 offset += sizeof(u64);
337
338 gen = addr;
347 if (*gen != BTRFS_I(inode)->generation) { 339 if (*gen != BTRFS_I(inode)->generation) {
348 printk(KERN_ERR "btrfs: space cache generation" 340 printk(KERN_ERR "btrfs: space cache generation"
349 " (%llu) does not match inode (%llu)\n", 341 " (%llu) does not match inode (%llu)\n",
@@ -355,24 +347,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
355 page_cache_release(page); 347 page_cache_release(page);
356 goto free_cache; 348 goto free_cache;
357 } 349 }
358 crc = (u32 *)disk_crcs; 350 addr += sizeof(u64);
359 } 351 offset += sizeof(u64);
360 entry = addr + start_offset;
361
362 /* First lets check our crc before we do anything fun */
363 cur_crc = ~(u32)0;
364 cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
365 PAGE_CACHE_SIZE - start_offset);
366 btrfs_csum_final(cur_crc, (char *)&cur_crc);
367 if (cur_crc != *crc) {
368 printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
369 index);
370 kunmap(page);
371 unlock_page(page);
372 page_cache_release(page);
373 goto free_cache;
374 } 352 }
375 crc++; 353 entry = addr;
376 354
377 while (1) { 355 while (1) {
378 if (!num_entries) 356 if (!num_entries)
@@ -470,8 +448,6 @@ next:
470 448
471 ret = 1; 449 ret = 1;
472out: 450out:
473 kfree(checksums);
474 kfree(disk_crcs);
475 return ret; 451 return ret;
476free_cache: 452free_cache:
477 __btrfs_remove_free_space_cache(ctl); 453 __btrfs_remove_free_space_cache(ctl);
@@ -569,8 +545,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
569 struct btrfs_key key; 545 struct btrfs_key key;
570 u64 start, end, len; 546 u64 start, end, len;
571 u64 bytes = 0; 547 u64 bytes = 0;
572 u32 *crc, *checksums; 548 u32 crc = ~(u32)0;
573 unsigned long first_page_offset;
574 int index = 0, num_pages = 0; 549 int index = 0, num_pages = 0;
575 int entries = 0; 550 int entries = 0;
576 int bitmaps = 0; 551 int bitmaps = 0;
@@ -590,34 +565,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
590 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 565 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
591 PAGE_CACHE_SHIFT; 566 PAGE_CACHE_SHIFT;
592 567
593 /* Since the first page has all of our checksums and our generation we
594 * need to calculate the offset into the page that we can start writing
595 * our entries.
596 */
597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
598
599 filemap_write_and_wait(inode->i_mapping); 568 filemap_write_and_wait(inode->i_mapping);
600 btrfs_wait_ordered_range(inode, inode->i_size & 569 btrfs_wait_ordered_range(inode, inode->i_size &
601 ~(root->sectorsize - 1), (u64)-1); 570 ~(root->sectorsize - 1), (u64)-1);
602 571
603 /* make sure we don't overflow that first page */
604 if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
605 /* this is really the same as running out of space, where we also return 0 */
606 printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
607 ret = 0;
608 goto out_update;
609 }
610
611 /* We need a checksum per page. */
612 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
613 if (!crc)
614 return -1;
615
616 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); 572 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
617 if (!pages) { 573 if (!pages)
618 kfree(crc);
619 return -1; 574 return -1;
620 }
621 575
622 /* Get the cluster for this block_group if it exists */ 576 /* Get the cluster for this block_group if it exists */
623 if (block_group && !list_empty(&block_group->cluster_list)) 577 if (block_group && !list_empty(&block_group->cluster_list))
@@ -640,7 +594,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
640 * know and don't freak out. 594 * know and don't freak out.
641 */ 595 */
642 while (index < num_pages) { 596 while (index < num_pages) {
643 page = grab_cache_page(inode->i_mapping, index); 597 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
644 if (!page) { 598 if (!page) {
645 int i; 599 int i;
646 600
@@ -648,7 +602,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
648 unlock_page(pages[i]); 602 unlock_page(pages[i]);
649 page_cache_release(pages[i]); 603 page_cache_release(pages[i]);
650 } 604 }
651 goto out_free; 605 goto out;
652 } 606 }
653 pages[index] = page; 607 pages[index] = page;
654 index++; 608 index++;
@@ -668,17 +622,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
668 /* Write out the extent entries */ 622 /* Write out the extent entries */
669 do { 623 do {
670 struct btrfs_free_space_entry *entry; 624 struct btrfs_free_space_entry *entry;
671 void *addr; 625 void *addr, *orig;
672 unsigned long offset = 0; 626 unsigned long offset = 0;
673 unsigned long start_offset = 0;
674 627
675 next_page = false; 628 next_page = false;
676 629
677 if (index == 0) {
678 start_offset = first_page_offset;
679 offset = start_offset;
680 }
681
682 if (index >= num_pages) { 630 if (index >= num_pages) {
683 out_of_space = true; 631 out_of_space = true;
684 break; 632 break;
@@ -686,10 +634,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
686 634
687 page = pages[index]; 635 page = pages[index];
688 636
689 addr = kmap(page); 637 orig = addr = kmap(page);
690 entry = addr + start_offset; 638 if (index == 0) {
639 u64 *gen;
691 640
692 memset(addr, 0, PAGE_CACHE_SIZE); 641 /*
642 * We're going to put in a bogus crc for this page to
643 * make sure that old kernels who aren't aware of this
644 * format will be sure to discard the cache.
645 */
646 addr += sizeof(u64);
647 offset += sizeof(u64);
648
649 gen = addr;
650 *gen = trans->transid;
651 addr += sizeof(u64);
652 offset += sizeof(u64);
653 }
654 entry = addr;
655
656 memset(addr, 0, PAGE_CACHE_SIZE - offset);
693 while (node && !next_page) { 657 while (node && !next_page) {
694 struct btrfs_free_space *e; 658 struct btrfs_free_space *e;
695 659
@@ -752,13 +716,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
752 next_page = true; 716 next_page = true;
753 entry++; 717 entry++;
754 } 718 }
755 *crc = ~(u32)0;
756 *crc = btrfs_csum_data(root, addr + start_offset, *crc,
757 PAGE_CACHE_SIZE - start_offset);
758 kunmap(page);
759 719
760 btrfs_csum_final(*crc, (char *)crc); 720 /* Generate bogus crc value */
761 crc++; 721 if (index == 0) {
722 u32 *tmp;
723 crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
724 PAGE_CACHE_SIZE - sizeof(u64));
725 btrfs_csum_final(crc, (char *)&crc);
726 crc++;
727 tmp = orig;
728 *tmp = crc;
729 }
730
731 kunmap(page);
762 732
763 bytes += PAGE_CACHE_SIZE; 733 bytes += PAGE_CACHE_SIZE;
764 734
@@ -779,11 +749,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
779 749
780 addr = kmap(page); 750 addr = kmap(page);
781 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 751 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
782 *crc = ~(u32)0;
783 *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
784 kunmap(page); 752 kunmap(page);
785 btrfs_csum_final(*crc, (char *)crc);
786 crc++;
787 bytes += PAGE_CACHE_SIZE; 753 bytes += PAGE_CACHE_SIZE;
788 754
789 list_del_init(&entry->list); 755 list_del_init(&entry->list);
@@ -796,7 +762,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
796 i_size_read(inode) - 1, &cached_state, 762 i_size_read(inode) - 1, &cached_state,
797 GFP_NOFS); 763 GFP_NOFS);
798 ret = 0; 764 ret = 0;
799 goto out_free; 765 goto out;
800 } 766 }
801 767
802 /* Zero out the rest of the pages just to make sure */ 768 /* Zero out the rest of the pages just to make sure */
@@ -811,20 +777,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
811 index++; 777 index++;
812 } 778 }
813 779
814 /* Write the checksums and trans id to the first page */
815 {
816 void *addr;
817 u64 *gen;
818
819 page = pages[0];
820
821 addr = kmap(page);
822 memcpy(addr, checksums, sizeof(u32) * num_pages);
823 gen = addr + (sizeof(u32) * num_pages);
824 *gen = trans->transid;
825 kunmap(page);
826 }
827
828 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, 780 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
829 bytes, &cached_state); 781 bytes, &cached_state);
830 btrfs_drop_pages(pages, num_pages); 782 btrfs_drop_pages(pages, num_pages);
@@ -833,7 +785,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
833 785
834 if (ret) { 786 if (ret) {
835 ret = 0; 787 ret = 0;
836 goto out_free; 788 goto out;
837 } 789 }
838 790
839 BTRFS_I(inode)->generation = trans->transid; 791 BTRFS_I(inode)->generation = trans->transid;
@@ -850,7 +802,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
850 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 802 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
851 EXTENT_DIRTY | EXTENT_DELALLOC | 803 EXTENT_DIRTY | EXTENT_DELALLOC |
852 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); 804 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
853 goto out_free; 805 goto out;
854 } 806 }
855 leaf = path->nodes[0]; 807 leaf = path->nodes[0];
856 if (ret > 0) { 808 if (ret > 0) {
@@ -866,7 +818,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
866 EXTENT_DO_ACCOUNTING, 0, 0, NULL, 818 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
867 GFP_NOFS); 819 GFP_NOFS);
868 btrfs_release_path(path); 820 btrfs_release_path(path);
869 goto out_free; 821 goto out;
870 } 822 }
871 } 823 }
872 header = btrfs_item_ptr(leaf, path->slots[0], 824 header = btrfs_item_ptr(leaf, path->slots[0],
@@ -879,11 +831,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
879 831
880 ret = 1; 832 ret = 1;
881 833
882out_free: 834out:
883 kfree(checksums);
884 kfree(pages); 835 kfree(pages);
885
886out_update:
887 if (ret != 1) { 836 if (ret != 1) {
888 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 837 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
889 BTRFS_I(inode)->generation = 0; 838 BTRFS_I(inode)->generation = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d340f63d8f07..15fceefbca0a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
750 return alloc_hint; 750 return alloc_hint;
751} 751}
752 752
753static inline bool is_free_space_inode(struct btrfs_root *root,
754 struct inode *inode)
755{
756 if (root == root->fs_info->tree_root ||
757 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
758 return true;
759 return false;
760}
761
762/* 753/*
763 * when extent_io.c finds a delayed allocation range in the file, 754 * when extent_io.c finds a delayed allocation range in the file,
764 * the call backs end up in this code. The basic idea is to 755 * the call backs end up in this code. The basic idea is to
@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode,
791 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 782 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
792 int ret = 0; 783 int ret = 0;
793 784
794 BUG_ON(is_free_space_inode(root, inode)); 785 BUG_ON(btrfs_is_free_space_inode(root, inode));
795 trans = btrfs_join_transaction(root); 786 trans = btrfs_join_transaction(root);
796 BUG_ON(IS_ERR(trans)); 787 BUG_ON(IS_ERR(trans));
797 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 788 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1070,9 +1061,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1070 u64 ino = btrfs_ino(inode); 1061 u64 ino = btrfs_ino(inode);
1071 1062
1072 path = btrfs_alloc_path(); 1063 path = btrfs_alloc_path();
1073 BUG_ON(!path); 1064 if (!path)
1065 return -ENOMEM;
1074 1066
1075 nolock = is_free_space_inode(root, inode); 1067 nolock = btrfs_is_free_space_inode(root, inode);
1076 1068
1077 if (nolock) 1069 if (nolock)
1078 trans = btrfs_join_transaction_nolock(root); 1070 trans = btrfs_join_transaction_nolock(root);
@@ -1291,15 +1283,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1291 return ret; 1283 return ret;
1292} 1284}
1293 1285
1294static int btrfs_split_extent_hook(struct inode *inode, 1286static void btrfs_split_extent_hook(struct inode *inode,
1295 struct extent_state *orig, u64 split) 1287 struct extent_state *orig, u64 split)
1296{ 1288{
1297 /* not delalloc, ignore it */ 1289 /* not delalloc, ignore it */
1298 if (!(orig->state & EXTENT_DELALLOC)) 1290 if (!(orig->state & EXTENT_DELALLOC))
1299 return 0; 1291 return;
1300 1292
1301 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 1293 spin_lock(&BTRFS_I(inode)->lock);
1302 return 0; 1294 BTRFS_I(inode)->outstanding_extents++;
1295 spin_unlock(&BTRFS_I(inode)->lock);
1303} 1296}
1304 1297
1305/* 1298/*
@@ -1308,16 +1301,17 @@ static int btrfs_split_extent_hook(struct inode *inode,
1308 * extents, such as when we are doing sequential writes, so we can properly 1301 * extents, such as when we are doing sequential writes, so we can properly
1309 * account for the metadata space we'll need. 1302 * account for the metadata space we'll need.
1310 */ 1303 */
1311static int btrfs_merge_extent_hook(struct inode *inode, 1304static void btrfs_merge_extent_hook(struct inode *inode,
1312 struct extent_state *new, 1305 struct extent_state *new,
1313 struct extent_state *other) 1306 struct extent_state *other)
1314{ 1307{
1315 /* not delalloc, ignore it */ 1308 /* not delalloc, ignore it */
1316 if (!(other->state & EXTENT_DELALLOC)) 1309 if (!(other->state & EXTENT_DELALLOC))
1317 return 0; 1310 return;
1318 1311
1319 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 1312 spin_lock(&BTRFS_I(inode)->lock);
1320 return 0; 1313 BTRFS_I(inode)->outstanding_extents--;
1314 spin_unlock(&BTRFS_I(inode)->lock);
1321} 1315}
1322 1316
1323/* 1317/*
@@ -1325,8 +1319,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1325 * bytes in this file, and to maintain the list of inodes that 1319 * bytes in this file, and to maintain the list of inodes that
1326 * have pending delalloc work to be done. 1320 * have pending delalloc work to be done.
1327 */ 1321 */
1328static int btrfs_set_bit_hook(struct inode *inode, 1322static void btrfs_set_bit_hook(struct inode *inode,
1329 struct extent_state *state, int *bits) 1323 struct extent_state *state, int *bits)
1330{ 1324{
1331 1325
1332 /* 1326 /*
@@ -1337,12 +1331,15 @@ static int btrfs_set_bit_hook(struct inode *inode,
1337 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1331 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1338 struct btrfs_root *root = BTRFS_I(inode)->root; 1332 struct btrfs_root *root = BTRFS_I(inode)->root;
1339 u64 len = state->end + 1 - state->start; 1333 u64 len = state->end + 1 - state->start;
1340 bool do_list = !is_free_space_inode(root, inode); 1334 bool do_list = !btrfs_is_free_space_inode(root, inode);
1341 1335
1342 if (*bits & EXTENT_FIRST_DELALLOC) 1336 if (*bits & EXTENT_FIRST_DELALLOC) {
1343 *bits &= ~EXTENT_FIRST_DELALLOC; 1337 *bits &= ~EXTENT_FIRST_DELALLOC;
1344 else 1338 } else {
1345 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 1339 spin_lock(&BTRFS_I(inode)->lock);
1340 BTRFS_I(inode)->outstanding_extents++;
1341 spin_unlock(&BTRFS_I(inode)->lock);
1342 }
1346 1343
1347 spin_lock(&root->fs_info->delalloc_lock); 1344 spin_lock(&root->fs_info->delalloc_lock);
1348 BTRFS_I(inode)->delalloc_bytes += len; 1345 BTRFS_I(inode)->delalloc_bytes += len;
@@ -1353,14 +1350,13 @@ static int btrfs_set_bit_hook(struct inode *inode,
1353 } 1350 }
1354 spin_unlock(&root->fs_info->delalloc_lock); 1351 spin_unlock(&root->fs_info->delalloc_lock);
1355 } 1352 }
1356 return 0;
1357} 1353}
1358 1354
1359/* 1355/*
1360 * extent_io.c clear_bit_hook, see set_bit_hook for why 1356 * extent_io.c clear_bit_hook, see set_bit_hook for why
1361 */ 1357 */
1362static int btrfs_clear_bit_hook(struct inode *inode, 1358static void btrfs_clear_bit_hook(struct inode *inode,
1363 struct extent_state *state, int *bits) 1359 struct extent_state *state, int *bits)
1364{ 1360{
1365 /* 1361 /*
1366 * set_bit and clear bit hooks normally require _irqsave/restore 1362 * set_bit and clear bit hooks normally require _irqsave/restore
@@ -1370,12 +1366,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1370 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1366 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1371 struct btrfs_root *root = BTRFS_I(inode)->root; 1367 struct btrfs_root *root = BTRFS_I(inode)->root;
1372 u64 len = state->end + 1 - state->start; 1368 u64 len = state->end + 1 - state->start;
1373 bool do_list = !is_free_space_inode(root, inode); 1369 bool do_list = !btrfs_is_free_space_inode(root, inode);
1374 1370
1375 if (*bits & EXTENT_FIRST_DELALLOC) 1371 if (*bits & EXTENT_FIRST_DELALLOC) {
1376 *bits &= ~EXTENT_FIRST_DELALLOC; 1372 *bits &= ~EXTENT_FIRST_DELALLOC;
1377 else if (!(*bits & EXTENT_DO_ACCOUNTING)) 1373 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
1378 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 1374 spin_lock(&BTRFS_I(inode)->lock);
1375 BTRFS_I(inode)->outstanding_extents--;
1376 spin_unlock(&BTRFS_I(inode)->lock);
1377 }
1379 1378
1380 if (*bits & EXTENT_DO_ACCOUNTING) 1379 if (*bits & EXTENT_DO_ACCOUNTING)
1381 btrfs_delalloc_release_metadata(inode, len); 1380 btrfs_delalloc_release_metadata(inode, len);
@@ -1394,7 +1393,6 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1394 } 1393 }
1395 spin_unlock(&root->fs_info->delalloc_lock); 1394 spin_unlock(&root->fs_info->delalloc_lock);
1396 } 1395 }
1397 return 0;
1398} 1396}
1399 1397
1400/* 1398/*
@@ -1477,7 +1475,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1477 1475
1478 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1476 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1479 1477
1480 if (is_free_space_inode(root, inode)) 1478 if (btrfs_is_free_space_inode(root, inode))
1481 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); 1479 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1482 else 1480 else
1483 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1481 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1644,7 +1642,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1644 int ret; 1642 int ret;
1645 1643
1646 path = btrfs_alloc_path(); 1644 path = btrfs_alloc_path();
1647 BUG_ON(!path); 1645 if (!path)
1646 return -ENOMEM;
1648 1647
1649 path->leave_spinning = 1; 1648 path->leave_spinning = 1;
1650 1649
@@ -1726,7 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1726 return 0; 1725 return 0;
1727 BUG_ON(!ordered_extent); 1726 BUG_ON(!ordered_extent);
1728 1727
1729 nolock = is_free_space_inode(root, inode); 1728 nolock = btrfs_is_free_space_inode(root, inode);
1730 1729
1731 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1730 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1732 BUG_ON(!list_empty(&ordered_extent->list)); 1731 BUG_ON(!list_empty(&ordered_extent->list));
@@ -2214,7 +2213,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2214 2213
2215 if (!root->orphan_block_rsv) { 2214 if (!root->orphan_block_rsv) {
2216 block_rsv = btrfs_alloc_block_rsv(root); 2215 block_rsv = btrfs_alloc_block_rsv(root);
2217 BUG_ON(!block_rsv); 2216 if (!block_rsv)
2217 return -ENOMEM;
2218 } 2218 }
2219 2219
2220 spin_lock(&root->orphan_lock); 2220 spin_lock(&root->orphan_lock);
@@ -2516,7 +2516,9 @@ static void btrfs_read_locked_inode(struct inode *inode)
2516 filled = true; 2516 filled = true;
2517 2517
2518 path = btrfs_alloc_path(); 2518 path = btrfs_alloc_path();
2519 BUG_ON(!path); 2519 if (!path)
2520 goto make_bad;
2521
2520 path->leave_spinning = 1; 2522 path->leave_spinning = 1;
2521 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 2523 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
2522 2524
@@ -2531,13 +2533,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
2531 2533
2532 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2534 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2533 struct btrfs_inode_item); 2535 struct btrfs_inode_item);
2534 if (!leaf->map_token)
2535 map_private_extent_buffer(leaf, (unsigned long)inode_item,
2536 sizeof(struct btrfs_inode_item),
2537 &leaf->map_token, &leaf->kaddr,
2538 &leaf->map_start, &leaf->map_len,
2539 KM_USER1);
2540
2541 inode->i_mode = btrfs_inode_mode(leaf, inode_item); 2536 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
2542 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); 2537 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
2543 inode->i_uid = btrfs_inode_uid(leaf, inode_item); 2538 inode->i_uid = btrfs_inode_uid(leaf, inode_item);
@@ -2575,11 +2570,6 @@ cache_acl:
2575 if (!maybe_acls) 2570 if (!maybe_acls)
2576 cache_no_acl(inode); 2571 cache_no_acl(inode);
2577 2572
2578 if (leaf->map_token) {
2579 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2580 leaf->map_token = NULL;
2581 }
2582
2583 btrfs_free_path(path); 2573 btrfs_free_path(path);
2584 2574
2585 switch (inode->i_mode & S_IFMT) { 2575 switch (inode->i_mode & S_IFMT) {
@@ -2624,13 +2614,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2624 struct btrfs_inode_item *item, 2614 struct btrfs_inode_item *item,
2625 struct inode *inode) 2615 struct inode *inode)
2626{ 2616{
2627 if (!leaf->map_token)
2628 map_private_extent_buffer(leaf, (unsigned long)item,
2629 sizeof(struct btrfs_inode_item),
2630 &leaf->map_token, &leaf->kaddr,
2631 &leaf->map_start, &leaf->map_len,
2632 KM_USER1);
2633
2634 btrfs_set_inode_uid(leaf, item, inode->i_uid); 2617 btrfs_set_inode_uid(leaf, item, inode->i_uid);
2635 btrfs_set_inode_gid(leaf, item, inode->i_gid); 2618 btrfs_set_inode_gid(leaf, item, inode->i_gid);
2636 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2619 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2659,11 +2642,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2659 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2642 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2660 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2643 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2661 btrfs_set_inode_block_group(leaf, item, 0); 2644 btrfs_set_inode_block_group(leaf, item, 0);
2662
2663 if (leaf->map_token) {
2664 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2665 leaf->map_token = NULL;
2666 }
2667} 2645}
2668 2646
2669/* 2647/*
@@ -2678,12 +2656,14 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2678 int ret; 2656 int ret;
2679 2657
2680 /* 2658 /*
2681 * If root is tree root, it means this inode is used to 2659 * If the inode is a free space inode, we can deadlock during commit
2682 * store free space information. And these inodes are updated 2660 * if we put it into the delayed code.
2683 * when committing the transaction, so they needn't delaye to 2661 *
2684 * be updated, or deadlock will occured. 2662 * The data relocation inode should also be directly updated
2663 * without delay
2685 */ 2664 */
2686 if (!is_free_space_inode(root, inode)) { 2665 if (!btrfs_is_free_space_inode(root, inode)
2666 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
2687 ret = btrfs_delayed_update_inode(trans, root, inode); 2667 ret = btrfs_delayed_update_inode(trans, root, inode);
2688 if (!ret) 2668 if (!ret)
2689 btrfs_set_inode_last_trans(trans, inode); 2669 btrfs_set_inode_last_trans(trans, inode);
@@ -3019,13 +2999,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3019 2999
3020 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 3000 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
3021 dentry->d_name.name, dentry->d_name.len); 3001 dentry->d_name.name, dentry->d_name.len);
3022 BUG_ON(ret); 3002 if (ret)
3003 goto out;
3023 3004
3024 if (inode->i_nlink == 0) { 3005 if (inode->i_nlink == 0) {
3025 ret = btrfs_orphan_add(trans, inode); 3006 ret = btrfs_orphan_add(trans, inode);
3026 BUG_ON(ret); 3007 if (ret)
3008 goto out;
3027 } 3009 }
3028 3010
3011out:
3029 nr = trans->blocks_used; 3012 nr = trans->blocks_used;
3030 __unlink_end_trans(trans, root); 3013 __unlink_end_trans(trans, root);
3031 btrfs_btree_balance_dirty(root, nr); 3014 btrfs_btree_balance_dirty(root, nr);
@@ -3168,6 +3151,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3168 3151
3169 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3152 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3170 3153
3154 path = btrfs_alloc_path();
3155 if (!path)
3156 return -ENOMEM;
3157 path->reada = -1;
3158
3171 if (root->ref_cows || root == root->fs_info->tree_root) 3159 if (root->ref_cows || root == root->fs_info->tree_root)
3172 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3160 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3173 3161
@@ -3180,10 +3168,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3180 if (min_type == 0 && root == BTRFS_I(inode)->root) 3168 if (min_type == 0 && root == BTRFS_I(inode)->root)
3181 btrfs_kill_delayed_inode_items(inode); 3169 btrfs_kill_delayed_inode_items(inode);
3182 3170
3183 path = btrfs_alloc_path();
3184 BUG_ON(!path);
3185 path->reada = -1;
3186
3187 key.objectid = ino; 3171 key.objectid = ino;
3188 key.offset = (u64)-1; 3172 key.offset = (u64)-1;
3189 key.type = (u8)-1; 3173 key.type = (u8)-1;
@@ -3396,7 +3380,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3396 3380
3397 ret = -ENOMEM; 3381 ret = -ENOMEM;
3398again: 3382again:
3399 page = grab_cache_page(mapping, index); 3383 page = find_or_create_page(mapping, index, GFP_NOFS);
3400 if (!page) { 3384 if (!page) {
3401 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 3385 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3402 goto out; 3386 goto out;
@@ -3632,7 +3616,7 @@ void btrfs_evict_inode(struct inode *inode)
3632 3616
3633 truncate_inode_pages(&inode->i_data, 0); 3617 truncate_inode_pages(&inode->i_data, 0);
3634 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || 3618 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3635 is_free_space_inode(root, inode))) 3619 btrfs_is_free_space_inode(root, inode)))
3636 goto no_delete; 3620 goto no_delete;
3637 3621
3638 if (is_bad_inode(inode)) { 3622 if (is_bad_inode(inode)) {
@@ -3711,7 +3695,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
3711 int ret = 0; 3695 int ret = 0;
3712 3696
3713 path = btrfs_alloc_path(); 3697 path = btrfs_alloc_path();
3714 BUG_ON(!path); 3698 if (!path)
3699 return -ENOMEM;
3715 3700
3716 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, 3701 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
3717 namelen, 0); 3702 namelen, 0);
@@ -3967,6 +3952,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3967 struct btrfs_root *root, int *new) 3952 struct btrfs_root *root, int *new)
3968{ 3953{
3969 struct inode *inode; 3954 struct inode *inode;
3955 int bad_inode = 0;
3970 3956
3971 inode = btrfs_iget_locked(s, location->objectid, root); 3957 inode = btrfs_iget_locked(s, location->objectid, root);
3972 if (!inode) 3958 if (!inode)
@@ -3976,10 +3962,19 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3976 BTRFS_I(inode)->root = root; 3962 BTRFS_I(inode)->root = root;
3977 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); 3963 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
3978 btrfs_read_locked_inode(inode); 3964 btrfs_read_locked_inode(inode);
3979 inode_tree_add(inode); 3965 if (!is_bad_inode(inode)) {
3980 unlock_new_inode(inode); 3966 inode_tree_add(inode);
3981 if (new) 3967 unlock_new_inode(inode);
3982 *new = 1; 3968 if (new)
3969 *new = 1;
3970 } else {
3971 bad_inode = 1;
3972 }
3973 }
3974
3975 if (bad_inode) {
3976 iput(inode);
3977 inode = ERR_PTR(-ESTALE);
3983 } 3978 }
3984 3979
3985 return inode; 3980 return inode;
@@ -4014,12 +4009,19 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4014 struct btrfs_root *sub_root = root; 4009 struct btrfs_root *sub_root = root;
4015 struct btrfs_key location; 4010 struct btrfs_key location;
4016 int index; 4011 int index;
4017 int ret; 4012 int ret = 0;
4018 4013
4019 if (dentry->d_name.len > BTRFS_NAME_LEN) 4014 if (dentry->d_name.len > BTRFS_NAME_LEN)
4020 return ERR_PTR(-ENAMETOOLONG); 4015 return ERR_PTR(-ENAMETOOLONG);
4021 4016
4022 ret = btrfs_inode_by_name(dir, dentry, &location); 4017 if (unlikely(d_need_lookup(dentry))) {
4018 memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
4019 kfree(dentry->d_fsdata);
4020 dentry->d_fsdata = NULL;
4021 d_clear_need_lookup(dentry);
4022 } else {
4023 ret = btrfs_inode_by_name(dir, dentry, &location);
4024 }
4023 4025
4024 if (ret < 0) 4026 if (ret < 0)
4025 return ERR_PTR(ret); 4027 return ERR_PTR(ret);
@@ -4074,16 +4076,16 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
4074 return 0; 4076 return 0;
4075} 4077}
4076 4078
4079static void btrfs_dentry_release(struct dentry *dentry)
4080{
4081 if (dentry->d_fsdata)
4082 kfree(dentry->d_fsdata);
4083}
4084
4077static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, 4085static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4078 struct nameidata *nd) 4086 struct nameidata *nd)
4079{ 4087{
4080 struct inode *inode; 4088 return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
4081
4082 inode = btrfs_lookup_dentry(dir, dentry);
4083 if (IS_ERR(inode))
4084 return ERR_CAST(inode);
4085
4086 return d_splice_alias(inode, dentry);
4087} 4089}
4088 4090
4089unsigned char btrfs_filetype_table[] = { 4091unsigned char btrfs_filetype_table[] = {
@@ -4102,6 +4104,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4102 struct btrfs_path *path; 4104 struct btrfs_path *path;
4103 struct list_head ins_list; 4105 struct list_head ins_list;
4104 struct list_head del_list; 4106 struct list_head del_list;
4107 struct qstr q;
4105 int ret; 4108 int ret;
4106 struct extent_buffer *leaf; 4109 struct extent_buffer *leaf;
4107 int slot; 4110 int slot;
@@ -4191,6 +4194,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4191 4194
4192 while (di_cur < di_total) { 4195 while (di_cur < di_total) {
4193 struct btrfs_key location; 4196 struct btrfs_key location;
4197 struct dentry *tmp;
4194 4198
4195 if (verify_dir_item(root, leaf, di)) 4199 if (verify_dir_item(root, leaf, di))
4196 break; 4200 break;
@@ -4211,6 +4215,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4211 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; 4215 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
4212 btrfs_dir_item_key_to_cpu(leaf, di, &location); 4216 btrfs_dir_item_key_to_cpu(leaf, di, &location);
4213 4217
4218 q.name = name_ptr;
4219 q.len = name_len;
4220 q.hash = full_name_hash(q.name, q.len);
4221 tmp = d_lookup(filp->f_dentry, &q);
4222 if (!tmp) {
4223 struct btrfs_key *newkey;
4224
4225 newkey = kzalloc(sizeof(struct btrfs_key),
4226 GFP_NOFS);
4227 if (!newkey)
4228 goto no_dentry;
4229 tmp = d_alloc(filp->f_dentry, &q);
4230 if (!tmp) {
4231 kfree(newkey);
4232 dput(tmp);
4233 goto no_dentry;
4234 }
4235 memcpy(newkey, &location,
4236 sizeof(struct btrfs_key));
4237 tmp->d_fsdata = newkey;
4238 tmp->d_flags |= DCACHE_NEED_LOOKUP;
4239 d_rehash(tmp);
4240 dput(tmp);
4241 } else {
4242 dput(tmp);
4243 }
4244no_dentry:
4214 /* is this a reference to our own snapshot? If so 4245 /* is this a reference to our own snapshot? If so
4215 * skip it 4246 * skip it
4216 */ 4247 */
@@ -4275,7 +4306,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4275 if (BTRFS_I(inode)->dummy_inode) 4306 if (BTRFS_I(inode)->dummy_inode)
4276 return 0; 4307 return 0;
4277 4308
4278 if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) 4309 if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
4279 nolock = true; 4310 nolock = true;
4280 4311
4281 if (wbc->sync_mode == WB_SYNC_ALL) { 4312 if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -4436,7 +4467,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4436 int owner; 4467 int owner;
4437 4468
4438 path = btrfs_alloc_path(); 4469 path = btrfs_alloc_path();
4439 BUG_ON(!path); 4470 if (!path)
4471 return ERR_PTR(-ENOMEM);
4440 4472
4441 inode = new_inode(root->fs_info->sb); 4473 inode = new_inode(root->fs_info->sb);
4442 if (!inode) { 4474 if (!inode) {
@@ -4471,7 +4503,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4471 inode->i_generation = BTRFS_I(inode)->generation; 4503 inode->i_generation = BTRFS_I(inode)->generation;
4472 btrfs_set_inode_space_info(root, inode); 4504 btrfs_set_inode_space_info(root, inode);
4473 4505
4474 if (mode & S_IFDIR) 4506 if (S_ISDIR(mode))
4475 owner = 0; 4507 owner = 0;
4476 else 4508 else
4477 owner = 1; 4509 owner = 1;
@@ -4516,7 +4548,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4516 4548
4517 btrfs_inherit_iflags(inode, dir); 4549 btrfs_inherit_iflags(inode, dir);
4518 4550
4519 if ((mode & S_IFREG)) { 4551 if (S_ISREG(mode)) {
4520 if (btrfs_test_opt(root, NODATASUM)) 4552 if (btrfs_test_opt(root, NODATASUM))
4521 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 4553 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
4522 if (btrfs_test_opt(root, NODATACOW) || 4554 if (btrfs_test_opt(root, NODATACOW) ||
@@ -4770,11 +4802,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4770 if (err) { 4802 if (err) {
4771 drop_inode = 1; 4803 drop_inode = 1;
4772 } else { 4804 } else {
4773 struct dentry *parent = dget_parent(dentry); 4805 struct dentry *parent = dentry->d_parent;
4774 err = btrfs_update_inode(trans, root, inode); 4806 err = btrfs_update_inode(trans, root, inode);
4775 BUG_ON(err); 4807 BUG_ON(err);
4776 btrfs_log_new_name(trans, inode, NULL, parent); 4808 btrfs_log_new_name(trans, inode, NULL, parent);
4777 dput(parent);
4778 } 4809 }
4779 4810
4780 nr = trans->blocks_used; 4811 nr = trans->blocks_used;
@@ -6697,19 +6728,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6697 return 0; 6728 return 0;
6698} 6729}
6699 6730
6700/* helper function for file defrag and space balancing. This
6701 * forces readahead on a given range of bytes in an inode
6702 */
6703unsigned long btrfs_force_ra(struct address_space *mapping,
6704 struct file_ra_state *ra, struct file *file,
6705 pgoff_t offset, pgoff_t last_index)
6706{
6707 pgoff_t req_size = last_index - offset + 1;
6708
6709 page_cache_sync_readahead(mapping, ra, file, offset, req_size);
6710 return offset + req_size;
6711}
6712
6713struct inode *btrfs_alloc_inode(struct super_block *sb) 6731struct inode *btrfs_alloc_inode(struct super_block *sb)
6714{ 6732{
6715 struct btrfs_inode *ei; 6733 struct btrfs_inode *ei;
@@ -6733,8 +6751,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6733 ei->index_cnt = (u64)-1; 6751 ei->index_cnt = (u64)-1;
6734 ei->last_unlink_trans = 0; 6752 ei->last_unlink_trans = 0;
6735 6753
6736 atomic_set(&ei->outstanding_extents, 0); 6754 spin_lock_init(&ei->lock);
6737 atomic_set(&ei->reserved_extents, 0); 6755 ei->outstanding_extents = 0;
6756 ei->reserved_extents = 0;
6738 6757
6739 ei->ordered_data_close = 0; 6758 ei->ordered_data_close = 0;
6740 ei->orphan_meta_reserved = 0; 6759 ei->orphan_meta_reserved = 0;
@@ -6772,8 +6791,8 @@ void btrfs_destroy_inode(struct inode *inode)
6772 6791
6773 WARN_ON(!list_empty(&inode->i_dentry)); 6792 WARN_ON(!list_empty(&inode->i_dentry));
6774 WARN_ON(inode->i_data.nrpages); 6793 WARN_ON(inode->i_data.nrpages);
6775 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); 6794 WARN_ON(BTRFS_I(inode)->outstanding_extents);
6776 WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); 6795 WARN_ON(BTRFS_I(inode)->reserved_extents);
6777 6796
6778 /* 6797 /*
6779 * This can happen where we create an inode, but somebody else also 6798 * This can happen where we create an inode, but somebody else also
@@ -6828,7 +6847,7 @@ int btrfs_drop_inode(struct inode *inode)
6828 struct btrfs_root *root = BTRFS_I(inode)->root; 6847 struct btrfs_root *root = BTRFS_I(inode)->root;
6829 6848
6830 if (btrfs_root_refs(&root->root_item) == 0 && 6849 if (btrfs_root_refs(&root->root_item) == 0 &&
6831 !is_free_space_inode(root, inode)) 6850 !btrfs_is_free_space_inode(root, inode))
6832 return 1; 6851 return 1;
6833 else 6852 else
6834 return generic_drop_inode(inode); 6853 return generic_drop_inode(inode);
@@ -6898,7 +6917,7 @@ static int btrfs_getattr(struct vfsmount *mnt,
6898{ 6917{
6899 struct inode *inode = dentry->d_inode; 6918 struct inode *inode = dentry->d_inode;
6900 generic_fillattr(inode, stat); 6919 generic_fillattr(inode, stat);
6901 stat->dev = BTRFS_I(inode)->root->anon_super.s_dev; 6920 stat->dev = BTRFS_I(inode)->root->anon_dev;
6902 stat->blksize = PAGE_CACHE_SIZE; 6921 stat->blksize = PAGE_CACHE_SIZE;
6903 stat->blocks = (inode_get_bytes(inode) + 6922 stat->blocks = (inode_get_bytes(inode) +
6904 BTRFS_I(inode)->delalloc_bytes) >> 9; 6923 BTRFS_I(inode)->delalloc_bytes) >> 9;
@@ -7066,9 +7085,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7066 BUG_ON(ret); 7085 BUG_ON(ret);
7067 7086
7068 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { 7087 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
7069 struct dentry *parent = dget_parent(new_dentry); 7088 struct dentry *parent = new_dentry->d_parent;
7070 btrfs_log_new_name(trans, old_inode, old_dir, parent); 7089 btrfs_log_new_name(trans, old_inode, old_dir, parent);
7071 dput(parent);
7072 btrfs_end_log_trans(root); 7090 btrfs_end_log_trans(root);
7073 } 7091 }
7074out_fail: 7092out_fail:
@@ -7192,7 +7210,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7192 goto out_unlock; 7210 goto out_unlock;
7193 7211
7194 path = btrfs_alloc_path(); 7212 path = btrfs_alloc_path();
7195 BUG_ON(!path); 7213 if (!path) {
7214 err = -ENOMEM;
7215 drop_inode = 1;
7216 goto out_unlock;
7217 }
7196 key.objectid = btrfs_ino(inode); 7218 key.objectid = btrfs_ino(inode);
7197 key.offset = 0; 7219 key.offset = 0;
7198 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 7220 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
@@ -7329,7 +7351,7 @@ static int btrfs_set_page_dirty(struct page *page)
7329 return __set_page_dirty_nobuffers(page); 7351 return __set_page_dirty_nobuffers(page);
7330} 7352}
7331 7353
7332static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) 7354static int btrfs_permission(struct inode *inode, int mask)
7333{ 7355{
7334 struct btrfs_root *root = BTRFS_I(inode)->root; 7356 struct btrfs_root *root = BTRFS_I(inode)->root;
7335 7357
@@ -7337,7 +7359,7 @@ static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
7337 return -EROFS; 7359 return -EROFS;
7338 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) 7360 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
7339 return -EACCES; 7361 return -EACCES;
7340 return generic_permission(inode, mask, flags, btrfs_check_acl); 7362 return generic_permission(inode, mask);
7341} 7363}
7342 7364
7343static const struct inode_operations btrfs_dir_inode_operations = { 7365static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7357,10 +7379,12 @@ static const struct inode_operations btrfs_dir_inode_operations = {
7357 .listxattr = btrfs_listxattr, 7379 .listxattr = btrfs_listxattr,
7358 .removexattr = btrfs_removexattr, 7380 .removexattr = btrfs_removexattr,
7359 .permission = btrfs_permission, 7381 .permission = btrfs_permission,
7382 .get_acl = btrfs_get_acl,
7360}; 7383};
7361static const struct inode_operations btrfs_dir_ro_inode_operations = { 7384static const struct inode_operations btrfs_dir_ro_inode_operations = {
7362 .lookup = btrfs_lookup, 7385 .lookup = btrfs_lookup,
7363 .permission = btrfs_permission, 7386 .permission = btrfs_permission,
7387 .get_acl = btrfs_get_acl,
7364}; 7388};
7365 7389
7366static const struct file_operations btrfs_dir_file_operations = { 7390static const struct file_operations btrfs_dir_file_operations = {
@@ -7429,6 +7453,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
7429 .removexattr = btrfs_removexattr, 7453 .removexattr = btrfs_removexattr,
7430 .permission = btrfs_permission, 7454 .permission = btrfs_permission,
7431 .fiemap = btrfs_fiemap, 7455 .fiemap = btrfs_fiemap,
7456 .get_acl = btrfs_get_acl,
7432}; 7457};
7433static const struct inode_operations btrfs_special_inode_operations = { 7458static const struct inode_operations btrfs_special_inode_operations = {
7434 .getattr = btrfs_getattr, 7459 .getattr = btrfs_getattr,
@@ -7438,6 +7463,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
7438 .getxattr = btrfs_getxattr, 7463 .getxattr = btrfs_getxattr,
7439 .listxattr = btrfs_listxattr, 7464 .listxattr = btrfs_listxattr,
7440 .removexattr = btrfs_removexattr, 7465 .removexattr = btrfs_removexattr,
7466 .get_acl = btrfs_get_acl,
7441}; 7467};
7442static const struct inode_operations btrfs_symlink_inode_operations = { 7468static const struct inode_operations btrfs_symlink_inode_operations = {
7443 .readlink = generic_readlink, 7469 .readlink = generic_readlink,
@@ -7449,8 +7475,10 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
7449 .getxattr = btrfs_getxattr, 7475 .getxattr = btrfs_getxattr,
7450 .listxattr = btrfs_listxattr, 7476 .listxattr = btrfs_listxattr,
7451 .removexattr = btrfs_removexattr, 7477 .removexattr = btrfs_removexattr,
7478 .get_acl = btrfs_get_acl,
7452}; 7479};
7453 7480
7454const struct dentry_operations btrfs_dentry_operations = { 7481const struct dentry_operations btrfs_dentry_operations = {
7455 .d_delete = btrfs_dentry_delete, 7482 .d_delete = btrfs_dentry_delete,
7483 .d_release = btrfs_dentry_release,
7456}; 7484};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a3c4751e07db..7cf013349941 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -323,7 +323,7 @@ static noinline int create_subvol(struct btrfs_root *root,
323 struct btrfs_inode_item *inode_item; 323 struct btrfs_inode_item *inode_item;
324 struct extent_buffer *leaf; 324 struct extent_buffer *leaf;
325 struct btrfs_root *new_root; 325 struct btrfs_root *new_root;
326 struct dentry *parent = dget_parent(dentry); 326 struct dentry *parent = dentry->d_parent;
327 struct inode *dir; 327 struct inode *dir;
328 int ret; 328 int ret;
329 int err; 329 int err;
@@ -332,10 +332,8 @@ static noinline int create_subvol(struct btrfs_root *root,
332 u64 index = 0; 332 u64 index = 0;
333 333
334 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); 334 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
335 if (ret) { 335 if (ret)
336 dput(parent);
337 return ret; 336 return ret;
338 }
339 337
340 dir = parent->d_inode; 338 dir = parent->d_inode;
341 339
@@ -346,10 +344,8 @@ static noinline int create_subvol(struct btrfs_root *root,
346 * 2 - dir items 344 * 2 - dir items
347 */ 345 */
348 trans = btrfs_start_transaction(root, 6); 346 trans = btrfs_start_transaction(root, 6);
349 if (IS_ERR(trans)) { 347 if (IS_ERR(trans))
350 dput(parent);
351 return PTR_ERR(trans); 348 return PTR_ERR(trans);
352 }
353 349
354 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 350 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
355 0, objectid, NULL, 0, 0, 0); 351 0, objectid, NULL, 0, 0, 0);
@@ -439,7 +435,6 @@ static noinline int create_subvol(struct btrfs_root *root,
439 435
440 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 436 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
441fail: 437fail:
442 dput(parent);
443 if (async_transid) { 438 if (async_transid) {
444 *async_transid = trans->transid; 439 *async_transid = trans->transid;
445 err = btrfs_commit_transaction_async(trans, root, 1); 440 err = btrfs_commit_transaction_async(trans, root, 1);
@@ -456,7 +451,6 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
456 bool readonly) 451 bool readonly)
457{ 452{
458 struct inode *inode; 453 struct inode *inode;
459 struct dentry *parent;
460 struct btrfs_pending_snapshot *pending_snapshot; 454 struct btrfs_pending_snapshot *pending_snapshot;
461 struct btrfs_trans_handle *trans; 455 struct btrfs_trans_handle *trans;
462 int ret; 456 int ret;
@@ -504,9 +498,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
504 if (ret) 498 if (ret)
505 goto fail; 499 goto fail;
506 500
507 parent = dget_parent(dentry); 501 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
508 inode = btrfs_lookup_dentry(parent->d_inode, dentry);
509 dput(parent);
510 if (IS_ERR(inode)) { 502 if (IS_ERR(inode)) {
511 ret = PTR_ERR(inode); 503 ret = PTR_ERR(inode);
512 goto fail; 504 goto fail;
@@ -867,8 +859,8 @@ again:
867 /* step one, lock all the pages */ 859 /* step one, lock all the pages */
868 for (i = 0; i < num_pages; i++) { 860 for (i = 0; i < num_pages; i++) {
869 struct page *page; 861 struct page *page;
870 page = grab_cache_page(inode->i_mapping, 862 page = find_or_create_page(inode->i_mapping,
871 start_index + i); 863 start_index + i, GFP_NOFS);
872 if (!page) 864 if (!page)
873 break; 865 break;
874 866
@@ -938,7 +930,9 @@ again:
938 GFP_NOFS); 930 GFP_NOFS);
939 931
940 if (i_done != num_pages) { 932 if (i_done != num_pages) {
941 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 933 spin_lock(&BTRFS_I(inode)->lock);
934 BTRFS_I(inode)->outstanding_extents++;
935 spin_unlock(&BTRFS_I(inode)->lock);
942 btrfs_delalloc_release_space(inode, 936 btrfs_delalloc_release_space(inode,
943 (num_pages - i_done) << PAGE_CACHE_SHIFT); 937 (num_pages - i_done) << PAGE_CACHE_SHIFT);
944 } 938 }
@@ -1755,11 +1749,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1755 key.objectid = key.offset; 1749 key.objectid = key.offset;
1756 key.offset = (u64)-1; 1750 key.offset = (u64)-1;
1757 dirid = key.objectid; 1751 dirid = key.objectid;
1758
1759 } 1752 }
1760 if (ptr < name) 1753 if (ptr < name)
1761 goto out; 1754 goto out;
1762 memcpy(name, ptr, total_len); 1755 memmove(name, ptr, total_len);
1763 name[total_len]='\0'; 1756 name[total_len]='\0';
1764 ret = 0; 1757 ret = 0;
1765out: 1758out:
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 66fa43dc3f0f..d77b67c4b275 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -24,185 +24,197 @@
24#include "extent_io.h" 24#include "extent_io.h"
25#include "locking.h" 25#include "locking.h"
26 26
27static inline void spin_nested(struct extent_buffer *eb) 27void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
28{
29 spin_lock(&eb->lock);
30}
31 28
32/* 29/*
33 * Setting a lock to blocking will drop the spinlock and set the 30 * if we currently have a spinning reader or writer lock
34 * flag that forces other procs who want the lock to wait. After 31 * (indicated by the rw flag) this will bump the count
35 * this you can safely schedule with the lock held. 32 * of blocking holders and drop the spinlock.
36 */ 33 */
37void btrfs_set_lock_blocking(struct extent_buffer *eb) 34void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
38{ 35{
39 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { 36 if (rw == BTRFS_WRITE_LOCK) {
40 set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); 37 if (atomic_read(&eb->blocking_writers) == 0) {
41 spin_unlock(&eb->lock); 38 WARN_ON(atomic_read(&eb->spinning_writers) != 1);
39 atomic_dec(&eb->spinning_writers);
40 btrfs_assert_tree_locked(eb);
41 atomic_inc(&eb->blocking_writers);
42 write_unlock(&eb->lock);
43 }
44 } else if (rw == BTRFS_READ_LOCK) {
45 btrfs_assert_tree_read_locked(eb);
46 atomic_inc(&eb->blocking_readers);
47 WARN_ON(atomic_read(&eb->spinning_readers) == 0);
48 atomic_dec(&eb->spinning_readers);
49 read_unlock(&eb->lock);
42 } 50 }
43 /* exit with the spin lock released and the bit set */ 51 return;
44} 52}
45 53
46/* 54/*
47 * clearing the blocking flag will take the spinlock again. 55 * if we currently have a blocking lock, take the spinlock
48 * After this you can't safely schedule 56 * and drop our blocking count
49 */ 57 */
50void btrfs_clear_lock_blocking(struct extent_buffer *eb) 58void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
51{ 59{
52 if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { 60 if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
53 spin_nested(eb); 61 BUG_ON(atomic_read(&eb->blocking_writers) != 1);
54 clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); 62 write_lock(&eb->lock);
55 smp_mb__after_clear_bit(); 63 WARN_ON(atomic_read(&eb->spinning_writers));
64 atomic_inc(&eb->spinning_writers);
65 if (atomic_dec_and_test(&eb->blocking_writers))
66 wake_up(&eb->write_lock_wq);
67 } else if (rw == BTRFS_READ_LOCK_BLOCKING) {
68 BUG_ON(atomic_read(&eb->blocking_readers) == 0);
69 read_lock(&eb->lock);
70 atomic_inc(&eb->spinning_readers);
71 if (atomic_dec_and_test(&eb->blocking_readers))
72 wake_up(&eb->read_lock_wq);
56 } 73 }
57 /* exit with the spin lock held */ 74 return;
58} 75}
59 76
60/* 77/*
61 * unfortunately, many of the places that currently set a lock to blocking 78 * take a spinning read lock. This will wait for any blocking
62 * don't end up blocking for very long, and often they don't block 79 * writers
63 * at all. For a dbench 50 run, if we don't spin on the blocking bit
64 * at all, the context switch rate can jump up to 400,000/sec or more.
65 *
66 * So, we're still stuck with this crummy spin on the blocking bit,
67 * at least until the most common causes of the short blocks
68 * can be dealt with.
69 */ 80 */
70static int btrfs_spin_on_block(struct extent_buffer *eb) 81void btrfs_tree_read_lock(struct extent_buffer *eb)
71{ 82{
72 int i; 83again:
73 84 wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
74 for (i = 0; i < 512; i++) { 85 read_lock(&eb->lock);
75 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 86 if (atomic_read(&eb->blocking_writers)) {
76 return 1; 87 read_unlock(&eb->lock);
77 if (need_resched()) 88 wait_event(eb->write_lock_wq,
78 break; 89 atomic_read(&eb->blocking_writers) == 0);
79 cpu_relax(); 90 goto again;
80 } 91 }
81 return 0; 92 atomic_inc(&eb->read_locks);
93 atomic_inc(&eb->spinning_readers);
82} 94}
83 95
84/* 96/*
85 * This is somewhat different from trylock. It will take the 97 * returns 1 if we get the read lock and 0 if we don't
86 * spinlock but if it finds the lock is set to blocking, it will 98 * this won't wait for blocking writers
87 * return without the lock held.
88 *
89 * returns 1 if it was able to take the lock and zero otherwise
90 *
91 * After this call, scheduling is not safe without first calling
92 * btrfs_set_lock_blocking()
93 */ 99 */
94int btrfs_try_spin_lock(struct extent_buffer *eb) 100int btrfs_try_tree_read_lock(struct extent_buffer *eb)
95{ 101{
96 int i; 102 if (atomic_read(&eb->blocking_writers))
103 return 0;
97 104
98 if (btrfs_spin_on_block(eb)) { 105 read_lock(&eb->lock);
99 spin_nested(eb); 106 if (atomic_read(&eb->blocking_writers)) {
100 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 107 read_unlock(&eb->lock);
101 return 1; 108 return 0;
102 spin_unlock(&eb->lock);
103 } 109 }
104 /* spin for a bit on the BLOCKING flag */ 110 atomic_inc(&eb->read_locks);
105 for (i = 0; i < 2; i++) { 111 atomic_inc(&eb->spinning_readers);
106 cpu_relax(); 112 return 1;
107 if (!btrfs_spin_on_block(eb))
108 break;
109
110 spin_nested(eb);
111 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
112 return 1;
113 spin_unlock(&eb->lock);
114 }
115 return 0;
116} 113}
117 114
118/* 115/*
119 * the autoremove wake function will return 0 if it tried to wake up 116 * returns 1 if we get the read lock and 0 if we don't
120 * a process that was already awake, which means that process won't 117 * this won't wait for blocking writers or readers
121 * count as an exclusive wakeup. The waitq code will continue waking
122 * procs until it finds one that was actually sleeping.
123 *
124 * For btrfs, this isn't quite what we want. We want a single proc
125 * to be notified that the lock is ready for taking. If that proc
126 * already happen to be awake, great, it will loop around and try for
127 * the lock.
128 *
129 * So, btrfs_wake_function always returns 1, even when the proc that we
130 * tried to wake up was already awake.
131 */ 118 */
132static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, 119int btrfs_try_tree_write_lock(struct extent_buffer *eb)
133 int sync, void *key)
134{ 120{
135 autoremove_wake_function(wait, mode, sync, key); 121 if (atomic_read(&eb->blocking_writers) ||
122 atomic_read(&eb->blocking_readers))
123 return 0;
124 write_lock(&eb->lock);
125 if (atomic_read(&eb->blocking_writers) ||
126 atomic_read(&eb->blocking_readers)) {
127 write_unlock(&eb->lock);
128 return 0;
129 }
130 atomic_inc(&eb->write_locks);
131 atomic_inc(&eb->spinning_writers);
136 return 1; 132 return 1;
137} 133}
138 134
139/* 135/*
140 * returns with the extent buffer spinlocked. 136 * drop a spinning read lock
141 * 137 */
142 * This will spin and/or wait as required to take the lock, and then 138void btrfs_tree_read_unlock(struct extent_buffer *eb)
143 * return with the spinlock held. 139{
144 * 140 btrfs_assert_tree_read_locked(eb);
145 * After this call, scheduling is not safe without first calling 141 WARN_ON(atomic_read(&eb->spinning_readers) == 0);
146 * btrfs_set_lock_blocking() 142 atomic_dec(&eb->spinning_readers);
143 atomic_dec(&eb->read_locks);
144 read_unlock(&eb->lock);
145}
146
147/*
148 * drop a blocking read lock
149 */
150void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
151{
152 btrfs_assert_tree_read_locked(eb);
153 WARN_ON(atomic_read(&eb->blocking_readers) == 0);
154 if (atomic_dec_and_test(&eb->blocking_readers))
155 wake_up(&eb->read_lock_wq);
156 atomic_dec(&eb->read_locks);
157}
158
159/*
160 * take a spinning write lock. This will wait for both
161 * blocking readers or writers
147 */ 162 */
148int btrfs_tree_lock(struct extent_buffer *eb) 163int btrfs_tree_lock(struct extent_buffer *eb)
149{ 164{
150 DEFINE_WAIT(wait); 165again:
151 wait.func = btrfs_wake_function; 166 wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
152 167 wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
153 if (!btrfs_spin_on_block(eb)) 168 write_lock(&eb->lock);
154 goto sleep; 169 if (atomic_read(&eb->blocking_readers)) {
155 170 write_unlock(&eb->lock);
156 while(1) { 171 wait_event(eb->read_lock_wq,
157 spin_nested(eb); 172 atomic_read(&eb->blocking_readers) == 0);
158 173 goto again;
159 /* nobody is blocking, exit with the spinlock held */
160 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
161 return 0;
162
163 /*
164 * we have the spinlock, but the real owner is blocking.
165 * wait for them
166 */
167 spin_unlock(&eb->lock);
168
169 /*
170 * spin for a bit, and if the blocking flag goes away,
171 * loop around
172 */
173 cpu_relax();
174 if (btrfs_spin_on_block(eb))
175 continue;
176sleep:
177 prepare_to_wait_exclusive(&eb->lock_wq, &wait,
178 TASK_UNINTERRUPTIBLE);
179
180 if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
181 schedule();
182
183 finish_wait(&eb->lock_wq, &wait);
184 } 174 }
175 if (atomic_read(&eb->blocking_writers)) {
176 write_unlock(&eb->lock);
177 wait_event(eb->write_lock_wq,
178 atomic_read(&eb->blocking_writers) == 0);
179 goto again;
180 }
181 WARN_ON(atomic_read(&eb->spinning_writers));
182 atomic_inc(&eb->spinning_writers);
183 atomic_inc(&eb->write_locks);
185 return 0; 184 return 0;
186} 185}
187 186
187/*
188 * drop a spinning or a blocking write lock.
189 */
188int btrfs_tree_unlock(struct extent_buffer *eb) 190int btrfs_tree_unlock(struct extent_buffer *eb)
189{ 191{
190 /* 192 int blockers = atomic_read(&eb->blocking_writers);
191 * if we were a blocking owner, we don't have the spinlock held 193
192 * just clear the bit and look for waiters 194 BUG_ON(blockers > 1);
193 */ 195
194 if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 196 btrfs_assert_tree_locked(eb);
195 smp_mb__after_clear_bit(); 197 atomic_dec(&eb->write_locks);
196 else 198
197 spin_unlock(&eb->lock); 199 if (blockers) {
198 200 WARN_ON(atomic_read(&eb->spinning_writers));
199 if (waitqueue_active(&eb->lock_wq)) 201 atomic_dec(&eb->blocking_writers);
200 wake_up(&eb->lock_wq); 202 smp_wmb();
203 wake_up(&eb->write_lock_wq);
204 } else {
205 WARN_ON(atomic_read(&eb->spinning_writers) != 1);
206 atomic_dec(&eb->spinning_writers);
207 write_unlock(&eb->lock);
208 }
201 return 0; 209 return 0;
202} 210}
203 211
204void btrfs_assert_tree_locked(struct extent_buffer *eb) 212void btrfs_assert_tree_locked(struct extent_buffer *eb)
205{ 213{
206 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 214 BUG_ON(!atomic_read(&eb->write_locks));
207 assert_spin_locked(&eb->lock); 215}
216
217void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
218{
219 BUG_ON(!atomic_read(&eb->read_locks));
208} 220}
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 5c33a560a2f1..17247ddb81a0 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -19,11 +19,43 @@
19#ifndef __BTRFS_LOCKING_ 19#ifndef __BTRFS_LOCKING_
20#define __BTRFS_LOCKING_ 20#define __BTRFS_LOCKING_
21 21
22#define BTRFS_WRITE_LOCK 1
23#define BTRFS_READ_LOCK 2
24#define BTRFS_WRITE_LOCK_BLOCKING 3
25#define BTRFS_READ_LOCK_BLOCKING 4
26
22int btrfs_tree_lock(struct extent_buffer *eb); 27int btrfs_tree_lock(struct extent_buffer *eb);
23int btrfs_tree_unlock(struct extent_buffer *eb); 28int btrfs_tree_unlock(struct extent_buffer *eb);
24int btrfs_try_spin_lock(struct extent_buffer *eb); 29int btrfs_try_spin_lock(struct extent_buffer *eb);
25 30
26void btrfs_set_lock_blocking(struct extent_buffer *eb); 31void btrfs_tree_read_lock(struct extent_buffer *eb);
27void btrfs_clear_lock_blocking(struct extent_buffer *eb); 32void btrfs_tree_read_unlock(struct extent_buffer *eb);
33void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
34void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
35void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
28void btrfs_assert_tree_locked(struct extent_buffer *eb); 36void btrfs_assert_tree_locked(struct extent_buffer *eb);
37int btrfs_try_tree_read_lock(struct extent_buffer *eb);
38int btrfs_try_tree_write_lock(struct extent_buffer *eb);
39
40static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
41{
42 if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
43 btrfs_tree_unlock(eb);
44 else if (rw == BTRFS_READ_LOCK_BLOCKING)
45 btrfs_tree_read_unlock_blocking(eb);
46 else if (rw == BTRFS_READ_LOCK)
47 btrfs_tree_read_unlock(eb);
48 else
49 BUG();
50}
51
52static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
53{
54 btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
55}
56
57static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
58{
59 btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
60}
29#endif 61#endif
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
deleted file mode 100644
index 82d569cb6267..000000000000
--- a/fs/btrfs/ref-cache.c
+++ /dev/null
@@ -1,68 +0,0 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/slab.h>
21#include <linux/sort.h>
22#include "ctree.h"
23#include "ref-cache.h"
24#include "transaction.h"
25
26static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
27 struct rb_node *node)
28{
29 struct rb_node **p = &root->rb_node;
30 struct rb_node *parent = NULL;
31 struct btrfs_leaf_ref *entry;
32
33 while (*p) {
34 parent = *p;
35 entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node);
36
37 if (bytenr < entry->bytenr)
38 p = &(*p)->rb_left;
39 else if (bytenr > entry->bytenr)
40 p = &(*p)->rb_right;
41 else
42 return parent;
43 }
44
45 entry = rb_entry(node, struct btrfs_leaf_ref, rb_node);
46 rb_link_node(node, parent, p);
47 rb_insert_color(node, root);
48 return NULL;
49}
50
51static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
52{
53 struct rb_node *n = root->rb_node;
54 struct btrfs_leaf_ref *entry;
55
56 while (n) {
57 entry = rb_entry(n, struct btrfs_leaf_ref, rb_node);
58 WARN_ON(!entry->in_tree);
59
60 if (bytenr < entry->bytenr)
61 n = n->rb_left;
62 else if (bytenr > entry->bytenr)
63 n = n->rb_right;
64 else
65 return n;
66 }
67 return NULL;
68}
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h
deleted file mode 100644
index 24f7001f6387..000000000000
--- a/fs/btrfs/ref-cache.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#ifndef __REFCACHE__
19#define __REFCACHE__
20
21struct btrfs_extent_info {
22 /* bytenr and num_bytes find the extent in the extent allocation tree */
23 u64 bytenr;
24 u64 num_bytes;
25
26 /* objectid and offset find the back reference for the file */
27 u64 objectid;
28 u64 offset;
29};
30
31struct btrfs_leaf_ref {
32 struct rb_node rb_node;
33 struct btrfs_leaf_ref_tree *tree;
34 int in_tree;
35 atomic_t usage;
36
37 u64 root_gen;
38 u64 bytenr;
39 u64 owner;
40 u64 generation;
41 int nritems;
42
43 struct list_head list;
44 struct btrfs_extent_info extents[];
45};
46
47static inline size_t btrfs_leaf_ref_size(int nr_extents)
48{
49 return sizeof(struct btrfs_leaf_ref) +
50 sizeof(struct btrfs_extent_info) * nr_extents;
51}
52#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 5e0a3dc79a45..59bb1764273d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
2955 page_cache_sync_readahead(inode->i_mapping, 2955 page_cache_sync_readahead(inode->i_mapping,
2956 ra, NULL, index, 2956 ra, NULL, index,
2957 last_index + 1 - index); 2957 last_index + 1 - index);
2958 page = grab_cache_page(inode->i_mapping, index); 2958 page = find_or_create_page(inode->i_mapping, index,
2959 GFP_NOFS);
2959 if (!page) { 2960 if (!page) {
2960 btrfs_delalloc_release_metadata(inode, 2961 btrfs_delalloc_release_metadata(inode,
2961 PAGE_CACHE_SIZE); 2962 PAGE_CACHE_SIZE);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ebe45443de06..f4099904565a 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -71,13 +71,12 @@ out:
71 return ret; 71 return ret;
72} 72}
73 73
74int btrfs_set_root_node(struct btrfs_root_item *item, 74void btrfs_set_root_node(struct btrfs_root_item *item,
75 struct extent_buffer *node) 75 struct extent_buffer *node)
76{ 76{
77 btrfs_set_root_bytenr(item, node->start); 77 btrfs_set_root_bytenr(item, node->start);
78 btrfs_set_root_level(item, btrfs_header_level(node)); 78 btrfs_set_root_level(item, btrfs_header_level(node));
79 btrfs_set_root_generation(item, btrfs_header_generation(node)); 79 btrfs_set_root_generation(item, btrfs_header_generation(node));
80 return 0;
81} 80}
82 81
83/* 82/*
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index c0f7ecaf1e79..bc1f6ad18442 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \
50 unsigned long part_offset = (unsigned long)s; \ 50 unsigned long part_offset = (unsigned long)s; \
51 unsigned long offset = part_offset + offsetof(type, member); \ 51 unsigned long offset = part_offset + offsetof(type, member); \
52 type *p; \ 52 type *p; \
53 /* ugly, but we want the fast path here */ \ 53 int err; \
54 if (eb->map_token && offset >= eb->map_start && \ 54 char *kaddr; \
55 offset + sizeof(((type *)0)->member) <= eb->map_start + \ 55 unsigned long map_start; \
56 eb->map_len) { \ 56 unsigned long map_len; \
57 p = (type *)(eb->kaddr + part_offset - eb->map_start); \ 57 u##bits res; \
58 return le##bits##_to_cpu(p->member); \ 58 err = map_private_extent_buffer(eb, offset, \
59 } \ 59 sizeof(((type *)0)->member), \
60 { \ 60 &kaddr, &map_start, &map_len); \
61 int err; \ 61 if (err) { \
62 char *map_token; \ 62 __le##bits leres; \
63 char *kaddr; \ 63 read_eb_member(eb, s, type, member, &leres); \
64 int unmap_on_exit = (eb->map_token == NULL); \ 64 return le##bits##_to_cpu(leres); \
65 unsigned long map_start; \ 65 } \
66 unsigned long map_len; \ 66 p = (type *)(kaddr + part_offset - map_start); \
67 u##bits res; \ 67 res = le##bits##_to_cpu(p->member); \
68 err = map_extent_buffer(eb, offset, \ 68 return res; \
69 sizeof(((type *)0)->member), \
70 &map_token, &kaddr, \
71 &map_start, &map_len, KM_USER1); \
72 if (err) { \
73 __le##bits leres; \
74 read_eb_member(eb, s, type, member, &leres); \
75 return le##bits##_to_cpu(leres); \
76 } \
77 p = (type *)(kaddr + part_offset - map_start); \
78 res = le##bits##_to_cpu(p->member); \
79 if (unmap_on_exit) \
80 unmap_extent_buffer(eb, map_token, KM_USER1); \
81 return res; \
82 } \
83} \ 69} \
84void btrfs_set_##name(struct extent_buffer *eb, \ 70void btrfs_set_##name(struct extent_buffer *eb, \
85 type *s, u##bits val) \ 71 type *s, u##bits val) \
@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \
87 unsigned long part_offset = (unsigned long)s; \ 73 unsigned long part_offset = (unsigned long)s; \
88 unsigned long offset = part_offset + offsetof(type, member); \ 74 unsigned long offset = part_offset + offsetof(type, member); \
89 type *p; \ 75 type *p; \
90 /* ugly, but we want the fast path here */ \ 76 int err; \
91 if (eb->map_token && offset >= eb->map_start && \ 77 char *kaddr; \
92 offset + sizeof(((type *)0)->member) <= eb->map_start + \ 78 unsigned long map_start; \
93 eb->map_len) { \ 79 unsigned long map_len; \
94 p = (type *)(eb->kaddr + part_offset - eb->map_start); \ 80 err = map_private_extent_buffer(eb, offset, \
95 p->member = cpu_to_le##bits(val); \ 81 sizeof(((type *)0)->member), \
96 return; \ 82 &kaddr, &map_start, &map_len); \
97 } \ 83 if (err) { \
98 { \ 84 __le##bits val2; \
99 int err; \ 85 val2 = cpu_to_le##bits(val); \
100 char *map_token; \ 86 write_eb_member(eb, s, type, member, &val2); \
101 char *kaddr; \ 87 return; \
102 int unmap_on_exit = (eb->map_token == NULL); \ 88 } \
103 unsigned long map_start; \ 89 p = (type *)(kaddr + part_offset - map_start); \
104 unsigned long map_len; \ 90 p->member = cpu_to_le##bits(val); \
105 err = map_extent_buffer(eb, offset, \
106 sizeof(((type *)0)->member), \
107 &map_token, &kaddr, \
108 &map_start, &map_len, KM_USER1); \
109 if (err) { \
110 __le##bits val2; \
111 val2 = cpu_to_le##bits(val); \
112 write_eb_member(eb, s, type, member, &val2); \
113 return; \
114 } \
115 p = (type *)(kaddr + part_offset - map_start); \
116 p->member = cpu_to_le##bits(val); \
117 if (unmap_on_exit) \
118 unmap_extent_buffer(eb, map_token, KM_USER1); \
119 } \
120} 91}
121 92
122#include "ctree.h" 93#include "ctree.h"
@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb,
125 struct btrfs_disk_key *disk_key, int nr) 96 struct btrfs_disk_key *disk_key, int nr)
126{ 97{
127 unsigned long ptr = btrfs_node_key_ptr_offset(nr); 98 unsigned long ptr = btrfs_node_key_ptr_offset(nr);
128 if (eb->map_token && ptr >= eb->map_start &&
129 ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
130 memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
131 sizeof(*disk_key));
132 return;
133 } else if (eb->map_token) {
134 unmap_extent_buffer(eb, eb->map_token, KM_USER1);
135 eb->map_token = NULL;
136 }
137 read_eb_member(eb, (struct btrfs_key_ptr *)ptr, 99 read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
138 struct btrfs_key_ptr, key, disk_key); 100 struct btrfs_key_ptr, key, disk_key);
139} 101}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0bb4ebbb71b7..15634d4648d7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -723,6 +723,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
723 seq_puts(seq, ",clear_cache"); 723 seq_puts(seq, ",clear_cache");
724 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 724 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
725 seq_puts(seq, ",user_subvol_rm_allowed"); 725 seq_puts(seq, ",user_subvol_rm_allowed");
726 if (btrfs_test_opt(root, ENOSPC_DEBUG))
727 seq_puts(seq, ",enospc_debug");
728 if (btrfs_test_opt(root, AUTO_DEFRAG))
729 seq_puts(seq, ",autodefrag");
730 if (btrfs_test_opt(root, INODE_MAP_CACHE))
731 seq_puts(seq, ",inode_cache");
726 return 0; 732 return 0;
727} 733}
728 734
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 51dcec86757f..7dc36fab4afc 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -216,17 +216,11 @@ static void wait_current_trans(struct btrfs_root *root)
216 spin_lock(&root->fs_info->trans_lock); 216 spin_lock(&root->fs_info->trans_lock);
217 cur_trans = root->fs_info->running_transaction; 217 cur_trans = root->fs_info->running_transaction;
218 if (cur_trans && cur_trans->blocked) { 218 if (cur_trans && cur_trans->blocked) {
219 DEFINE_WAIT(wait);
220 atomic_inc(&cur_trans->use_count); 219 atomic_inc(&cur_trans->use_count);
221 spin_unlock(&root->fs_info->trans_lock); 220 spin_unlock(&root->fs_info->trans_lock);
222 while (1) { 221
223 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 222 wait_event(root->fs_info->transaction_wait,
224 TASK_UNINTERRUPTIBLE); 223 !cur_trans->blocked);
225 if (!cur_trans->blocked)
226 break;
227 schedule();
228 }
229 finish_wait(&root->fs_info->transaction_wait, &wait);
230 put_transaction(cur_trans); 224 put_transaction(cur_trans);
231 } else { 225 } else {
232 spin_unlock(&root->fs_info->trans_lock); 226 spin_unlock(&root->fs_info->trans_lock);
@@ -260,7 +254,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
260{ 254{
261 struct btrfs_trans_handle *h; 255 struct btrfs_trans_handle *h;
262 struct btrfs_transaction *cur_trans; 256 struct btrfs_transaction *cur_trans;
263 int retries = 0; 257 u64 num_bytes = 0;
264 int ret; 258 int ret;
265 259
266 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 260 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -274,6 +268,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
274 h->block_rsv = NULL; 268 h->block_rsv = NULL;
275 goto got_it; 269 goto got_it;
276 } 270 }
271
272 /*
273 * Do the reservation before we join the transaction so we can do all
274 * the appropriate flushing if need be.
275 */
276 if (num_items > 0 && root != root->fs_info->chunk_root) {
277 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
278 ret = btrfs_block_rsv_add(NULL, root,
279 &root->fs_info->trans_block_rsv,
280 num_bytes);
281 if (ret)
282 return ERR_PTR(ret);
283 }
277again: 284again:
278 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 285 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
279 if (!h) 286 if (!h)
@@ -310,24 +317,9 @@ again:
310 goto again; 317 goto again;
311 } 318 }
312 319
313 if (num_items > 0) { 320 if (num_bytes) {
314 ret = btrfs_trans_reserve_metadata(h, root, num_items); 321 h->block_rsv = &root->fs_info->trans_block_rsv;
315 if (ret == -EAGAIN && !retries) { 322 h->bytes_reserved = num_bytes;
316 retries++;
317 btrfs_commit_transaction(h, root);
318 goto again;
319 } else if (ret == -EAGAIN) {
320 /*
321 * We have already retried and got EAGAIN, so really we
322 * don't have space, so set ret to -ENOSPC.
323 */
324 ret = -ENOSPC;
325 }
326
327 if (ret < 0) {
328 btrfs_end_transaction(h, root);
329 return ERR_PTR(ret);
330 }
331 } 323 }
332 324
333got_it: 325got_it:
@@ -359,19 +351,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
359} 351}
360 352
361/* wait for a transaction commit to be fully complete */ 353/* wait for a transaction commit to be fully complete */
362static noinline int wait_for_commit(struct btrfs_root *root, 354static noinline void wait_for_commit(struct btrfs_root *root,
363 struct btrfs_transaction *commit) 355 struct btrfs_transaction *commit)
364{ 356{
365 DEFINE_WAIT(wait); 357 wait_event(commit->commit_wait, commit->commit_done);
366 while (!commit->commit_done) {
367 prepare_to_wait(&commit->commit_wait, &wait,
368 TASK_UNINTERRUPTIBLE);
369 if (commit->commit_done)
370 break;
371 schedule();
372 }
373 finish_wait(&commit->commit_wait, &wait);
374 return 0;
375} 358}
376 359
377int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) 360int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
@@ -499,10 +482,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
499 } 482 }
500 483
501 if (lock && cur_trans->blocked && !cur_trans->in_commit) { 484 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
502 if (throttle) 485 if (throttle) {
486 /*
487 * We may race with somebody else here so end up having
488 * to call end_transaction on ourselves again, so inc
489 * our use_count.
490 */
491 trans->use_count++;
503 return btrfs_commit_transaction(trans, root); 492 return btrfs_commit_transaction(trans, root);
504 else 493 } else {
505 wake_up_process(info->transaction_kthread); 494 wake_up_process(info->transaction_kthread);
495 }
506 } 496 }
507 497
508 WARN_ON(cur_trans != info->running_transaction); 498 WARN_ON(cur_trans != info->running_transaction);
@@ -1080,22 +1070,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
1080static void wait_current_trans_commit_start(struct btrfs_root *root, 1070static void wait_current_trans_commit_start(struct btrfs_root *root,
1081 struct btrfs_transaction *trans) 1071 struct btrfs_transaction *trans)
1082{ 1072{
1083 DEFINE_WAIT(wait); 1073 wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit);
1084
1085 if (trans->in_commit)
1086 return;
1087
1088 while (1) {
1089 prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
1090 TASK_UNINTERRUPTIBLE);
1091 if (trans->in_commit) {
1092 finish_wait(&root->fs_info->transaction_blocked_wait,
1093 &wait);
1094 break;
1095 }
1096 schedule();
1097 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1098 }
1099} 1074}
1100 1075
1101/* 1076/*
@@ -1105,24 +1080,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
1105static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, 1080static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1106 struct btrfs_transaction *trans) 1081 struct btrfs_transaction *trans)
1107{ 1082{
1108 DEFINE_WAIT(wait); 1083 wait_event(root->fs_info->transaction_wait,
1109 1084 trans->commit_done || (trans->in_commit && !trans->blocked));
1110 if (trans->commit_done || (trans->in_commit && !trans->blocked))
1111 return;
1112
1113 while (1) {
1114 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
1115 TASK_UNINTERRUPTIBLE);
1116 if (trans->commit_done ||
1117 (trans->in_commit && !trans->blocked)) {
1118 finish_wait(&root->fs_info->transaction_wait,
1119 &wait);
1120 break;
1121 }
1122 schedule();
1123 finish_wait(&root->fs_info->transaction_wait,
1124 &wait);
1125 }
1126} 1085}
1127 1086
1128/* 1087/*
@@ -1229,8 +1188,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1229 atomic_inc(&cur_trans->use_count); 1188 atomic_inc(&cur_trans->use_count);
1230 btrfs_end_transaction(trans, root); 1189 btrfs_end_transaction(trans, root);
1231 1190
1232 ret = wait_for_commit(root, cur_trans); 1191 wait_for_commit(root, cur_trans);
1233 BUG_ON(ret);
1234 1192
1235 put_transaction(cur_trans); 1193 put_transaction(cur_trans);
1236 1194
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 4ce8a9f41d1e..babee65f8eda 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1617,7 +1617,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1617 return 0; 1617 return 0;
1618 1618
1619 path = btrfs_alloc_path(); 1619 path = btrfs_alloc_path();
1620 BUG_ON(!path); 1620 if (!path)
1621 return -ENOMEM;
1621 1622
1622 nritems = btrfs_header_nritems(eb); 1623 nritems = btrfs_header_nritems(eb);
1623 for (i = 0; i < nritems; i++) { 1624 for (i = 0; i < nritems; i++) {
@@ -1723,15 +1724,17 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1723 return -ENOMEM; 1724 return -ENOMEM;
1724 1725
1725 if (*level == 1) { 1726 if (*level == 1) {
1726 wc->process_func(root, next, wc, ptr_gen); 1727 ret = wc->process_func(root, next, wc, ptr_gen);
1728 if (ret)
1729 return ret;
1727 1730
1728 path->slots[*level]++; 1731 path->slots[*level]++;
1729 if (wc->free) { 1732 if (wc->free) {
1730 btrfs_read_buffer(next, ptr_gen); 1733 btrfs_read_buffer(next, ptr_gen);
1731 1734
1732 btrfs_tree_lock(next); 1735 btrfs_tree_lock(next);
1733 clean_tree_block(trans, root, next);
1734 btrfs_set_lock_blocking(next); 1736 btrfs_set_lock_blocking(next);
1737 clean_tree_block(trans, root, next);
1735 btrfs_wait_tree_block_writeback(next); 1738 btrfs_wait_tree_block_writeback(next);
1736 btrfs_tree_unlock(next); 1739 btrfs_tree_unlock(next);
1737 1740
@@ -1788,16 +1791,19 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1788 parent = path->nodes[*level + 1]; 1791 parent = path->nodes[*level + 1];
1789 1792
1790 root_owner = btrfs_header_owner(parent); 1793 root_owner = btrfs_header_owner(parent);
1791 wc->process_func(root, path->nodes[*level], wc, 1794 ret = wc->process_func(root, path->nodes[*level], wc,
1792 btrfs_header_generation(path->nodes[*level])); 1795 btrfs_header_generation(path->nodes[*level]));
1796 if (ret)
1797 return ret;
1798
1793 if (wc->free) { 1799 if (wc->free) {
1794 struct extent_buffer *next; 1800 struct extent_buffer *next;
1795 1801
1796 next = path->nodes[*level]; 1802 next = path->nodes[*level];
1797 1803
1798 btrfs_tree_lock(next); 1804 btrfs_tree_lock(next);
1799 clean_tree_block(trans, root, next);
1800 btrfs_set_lock_blocking(next); 1805 btrfs_set_lock_blocking(next);
1806 clean_tree_block(trans, root, next);
1801 btrfs_wait_tree_block_writeback(next); 1807 btrfs_wait_tree_block_writeback(next);
1802 btrfs_tree_unlock(next); 1808 btrfs_tree_unlock(next);
1803 1809
@@ -1864,8 +1870,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1864 next = path->nodes[orig_level]; 1870 next = path->nodes[orig_level];
1865 1871
1866 btrfs_tree_lock(next); 1872 btrfs_tree_lock(next);
1867 clean_tree_block(trans, log, next);
1868 btrfs_set_lock_blocking(next); 1873 btrfs_set_lock_blocking(next);
1874 clean_tree_block(trans, log, next);
1869 btrfs_wait_tree_block_writeback(next); 1875 btrfs_wait_tree_block_writeback(next);
1870 btrfs_tree_unlock(next); 1876 btrfs_tree_unlock(next);
1871 1877
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1efa56e18f9b..53875ae73ad4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1037,7 +1037,8 @@ static noinline int find_next_chunk(struct btrfs_root *root,
1037 struct btrfs_key found_key; 1037 struct btrfs_key found_key;
1038 1038
1039 path = btrfs_alloc_path(); 1039 path = btrfs_alloc_path();
1040 BUG_ON(!path); 1040 if (!path)
1041 return -ENOMEM;
1041 1042
1042 key.objectid = objectid; 1043 key.objectid = objectid;
1043 key.offset = (u64)-1; 1044 key.offset = (u64)-1;
@@ -2061,8 +2062,10 @@ int btrfs_balance(struct btrfs_root *dev_root)
2061 2062
2062 /* step two, relocate all the chunks */ 2063 /* step two, relocate all the chunks */
2063 path = btrfs_alloc_path(); 2064 path = btrfs_alloc_path();
2064 BUG_ON(!path); 2065 if (!path) {
2065 2066 ret = -ENOMEM;
2067 goto error;
2068 }
2066 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 2069 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2067 key.offset = (u64)-1; 2070 key.offset = (u64)-1;
2068 key.type = BTRFS_CHUNK_ITEM_KEY; 2071 key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -2098,7 +2101,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
2098 chunk_root->root_key.objectid, 2101 chunk_root->root_key.objectid,
2099 found_key.objectid, 2102 found_key.objectid,
2100 found_key.offset); 2103 found_key.offset);
2101 BUG_ON(ret && ret != -ENOSPC); 2104 if (ret && ret != -ENOSPC)
2105 goto error;
2102 key.offset = found_key.offset - 1; 2106 key.offset = found_key.offset - 1;
2103 } 2107 }
2104 ret = 0; 2108 ret = 0;
@@ -2660,7 +2664,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
2660 2664
2661 ret = find_next_chunk(fs_info->chunk_root, 2665 ret = find_next_chunk(fs_info->chunk_root,
2662 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); 2666 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
2663 BUG_ON(ret); 2667 if (ret)
2668 return ret;
2664 2669
2665 alloc_profile = BTRFS_BLOCK_GROUP_METADATA | 2670 alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
2666 (fs_info->metadata_alloc_profile & 2671 (fs_info->metadata_alloc_profile &
@@ -3594,7 +3599,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
3594 if (!sb) 3599 if (!sb)
3595 return -ENOMEM; 3600 return -ENOMEM;
3596 btrfs_set_buffer_uptodate(sb); 3601 btrfs_set_buffer_uptodate(sb);
3597 btrfs_set_buffer_lockdep_class(sb, 0); 3602 btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
3598 3603
3599 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); 3604 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
3600 array_size = btrfs_super_sys_array_size(super_copy); 3605 array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a039e6ed4ce0..6196e1a76c14 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -102,43 +102,57 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
102 if (!path) 102 if (!path)
103 return -ENOMEM; 103 return -ENOMEM;
104 104
105 /* first lets see if we already have this xattr */ 105 if (flags & XATTR_REPLACE) {
106 di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, 106 di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
107 strlen(name), -1); 107 name_len, -1);
108 if (IS_ERR(di)) { 108 if (IS_ERR(di)) {
109 ret = PTR_ERR(di); 109 ret = PTR_ERR(di);
110 goto out; 110 goto out;
111 } 111 } else if (!di) {
112 112 ret = -ENODATA;
113 /* ok we already have this xattr, lets remove it */
114 if (di) {
115 /* if we want create only exit */
116 if (flags & XATTR_CREATE) {
117 ret = -EEXIST;
118 goto out; 113 goto out;
119 } 114 }
120
121 ret = btrfs_delete_one_dir_name(trans, root, path, di); 115 ret = btrfs_delete_one_dir_name(trans, root, path, di);
122 BUG_ON(ret); 116 if (ret)
117 goto out;
123 btrfs_release_path(path); 118 btrfs_release_path(path);
119 }
124 120
125 /* if we don't have a value then we are removing the xattr */ 121again:
126 if (!value) 122 ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
123 name, name_len, value, size);
124 if (ret == -EEXIST) {
125 if (flags & XATTR_CREATE)
127 goto out; 126 goto out;
128 } else { 127 /*
128 * We can't use the path we already have since we won't have the
129 * proper locking for a delete, so release the path and
130 * re-lookup to delete the thing.
131 */
129 btrfs_release_path(path); 132 btrfs_release_path(path);
133 di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
134 name, name_len, -1);
135 if (IS_ERR(di)) {
136 ret = PTR_ERR(di);
137 goto out;
138 } else if (!di) {
139 /* Shouldn't happen but just in case... */
140 btrfs_release_path(path);
141 goto again;
142 }
130 143
131 if (flags & XATTR_REPLACE) { 144 ret = btrfs_delete_one_dir_name(trans, root, path, di);
132 /* we couldn't find the attr to replace */ 145 if (ret)
133 ret = -ENODATA;
134 goto out; 146 goto out;
147
148 /*
149 * We have a value to set, so go back and try to insert it now.
150 */
151 if (value) {
152 btrfs_release_path(path);
153 goto again;
135 } 154 }
136 } 155 }
137
138 /* ok we have to create a completely new xattr */
139 ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
140 name, name_len, value, size);
141 BUG_ON(ret);
142out: 156out:
143 btrfs_free_path(path); 157 btrfs_free_path(path);
144 return ret; 158 return ret;