diff options
Diffstat (limited to 'fs/btrfs')
32 files changed, 1502 insertions, 1653 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9b72dcf1cd25..40e6ac08c21f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -6,5 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ | 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o |
11 | |||
12 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | ||
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index f66fc9959733..eb159aaa5a11 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -28,9 +28,7 @@ | |||
28 | #include "btrfs_inode.h" | 28 | #include "btrfs_inode.h" |
29 | #include "xattr.h" | 29 | #include "xattr.h" |
30 | 30 | ||
31 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 31 | struct posix_acl *btrfs_get_acl(struct inode *inode, int type) |
32 | |||
33 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | ||
34 | { | 32 | { |
35 | int size; | 33 | int size; |
36 | const char *name; | 34 | const char *name; |
@@ -111,7 +109,6 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
111 | int ret, size = 0; | 109 | int ret, size = 0; |
112 | const char *name; | 110 | const char *name; |
113 | char *value = NULL; | 111 | char *value = NULL; |
114 | mode_t mode; | ||
115 | 112 | ||
116 | if (acl) { | 113 | if (acl) { |
117 | ret = posix_acl_valid(acl); | 114 | ret = posix_acl_valid(acl); |
@@ -122,13 +119,11 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
122 | 119 | ||
123 | switch (type) { | 120 | switch (type) { |
124 | case ACL_TYPE_ACCESS: | 121 | case ACL_TYPE_ACCESS: |
125 | mode = inode->i_mode; | ||
126 | name = POSIX_ACL_XATTR_ACCESS; | 122 | name = POSIX_ACL_XATTR_ACCESS; |
127 | if (acl) { | 123 | if (acl) { |
128 | ret = posix_acl_equiv_mode(acl, &mode); | 124 | ret = posix_acl_equiv_mode(acl, &inode->i_mode); |
129 | if (ret < 0) | 125 | if (ret < 0) |
130 | return ret; | 126 | return ret; |
131 | inode->i_mode = mode; | ||
132 | } | 127 | } |
133 | ret = 0; | 128 | ret = 0; |
134 | break; | 129 | break; |
@@ -195,28 +190,6 @@ out: | |||
195 | return ret; | 190 | return ret; |
196 | } | 191 | } |
197 | 192 | ||
198 | int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags) | ||
199 | { | ||
200 | int error = -EAGAIN; | ||
201 | |||
202 | if (flags & IPERM_FLAG_RCU) { | ||
203 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) | ||
204 | error = -ECHILD; | ||
205 | |||
206 | } else { | ||
207 | struct posix_acl *acl; | ||
208 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); | ||
209 | if (IS_ERR(acl)) | ||
210 | return PTR_ERR(acl); | ||
211 | if (acl) { | ||
212 | error = posix_acl_permission(inode, acl, mask); | ||
213 | posix_acl_release(acl); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | return error; | ||
218 | } | ||
219 | |||
220 | /* | 193 | /* |
221 | * btrfs_init_acl is already generally called under fs_mutex, so the locking | 194 | * btrfs_init_acl is already generally called under fs_mutex, so the locking |
222 | * stuff has been fixed to work with that. If the locking stuff changes, we | 195 | * stuff has been fixed to work with that. If the locking stuff changes, we |
@@ -244,31 +217,20 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, | |||
244 | } | 217 | } |
245 | 218 | ||
246 | if (IS_POSIXACL(dir) && acl) { | 219 | if (IS_POSIXACL(dir) && acl) { |
247 | struct posix_acl *clone; | ||
248 | mode_t mode; | ||
249 | |||
250 | if (S_ISDIR(inode->i_mode)) { | 220 | if (S_ISDIR(inode->i_mode)) { |
251 | ret = btrfs_set_acl(trans, inode, acl, | 221 | ret = btrfs_set_acl(trans, inode, acl, |
252 | ACL_TYPE_DEFAULT); | 222 | ACL_TYPE_DEFAULT); |
253 | if (ret) | 223 | if (ret) |
254 | goto failed; | 224 | goto failed; |
255 | } | 225 | } |
256 | clone = posix_acl_clone(acl, GFP_NOFS); | 226 | ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); |
257 | ret = -ENOMEM; | 227 | if (ret < 0) |
258 | if (!clone) | 228 | return ret; |
259 | goto failed; | 229 | |
260 | 230 | if (ret > 0) { | |
261 | mode = inode->i_mode; | 231 | /* we need an acl */ |
262 | ret = posix_acl_create_masq(clone, &mode); | 232 | ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); |
263 | if (ret >= 0) { | ||
264 | inode->i_mode = mode; | ||
265 | if (ret > 0) { | ||
266 | /* we need an acl */ | ||
267 | ret = btrfs_set_acl(trans, inode, clone, | ||
268 | ACL_TYPE_ACCESS); | ||
269 | } | ||
270 | } | 233 | } |
271 | posix_acl_release(clone); | ||
272 | } | 234 | } |
273 | failed: | 235 | failed: |
274 | posix_acl_release(acl); | 236 | posix_acl_release(acl); |
@@ -278,7 +240,7 @@ failed: | |||
278 | 240 | ||
279 | int btrfs_acl_chmod(struct inode *inode) | 241 | int btrfs_acl_chmod(struct inode *inode) |
280 | { | 242 | { |
281 | struct posix_acl *acl, *clone; | 243 | struct posix_acl *acl; |
282 | int ret = 0; | 244 | int ret = 0; |
283 | 245 | ||
284 | if (S_ISLNK(inode->i_mode)) | 246 | if (S_ISLNK(inode->i_mode)) |
@@ -291,17 +253,11 @@ int btrfs_acl_chmod(struct inode *inode) | |||
291 | if (IS_ERR_OR_NULL(acl)) | 253 | if (IS_ERR_OR_NULL(acl)) |
292 | return PTR_ERR(acl); | 254 | return PTR_ERR(acl); |
293 | 255 | ||
294 | clone = posix_acl_clone(acl, GFP_KERNEL); | 256 | ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); |
257 | if (ret) | ||
258 | return ret; | ||
259 | ret = btrfs_set_acl(NULL, inode, acl, ACL_TYPE_ACCESS); | ||
295 | posix_acl_release(acl); | 260 | posix_acl_release(acl); |
296 | if (!clone) | ||
297 | return -ENOMEM; | ||
298 | |||
299 | ret = posix_acl_chmod_masq(clone, inode->i_mode); | ||
300 | if (!ret) | ||
301 | ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS); | ||
302 | |||
303 | posix_acl_release(clone); | ||
304 | |||
305 | return ret; | 261 | return ret; |
306 | } | 262 | } |
307 | 263 | ||
@@ -318,18 +274,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = { | |||
318 | .get = btrfs_xattr_acl_get, | 274 | .get = btrfs_xattr_acl_get, |
319 | .set = btrfs_xattr_acl_set, | 275 | .set = btrfs_xattr_acl_set, |
320 | }; | 276 | }; |
321 | |||
322 | #else /* CONFIG_BTRFS_FS_POSIX_ACL */ | ||
323 | |||
324 | int btrfs_acl_chmod(struct inode *inode) | ||
325 | { | ||
326 | return 0; | ||
327 | } | ||
328 | |||
329 | int btrfs_init_acl(struct btrfs_trans_handle *trans, | ||
330 | struct inode *inode, struct inode *dir) | ||
331 | { | ||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | #endif /* CONFIG_BTRFS_FS_POSIX_ACL */ | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 52d7eca8c7bf..502b9e988679 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -34,6 +34,9 @@ struct btrfs_inode { | |||
34 | */ | 34 | */ |
35 | struct btrfs_key location; | 35 | struct btrfs_key location; |
36 | 36 | ||
37 | /* Lock for counters */ | ||
38 | spinlock_t lock; | ||
39 | |||
37 | /* the extent_tree has caches of all the extent mappings to disk */ | 40 | /* the extent_tree has caches of all the extent mappings to disk */ |
38 | struct extent_map_tree extent_tree; | 41 | struct extent_map_tree extent_tree; |
39 | 42 | ||
@@ -134,8 +137,8 @@ struct btrfs_inode { | |||
134 | * items we think we'll end up using, and reserved_extents is the number | 137 | * items we think we'll end up using, and reserved_extents is the number |
135 | * of extent items we've reserved metadata for. | 138 | * of extent items we've reserved metadata for. |
136 | */ | 139 | */ |
137 | atomic_t outstanding_extents; | 140 | unsigned outstanding_extents; |
138 | atomic_t reserved_extents; | 141 | unsigned reserved_extents; |
139 | 142 | ||
140 | /* | 143 | /* |
141 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
@@ -184,4 +187,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) | |||
184 | BTRFS_I(inode)->disk_i_size = size; | 187 | BTRFS_I(inode)->disk_i_size = size; |
185 | } | 188 | } |
186 | 189 | ||
190 | static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, | ||
191 | struct inode *inode) | ||
192 | { | ||
193 | if (root == root->fs_info->tree_root || | ||
194 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | ||
195 | return true; | ||
196 | return false; | ||
197 | } | ||
198 | |||
187 | #endif | 199 | #endif |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index bfe42b03eaf9..8ec5d86f1734 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -338,6 +338,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
338 | u64 first_byte = disk_start; | 338 | u64 first_byte = disk_start; |
339 | struct block_device *bdev; | 339 | struct block_device *bdev; |
340 | int ret; | 340 | int ret; |
341 | int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
341 | 342 | ||
342 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | 343 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); |
343 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 344 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
@@ -392,8 +393,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
392 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 393 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
393 | BUG_ON(ret); | 394 | BUG_ON(ret); |
394 | 395 | ||
395 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); | 396 | if (!skip_sum) { |
396 | BUG_ON(ret); | 397 | ret = btrfs_csum_one_bio(root, inode, bio, |
398 | start, 1); | ||
399 | BUG_ON(ret); | ||
400 | } | ||
397 | 401 | ||
398 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | 402 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); |
399 | BUG_ON(ret); | 403 | BUG_ON(ret); |
@@ -418,8 +422,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
418 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 422 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
419 | BUG_ON(ret); | 423 | BUG_ON(ret); |
420 | 424 | ||
421 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); | 425 | if (!skip_sum) { |
422 | BUG_ON(ret); | 426 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); |
427 | BUG_ON(ret); | ||
428 | } | ||
423 | 429 | ||
424 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | 430 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); |
425 | BUG_ON(ret); | 431 | BUG_ON(ret); |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2e667868e0d2..011cab3aca8d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) | |||
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | 56 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { |
57 | if (p->nodes[i] && p->locks[i]) | 57 | if (!p->nodes[i] || !p->locks[i]) |
58 | btrfs_set_lock_blocking(p->nodes[i]); | 58 | continue; |
59 | btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]); | ||
60 | if (p->locks[i] == BTRFS_READ_LOCK) | ||
61 | p->locks[i] = BTRFS_READ_LOCK_BLOCKING; | ||
62 | else if (p->locks[i] == BTRFS_WRITE_LOCK) | ||
63 | p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; | ||
59 | } | 64 | } |
60 | } | 65 | } |
61 | 66 | ||
@@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) | |||
68 | * for held | 73 | * for held |
69 | */ | 74 | */ |
70 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | 75 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p, |
71 | struct extent_buffer *held) | 76 | struct extent_buffer *held, int held_rw) |
72 | { | 77 | { |
73 | int i; | 78 | int i; |
74 | 79 | ||
@@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
79 | * really sure by forcing the path to blocking before we clear | 84 | * really sure by forcing the path to blocking before we clear |
80 | * the path blocking. | 85 | * the path blocking. |
81 | */ | 86 | */ |
82 | if (held) | 87 | if (held) { |
83 | btrfs_set_lock_blocking(held); | 88 | btrfs_set_lock_blocking_rw(held, held_rw); |
89 | if (held_rw == BTRFS_WRITE_LOCK) | ||
90 | held_rw = BTRFS_WRITE_LOCK_BLOCKING; | ||
91 | else if (held_rw == BTRFS_READ_LOCK) | ||
92 | held_rw = BTRFS_READ_LOCK_BLOCKING; | ||
93 | } | ||
84 | btrfs_set_path_blocking(p); | 94 | btrfs_set_path_blocking(p); |
85 | #endif | 95 | #endif |
86 | 96 | ||
87 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { | 97 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { |
88 | if (p->nodes[i] && p->locks[i]) | 98 | if (p->nodes[i] && p->locks[i]) { |
89 | btrfs_clear_lock_blocking(p->nodes[i]); | 99 | btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]); |
100 | if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING) | ||
101 | p->locks[i] = BTRFS_WRITE_LOCK; | ||
102 | else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING) | ||
103 | p->locks[i] = BTRFS_READ_LOCK; | ||
104 | } | ||
90 | } | 105 | } |
91 | 106 | ||
92 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 107 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
93 | if (held) | 108 | if (held) |
94 | btrfs_clear_lock_blocking(held); | 109 | btrfs_clear_lock_blocking_rw(held, held_rw); |
95 | #endif | 110 | #endif |
96 | } | 111 | } |
97 | 112 | ||
@@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p) | |||
119 | if (!p->nodes[i]) | 134 | if (!p->nodes[i]) |
120 | continue; | 135 | continue; |
121 | if (p->locks[i]) { | 136 | if (p->locks[i]) { |
122 | btrfs_tree_unlock(p->nodes[i]); | 137 | btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); |
123 | p->locks[i] = 0; | 138 | p->locks[i] = 0; |
124 | } | 139 | } |
125 | free_extent_buffer(p->nodes[i]); | 140 | free_extent_buffer(p->nodes[i]); |
@@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
167 | return eb; | 182 | return eb; |
168 | } | 183 | } |
169 | 184 | ||
185 | /* loop around taking references on and locking the root node of the | ||
186 | * tree until you end up with a lock on the root. A locked buffer | ||
187 | * is returned, with a reference held. | ||
188 | */ | ||
189 | struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) | ||
190 | { | ||
191 | struct extent_buffer *eb; | ||
192 | |||
193 | while (1) { | ||
194 | eb = btrfs_root_node(root); | ||
195 | btrfs_tree_read_lock(eb); | ||
196 | if (eb == root->node) | ||
197 | break; | ||
198 | btrfs_tree_read_unlock(eb); | ||
199 | free_extent_buffer(eb); | ||
200 | } | ||
201 | return eb; | ||
202 | } | ||
203 | |||
170 | /* cowonly root (everything not a reference counted cow subvolume), just get | 204 | /* cowonly root (everything not a reference counted cow subvolume), just get |
171 | * put onto a simple dirty list. transaction.c walks this to make sure they | 205 | * put onto a simple dirty list. transaction.c walks this to make sure they |
172 | * get properly updated on disk. | 206 | * get properly updated on disk. |
@@ -626,14 +660,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
626 | for (i = start_slot; i < end_slot; i++) { | 660 | for (i = start_slot; i < end_slot; i++) { |
627 | int close = 1; | 661 | int close = 1; |
628 | 662 | ||
629 | if (!parent->map_token) { | ||
630 | map_extent_buffer(parent, | ||
631 | btrfs_node_key_ptr_offset(i), | ||
632 | sizeof(struct btrfs_key_ptr), | ||
633 | &parent->map_token, &parent->kaddr, | ||
634 | &parent->map_start, &parent->map_len, | ||
635 | KM_USER1); | ||
636 | } | ||
637 | btrfs_node_key(parent, &disk_key, i); | 663 | btrfs_node_key(parent, &disk_key, i); |
638 | if (!progress_passed && comp_keys(&disk_key, progress) < 0) | 664 | if (!progress_passed && comp_keys(&disk_key, progress) < 0) |
639 | continue; | 665 | continue; |
@@ -656,11 +682,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
656 | last_block = blocknr; | 682 | last_block = blocknr; |
657 | continue; | 683 | continue; |
658 | } | 684 | } |
659 | if (parent->map_token) { | ||
660 | unmap_extent_buffer(parent, parent->map_token, | ||
661 | KM_USER1); | ||
662 | parent->map_token = NULL; | ||
663 | } | ||
664 | 685 | ||
665 | cur = btrfs_find_tree_block(root, blocknr, blocksize); | 686 | cur = btrfs_find_tree_block(root, blocknr, blocksize); |
666 | if (cur) | 687 | if (cur) |
@@ -701,11 +722,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
701 | btrfs_tree_unlock(cur); | 722 | btrfs_tree_unlock(cur); |
702 | free_extent_buffer(cur); | 723 | free_extent_buffer(cur); |
703 | } | 724 | } |
704 | if (parent->map_token) { | ||
705 | unmap_extent_buffer(parent, parent->map_token, | ||
706 | KM_USER1); | ||
707 | parent->map_token = NULL; | ||
708 | } | ||
709 | return err; | 725 | return err; |
710 | } | 726 | } |
711 | 727 | ||
@@ -746,7 +762,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
746 | struct btrfs_disk_key *tmp = NULL; | 762 | struct btrfs_disk_key *tmp = NULL; |
747 | struct btrfs_disk_key unaligned; | 763 | struct btrfs_disk_key unaligned; |
748 | unsigned long offset; | 764 | unsigned long offset; |
749 | char *map_token = NULL; | ||
750 | char *kaddr = NULL; | 765 | char *kaddr = NULL; |
751 | unsigned long map_start = 0; | 766 | unsigned long map_start = 0; |
752 | unsigned long map_len = 0; | 767 | unsigned long map_len = 0; |
@@ -756,18 +771,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
756 | mid = (low + high) / 2; | 771 | mid = (low + high) / 2; |
757 | offset = p + mid * item_size; | 772 | offset = p + mid * item_size; |
758 | 773 | ||
759 | if (!map_token || offset < map_start || | 774 | if (!kaddr || offset < map_start || |
760 | (offset + sizeof(struct btrfs_disk_key)) > | 775 | (offset + sizeof(struct btrfs_disk_key)) > |
761 | map_start + map_len) { | 776 | map_start + map_len) { |
762 | if (map_token) { | ||
763 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
764 | map_token = NULL; | ||
765 | } | ||
766 | 777 | ||
767 | err = map_private_extent_buffer(eb, offset, | 778 | err = map_private_extent_buffer(eb, offset, |
768 | sizeof(struct btrfs_disk_key), | 779 | sizeof(struct btrfs_disk_key), |
769 | &map_token, &kaddr, | 780 | &kaddr, &map_start, &map_len); |
770 | &map_start, &map_len, KM_USER0); | ||
771 | 781 | ||
772 | if (!err) { | 782 | if (!err) { |
773 | tmp = (struct btrfs_disk_key *)(kaddr + offset - | 783 | tmp = (struct btrfs_disk_key *)(kaddr + offset - |
@@ -790,14 +800,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
790 | high = mid; | 800 | high = mid; |
791 | else { | 801 | else { |
792 | *slot = mid; | 802 | *slot = mid; |
793 | if (map_token) | ||
794 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
795 | return 0; | 803 | return 0; |
796 | } | 804 | } |
797 | } | 805 | } |
798 | *slot = low; | 806 | *slot = low; |
799 | if (map_token) | ||
800 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
801 | return 1; | 807 | return 1; |
802 | } | 808 | } |
803 | 809 | ||
@@ -890,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
890 | 896 | ||
891 | mid = path->nodes[level]; | 897 | mid = path->nodes[level]; |
892 | 898 | ||
893 | WARN_ON(!path->locks[level]); | 899 | WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && |
900 | path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING); | ||
894 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | 901 | WARN_ON(btrfs_header_generation(mid) != trans->transid); |
895 | 902 | ||
896 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); | 903 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); |
@@ -1228,7 +1235,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1228 | u32 nr; | 1235 | u32 nr; |
1229 | u32 blocksize; | 1236 | u32 blocksize; |
1230 | u32 nscan = 0; | 1237 | u32 nscan = 0; |
1231 | bool map = true; | ||
1232 | 1238 | ||
1233 | if (level != 1) | 1239 | if (level != 1) |
1234 | return; | 1240 | return; |
@@ -1250,19 +1256,8 @@ static void reada_for_search(struct btrfs_root *root, | |||
1250 | 1256 | ||
1251 | nritems = btrfs_header_nritems(node); | 1257 | nritems = btrfs_header_nritems(node); |
1252 | nr = slot; | 1258 | nr = slot; |
1253 | if (node->map_token || path->skip_locking) | ||
1254 | map = false; | ||
1255 | 1259 | ||
1256 | while (1) { | 1260 | while (1) { |
1257 | if (map && !node->map_token) { | ||
1258 | unsigned long offset = btrfs_node_key_ptr_offset(nr); | ||
1259 | map_private_extent_buffer(node, offset, | ||
1260 | sizeof(struct btrfs_key_ptr), | ||
1261 | &node->map_token, | ||
1262 | &node->kaddr, | ||
1263 | &node->map_start, | ||
1264 | &node->map_len, KM_USER1); | ||
1265 | } | ||
1266 | if (direction < 0) { | 1261 | if (direction < 0) { |
1267 | if (nr == 0) | 1262 | if (nr == 0) |
1268 | break; | 1263 | break; |
@@ -1281,11 +1276,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1281 | if ((search <= target && target - search <= 65536) || | 1276 | if ((search <= target && target - search <= 65536) || |
1282 | (search > target && search - target <= 65536)) { | 1277 | (search > target && search - target <= 65536)) { |
1283 | gen = btrfs_node_ptr_generation(node, nr); | 1278 | gen = btrfs_node_ptr_generation(node, nr); |
1284 | if (map && node->map_token) { | ||
1285 | unmap_extent_buffer(node, node->map_token, | ||
1286 | KM_USER1); | ||
1287 | node->map_token = NULL; | ||
1288 | } | ||
1289 | readahead_tree_block(root, search, blocksize, gen); | 1279 | readahead_tree_block(root, search, blocksize, gen); |
1290 | nread += blocksize; | 1280 | nread += blocksize; |
1291 | } | 1281 | } |
@@ -1293,10 +1283,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1293 | if ((nread > 65536 || nscan > 32)) | 1283 | if ((nread > 65536 || nscan > 32)) |
1294 | break; | 1284 | break; |
1295 | } | 1285 | } |
1296 | if (map && node->map_token) { | ||
1297 | unmap_extent_buffer(node, node->map_token, KM_USER1); | ||
1298 | node->map_token = NULL; | ||
1299 | } | ||
1300 | } | 1286 | } |
1301 | 1287 | ||
1302 | /* | 1288 | /* |
@@ -1409,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level, | |||
1409 | 1395 | ||
1410 | t = path->nodes[i]; | 1396 | t = path->nodes[i]; |
1411 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { | 1397 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { |
1412 | btrfs_tree_unlock(t); | 1398 | btrfs_tree_unlock_rw(t, path->locks[i]); |
1413 | path->locks[i] = 0; | 1399 | path->locks[i] = 0; |
1414 | } | 1400 | } |
1415 | } | 1401 | } |
@@ -1436,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
1436 | continue; | 1422 | continue; |
1437 | if (!path->locks[i]) | 1423 | if (!path->locks[i]) |
1438 | continue; | 1424 | continue; |
1439 | btrfs_tree_unlock(path->nodes[i]); | 1425 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); |
1440 | path->locks[i] = 0; | 1426 | path->locks[i] = 0; |
1441 | } | 1427 | } |
1442 | } | 1428 | } |
@@ -1485,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1485 | * we can trust our generation number | 1471 | * we can trust our generation number |
1486 | */ | 1472 | */ |
1487 | free_extent_buffer(tmp); | 1473 | free_extent_buffer(tmp); |
1474 | btrfs_set_path_blocking(p); | ||
1475 | |||
1488 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1476 | tmp = read_tree_block(root, blocknr, blocksize, gen); |
1489 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | 1477 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { |
1490 | *eb_ret = tmp; | 1478 | *eb_ret = tmp; |
@@ -1540,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1540 | static int | 1528 | static int |
1541 | setup_nodes_for_search(struct btrfs_trans_handle *trans, | 1529 | setup_nodes_for_search(struct btrfs_trans_handle *trans, |
1542 | struct btrfs_root *root, struct btrfs_path *p, | 1530 | struct btrfs_root *root, struct btrfs_path *p, |
1543 | struct extent_buffer *b, int level, int ins_len) | 1531 | struct extent_buffer *b, int level, int ins_len, |
1532 | int *write_lock_level) | ||
1544 | { | 1533 | { |
1545 | int ret; | 1534 | int ret; |
1546 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= | 1535 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= |
1547 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1536 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { |
1548 | int sret; | 1537 | int sret; |
1549 | 1538 | ||
1539 | if (*write_lock_level < level + 1) { | ||
1540 | *write_lock_level = level + 1; | ||
1541 | btrfs_release_path(p); | ||
1542 | goto again; | ||
1543 | } | ||
1544 | |||
1550 | sret = reada_for_balance(root, p, level); | 1545 | sret = reada_for_balance(root, p, level); |
1551 | if (sret) | 1546 | if (sret) |
1552 | goto again; | 1547 | goto again; |
1553 | 1548 | ||
1554 | btrfs_set_path_blocking(p); | 1549 | btrfs_set_path_blocking(p); |
1555 | sret = split_node(trans, root, p, level); | 1550 | sret = split_node(trans, root, p, level); |
1556 | btrfs_clear_path_blocking(p, NULL); | 1551 | btrfs_clear_path_blocking(p, NULL, 0); |
1557 | 1552 | ||
1558 | BUG_ON(sret > 0); | 1553 | BUG_ON(sret > 0); |
1559 | if (sret) { | 1554 | if (sret) { |
@@ -1565,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, | |||
1565 | BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { | 1560 | BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { |
1566 | int sret; | 1561 | int sret; |
1567 | 1562 | ||
1563 | if (*write_lock_level < level + 1) { | ||
1564 | *write_lock_level = level + 1; | ||
1565 | btrfs_release_path(p); | ||
1566 | goto again; | ||
1567 | } | ||
1568 | |||
1568 | sret = reada_for_balance(root, p, level); | 1569 | sret = reada_for_balance(root, p, level); |
1569 | if (sret) | 1570 | if (sret) |
1570 | goto again; | 1571 | goto again; |
1571 | 1572 | ||
1572 | btrfs_set_path_blocking(p); | 1573 | btrfs_set_path_blocking(p); |
1573 | sret = balance_level(trans, root, p, level); | 1574 | sret = balance_level(trans, root, p, level); |
1574 | btrfs_clear_path_blocking(p, NULL); | 1575 | btrfs_clear_path_blocking(p, NULL, 0); |
1575 | 1576 | ||
1576 | if (sret) { | 1577 | if (sret) { |
1577 | ret = sret; | 1578 | ret = sret; |
@@ -1615,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1615 | int err; | 1616 | int err; |
1616 | int level; | 1617 | int level; |
1617 | int lowest_unlock = 1; | 1618 | int lowest_unlock = 1; |
1619 | int root_lock; | ||
1620 | /* everything at write_lock_level or lower must be write locked */ | ||
1621 | int write_lock_level = 0; | ||
1618 | u8 lowest_level = 0; | 1622 | u8 lowest_level = 0; |
1619 | 1623 | ||
1620 | lowest_level = p->lowest_level; | 1624 | lowest_level = p->lowest_level; |
1621 | WARN_ON(lowest_level && ins_len > 0); | 1625 | WARN_ON(lowest_level && ins_len > 0); |
1622 | WARN_ON(p->nodes[0] != NULL); | 1626 | WARN_ON(p->nodes[0] != NULL); |
1623 | 1627 | ||
1624 | if (ins_len < 0) | 1628 | if (ins_len < 0) { |
1625 | lowest_unlock = 2; | 1629 | lowest_unlock = 2; |
1626 | 1630 | ||
1631 | /* when we are removing items, we might have to go up to level | ||
1632 | * two as we update tree pointers Make sure we keep write | ||
1633 | * for those levels as well | ||
1634 | */ | ||
1635 | write_lock_level = 2; | ||
1636 | } else if (ins_len > 0) { | ||
1637 | /* | ||
1638 | * for inserting items, make sure we have a write lock on | ||
1639 | * level 1 so we can update keys | ||
1640 | */ | ||
1641 | write_lock_level = 1; | ||
1642 | } | ||
1643 | |||
1644 | if (!cow) | ||
1645 | write_lock_level = -1; | ||
1646 | |||
1647 | if (cow && (p->keep_locks || p->lowest_level)) | ||
1648 | write_lock_level = BTRFS_MAX_LEVEL; | ||
1649 | |||
1627 | again: | 1650 | again: |
1651 | /* | ||
1652 | * we try very hard to do read locks on the root | ||
1653 | */ | ||
1654 | root_lock = BTRFS_READ_LOCK; | ||
1655 | level = 0; | ||
1628 | if (p->search_commit_root) { | 1656 | if (p->search_commit_root) { |
1657 | /* | ||
1658 | * the commit roots are read only | ||
1659 | * so we always do read locks | ||
1660 | */ | ||
1629 | b = root->commit_root; | 1661 | b = root->commit_root; |
1630 | extent_buffer_get(b); | 1662 | extent_buffer_get(b); |
1663 | level = btrfs_header_level(b); | ||
1631 | if (!p->skip_locking) | 1664 | if (!p->skip_locking) |
1632 | btrfs_tree_lock(b); | 1665 | btrfs_tree_read_lock(b); |
1633 | } else { | 1666 | } else { |
1634 | if (p->skip_locking) | 1667 | if (p->skip_locking) { |
1635 | b = btrfs_root_node(root); | 1668 | b = btrfs_root_node(root); |
1636 | else | 1669 | level = btrfs_header_level(b); |
1637 | b = btrfs_lock_root_node(root); | 1670 | } else { |
1671 | /* we don't know the level of the root node | ||
1672 | * until we actually have it read locked | ||
1673 | */ | ||
1674 | b = btrfs_read_lock_root_node(root); | ||
1675 | level = btrfs_header_level(b); | ||
1676 | if (level <= write_lock_level) { | ||
1677 | /* whoops, must trade for write lock */ | ||
1678 | btrfs_tree_read_unlock(b); | ||
1679 | free_extent_buffer(b); | ||
1680 | b = btrfs_lock_root_node(root); | ||
1681 | root_lock = BTRFS_WRITE_LOCK; | ||
1682 | |||
1683 | /* the level might have changed, check again */ | ||
1684 | level = btrfs_header_level(b); | ||
1685 | } | ||
1686 | } | ||
1638 | } | 1687 | } |
1688 | p->nodes[level] = b; | ||
1689 | if (!p->skip_locking) | ||
1690 | p->locks[level] = root_lock; | ||
1639 | 1691 | ||
1640 | while (b) { | 1692 | while (b) { |
1641 | level = btrfs_header_level(b); | 1693 | level = btrfs_header_level(b); |
@@ -1644,10 +1696,6 @@ again: | |||
1644 | * setup the path here so we can release it under lock | 1696 | * setup the path here so we can release it under lock |
1645 | * contention with the cow code | 1697 | * contention with the cow code |
1646 | */ | 1698 | */ |
1647 | p->nodes[level] = b; | ||
1648 | if (!p->skip_locking) | ||
1649 | p->locks[level] = 1; | ||
1650 | |||
1651 | if (cow) { | 1699 | if (cow) { |
1652 | /* | 1700 | /* |
1653 | * if we don't really need to cow this block | 1701 | * if we don't really need to cow this block |
@@ -1659,6 +1707,16 @@ again: | |||
1659 | 1707 | ||
1660 | btrfs_set_path_blocking(p); | 1708 | btrfs_set_path_blocking(p); |
1661 | 1709 | ||
1710 | /* | ||
1711 | * must have write locks on this node and the | ||
1712 | * parent | ||
1713 | */ | ||
1714 | if (level + 1 > write_lock_level) { | ||
1715 | write_lock_level = level + 1; | ||
1716 | btrfs_release_path(p); | ||
1717 | goto again; | ||
1718 | } | ||
1719 | |||
1662 | err = btrfs_cow_block(trans, root, b, | 1720 | err = btrfs_cow_block(trans, root, b, |
1663 | p->nodes[level + 1], | 1721 | p->nodes[level + 1], |
1664 | p->slots[level + 1], &b); | 1722 | p->slots[level + 1], &b); |
@@ -1671,10 +1729,7 @@ cow_done: | |||
1671 | BUG_ON(!cow && ins_len); | 1729 | BUG_ON(!cow && ins_len); |
1672 | 1730 | ||
1673 | p->nodes[level] = b; | 1731 | p->nodes[level] = b; |
1674 | if (!p->skip_locking) | 1732 | btrfs_clear_path_blocking(p, NULL, 0); |
1675 | p->locks[level] = 1; | ||
1676 | |||
1677 | btrfs_clear_path_blocking(p, NULL); | ||
1678 | 1733 | ||
1679 | /* | 1734 | /* |
1680 | * we have a lock on b and as long as we aren't changing | 1735 | * we have a lock on b and as long as we aren't changing |
@@ -1700,7 +1755,7 @@ cow_done: | |||
1700 | } | 1755 | } |
1701 | p->slots[level] = slot; | 1756 | p->slots[level] = slot; |
1702 | err = setup_nodes_for_search(trans, root, p, b, level, | 1757 | err = setup_nodes_for_search(trans, root, p, b, level, |
1703 | ins_len); | 1758 | ins_len, &write_lock_level); |
1704 | if (err == -EAGAIN) | 1759 | if (err == -EAGAIN) |
1705 | goto again; | 1760 | goto again; |
1706 | if (err) { | 1761 | if (err) { |
@@ -1710,6 +1765,19 @@ cow_done: | |||
1710 | b = p->nodes[level]; | 1765 | b = p->nodes[level]; |
1711 | slot = p->slots[level]; | 1766 | slot = p->slots[level]; |
1712 | 1767 | ||
1768 | /* | ||
1769 | * slot 0 is special, if we change the key | ||
1770 | * we have to update the parent pointer | ||
1771 | * which means we must have a write lock | ||
1772 | * on the parent | ||
1773 | */ | ||
1774 | if (slot == 0 && cow && | ||
1775 | write_lock_level < level + 1) { | ||
1776 | write_lock_level = level + 1; | ||
1777 | btrfs_release_path(p); | ||
1778 | goto again; | ||
1779 | } | ||
1780 | |||
1713 | unlock_up(p, level, lowest_unlock); | 1781 | unlock_up(p, level, lowest_unlock); |
1714 | 1782 | ||
1715 | if (level == lowest_level) { | 1783 | if (level == lowest_level) { |
@@ -1728,23 +1796,42 @@ cow_done: | |||
1728 | } | 1796 | } |
1729 | 1797 | ||
1730 | if (!p->skip_locking) { | 1798 | if (!p->skip_locking) { |
1731 | btrfs_clear_path_blocking(p, NULL); | 1799 | level = btrfs_header_level(b); |
1732 | err = btrfs_try_spin_lock(b); | 1800 | if (level <= write_lock_level) { |
1733 | 1801 | err = btrfs_try_tree_write_lock(b); | |
1734 | if (!err) { | 1802 | if (!err) { |
1735 | btrfs_set_path_blocking(p); | 1803 | btrfs_set_path_blocking(p); |
1736 | btrfs_tree_lock(b); | 1804 | btrfs_tree_lock(b); |
1737 | btrfs_clear_path_blocking(p, b); | 1805 | btrfs_clear_path_blocking(p, b, |
1806 | BTRFS_WRITE_LOCK); | ||
1807 | } | ||
1808 | p->locks[level] = BTRFS_WRITE_LOCK; | ||
1809 | } else { | ||
1810 | err = btrfs_try_tree_read_lock(b); | ||
1811 | if (!err) { | ||
1812 | btrfs_set_path_blocking(p); | ||
1813 | btrfs_tree_read_lock(b); | ||
1814 | btrfs_clear_path_blocking(p, b, | ||
1815 | BTRFS_READ_LOCK); | ||
1816 | } | ||
1817 | p->locks[level] = BTRFS_READ_LOCK; | ||
1738 | } | 1818 | } |
1819 | p->nodes[level] = b; | ||
1739 | } | 1820 | } |
1740 | } else { | 1821 | } else { |
1741 | p->slots[level] = slot; | 1822 | p->slots[level] = slot; |
1742 | if (ins_len > 0 && | 1823 | if (ins_len > 0 && |
1743 | btrfs_leaf_free_space(root, b) < ins_len) { | 1824 | btrfs_leaf_free_space(root, b) < ins_len) { |
1825 | if (write_lock_level < 1) { | ||
1826 | write_lock_level = 1; | ||
1827 | btrfs_release_path(p); | ||
1828 | goto again; | ||
1829 | } | ||
1830 | |||
1744 | btrfs_set_path_blocking(p); | 1831 | btrfs_set_path_blocking(p); |
1745 | err = split_leaf(trans, root, key, | 1832 | err = split_leaf(trans, root, key, |
1746 | p, ins_len, ret == 0); | 1833 | p, ins_len, ret == 0); |
1747 | btrfs_clear_path_blocking(p, NULL); | 1834 | btrfs_clear_path_blocking(p, NULL, 0); |
1748 | 1835 | ||
1749 | BUG_ON(err > 0); | 1836 | BUG_ON(err > 0); |
1750 | if (err) { | 1837 | if (err) { |
@@ -2025,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2025 | add_root_to_dirty_list(root); | 2112 | add_root_to_dirty_list(root); |
2026 | extent_buffer_get(c); | 2113 | extent_buffer_get(c); |
2027 | path->nodes[level] = c; | 2114 | path->nodes[level] = c; |
2028 | path->locks[level] = 1; | 2115 | path->locks[level] = BTRFS_WRITE_LOCK; |
2029 | path->slots[level] = 0; | 2116 | path->slots[level] = 0; |
2030 | return 0; | 2117 | return 0; |
2031 | } | 2118 | } |
@@ -2253,14 +2340,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2253 | if (path->slots[0] == i) | 2340 | if (path->slots[0] == i) |
2254 | push_space += data_size; | 2341 | push_space += data_size; |
2255 | 2342 | ||
2256 | if (!left->map_token) { | ||
2257 | map_extent_buffer(left, (unsigned long)item, | ||
2258 | sizeof(struct btrfs_item), | ||
2259 | &left->map_token, &left->kaddr, | ||
2260 | &left->map_start, &left->map_len, | ||
2261 | KM_USER1); | ||
2262 | } | ||
2263 | |||
2264 | this_item_size = btrfs_item_size(left, item); | 2343 | this_item_size = btrfs_item_size(left, item); |
2265 | if (this_item_size + sizeof(*item) + push_space > free_space) | 2344 | if (this_item_size + sizeof(*item) + push_space > free_space) |
2266 | break; | 2345 | break; |
@@ -2271,10 +2350,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2271 | break; | 2350 | break; |
2272 | i--; | 2351 | i--; |
2273 | } | 2352 | } |
2274 | if (left->map_token) { | ||
2275 | unmap_extent_buffer(left, left->map_token, KM_USER1); | ||
2276 | left->map_token = NULL; | ||
2277 | } | ||
2278 | 2353 | ||
2279 | if (push_items == 0) | 2354 | if (push_items == 0) |
2280 | goto out_unlock; | 2355 | goto out_unlock; |
@@ -2316,21 +2391,10 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2316 | push_space = BTRFS_LEAF_DATA_SIZE(root); | 2391 | push_space = BTRFS_LEAF_DATA_SIZE(root); |
2317 | for (i = 0; i < right_nritems; i++) { | 2392 | for (i = 0; i < right_nritems; i++) { |
2318 | item = btrfs_item_nr(right, i); | 2393 | item = btrfs_item_nr(right, i); |
2319 | if (!right->map_token) { | ||
2320 | map_extent_buffer(right, (unsigned long)item, | ||
2321 | sizeof(struct btrfs_item), | ||
2322 | &right->map_token, &right->kaddr, | ||
2323 | &right->map_start, &right->map_len, | ||
2324 | KM_USER1); | ||
2325 | } | ||
2326 | push_space -= btrfs_item_size(right, item); | 2394 | push_space -= btrfs_item_size(right, item); |
2327 | btrfs_set_item_offset(right, item, push_space); | 2395 | btrfs_set_item_offset(right, item, push_space); |
2328 | } | 2396 | } |
2329 | 2397 | ||
2330 | if (right->map_token) { | ||
2331 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2332 | right->map_token = NULL; | ||
2333 | } | ||
2334 | left_nritems -= push_items; | 2398 | left_nritems -= push_items; |
2335 | btrfs_set_header_nritems(left, left_nritems); | 2399 | btrfs_set_header_nritems(left, left_nritems); |
2336 | 2400 | ||
@@ -2467,13 +2531,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2467 | 2531 | ||
2468 | for (i = 0; i < nr; i++) { | 2532 | for (i = 0; i < nr; i++) { |
2469 | item = btrfs_item_nr(right, i); | 2533 | item = btrfs_item_nr(right, i); |
2470 | if (!right->map_token) { | ||
2471 | map_extent_buffer(right, (unsigned long)item, | ||
2472 | sizeof(struct btrfs_item), | ||
2473 | &right->map_token, &right->kaddr, | ||
2474 | &right->map_start, &right->map_len, | ||
2475 | KM_USER1); | ||
2476 | } | ||
2477 | 2534 | ||
2478 | if (!empty && push_items > 0) { | 2535 | if (!empty && push_items > 0) { |
2479 | if (path->slots[0] < i) | 2536 | if (path->slots[0] < i) |
@@ -2496,11 +2553,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2496 | push_space += this_item_size + sizeof(*item); | 2553 | push_space += this_item_size + sizeof(*item); |
2497 | } | 2554 | } |
2498 | 2555 | ||
2499 | if (right->map_token) { | ||
2500 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2501 | right->map_token = NULL; | ||
2502 | } | ||
2503 | |||
2504 | if (push_items == 0) { | 2556 | if (push_items == 0) { |
2505 | ret = 1; | 2557 | ret = 1; |
2506 | goto out; | 2558 | goto out; |
@@ -2530,23 +2582,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2530 | u32 ioff; | 2582 | u32 ioff; |
2531 | 2583 | ||
2532 | item = btrfs_item_nr(left, i); | 2584 | item = btrfs_item_nr(left, i); |
2533 | if (!left->map_token) { | ||
2534 | map_extent_buffer(left, (unsigned long)item, | ||
2535 | sizeof(struct btrfs_item), | ||
2536 | &left->map_token, &left->kaddr, | ||
2537 | &left->map_start, &left->map_len, | ||
2538 | KM_USER1); | ||
2539 | } | ||
2540 | 2585 | ||
2541 | ioff = btrfs_item_offset(left, item); | 2586 | ioff = btrfs_item_offset(left, item); |
2542 | btrfs_set_item_offset(left, item, | 2587 | btrfs_set_item_offset(left, item, |
2543 | ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); | 2588 | ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); |
2544 | } | 2589 | } |
2545 | btrfs_set_header_nritems(left, old_left_nritems + push_items); | 2590 | btrfs_set_header_nritems(left, old_left_nritems + push_items); |
2546 | if (left->map_token) { | ||
2547 | unmap_extent_buffer(left, left->map_token, KM_USER1); | ||
2548 | left->map_token = NULL; | ||
2549 | } | ||
2550 | 2591 | ||
2551 | /* fixup right node */ | 2592 | /* fixup right node */ |
2552 | if (push_items > right_nritems) { | 2593 | if (push_items > right_nritems) { |
@@ -2574,21 +2615,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2574 | for (i = 0; i < right_nritems; i++) { | 2615 | for (i = 0; i < right_nritems; i++) { |
2575 | item = btrfs_item_nr(right, i); | 2616 | item = btrfs_item_nr(right, i); |
2576 | 2617 | ||
2577 | if (!right->map_token) { | ||
2578 | map_extent_buffer(right, (unsigned long)item, | ||
2579 | sizeof(struct btrfs_item), | ||
2580 | &right->map_token, &right->kaddr, | ||
2581 | &right->map_start, &right->map_len, | ||
2582 | KM_USER1); | ||
2583 | } | ||
2584 | |||
2585 | push_space = push_space - btrfs_item_size(right, item); | 2618 | push_space = push_space - btrfs_item_size(right, item); |
2586 | btrfs_set_item_offset(right, item, push_space); | 2619 | btrfs_set_item_offset(right, item, push_space); |
2587 | } | 2620 | } |
2588 | if (right->map_token) { | ||
2589 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2590 | right->map_token = NULL; | ||
2591 | } | ||
2592 | 2621 | ||
2593 | btrfs_mark_buffer_dirty(left); | 2622 | btrfs_mark_buffer_dirty(left); |
2594 | if (right_nritems) | 2623 | if (right_nritems) |
@@ -2729,23 +2758,10 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, | |||
2729 | struct btrfs_item *item = btrfs_item_nr(right, i); | 2758 | struct btrfs_item *item = btrfs_item_nr(right, i); |
2730 | u32 ioff; | 2759 | u32 ioff; |
2731 | 2760 | ||
2732 | if (!right->map_token) { | ||
2733 | map_extent_buffer(right, (unsigned long)item, | ||
2734 | sizeof(struct btrfs_item), | ||
2735 | &right->map_token, &right->kaddr, | ||
2736 | &right->map_start, &right->map_len, | ||
2737 | KM_USER1); | ||
2738 | } | ||
2739 | |||
2740 | ioff = btrfs_item_offset(right, item); | 2761 | ioff = btrfs_item_offset(right, item); |
2741 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | 2762 | btrfs_set_item_offset(right, item, ioff + rt_data_off); |
2742 | } | 2763 | } |
2743 | 2764 | ||
2744 | if (right->map_token) { | ||
2745 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2746 | right->map_token = NULL; | ||
2747 | } | ||
2748 | |||
2749 | btrfs_set_header_nritems(l, mid); | 2765 | btrfs_set_header_nritems(l, mid); |
2750 | ret = 0; | 2766 | ret = 0; |
2751 | btrfs_item_key(right, &disk_key, 0); | 2767 | btrfs_item_key(right, &disk_key, 0); |
@@ -3264,23 +3280,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
3264 | u32 ioff; | 3280 | u32 ioff; |
3265 | item = btrfs_item_nr(leaf, i); | 3281 | item = btrfs_item_nr(leaf, i); |
3266 | 3282 | ||
3267 | if (!leaf->map_token) { | ||
3268 | map_extent_buffer(leaf, (unsigned long)item, | ||
3269 | sizeof(struct btrfs_item), | ||
3270 | &leaf->map_token, &leaf->kaddr, | ||
3271 | &leaf->map_start, &leaf->map_len, | ||
3272 | KM_USER1); | ||
3273 | } | ||
3274 | |||
3275 | ioff = btrfs_item_offset(leaf, item); | 3283 | ioff = btrfs_item_offset(leaf, item); |
3276 | btrfs_set_item_offset(leaf, item, ioff + size_diff); | 3284 | btrfs_set_item_offset(leaf, item, ioff + size_diff); |
3277 | } | 3285 | } |
3278 | 3286 | ||
3279 | if (leaf->map_token) { | ||
3280 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3281 | leaf->map_token = NULL; | ||
3282 | } | ||
3283 | |||
3284 | /* shift the data */ | 3287 | /* shift the data */ |
3285 | if (from_end) { | 3288 | if (from_end) { |
3286 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 3289 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + |
@@ -3377,22 +3380,10 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
3377 | u32 ioff; | 3380 | u32 ioff; |
3378 | item = btrfs_item_nr(leaf, i); | 3381 | item = btrfs_item_nr(leaf, i); |
3379 | 3382 | ||
3380 | if (!leaf->map_token) { | ||
3381 | map_extent_buffer(leaf, (unsigned long)item, | ||
3382 | sizeof(struct btrfs_item), | ||
3383 | &leaf->map_token, &leaf->kaddr, | ||
3384 | &leaf->map_start, &leaf->map_len, | ||
3385 | KM_USER1); | ||
3386 | } | ||
3387 | ioff = btrfs_item_offset(leaf, item); | 3383 | ioff = btrfs_item_offset(leaf, item); |
3388 | btrfs_set_item_offset(leaf, item, ioff - data_size); | 3384 | btrfs_set_item_offset(leaf, item, ioff - data_size); |
3389 | } | 3385 | } |
3390 | 3386 | ||
3391 | if (leaf->map_token) { | ||
3392 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3393 | leaf->map_token = NULL; | ||
3394 | } | ||
3395 | |||
3396 | /* shift the data */ | 3387 | /* shift the data */ |
3397 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 3388 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + |
3398 | data_end - data_size, btrfs_leaf_data(leaf) + | 3389 | data_end - data_size, btrfs_leaf_data(leaf) + |
@@ -3494,27 +3485,13 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, | |||
3494 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | 3485 | * item0..itemN ... dataN.offset..dataN.size .. data0.size |
3495 | */ | 3486 | */ |
3496 | /* first correct the data pointers */ | 3487 | /* first correct the data pointers */ |
3497 | WARN_ON(leaf->map_token); | ||
3498 | for (i = slot; i < nritems; i++) { | 3488 | for (i = slot; i < nritems; i++) { |
3499 | u32 ioff; | 3489 | u32 ioff; |
3500 | 3490 | ||
3501 | item = btrfs_item_nr(leaf, i); | 3491 | item = btrfs_item_nr(leaf, i); |
3502 | if (!leaf->map_token) { | ||
3503 | map_extent_buffer(leaf, (unsigned long)item, | ||
3504 | sizeof(struct btrfs_item), | ||
3505 | &leaf->map_token, &leaf->kaddr, | ||
3506 | &leaf->map_start, &leaf->map_len, | ||
3507 | KM_USER1); | ||
3508 | } | ||
3509 | |||
3510 | ioff = btrfs_item_offset(leaf, item); | 3492 | ioff = btrfs_item_offset(leaf, item); |
3511 | btrfs_set_item_offset(leaf, item, ioff - total_data); | 3493 | btrfs_set_item_offset(leaf, item, ioff - total_data); |
3512 | } | 3494 | } |
3513 | if (leaf->map_token) { | ||
3514 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3515 | leaf->map_token = NULL; | ||
3516 | } | ||
3517 | |||
3518 | /* shift the items */ | 3495 | /* shift the items */ |
3519 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | 3496 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), |
3520 | btrfs_item_nr_offset(slot), | 3497 | btrfs_item_nr_offset(slot), |
@@ -3608,27 +3585,13 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans, | |||
3608 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | 3585 | * item0..itemN ... dataN.offset..dataN.size .. data0.size |
3609 | */ | 3586 | */ |
3610 | /* first correct the data pointers */ | 3587 | /* first correct the data pointers */ |
3611 | WARN_ON(leaf->map_token); | ||
3612 | for (i = slot; i < nritems; i++) { | 3588 | for (i = slot; i < nritems; i++) { |
3613 | u32 ioff; | 3589 | u32 ioff; |
3614 | 3590 | ||
3615 | item = btrfs_item_nr(leaf, i); | 3591 | item = btrfs_item_nr(leaf, i); |
3616 | if (!leaf->map_token) { | ||
3617 | map_extent_buffer(leaf, (unsigned long)item, | ||
3618 | sizeof(struct btrfs_item), | ||
3619 | &leaf->map_token, &leaf->kaddr, | ||
3620 | &leaf->map_start, &leaf->map_len, | ||
3621 | KM_USER1); | ||
3622 | } | ||
3623 | |||
3624 | ioff = btrfs_item_offset(leaf, item); | 3592 | ioff = btrfs_item_offset(leaf, item); |
3625 | btrfs_set_item_offset(leaf, item, ioff - total_data); | 3593 | btrfs_set_item_offset(leaf, item, ioff - total_data); |
3626 | } | 3594 | } |
3627 | if (leaf->map_token) { | ||
3628 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3629 | leaf->map_token = NULL; | ||
3630 | } | ||
3631 | |||
3632 | /* shift the items */ | 3595 | /* shift the items */ |
3633 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | 3596 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), |
3634 | btrfs_item_nr_offset(slot), | 3597 | btrfs_item_nr_offset(slot), |
@@ -3840,22 +3803,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3840 | u32 ioff; | 3803 | u32 ioff; |
3841 | 3804 | ||
3842 | item = btrfs_item_nr(leaf, i); | 3805 | item = btrfs_item_nr(leaf, i); |
3843 | if (!leaf->map_token) { | ||
3844 | map_extent_buffer(leaf, (unsigned long)item, | ||
3845 | sizeof(struct btrfs_item), | ||
3846 | &leaf->map_token, &leaf->kaddr, | ||
3847 | &leaf->map_start, &leaf->map_len, | ||
3848 | KM_USER1); | ||
3849 | } | ||
3850 | ioff = btrfs_item_offset(leaf, item); | 3806 | ioff = btrfs_item_offset(leaf, item); |
3851 | btrfs_set_item_offset(leaf, item, ioff + dsize); | 3807 | btrfs_set_item_offset(leaf, item, ioff + dsize); |
3852 | } | 3808 | } |
3853 | 3809 | ||
3854 | if (leaf->map_token) { | ||
3855 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3856 | leaf->map_token = NULL; | ||
3857 | } | ||
3858 | |||
3859 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), | 3810 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), |
3860 | btrfs_item_nr_offset(slot + nr), | 3811 | btrfs_item_nr_offset(slot + nr), |
3861 | sizeof(struct btrfs_item) * | 3812 | sizeof(struct btrfs_item) * |
@@ -4004,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
4004 | 3955 | ||
4005 | WARN_ON(!path->keep_locks); | 3956 | WARN_ON(!path->keep_locks); |
4006 | again: | 3957 | again: |
4007 | cur = btrfs_lock_root_node(root); | 3958 | cur = btrfs_read_lock_root_node(root); |
4008 | level = btrfs_header_level(cur); | 3959 | level = btrfs_header_level(cur); |
4009 | WARN_ON(path->nodes[level]); | 3960 | WARN_ON(path->nodes[level]); |
4010 | path->nodes[level] = cur; | 3961 | path->nodes[level] = cur; |
4011 | path->locks[level] = 1; | 3962 | path->locks[level] = BTRFS_READ_LOCK; |
4012 | 3963 | ||
4013 | if (btrfs_header_generation(cur) < min_trans) { | 3964 | if (btrfs_header_generation(cur) < min_trans) { |
4014 | ret = 1; | 3965 | ret = 1; |
@@ -4098,12 +4049,12 @@ find_next_key: | |||
4098 | cur = read_node_slot(root, cur, slot); | 4049 | cur = read_node_slot(root, cur, slot); |
4099 | BUG_ON(!cur); | 4050 | BUG_ON(!cur); |
4100 | 4051 | ||
4101 | btrfs_tree_lock(cur); | 4052 | btrfs_tree_read_lock(cur); |
4102 | 4053 | ||
4103 | path->locks[level - 1] = 1; | 4054 | path->locks[level - 1] = BTRFS_READ_LOCK; |
4104 | path->nodes[level - 1] = cur; | 4055 | path->nodes[level - 1] = cur; |
4105 | unlock_up(path, level, 1); | 4056 | unlock_up(path, level, 1); |
4106 | btrfs_clear_path_blocking(path, NULL); | 4057 | btrfs_clear_path_blocking(path, NULL, 0); |
4107 | } | 4058 | } |
4108 | out: | 4059 | out: |
4109 | if (ret == 0) | 4060 | if (ret == 0) |
@@ -4218,30 +4169,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4218 | u32 nritems; | 4169 | u32 nritems; |
4219 | int ret; | 4170 | int ret; |
4220 | int old_spinning = path->leave_spinning; | 4171 | int old_spinning = path->leave_spinning; |
4221 | int force_blocking = 0; | 4172 | int next_rw_lock = 0; |
4222 | 4173 | ||
4223 | nritems = btrfs_header_nritems(path->nodes[0]); | 4174 | nritems = btrfs_header_nritems(path->nodes[0]); |
4224 | if (nritems == 0) | 4175 | if (nritems == 0) |
4225 | return 1; | 4176 | return 1; |
4226 | 4177 | ||
4227 | /* | ||
4228 | * we take the blocks in an order that upsets lockdep. Using | ||
4229 | * blocking mode is the only way around it. | ||
4230 | */ | ||
4231 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
4232 | force_blocking = 1; | ||
4233 | #endif | ||
4234 | |||
4235 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | 4178 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); |
4236 | again: | 4179 | again: |
4237 | level = 1; | 4180 | level = 1; |
4238 | next = NULL; | 4181 | next = NULL; |
4182 | next_rw_lock = 0; | ||
4239 | btrfs_release_path(path); | 4183 | btrfs_release_path(path); |
4240 | 4184 | ||
4241 | path->keep_locks = 1; | 4185 | path->keep_locks = 1; |
4242 | 4186 | path->leave_spinning = 1; | |
4243 | if (!force_blocking) | ||
4244 | path->leave_spinning = 1; | ||
4245 | 4187 | ||
4246 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4188 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4247 | path->keep_locks = 0; | 4189 | path->keep_locks = 0; |
@@ -4281,11 +4223,12 @@ again: | |||
4281 | } | 4223 | } |
4282 | 4224 | ||
4283 | if (next) { | 4225 | if (next) { |
4284 | btrfs_tree_unlock(next); | 4226 | btrfs_tree_unlock_rw(next, next_rw_lock); |
4285 | free_extent_buffer(next); | 4227 | free_extent_buffer(next); |
4286 | } | 4228 | } |
4287 | 4229 | ||
4288 | next = c; | 4230 | next = c; |
4231 | next_rw_lock = path->locks[level]; | ||
4289 | ret = read_block_for_search(NULL, root, path, &next, level, | 4232 | ret = read_block_for_search(NULL, root, path, &next, level, |
4290 | slot, &key); | 4233 | slot, &key); |
4291 | if (ret == -EAGAIN) | 4234 | if (ret == -EAGAIN) |
@@ -4297,15 +4240,14 @@ again: | |||
4297 | } | 4240 | } |
4298 | 4241 | ||
4299 | if (!path->skip_locking) { | 4242 | if (!path->skip_locking) { |
4300 | ret = btrfs_try_spin_lock(next); | 4243 | ret = btrfs_try_tree_read_lock(next); |
4301 | if (!ret) { | 4244 | if (!ret) { |
4302 | btrfs_set_path_blocking(path); | 4245 | btrfs_set_path_blocking(path); |
4303 | btrfs_tree_lock(next); | 4246 | btrfs_tree_read_lock(next); |
4304 | if (!force_blocking) | 4247 | btrfs_clear_path_blocking(path, next, |
4305 | btrfs_clear_path_blocking(path, next); | 4248 | BTRFS_READ_LOCK); |
4306 | } | 4249 | } |
4307 | if (force_blocking) | 4250 | next_rw_lock = BTRFS_READ_LOCK; |
4308 | btrfs_set_lock_blocking(next); | ||
4309 | } | 4251 | } |
4310 | break; | 4252 | break; |
4311 | } | 4253 | } |
@@ -4314,14 +4256,13 @@ again: | |||
4314 | level--; | 4256 | level--; |
4315 | c = path->nodes[level]; | 4257 | c = path->nodes[level]; |
4316 | if (path->locks[level]) | 4258 | if (path->locks[level]) |
4317 | btrfs_tree_unlock(c); | 4259 | btrfs_tree_unlock_rw(c, path->locks[level]); |
4318 | 4260 | ||
4319 | free_extent_buffer(c); | 4261 | free_extent_buffer(c); |
4320 | path->nodes[level] = next; | 4262 | path->nodes[level] = next; |
4321 | path->slots[level] = 0; | 4263 | path->slots[level] = 0; |
4322 | if (!path->skip_locking) | 4264 | if (!path->skip_locking) |
4323 | path->locks[level] = 1; | 4265 | path->locks[level] = next_rw_lock; |
4324 | |||
4325 | if (!level) | 4266 | if (!level) |
4326 | break; | 4267 | break; |
4327 | 4268 | ||
@@ -4336,16 +4277,14 @@ again: | |||
4336 | } | 4277 | } |
4337 | 4278 | ||
4338 | if (!path->skip_locking) { | 4279 | if (!path->skip_locking) { |
4339 | btrfs_assert_tree_locked(path->nodes[level]); | 4280 | ret = btrfs_try_tree_read_lock(next); |
4340 | ret = btrfs_try_spin_lock(next); | ||
4341 | if (!ret) { | 4281 | if (!ret) { |
4342 | btrfs_set_path_blocking(path); | 4282 | btrfs_set_path_blocking(path); |
4343 | btrfs_tree_lock(next); | 4283 | btrfs_tree_read_lock(next); |
4344 | if (!force_blocking) | 4284 | btrfs_clear_path_blocking(path, next, |
4345 | btrfs_clear_path_blocking(path, next); | 4285 | BTRFS_READ_LOCK); |
4346 | } | 4286 | } |
4347 | if (force_blocking) | 4287 | next_rw_lock = BTRFS_READ_LOCK; |
4348 | btrfs_set_lock_blocking(next); | ||
4349 | } | 4288 | } |
4350 | } | 4289 | } |
4351 | ret = 0; | 4290 | ret = 0; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f30ac05dbda7..0469263e327e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -755,6 +755,8 @@ struct btrfs_space_info { | |||
755 | chunks for this space */ | 755 | chunks for this space */ |
756 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ | 756 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ |
757 | 757 | ||
758 | unsigned int flush:1; /* set if we are trying to make space */ | ||
759 | |||
758 | unsigned int force_alloc; /* set if we need to force a chunk | 760 | unsigned int force_alloc; /* set if we need to force a chunk |
759 | alloc for this space */ | 761 | alloc for this space */ |
760 | 762 | ||
@@ -764,7 +766,7 @@ struct btrfs_space_info { | |||
764 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; | 766 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
765 | spinlock_t lock; | 767 | spinlock_t lock; |
766 | struct rw_semaphore groups_sem; | 768 | struct rw_semaphore groups_sem; |
767 | atomic_t caching_threads; | 769 | wait_queue_head_t wait; |
768 | }; | 770 | }; |
769 | 771 | ||
770 | struct btrfs_block_rsv { | 772 | struct btrfs_block_rsv { |
@@ -824,6 +826,7 @@ struct btrfs_caching_control { | |||
824 | struct list_head list; | 826 | struct list_head list; |
825 | struct mutex mutex; | 827 | struct mutex mutex; |
826 | wait_queue_head_t wait; | 828 | wait_queue_head_t wait; |
829 | struct btrfs_work work; | ||
827 | struct btrfs_block_group_cache *block_group; | 830 | struct btrfs_block_group_cache *block_group; |
828 | u64 progress; | 831 | u64 progress; |
829 | atomic_t count; | 832 | atomic_t count; |
@@ -1032,6 +1035,8 @@ struct btrfs_fs_info { | |||
1032 | struct btrfs_workers endio_write_workers; | 1035 | struct btrfs_workers endio_write_workers; |
1033 | struct btrfs_workers endio_freespace_worker; | 1036 | struct btrfs_workers endio_freespace_worker; |
1034 | struct btrfs_workers submit_workers; | 1037 | struct btrfs_workers submit_workers; |
1038 | struct btrfs_workers caching_workers; | ||
1039 | |||
1035 | /* | 1040 | /* |
1036 | * fixup workers take dirty pages that didn't properly go through | 1041 | * fixup workers take dirty pages that didn't properly go through |
1037 | * the cow mechanism and make them safe to write. It happens | 1042 | * the cow mechanism and make them safe to write. It happens |
@@ -1219,7 +1224,7 @@ struct btrfs_root { | |||
1219 | * right now this just gets used so that a root has its own devid | 1224 | * right now this just gets used so that a root has its own devid |
1220 | * for stat. It may be used for more later | 1225 | * for stat. It may be used for more later |
1221 | */ | 1226 | */ |
1222 | struct super_block anon_super; | 1227 | dev_t anon_dev; |
1223 | }; | 1228 | }; |
1224 | 1229 | ||
1225 | struct btrfs_ioctl_defrag_range_args { | 1230 | struct btrfs_ioctl_defrag_range_args { |
@@ -1335,6 +1340,11 @@ struct btrfs_ioctl_defrag_range_args { | |||
1335 | */ | 1340 | */ |
1336 | #define BTRFS_STRING_ITEM_KEY 253 | 1341 | #define BTRFS_STRING_ITEM_KEY 253 |
1337 | 1342 | ||
1343 | /* | ||
1344 | * Flags for mount options. | ||
1345 | * | ||
1346 | * Note: don't forget to add new options to btrfs_show_options() | ||
1347 | */ | ||
1338 | #define BTRFS_MOUNT_NODATASUM (1 << 0) | 1348 | #define BTRFS_MOUNT_NODATASUM (1 << 0) |
1339 | #define BTRFS_MOUNT_NODATACOW (1 << 1) | 1349 | #define BTRFS_MOUNT_NODATACOW (1 << 1) |
1340 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) | 1350 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) |
@@ -2123,7 +2133,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) | |||
2123 | 2133 | ||
2124 | /* extent-tree.c */ | 2134 | /* extent-tree.c */ |
2125 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 2135 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
2126 | int num_items) | 2136 | unsigned num_items) |
2127 | { | 2137 | { |
2128 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 2138 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * |
2129 | 3 * num_items; | 2139 | 3 * num_items; |
@@ -2217,9 +2227,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | |||
2217 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2227 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2218 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2228 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
2219 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); | 2229 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
2220 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2221 | struct btrfs_root *root, | ||
2222 | int num_items); | ||
2223 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 2230 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
2224 | struct btrfs_root *root); | 2231 | struct btrfs_root *root); |
2225 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | 2232 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
@@ -2325,7 +2332,7 @@ struct btrfs_path *btrfs_alloc_path(void); | |||
2325 | void btrfs_free_path(struct btrfs_path *p); | 2332 | void btrfs_free_path(struct btrfs_path *p); |
2326 | void btrfs_set_path_blocking(struct btrfs_path *p); | 2333 | void btrfs_set_path_blocking(struct btrfs_path *p); |
2327 | void btrfs_clear_path_blocking(struct btrfs_path *p, | 2334 | void btrfs_clear_path_blocking(struct btrfs_path *p, |
2328 | struct extent_buffer *held); | 2335 | struct extent_buffer *held, int held_rw); |
2329 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); | 2336 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); |
2330 | 2337 | ||
2331 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2338 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
@@ -2399,8 +2406,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | |||
2399 | btrfs_root_item *item, struct btrfs_key *key); | 2406 | btrfs_root_item *item, struct btrfs_key *key); |
2400 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 2407 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); |
2401 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | 2408 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); |
2402 | int btrfs_set_root_node(struct btrfs_root_item *item, | 2409 | void btrfs_set_root_node(struct btrfs_root_item *item, |
2403 | struct extent_buffer *node); | 2410 | struct extent_buffer *node); |
2404 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); | 2411 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); |
2405 | 2412 | ||
2406 | /* dir-item.c */ | 2413 | /* dir-item.c */ |
@@ -2505,6 +2512,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, | |||
2505 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 2512 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
2506 | struct list_head *list, int search_commit); | 2513 | struct list_head *list, int search_commit); |
2507 | /* inode.c */ | 2514 | /* inode.c */ |
2515 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, | ||
2516 | size_t pg_offset, u64 start, u64 len, | ||
2517 | int create); | ||
2508 | 2518 | ||
2509 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ | 2519 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ |
2510 | #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) | 2520 | #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) |
@@ -2513,6 +2523,14 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
2513 | #define PageChecked PageFsMisc | 2523 | #define PageChecked PageFsMisc |
2514 | #endif | 2524 | #endif |
2515 | 2525 | ||
2526 | /* This forces readahead on a given range of bytes in an inode */ | ||
2527 | static inline void btrfs_force_ra(struct address_space *mapping, | ||
2528 | struct file_ra_state *ra, struct file *file, | ||
2529 | pgoff_t offset, unsigned long req_size) | ||
2530 | { | ||
2531 | page_cache_sync_readahead(mapping, ra, file, offset, req_size); | ||
2532 | } | ||
2533 | |||
2516 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); | 2534 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); |
2517 | int btrfs_set_inode_index(struct inode *dir, u64 *index); | 2535 | int btrfs_set_inode_index(struct inode *dir, u64 *index); |
2518 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | 2536 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, |
@@ -2541,9 +2559,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
2541 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 2559 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
2542 | size_t size, struct bio *bio, unsigned long bio_flags); | 2560 | size_t size, struct bio *bio, unsigned long bio_flags); |
2543 | 2561 | ||
2544 | unsigned long btrfs_force_ra(struct address_space *mapping, | ||
2545 | struct file_ra_state *ra, struct file *file, | ||
2546 | pgoff_t offset, pgoff_t last_index); | ||
2547 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2562 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2548 | int btrfs_readpage(struct file *file, struct page *page); | 2563 | int btrfs_readpage(struct file *file, struct page *page); |
2549 | void btrfs_evict_inode(struct inode *inode); | 2564 | void btrfs_evict_inode(struct inode *inode); |
@@ -2597,7 +2612,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
2597 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | 2612 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, |
2598 | struct inode *inode); | 2613 | struct inode *inode); |
2599 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); | 2614 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); |
2600 | int btrfs_sync_file(struct file *file, int datasync); | 2615 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); |
2601 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2616 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
2602 | int skip_pinned); | 2617 | int skip_pinned); |
2603 | extern const struct file_operations btrfs_file_operations; | 2618 | extern const struct file_operations btrfs_file_operations; |
@@ -2637,13 +2652,22 @@ do { \ | |||
2637 | 2652 | ||
2638 | /* acl.c */ | 2653 | /* acl.c */ |
2639 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 2654 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
2640 | int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); | 2655 | struct posix_acl *btrfs_get_acl(struct inode *inode, int type); |
2641 | #else | ||
2642 | #define btrfs_check_acl NULL | ||
2643 | #endif | ||
2644 | int btrfs_init_acl(struct btrfs_trans_handle *trans, | 2656 | int btrfs_init_acl(struct btrfs_trans_handle *trans, |
2645 | struct inode *inode, struct inode *dir); | 2657 | struct inode *inode, struct inode *dir); |
2646 | int btrfs_acl_chmod(struct inode *inode); | 2658 | int btrfs_acl_chmod(struct inode *inode); |
2659 | #else | ||
2660 | #define btrfs_get_acl NULL | ||
2661 | static inline int btrfs_init_acl(struct btrfs_trans_handle *trans, | ||
2662 | struct inode *inode, struct inode *dir) | ||
2663 | { | ||
2664 | return 0; | ||
2665 | } | ||
2666 | static inline int btrfs_acl_chmod(struct inode *inode) | ||
2667 | { | ||
2668 | return 0; | ||
2669 | } | ||
2670 | #endif | ||
2647 | 2671 | ||
2648 | /* relocation.c */ | 2672 | /* relocation.c */ |
2649 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); | 2673 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 98c68e658a9b..b52c672f4c18 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, | |||
735 | } | 735 | } |
736 | 736 | ||
737 | /* reset all the locked nodes in the patch to spinning locks. */ | 737 | /* reset all the locked nodes in the patch to spinning locks. */ |
738 | btrfs_clear_path_blocking(path, NULL); | 738 | btrfs_clear_path_blocking(path, NULL, 0); |
739 | 739 | ||
740 | /* insert the keys of the items */ | 740 | /* insert the keys of the items */ |
741 | ret = setup_items_for_insert(trans, root, path, keys, data_size, | 741 | ret = setup_items_for_insert(trans, root, path, keys, data_size, |
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 8d27af4bd8b9..7083d08b2a21 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
26 | #include <linux/list.h> | 26 | #include <linux/list.h> |
27 | #include <linux/wait.h> | 27 | #include <linux/wait.h> |
28 | #include <asm/atomic.h> | 28 | #include <linux/atomic.h> |
29 | 29 | ||
30 | #include "ctree.h" | 30 | #include "ctree.h" |
31 | 31 | ||
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 685f2593c4f0..31d84e78129b 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | |||
89 | data_size = sizeof(*dir_item) + name_len + data_len; | 89 | data_size = sizeof(*dir_item) + name_len + data_len; |
90 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | 90 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, |
91 | name, name_len); | 91 | name, name_len); |
92 | /* | 92 | if (IS_ERR(dir_item)) |
93 | * FIXME: at some point we should handle xattr's that are larger than | 93 | return PTR_ERR(dir_item); |
94 | * what we can fit in our leaf. We set location to NULL b/c we arent | ||
95 | * pointing at anything else, that will change if we store the xattr | ||
96 | * data in a separate inode. | ||
97 | */ | ||
98 | BUG_ON(IS_ERR(dir_item)); | ||
99 | memset(&location, 0, sizeof(location)); | 94 | memset(&location, 0, sizeof(location)); |
100 | 95 | ||
101 | leaf = path->nodes[0]; | 96 | leaf = path->nodes[0]; |
@@ -203,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
203 | struct btrfs_key key; | 198 | struct btrfs_key key; |
204 | int ins_len = mod < 0 ? -1 : 0; | 199 | int ins_len = mod < 0 ? -1 : 0; |
205 | int cow = mod != 0; | 200 | int cow = mod != 0; |
206 | struct btrfs_key found_key; | ||
207 | struct extent_buffer *leaf; | ||
208 | 201 | ||
209 | key.objectid = dir; | 202 | key.objectid = dir; |
210 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 203 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); |
@@ -214,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
214 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | 207 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); |
215 | if (ret < 0) | 208 | if (ret < 0) |
216 | return ERR_PTR(ret); | 209 | return ERR_PTR(ret); |
217 | if (ret > 0) { | 210 | if (ret > 0) |
218 | if (path->slots[0] == 0) | ||
219 | return NULL; | ||
220 | path->slots[0]--; | ||
221 | } | ||
222 | |||
223 | leaf = path->nodes[0]; | ||
224 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
225 | |||
226 | if (found_key.objectid != dir || | ||
227 | btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || | ||
228 | found_key.offset != key.offset) | ||
229 | return NULL; | 211 | return NULL; |
230 | 212 | ||
231 | return btrfs_match_dir_item_name(root, path, name, name_len); | 213 | return btrfs_match_dir_item_name(root, path, name, name_len); |
@@ -320,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
320 | struct btrfs_key key; | 302 | struct btrfs_key key; |
321 | int ins_len = mod < 0 ? -1 : 0; | 303 | int ins_len = mod < 0 ? -1 : 0; |
322 | int cow = mod != 0; | 304 | int cow = mod != 0; |
323 | struct btrfs_key found_key; | ||
324 | struct extent_buffer *leaf; | ||
325 | 305 | ||
326 | key.objectid = dir; | 306 | key.objectid = dir; |
327 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | 307 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); |
@@ -329,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
329 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | 309 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); |
330 | if (ret < 0) | 310 | if (ret < 0) |
331 | return ERR_PTR(ret); | 311 | return ERR_PTR(ret); |
332 | if (ret > 0) { | 312 | if (ret > 0) |
333 | if (path->slots[0] == 0) | ||
334 | return NULL; | ||
335 | path->slots[0]--; | ||
336 | } | ||
337 | |||
338 | leaf = path->nodes[0]; | ||
339 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
340 | |||
341 | if (found_key.objectid != dir || | ||
342 | btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY || | ||
343 | found_key.offset != key.offset) | ||
344 | return NULL; | 313 | return NULL; |
345 | 314 | ||
346 | return btrfs_match_dir_item_name(root, path, name, name_len); | 315 | return btrfs_match_dir_item_name(root, path, name, name_len); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1ac8db5dc0a3..07b3ac662e19 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -100,38 +100,83 @@ struct async_submit_bio { | |||
100 | struct btrfs_work work; | 100 | struct btrfs_work work; |
101 | }; | 101 | }; |
102 | 102 | ||
103 | /* These are used to set the lockdep class on the extent buffer locks. | 103 | /* |
104 | * The class is set by the readpage_end_io_hook after the buffer has | 104 | * Lockdep class keys for extent_buffer->lock's in this root. For a given |
105 | * passed csum validation but before the pages are unlocked. | 105 | * eb, the lockdep key is determined by the btrfs_root it belongs to and |
106 | * the level the eb occupies in the tree. | ||
107 | * | ||
108 | * Different roots are used for different purposes and may nest inside each | ||
109 | * other and they require separate keysets. As lockdep keys should be | ||
110 | * static, assign keysets according to the purpose of the root as indicated | ||
111 | * by btrfs_root->objectid. This ensures that all special purpose roots | ||
112 | * have separate keysets. | ||
106 | * | 113 | * |
107 | * The lockdep class is also set by btrfs_init_new_buffer on freshly | 114 | * Lock-nesting across peer nodes is always done with the immediate parent |
108 | * allocated blocks. | 115 | * node locked thus preventing deadlock. As lockdep doesn't know this, use |
116 | * subclass to avoid triggering lockdep warning in such cases. | ||
109 | * | 117 | * |
110 | * The class is based on the level in the tree block, which allows lockdep | 118 | * The key is set by the readpage_end_io_hook after the buffer has passed |
111 | * to know that lower nodes nest inside the locks of higher nodes. | 119 | * csum validation but before the pages are unlocked. It is also set by |
120 | * btrfs_init_new_buffer on freshly allocated blocks. | ||
112 | * | 121 | * |
113 | * We also add a check to make sure the highest level of the tree is | 122 | * We also add a check to make sure the highest level of the tree is the |
114 | * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this | 123 | * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code |
115 | * code needs update as well. | 124 | * needs update as well. |
116 | */ | 125 | */ |
117 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 126 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
118 | # if BTRFS_MAX_LEVEL != 8 | 127 | # if BTRFS_MAX_LEVEL != 8 |
119 | # error | 128 | # error |
120 | # endif | 129 | # endif |
121 | static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; | 130 | |
122 | static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { | 131 | static struct btrfs_lockdep_keyset { |
123 | /* leaf */ | 132 | u64 id; /* root objectid */ |
124 | "btrfs-extent-00", | 133 | const char *name_stem; /* lock name stem */ |
125 | "btrfs-extent-01", | 134 | char names[BTRFS_MAX_LEVEL + 1][20]; |
126 | "btrfs-extent-02", | 135 | struct lock_class_key keys[BTRFS_MAX_LEVEL + 1]; |
127 | "btrfs-extent-03", | 136 | } btrfs_lockdep_keysets[] = { |
128 | "btrfs-extent-04", | 137 | { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" }, |
129 | "btrfs-extent-05", | 138 | { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" }, |
130 | "btrfs-extent-06", | 139 | { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" }, |
131 | "btrfs-extent-07", | 140 | { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" }, |
132 | /* highest possible level */ | 141 | { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" }, |
133 | "btrfs-extent-08", | 142 | { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" }, |
143 | { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" }, | ||
144 | { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, | ||
145 | { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, | ||
146 | { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, | ||
147 | { .id = 0, .name_stem = "tree" }, | ||
134 | }; | 148 | }; |
149 | |||
150 | void __init btrfs_init_lockdep(void) | ||
151 | { | ||
152 | int i, j; | ||
153 | |||
154 | /* initialize lockdep class names */ | ||
155 | for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) { | ||
156 | struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i]; | ||
157 | |||
158 | for (j = 0; j < ARRAY_SIZE(ks->names); j++) | ||
159 | snprintf(ks->names[j], sizeof(ks->names[j]), | ||
160 | "btrfs-%s-%02d", ks->name_stem, j); | ||
161 | } | ||
162 | } | ||
163 | |||
164 | void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, | ||
165 | int level) | ||
166 | { | ||
167 | struct btrfs_lockdep_keyset *ks; | ||
168 | |||
169 | BUG_ON(level >= ARRAY_SIZE(ks->keys)); | ||
170 | |||
171 | /* find the matching keyset, id 0 is the default entry */ | ||
172 | for (ks = btrfs_lockdep_keysets; ks->id; ks++) | ||
173 | if (ks->id == objectid) | ||
174 | break; | ||
175 | |||
176 | lockdep_set_class_and_name(&eb->lock, | ||
177 | &ks->keys[level], ks->names[level]); | ||
178 | } | ||
179 | |||
135 | #endif | 180 | #endif |
136 | 181 | ||
137 | /* | 182 | /* |
@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
217 | unsigned long len; | 262 | unsigned long len; |
218 | unsigned long cur_len; | 263 | unsigned long cur_len; |
219 | unsigned long offset = BTRFS_CSUM_SIZE; | 264 | unsigned long offset = BTRFS_CSUM_SIZE; |
220 | char *map_token = NULL; | ||
221 | char *kaddr; | 265 | char *kaddr; |
222 | unsigned long map_start; | 266 | unsigned long map_start; |
223 | unsigned long map_len; | 267 | unsigned long map_len; |
@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
228 | len = buf->len - offset; | 272 | len = buf->len - offset; |
229 | while (len > 0) { | 273 | while (len > 0) { |
230 | err = map_private_extent_buffer(buf, offset, 32, | 274 | err = map_private_extent_buffer(buf, offset, 32, |
231 | &map_token, &kaddr, | 275 | &kaddr, &map_start, &map_len); |
232 | &map_start, &map_len, KM_USER0); | ||
233 | if (err) | 276 | if (err) |
234 | return 1; | 277 | return 1; |
235 | cur_len = min(len, map_len - (offset - map_start)); | 278 | cur_len = min(len, map_len - (offset - map_start)); |
@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
237 | crc, cur_len); | 280 | crc, cur_len); |
238 | len -= cur_len; | 281 | len -= cur_len; |
239 | offset += cur_len; | 282 | offset += cur_len; |
240 | unmap_extent_buffer(buf, map_token, KM_USER0); | ||
241 | } | 283 | } |
242 | if (csum_size > sizeof(inline_result)) { | 284 | if (csum_size > sizeof(inline_result)) { |
243 | result = kzalloc(csum_size * sizeof(char), GFP_NOFS); | 285 | result = kzalloc(csum_size * sizeof(char), GFP_NOFS); |
@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root, | |||
494 | return 0; | 536 | return 0; |
495 | } | 537 | } |
496 | 538 | ||
497 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
498 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | ||
499 | { | ||
500 | lockdep_set_class_and_name(&eb->lock, | ||
501 | &btrfs_eb_class[level], | ||
502 | btrfs_eb_name[level]); | ||
503 | } | ||
504 | #endif | ||
505 | |||
506 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 539 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
507 | struct extent_state *state) | 540 | struct extent_state *state) |
508 | { | 541 | { |
@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
553 | } | 586 | } |
554 | found_level = btrfs_header_level(eb); | 587 | found_level = btrfs_header_level(eb); |
555 | 588 | ||
556 | btrfs_set_buffer_lockdep_class(eb, found_level); | 589 | btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), |
590 | eb, found_level); | ||
557 | 591 | ||
558 | ret = csum_tree_block(root, eb, 1); | 592 | ret = csum_tree_block(root, eb, 1); |
559 | if (ret) { | 593 | if (ret) { |
@@ -1077,12 +1111,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1077 | init_completion(&root->kobj_unregister); | 1111 | init_completion(&root->kobj_unregister); |
1078 | root->defrag_running = 0; | 1112 | root->defrag_running = 0; |
1079 | root->root_key.objectid = objectid; | 1113 | root->root_key.objectid = objectid; |
1080 | root->anon_super.s_root = NULL; | 1114 | root->anon_dev = 0; |
1081 | root->anon_super.s_dev = 0; | ||
1082 | INIT_LIST_HEAD(&root->anon_super.s_list); | ||
1083 | INIT_LIST_HEAD(&root->anon_super.s_instances); | ||
1084 | init_rwsem(&root->anon_super.s_umount); | ||
1085 | |||
1086 | return 0; | 1115 | return 0; |
1087 | } | 1116 | } |
1088 | 1117 | ||
@@ -1311,7 +1340,7 @@ again: | |||
1311 | spin_lock_init(&root->cache_lock); | 1340 | spin_lock_init(&root->cache_lock); |
1312 | init_waitqueue_head(&root->cache_wait); | 1341 | init_waitqueue_head(&root->cache_wait); |
1313 | 1342 | ||
1314 | ret = set_anon_super(&root->anon_super, NULL); | 1343 | ret = get_anon_bdev(&root->anon_dev); |
1315 | if (ret) | 1344 | if (ret) |
1316 | goto fail; | 1345 | goto fail; |
1317 | 1346 | ||
@@ -1603,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1603 | goto fail_bdi; | 1632 | goto fail_bdi; |
1604 | } | 1633 | } |
1605 | 1634 | ||
1606 | fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; | 1635 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
1607 | 1636 | ||
1608 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1637 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1609 | INIT_LIST_HEAD(&fs_info->trans_list); | 1638 | INIT_LIST_HEAD(&fs_info->trans_list); |
@@ -1807,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1807 | fs_info->thread_pool_size), | 1836 | fs_info->thread_pool_size), |
1808 | &fs_info->generic_worker); | 1837 | &fs_info->generic_worker); |
1809 | 1838 | ||
1839 | btrfs_init_workers(&fs_info->caching_workers, "cache", | ||
1840 | 2, &fs_info->generic_worker); | ||
1841 | |||
1810 | /* a higher idle thresh on the submit workers makes it much more | 1842 | /* a higher idle thresh on the submit workers makes it much more |
1811 | * likely that bios will be send down in a sane order to the | 1843 | * likely that bios will be send down in a sane order to the |
1812 | * devices | 1844 | * devices |
@@ -1860,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1860 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1892 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1861 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); | 1893 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); |
1862 | btrfs_start_workers(&fs_info->delayed_workers, 1); | 1894 | btrfs_start_workers(&fs_info->delayed_workers, 1); |
1895 | btrfs_start_workers(&fs_info->caching_workers, 1); | ||
1863 | 1896 | ||
1864 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1897 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1865 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1898 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -2117,6 +2150,7 @@ fail_sb_buffer: | |||
2117 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2150 | btrfs_stop_workers(&fs_info->endio_freespace_worker); |
2118 | btrfs_stop_workers(&fs_info->submit_workers); | 2151 | btrfs_stop_workers(&fs_info->submit_workers); |
2119 | btrfs_stop_workers(&fs_info->delayed_workers); | 2152 | btrfs_stop_workers(&fs_info->delayed_workers); |
2153 | btrfs_stop_workers(&fs_info->caching_workers); | ||
2120 | fail_alloc: | 2154 | fail_alloc: |
2121 | kfree(fs_info->delayed_root); | 2155 | kfree(fs_info->delayed_root); |
2122 | fail_iput: | 2156 | fail_iput: |
@@ -2393,10 +2427,8 @@ static void free_fs_root(struct btrfs_root *root) | |||
2393 | { | 2427 | { |
2394 | iput(root->cache_inode); | 2428 | iput(root->cache_inode); |
2395 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | 2429 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); |
2396 | if (root->anon_super.s_dev) { | 2430 | if (root->anon_dev) |
2397 | down_write(&root->anon_super.s_umount); | 2431 | free_anon_bdev(root->anon_dev); |
2398 | kill_anon_super(&root->anon_super); | ||
2399 | } | ||
2400 | free_extent_buffer(root->node); | 2432 | free_extent_buffer(root->node); |
2401 | free_extent_buffer(root->commit_root); | 2433 | free_extent_buffer(root->commit_root); |
2402 | kfree(root->free_ino_ctl); | 2434 | kfree(root->free_ino_ctl); |
@@ -2584,6 +2616,7 @@ int close_ctree(struct btrfs_root *root) | |||
2584 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2616 | btrfs_stop_workers(&fs_info->endio_freespace_worker); |
2585 | btrfs_stop_workers(&fs_info->submit_workers); | 2617 | btrfs_stop_workers(&fs_info->submit_workers); |
2586 | btrfs_stop_workers(&fs_info->delayed_workers); | 2618 | btrfs_stop_workers(&fs_info->delayed_workers); |
2619 | btrfs_stop_workers(&fs_info->caching_workers); | ||
2587 | 2620 | ||
2588 | btrfs_close_devices(fs_info->fs_devices); | 2621 | btrfs_close_devices(fs_info->fs_devices); |
2589 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2622 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a0b610a67aae..bec3ea4bd67f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page); | |||
87 | 87 | ||
88 | 88 | ||
89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
90 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); | 90 | void btrfs_init_lockdep(void); |
91 | void btrfs_set_buffer_lockdep_class(u64 objectid, | ||
92 | struct extent_buffer *eb, int level); | ||
91 | #else | 93 | #else |
92 | static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, | 94 | static inline void btrfs_init_lockdep(void) |
93 | int level) | 95 | { } |
96 | static inline void btrfs_set_buffer_lockdep_class(u64 objectid, | ||
97 | struct extent_buffer *eb, int level) | ||
94 | { | 98 | { |
95 | } | 99 | } |
96 | #endif | 100 | #endif |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71cd456fdb60..66bac226944e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
320 | return total_added; | 320 | return total_added; |
321 | } | 321 | } |
322 | 322 | ||
323 | static int caching_kthread(void *data) | 323 | static noinline void caching_thread(struct btrfs_work *work) |
324 | { | 324 | { |
325 | struct btrfs_block_group_cache *block_group = data; | 325 | struct btrfs_block_group_cache *block_group; |
326 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 326 | struct btrfs_fs_info *fs_info; |
327 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; | 327 | struct btrfs_caching_control *caching_ctl; |
328 | struct btrfs_root *extent_root = fs_info->extent_root; | 328 | struct btrfs_root *extent_root; |
329 | struct btrfs_path *path; | 329 | struct btrfs_path *path; |
330 | struct extent_buffer *leaf; | 330 | struct extent_buffer *leaf; |
331 | struct btrfs_key key; | 331 | struct btrfs_key key; |
@@ -334,9 +334,14 @@ static int caching_kthread(void *data) | |||
334 | u32 nritems; | 334 | u32 nritems; |
335 | int ret = 0; | 335 | int ret = 0; |
336 | 336 | ||
337 | caching_ctl = container_of(work, struct btrfs_caching_control, work); | ||
338 | block_group = caching_ctl->block_group; | ||
339 | fs_info = block_group->fs_info; | ||
340 | extent_root = fs_info->extent_root; | ||
341 | |||
337 | path = btrfs_alloc_path(); | 342 | path = btrfs_alloc_path(); |
338 | if (!path) | 343 | if (!path) |
339 | return -ENOMEM; | 344 | goto out; |
340 | 345 | ||
341 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 346 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
342 | 347 | ||
@@ -433,13 +438,11 @@ err: | |||
433 | free_excluded_extents(extent_root, block_group); | 438 | free_excluded_extents(extent_root, block_group); |
434 | 439 | ||
435 | mutex_unlock(&caching_ctl->mutex); | 440 | mutex_unlock(&caching_ctl->mutex); |
441 | out: | ||
436 | wake_up(&caching_ctl->wait); | 442 | wake_up(&caching_ctl->wait); |
437 | 443 | ||
438 | put_caching_control(caching_ctl); | 444 | put_caching_control(caching_ctl); |
439 | atomic_dec(&block_group->space_info->caching_threads); | ||
440 | btrfs_put_block_group(block_group); | 445 | btrfs_put_block_group(block_group); |
441 | |||
442 | return 0; | ||
443 | } | 446 | } |
444 | 447 | ||
445 | static int cache_block_group(struct btrfs_block_group_cache *cache, | 448 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
449 | { | 452 | { |
450 | struct btrfs_fs_info *fs_info = cache->fs_info; | 453 | struct btrfs_fs_info *fs_info = cache->fs_info; |
451 | struct btrfs_caching_control *caching_ctl; | 454 | struct btrfs_caching_control *caching_ctl; |
452 | struct task_struct *tsk; | ||
453 | int ret = 0; | 455 | int ret = 0; |
454 | 456 | ||
455 | smp_mb(); | 457 | smp_mb(); |
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
501 | caching_ctl->progress = cache->key.objectid; | 503 | caching_ctl->progress = cache->key.objectid; |
502 | /* one for caching kthread, one for caching block group list */ | 504 | /* one for caching kthread, one for caching block group list */ |
503 | atomic_set(&caching_ctl->count, 2); | 505 | atomic_set(&caching_ctl->count, 2); |
506 | caching_ctl->work.func = caching_thread; | ||
504 | 507 | ||
505 | spin_lock(&cache->lock); | 508 | spin_lock(&cache->lock); |
506 | if (cache->cached != BTRFS_CACHE_NO) { | 509 | if (cache->cached != BTRFS_CACHE_NO) { |
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
516 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | 519 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); |
517 | up_write(&fs_info->extent_commit_sem); | 520 | up_write(&fs_info->extent_commit_sem); |
518 | 521 | ||
519 | atomic_inc(&cache->space_info->caching_threads); | ||
520 | btrfs_get_block_group(cache); | 522 | btrfs_get_block_group(cache); |
521 | 523 | ||
522 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 524 | btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); |
523 | cache->key.objectid); | ||
524 | if (IS_ERR(tsk)) { | ||
525 | ret = PTR_ERR(tsk); | ||
526 | printk(KERN_ERR "error running thread %d\n", ret); | ||
527 | BUG(); | ||
528 | } | ||
529 | 525 | ||
530 | return ret; | 526 | return ret; |
531 | } | 527 | } |
@@ -667,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
667 | struct btrfs_path *path; | 663 | struct btrfs_path *path; |
668 | 664 | ||
669 | path = btrfs_alloc_path(); | 665 | path = btrfs_alloc_path(); |
670 | BUG_ON(!path); | 666 | if (!path) |
667 | return -ENOMEM; | ||
668 | |||
671 | key.objectid = start; | 669 | key.objectid = start; |
672 | key.offset = len; | 670 | key.offset = len; |
673 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 671 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); |
@@ -2932,9 +2930,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2932 | found->full = 0; | 2930 | found->full = 0; |
2933 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; | 2931 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; |
2934 | found->chunk_alloc = 0; | 2932 | found->chunk_alloc = 0; |
2933 | found->flush = 0; | ||
2934 | init_waitqueue_head(&found->wait); | ||
2935 | *space_info = found; | 2935 | *space_info = found; |
2936 | list_add_rcu(&found->list, &info->space_info); | 2936 | list_add_rcu(&found->list, &info->space_info); |
2937 | atomic_set(&found->caching_threads, 0); | ||
2938 | return 0; | 2937 | return 0; |
2939 | } | 2938 | } |
2940 | 2939 | ||
@@ -3275,6 +3274,9 @@ again: | |||
3275 | } | 3274 | } |
3276 | 3275 | ||
3277 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3276 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
3277 | if (ret < 0 && ret != -ENOSPC) | ||
3278 | goto out; | ||
3279 | |||
3278 | spin_lock(&space_info->lock); | 3280 | spin_lock(&space_info->lock); |
3279 | if (ret) | 3281 | if (ret) |
3280 | space_info->full = 1; | 3282 | space_info->full = 1; |
@@ -3284,6 +3286,7 @@ again: | |||
3284 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | 3286 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; |
3285 | space_info->chunk_alloc = 0; | 3287 | space_info->chunk_alloc = 0; |
3286 | spin_unlock(&space_info->lock); | 3288 | spin_unlock(&space_info->lock); |
3289 | out: | ||
3287 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3290 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
3288 | return ret; | 3291 | return ret; |
3289 | } | 3292 | } |
@@ -3314,6 +3317,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3314 | if (reserved == 0) | 3317 | if (reserved == 0) |
3315 | return 0; | 3318 | return 0; |
3316 | 3319 | ||
3320 | smp_mb(); | ||
3321 | if (root->fs_info->delalloc_bytes == 0) { | ||
3322 | if (trans) | ||
3323 | return 0; | ||
3324 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3325 | return 0; | ||
3326 | } | ||
3327 | |||
3317 | max_reclaim = min(reserved, to_reclaim); | 3328 | max_reclaim = min(reserved, to_reclaim); |
3318 | 3329 | ||
3319 | while (loops < 1024) { | 3330 | while (loops < 1024) { |
@@ -3356,6 +3367,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3356 | } | 3367 | } |
3357 | 3368 | ||
3358 | } | 3369 | } |
3370 | if (reclaimed >= to_reclaim && !trans) | ||
3371 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3359 | return reclaimed >= to_reclaim; | 3372 | return reclaimed >= to_reclaim; |
3360 | } | 3373 | } |
3361 | 3374 | ||
@@ -3380,15 +3393,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | |||
3380 | u64 num_bytes = orig_bytes; | 3393 | u64 num_bytes = orig_bytes; |
3381 | int retries = 0; | 3394 | int retries = 0; |
3382 | int ret = 0; | 3395 | int ret = 0; |
3383 | bool reserved = false; | ||
3384 | bool committed = false; | 3396 | bool committed = false; |
3397 | bool flushing = false; | ||
3385 | 3398 | ||
3386 | again: | 3399 | again: |
3387 | ret = -ENOSPC; | 3400 | ret = 0; |
3388 | if (reserved) | ||
3389 | num_bytes = 0; | ||
3390 | |||
3391 | spin_lock(&space_info->lock); | 3401 | spin_lock(&space_info->lock); |
3402 | /* | ||
3403 | * We only want to wait if somebody other than us is flushing and we are | ||
3404 | * actually alloed to flush. | ||
3405 | */ | ||
3406 | while (flush && !flushing && space_info->flush) { | ||
3407 | spin_unlock(&space_info->lock); | ||
3408 | /* | ||
3409 | * If we have a trans handle we can't wait because the flusher | ||
3410 | * may have to commit the transaction, which would mean we would | ||
3411 | * deadlock since we are waiting for the flusher to finish, but | ||
3412 | * hold the current transaction open. | ||
3413 | */ | ||
3414 | if (trans) | ||
3415 | return -EAGAIN; | ||
3416 | ret = wait_event_interruptible(space_info->wait, | ||
3417 | !space_info->flush); | ||
3418 | /* Must have been interrupted, return */ | ||
3419 | if (ret) | ||
3420 | return -EINTR; | ||
3421 | |||
3422 | spin_lock(&space_info->lock); | ||
3423 | } | ||
3424 | |||
3425 | ret = -ENOSPC; | ||
3392 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3426 | unused = space_info->bytes_used + space_info->bytes_reserved + |
3393 | space_info->bytes_pinned + space_info->bytes_readonly + | 3427 | space_info->bytes_pinned + space_info->bytes_readonly + |
3394 | space_info->bytes_may_use; | 3428 | space_info->bytes_may_use; |
@@ -3403,8 +3437,7 @@ again: | |||
3403 | if (unused <= space_info->total_bytes) { | 3437 | if (unused <= space_info->total_bytes) { |
3404 | unused = space_info->total_bytes - unused; | 3438 | unused = space_info->total_bytes - unused; |
3405 | if (unused >= num_bytes) { | 3439 | if (unused >= num_bytes) { |
3406 | if (!reserved) | 3440 | space_info->bytes_reserved += orig_bytes; |
3407 | space_info->bytes_reserved += orig_bytes; | ||
3408 | ret = 0; | 3441 | ret = 0; |
3409 | } else { | 3442 | } else { |
3410 | /* | 3443 | /* |
@@ -3429,17 +3462,14 @@ again: | |||
3429 | * to reclaim space we can actually use it instead of somebody else | 3462 | * to reclaim space we can actually use it instead of somebody else |
3430 | * stealing it from us. | 3463 | * stealing it from us. |
3431 | */ | 3464 | */ |
3432 | if (ret && !reserved) { | 3465 | if (ret && flush) { |
3433 | space_info->bytes_reserved += orig_bytes; | 3466 | flushing = true; |
3434 | reserved = true; | 3467 | space_info->flush = 1; |
3435 | } | 3468 | } |
3436 | 3469 | ||
3437 | spin_unlock(&space_info->lock); | 3470 | spin_unlock(&space_info->lock); |
3438 | 3471 | ||
3439 | if (!ret) | 3472 | if (!ret || !flush) |
3440 | return 0; | ||
3441 | |||
3442 | if (!flush) | ||
3443 | goto out; | 3473 | goto out; |
3444 | 3474 | ||
3445 | /* | 3475 | /* |
@@ -3447,11 +3477,11 @@ again: | |||
3447 | * metadata until after the IO is completed. | 3477 | * metadata until after the IO is completed. |
3448 | */ | 3478 | */ |
3449 | ret = shrink_delalloc(trans, root, num_bytes, 1); | 3479 | ret = shrink_delalloc(trans, root, num_bytes, 1); |
3450 | if (ret > 0) | 3480 | if (ret < 0) |
3451 | return 0; | ||
3452 | else if (ret < 0) | ||
3453 | goto out; | 3481 | goto out; |
3454 | 3482 | ||
3483 | ret = 0; | ||
3484 | |||
3455 | /* | 3485 | /* |
3456 | * So if we were overcommitted it's possible that somebody else flushed | 3486 | * So if we were overcommitted it's possible that somebody else flushed |
3457 | * out enough space and we simply didn't have enough space to reclaim, | 3487 | * out enough space and we simply didn't have enough space to reclaim, |
@@ -3462,11 +3492,11 @@ again: | |||
3462 | goto again; | 3492 | goto again; |
3463 | } | 3493 | } |
3464 | 3494 | ||
3465 | spin_lock(&space_info->lock); | ||
3466 | /* | 3495 | /* |
3467 | * Not enough space to be reclaimed, don't bother committing the | 3496 | * Not enough space to be reclaimed, don't bother committing the |
3468 | * transaction. | 3497 | * transaction. |
3469 | */ | 3498 | */ |
3499 | spin_lock(&space_info->lock); | ||
3470 | if (space_info->bytes_pinned < orig_bytes) | 3500 | if (space_info->bytes_pinned < orig_bytes) |
3471 | ret = -ENOSPC; | 3501 | ret = -ENOSPC; |
3472 | spin_unlock(&space_info->lock); | 3502 | spin_unlock(&space_info->lock); |
@@ -3474,10 +3504,13 @@ again: | |||
3474 | goto out; | 3504 | goto out; |
3475 | 3505 | ||
3476 | ret = -EAGAIN; | 3506 | ret = -EAGAIN; |
3477 | if (trans || committed) | 3507 | if (trans) |
3478 | goto out; | 3508 | goto out; |
3479 | 3509 | ||
3480 | ret = -ENOSPC; | 3510 | ret = -ENOSPC; |
3511 | if (committed) | ||
3512 | goto out; | ||
3513 | |||
3481 | trans = btrfs_join_transaction(root); | 3514 | trans = btrfs_join_transaction(root); |
3482 | if (IS_ERR(trans)) | 3515 | if (IS_ERR(trans)) |
3483 | goto out; | 3516 | goto out; |
@@ -3489,12 +3522,12 @@ again: | |||
3489 | } | 3522 | } |
3490 | 3523 | ||
3491 | out: | 3524 | out: |
3492 | if (reserved) { | 3525 | if (flushing) { |
3493 | spin_lock(&space_info->lock); | 3526 | spin_lock(&space_info->lock); |
3494 | space_info->bytes_reserved -= orig_bytes; | 3527 | space_info->flush = 0; |
3528 | wake_up_all(&space_info->wait); | ||
3495 | spin_unlock(&space_info->lock); | 3529 | spin_unlock(&space_info->lock); |
3496 | } | 3530 | } |
3497 | |||
3498 | return ret; | 3531 | return ret; |
3499 | } | 3532 | } |
3500 | 3533 | ||
@@ -3704,7 +3737,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3704 | if (commit_trans) { | 3737 | if (commit_trans) { |
3705 | if (trans) | 3738 | if (trans) |
3706 | return -EAGAIN; | 3739 | return -EAGAIN; |
3707 | |||
3708 | trans = btrfs_join_transaction(root); | 3740 | trans = btrfs_join_transaction(root); |
3709 | BUG_ON(IS_ERR(trans)); | 3741 | BUG_ON(IS_ERR(trans)); |
3710 | ret = btrfs_commit_transaction(trans, root); | 3742 | ret = btrfs_commit_transaction(trans, root); |
@@ -3874,26 +3906,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3874 | return 0; | 3906 | return 0; |
3875 | } | 3907 | } |
3876 | 3908 | ||
3877 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3878 | struct btrfs_root *root, | ||
3879 | int num_items) | ||
3880 | { | ||
3881 | u64 num_bytes; | ||
3882 | int ret; | ||
3883 | |||
3884 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
3885 | return 0; | ||
3886 | |||
3887 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | ||
3888 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
3889 | num_bytes); | ||
3890 | if (!ret) { | ||
3891 | trans->bytes_reserved += num_bytes; | ||
3892 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3893 | } | ||
3894 | return ret; | ||
3895 | } | ||
3896 | |||
3897 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 3909 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
3898 | struct btrfs_root *root) | 3910 | struct btrfs_root *root) |
3899 | { | 3911 | { |
@@ -3944,6 +3956,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3944 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3956 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3945 | } | 3957 | } |
3946 | 3958 | ||
3959 | static unsigned drop_outstanding_extent(struct inode *inode) | ||
3960 | { | ||
3961 | unsigned dropped_extents = 0; | ||
3962 | |||
3963 | spin_lock(&BTRFS_I(inode)->lock); | ||
3964 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | ||
3965 | BTRFS_I(inode)->outstanding_extents--; | ||
3966 | |||
3967 | /* | ||
3968 | * If we have more or the same amount of outsanding extents than we have | ||
3969 | * reserved then we need to leave the reserved extents count alone. | ||
3970 | */ | ||
3971 | if (BTRFS_I(inode)->outstanding_extents >= | ||
3972 | BTRFS_I(inode)->reserved_extents) | ||
3973 | goto out; | ||
3974 | |||
3975 | dropped_extents = BTRFS_I(inode)->reserved_extents - | ||
3976 | BTRFS_I(inode)->outstanding_extents; | ||
3977 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | ||
3978 | out: | ||
3979 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3980 | return dropped_extents; | ||
3981 | } | ||
3982 | |||
3947 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | 3983 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) |
3948 | { | 3984 | { |
3949 | return num_bytes >>= 3; | 3985 | return num_bytes >>= 3; |
@@ -3953,9 +3989,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3953 | { | 3989 | { |
3954 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3990 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3955 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 3991 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3956 | u64 to_reserve; | 3992 | u64 to_reserve = 0; |
3957 | int nr_extents; | 3993 | unsigned nr_extents = 0; |
3958 | int reserved_extents; | ||
3959 | int ret; | 3994 | int ret; |
3960 | 3995 | ||
3961 | if (btrfs_transaction_in_commit(root->fs_info)) | 3996 | if (btrfs_transaction_in_commit(root->fs_info)) |
@@ -3963,66 +3998,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3963 | 3998 | ||
3964 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 3999 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3965 | 4000 | ||
3966 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 4001 | spin_lock(&BTRFS_I(inode)->lock); |
3967 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | 4002 | BTRFS_I(inode)->outstanding_extents++; |
4003 | |||
4004 | if (BTRFS_I(inode)->outstanding_extents > | ||
4005 | BTRFS_I(inode)->reserved_extents) { | ||
4006 | nr_extents = BTRFS_I(inode)->outstanding_extents - | ||
4007 | BTRFS_I(inode)->reserved_extents; | ||
4008 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
3968 | 4009 | ||
3969 | if (nr_extents > reserved_extents) { | ||
3970 | nr_extents -= reserved_extents; | ||
3971 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 4010 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); |
3972 | } else { | ||
3973 | nr_extents = 0; | ||
3974 | to_reserve = 0; | ||
3975 | } | 4011 | } |
4012 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3976 | 4013 | ||
3977 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4014 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
3978 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | 4015 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
3979 | if (ret) | 4016 | if (ret) { |
4017 | unsigned dropped; | ||
4018 | /* | ||
4019 | * We don't need the return value since our reservation failed, | ||
4020 | * we just need to clean up our counter. | ||
4021 | */ | ||
4022 | dropped = drop_outstanding_extent(inode); | ||
4023 | WARN_ON(dropped > 1); | ||
3980 | return ret; | 4024 | return ret; |
3981 | 4025 | } | |
3982 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); | ||
3983 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
3984 | 4026 | ||
3985 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4027 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
3986 | 4028 | ||
3987 | if (block_rsv->size > 512 * 1024 * 1024) | ||
3988 | shrink_delalloc(NULL, root, to_reserve, 0); | ||
3989 | |||
3990 | return 0; | 4029 | return 0; |
3991 | } | 4030 | } |
3992 | 4031 | ||
3993 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | 4032 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) |
3994 | { | 4033 | { |
3995 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4034 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3996 | u64 to_free; | 4035 | u64 to_free = 0; |
3997 | int nr_extents; | 4036 | unsigned dropped; |
3998 | int reserved_extents; | ||
3999 | 4037 | ||
4000 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4038 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4001 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4039 | dropped = drop_outstanding_extent(inode); |
4002 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | ||
4003 | |||
4004 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | ||
4005 | do { | ||
4006 | int old, new; | ||
4007 | |||
4008 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
4009 | if (nr_extents >= reserved_extents) { | ||
4010 | nr_extents = 0; | ||
4011 | break; | ||
4012 | } | ||
4013 | old = reserved_extents; | ||
4014 | nr_extents = reserved_extents - nr_extents; | ||
4015 | new = reserved_extents - nr_extents; | ||
4016 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4017 | reserved_extents, new); | ||
4018 | if (likely(old == reserved_extents)) | ||
4019 | break; | ||
4020 | reserved_extents = old; | ||
4021 | } while (1); | ||
4022 | 4040 | ||
4023 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4041 | to_free = calc_csum_metadata_size(inode, num_bytes); |
4024 | if (nr_extents > 0) | 4042 | if (dropped > 0) |
4025 | to_free += btrfs_calc_trans_metadata_size(root, nr_extents); | 4043 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
4026 | 4044 | ||
4027 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4045 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
4028 | to_free); | 4046 | to_free); |
@@ -4444,7 +4462,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4444 | printk(KERN_ERR "umm, got %d back from search" | 4462 | printk(KERN_ERR "umm, got %d back from search" |
4445 | ", was looking for %llu\n", ret, | 4463 | ", was looking for %llu\n", ret, |
4446 | (unsigned long long)bytenr); | 4464 | (unsigned long long)bytenr); |
4447 | btrfs_print_leaf(extent_root, path->nodes[0]); | 4465 | if (ret > 0) |
4466 | btrfs_print_leaf(extent_root, | ||
4467 | path->nodes[0]); | ||
4448 | } | 4468 | } |
4449 | BUG_ON(ret); | 4469 | BUG_ON(ret); |
4450 | extent_slot = path->slots[0]; | 4470 | extent_slot = path->slots[0]; |
@@ -4990,14 +5010,10 @@ have_block_group: | |||
4990 | } | 5010 | } |
4991 | 5011 | ||
4992 | /* | 5012 | /* |
4993 | * We only want to start kthread caching if we are at | 5013 | * The caching workers are limited to 2 threads, so we |
4994 | * the point where we will wait for caching to make | 5014 | * can queue as much work as we care to. |
4995 | * progress, or if our ideal search is over and we've | ||
4996 | * found somebody to start caching. | ||
4997 | */ | 5015 | */ |
4998 | if (loop > LOOP_CACHING_NOWAIT || | 5016 | if (loop > LOOP_FIND_IDEAL) { |
4999 | (loop > LOOP_FIND_IDEAL && | ||
5000 | atomic_read(&space_info->caching_threads) < 2)) { | ||
5001 | ret = cache_block_group(block_group, trans, | 5017 | ret = cache_block_group(block_group, trans, |
5002 | orig_root, 0); | 5018 | orig_root, 0); |
5003 | BUG_ON(ret); | 5019 | BUG_ON(ret); |
@@ -5065,7 +5081,9 @@ have_block_group: | |||
5065 | * group is does point to and try again | 5081 | * group is does point to and try again |
5066 | */ | 5082 | */ |
5067 | if (!last_ptr_loop && last_ptr->block_group && | 5083 | if (!last_ptr_loop && last_ptr->block_group && |
5068 | last_ptr->block_group != block_group) { | 5084 | last_ptr->block_group != block_group && |
5085 | index <= | ||
5086 | get_block_group_index(last_ptr->block_group)) { | ||
5069 | 5087 | ||
5070 | btrfs_put_block_group(block_group); | 5088 | btrfs_put_block_group(block_group); |
5071 | block_group = last_ptr->block_group; | 5089 | block_group = last_ptr->block_group; |
@@ -5219,8 +5237,7 @@ loop: | |||
5219 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 5237 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
5220 | found_uncached_bg = false; | 5238 | found_uncached_bg = false; |
5221 | loop++; | 5239 | loop++; |
5222 | if (!ideal_cache_percent && | 5240 | if (!ideal_cache_percent) |
5223 | atomic_read(&space_info->caching_threads)) | ||
5224 | goto search; | 5241 | goto search; |
5225 | 5242 | ||
5226 | /* | 5243 | /* |
@@ -5494,7 +5511,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
5494 | u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); | 5511 | u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); |
5495 | 5512 | ||
5496 | path = btrfs_alloc_path(); | 5513 | path = btrfs_alloc_path(); |
5497 | BUG_ON(!path); | 5514 | if (!path) |
5515 | return -ENOMEM; | ||
5498 | 5516 | ||
5499 | path->leave_spinning = 1; | 5517 | path->leave_spinning = 1; |
5500 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, | 5518 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
@@ -5623,7 +5641,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
5623 | if (!buf) | 5641 | if (!buf) |
5624 | return ERR_PTR(-ENOMEM); | 5642 | return ERR_PTR(-ENOMEM); |
5625 | btrfs_set_header_generation(buf, trans->transid); | 5643 | btrfs_set_header_generation(buf, trans->transid); |
5626 | btrfs_set_buffer_lockdep_class(buf, level); | 5644 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); |
5627 | btrfs_tree_lock(buf); | 5645 | btrfs_tree_lock(buf); |
5628 | clean_tree_block(trans, root, buf); | 5646 | clean_tree_block(trans, root, buf); |
5629 | 5647 | ||
@@ -5910,7 +5928,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
5910 | return 1; | 5928 | return 1; |
5911 | 5929 | ||
5912 | if (path->locks[level] && !wc->keep_locks) { | 5930 | if (path->locks[level] && !wc->keep_locks) { |
5913 | btrfs_tree_unlock(eb); | 5931 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
5914 | path->locks[level] = 0; | 5932 | path->locks[level] = 0; |
5915 | } | 5933 | } |
5916 | return 0; | 5934 | return 0; |
@@ -5934,7 +5952,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
5934 | * keep the tree lock | 5952 | * keep the tree lock |
5935 | */ | 5953 | */ |
5936 | if (path->locks[level] && level > 0) { | 5954 | if (path->locks[level] && level > 0) { |
5937 | btrfs_tree_unlock(eb); | 5955 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
5938 | path->locks[level] = 0; | 5956 | path->locks[level] = 0; |
5939 | } | 5957 | } |
5940 | return 0; | 5958 | return 0; |
@@ -6047,7 +6065,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
6047 | BUG_ON(level != btrfs_header_level(next)); | 6065 | BUG_ON(level != btrfs_header_level(next)); |
6048 | path->nodes[level] = next; | 6066 | path->nodes[level] = next; |
6049 | path->slots[level] = 0; | 6067 | path->slots[level] = 0; |
6050 | path->locks[level] = 1; | 6068 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6051 | wc->level = level; | 6069 | wc->level = level; |
6052 | if (wc->level == 1) | 6070 | if (wc->level == 1) |
6053 | wc->reada_slot = 0; | 6071 | wc->reada_slot = 0; |
@@ -6118,7 +6136,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6118 | BUG_ON(level == 0); | 6136 | BUG_ON(level == 0); |
6119 | btrfs_tree_lock(eb); | 6137 | btrfs_tree_lock(eb); |
6120 | btrfs_set_lock_blocking(eb); | 6138 | btrfs_set_lock_blocking(eb); |
6121 | path->locks[level] = 1; | 6139 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6122 | 6140 | ||
6123 | ret = btrfs_lookup_extent_info(trans, root, | 6141 | ret = btrfs_lookup_extent_info(trans, root, |
6124 | eb->start, eb->len, | 6142 | eb->start, eb->len, |
@@ -6127,8 +6145,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6127 | BUG_ON(ret); | 6145 | BUG_ON(ret); |
6128 | BUG_ON(wc->refs[level] == 0); | 6146 | BUG_ON(wc->refs[level] == 0); |
6129 | if (wc->refs[level] == 1) { | 6147 | if (wc->refs[level] == 1) { |
6130 | btrfs_tree_unlock(eb); | 6148 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
6131 | path->locks[level] = 0; | ||
6132 | return 1; | 6149 | return 1; |
6133 | } | 6150 | } |
6134 | } | 6151 | } |
@@ -6150,7 +6167,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6150 | btrfs_header_generation(eb) == trans->transid) { | 6167 | btrfs_header_generation(eb) == trans->transid) { |
6151 | btrfs_tree_lock(eb); | 6168 | btrfs_tree_lock(eb); |
6152 | btrfs_set_lock_blocking(eb); | 6169 | btrfs_set_lock_blocking(eb); |
6153 | path->locks[level] = 1; | 6170 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6154 | } | 6171 | } |
6155 | clean_tree_block(trans, root, eb); | 6172 | clean_tree_block(trans, root, eb); |
6156 | } | 6173 | } |
@@ -6229,7 +6246,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
6229 | return 0; | 6246 | return 0; |
6230 | 6247 | ||
6231 | if (path->locks[level]) { | 6248 | if (path->locks[level]) { |
6232 | btrfs_tree_unlock(path->nodes[level]); | 6249 | btrfs_tree_unlock_rw(path->nodes[level], |
6250 | path->locks[level]); | ||
6233 | path->locks[level] = 0; | 6251 | path->locks[level] = 0; |
6234 | } | 6252 | } |
6235 | free_extent_buffer(path->nodes[level]); | 6253 | free_extent_buffer(path->nodes[level]); |
@@ -6265,10 +6283,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6265 | int level; | 6283 | int level; |
6266 | 6284 | ||
6267 | path = btrfs_alloc_path(); | 6285 | path = btrfs_alloc_path(); |
6268 | BUG_ON(!path); | 6286 | if (!path) |
6287 | return -ENOMEM; | ||
6269 | 6288 | ||
6270 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6289 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
6271 | BUG_ON(!wc); | 6290 | if (!wc) { |
6291 | btrfs_free_path(path); | ||
6292 | return -ENOMEM; | ||
6293 | } | ||
6272 | 6294 | ||
6273 | trans = btrfs_start_transaction(tree_root, 0); | 6295 | trans = btrfs_start_transaction(tree_root, 0); |
6274 | BUG_ON(IS_ERR(trans)); | 6296 | BUG_ON(IS_ERR(trans)); |
@@ -6281,7 +6303,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6281 | path->nodes[level] = btrfs_lock_root_node(root); | 6303 | path->nodes[level] = btrfs_lock_root_node(root); |
6282 | btrfs_set_lock_blocking(path->nodes[level]); | 6304 | btrfs_set_lock_blocking(path->nodes[level]); |
6283 | path->slots[level] = 0; | 6305 | path->slots[level] = 0; |
6284 | path->locks[level] = 1; | 6306 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6285 | memset(&wc->update_progress, 0, | 6307 | memset(&wc->update_progress, 0, |
6286 | sizeof(wc->update_progress)); | 6308 | sizeof(wc->update_progress)); |
6287 | } else { | 6309 | } else { |
@@ -6449,7 +6471,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6449 | level = btrfs_header_level(node); | 6471 | level = btrfs_header_level(node); |
6450 | path->nodes[level] = node; | 6472 | path->nodes[level] = node; |
6451 | path->slots[level] = 0; | 6473 | path->slots[level] = 0; |
6452 | path->locks[level] = 1; | 6474 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6453 | 6475 | ||
6454 | wc->refs[parent_level] = 1; | 6476 | wc->refs[parent_level] = 1; |
6455 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 6477 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
@@ -6524,30 +6546,48 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
6524 | return flags; | 6546 | return flags; |
6525 | } | 6547 | } |
6526 | 6548 | ||
6527 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) | 6549 | static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) |
6528 | { | 6550 | { |
6529 | struct btrfs_space_info *sinfo = cache->space_info; | 6551 | struct btrfs_space_info *sinfo = cache->space_info; |
6530 | u64 num_bytes; | 6552 | u64 num_bytes; |
6553 | u64 min_allocable_bytes; | ||
6531 | int ret = -ENOSPC; | 6554 | int ret = -ENOSPC; |
6532 | 6555 | ||
6533 | if (cache->ro) | 6556 | |
6534 | return 0; | 6557 | /* |
6558 | * We need some metadata space and system metadata space for | ||
6559 | * allocating chunks in some corner cases until we force to set | ||
6560 | * it to be readonly. | ||
6561 | */ | ||
6562 | if ((sinfo->flags & | ||
6563 | (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && | ||
6564 | !force) | ||
6565 | min_allocable_bytes = 1 * 1024 * 1024; | ||
6566 | else | ||
6567 | min_allocable_bytes = 0; | ||
6535 | 6568 | ||
6536 | spin_lock(&sinfo->lock); | 6569 | spin_lock(&sinfo->lock); |
6537 | spin_lock(&cache->lock); | 6570 | spin_lock(&cache->lock); |
6571 | |||
6572 | if (cache->ro) { | ||
6573 | ret = 0; | ||
6574 | goto out; | ||
6575 | } | ||
6576 | |||
6538 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | 6577 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - |
6539 | cache->bytes_super - btrfs_block_group_used(&cache->item); | 6578 | cache->bytes_super - btrfs_block_group_used(&cache->item); |
6540 | 6579 | ||
6541 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 6580 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
6542 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 6581 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
6543 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { | 6582 | cache->reserved_pinned + num_bytes + min_allocable_bytes <= |
6583 | sinfo->total_bytes) { | ||
6544 | sinfo->bytes_readonly += num_bytes; | 6584 | sinfo->bytes_readonly += num_bytes; |
6545 | sinfo->bytes_reserved += cache->reserved_pinned; | 6585 | sinfo->bytes_reserved += cache->reserved_pinned; |
6546 | cache->reserved_pinned = 0; | 6586 | cache->reserved_pinned = 0; |
6547 | cache->ro = 1; | 6587 | cache->ro = 1; |
6548 | ret = 0; | 6588 | ret = 0; |
6549 | } | 6589 | } |
6550 | 6590 | out: | |
6551 | spin_unlock(&cache->lock); | 6591 | spin_unlock(&cache->lock); |
6552 | spin_unlock(&sinfo->lock); | 6592 | spin_unlock(&sinfo->lock); |
6553 | return ret; | 6593 | return ret; |
@@ -6571,7 +6611,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6571 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 6611 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
6572 | CHUNK_ALLOC_FORCE); | 6612 | CHUNK_ALLOC_FORCE); |
6573 | 6613 | ||
6574 | ret = set_block_group_ro(cache); | 6614 | ret = set_block_group_ro(cache, 0); |
6575 | if (!ret) | 6615 | if (!ret) |
6576 | goto out; | 6616 | goto out; |
6577 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 6617 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
@@ -6579,7 +6619,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6579 | CHUNK_ALLOC_FORCE); | 6619 | CHUNK_ALLOC_FORCE); |
6580 | if (ret < 0) | 6620 | if (ret < 0) |
6581 | goto out; | 6621 | goto out; |
6582 | ret = set_block_group_ro(cache); | 6622 | ret = set_block_group_ro(cache, 0); |
6583 | out: | 6623 | out: |
6584 | btrfs_end_transaction(trans, root); | 6624 | btrfs_end_transaction(trans, root); |
6585 | return ret; | 6625 | return ret; |
@@ -7016,7 +7056,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7016 | 7056 | ||
7017 | set_avail_alloc_bits(root->fs_info, cache->flags); | 7057 | set_avail_alloc_bits(root->fs_info, cache->flags); |
7018 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 7058 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
7019 | set_block_group_ro(cache); | 7059 | set_block_group_ro(cache, 1); |
7020 | } | 7060 | } |
7021 | 7061 | ||
7022 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | 7062 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { |
@@ -7030,9 +7070,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7030 | * mirrored block groups. | 7070 | * mirrored block groups. |
7031 | */ | 7071 | */ |
7032 | list_for_each_entry(cache, &space_info->block_groups[3], list) | 7072 | list_for_each_entry(cache, &space_info->block_groups[3], list) |
7033 | set_block_group_ro(cache); | 7073 | set_block_group_ro(cache, 1); |
7034 | list_for_each_entry(cache, &space_info->block_groups[4], list) | 7074 | list_for_each_entry(cache, &space_info->block_groups[4], list) |
7035 | set_block_group_ro(cache); | 7075 | set_block_group_ro(cache, 1); |
7036 | } | 7076 | } |
7037 | 7077 | ||
7038 | init_global_block_rsv(info); | 7078 | init_global_block_rsv(info); |
@@ -7162,11 +7202,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7162 | spin_unlock(&cluster->refill_lock); | 7202 | spin_unlock(&cluster->refill_lock); |
7163 | 7203 | ||
7164 | path = btrfs_alloc_path(); | 7204 | path = btrfs_alloc_path(); |
7165 | BUG_ON(!path); | 7205 | if (!path) { |
7206 | ret = -ENOMEM; | ||
7207 | goto out; | ||
7208 | } | ||
7166 | 7209 | ||
7167 | inode = lookup_free_space_inode(root, block_group, path); | 7210 | inode = lookup_free_space_inode(root, block_group, path); |
7168 | if (!IS_ERR(inode)) { | 7211 | if (!IS_ERR(inode)) { |
7169 | btrfs_orphan_add(trans, inode); | 7212 | ret = btrfs_orphan_add(trans, inode); |
7213 | BUG_ON(ret); | ||
7170 | clear_nlink(inode); | 7214 | clear_nlink(inode); |
7171 | /* One for the block groups ref */ | 7215 | /* One for the block groups ref */ |
7172 | spin_lock(&block_group->lock); | 7216 | spin_lock(&block_group->lock); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7055d11c1efd..d418164a35f1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | |||
254 | * | 254 | * |
255 | * This should be called with the tree lock held. | 255 | * This should be called with the tree lock held. |
256 | */ | 256 | */ |
257 | static int merge_state(struct extent_io_tree *tree, | 257 | static void merge_state(struct extent_io_tree *tree, |
258 | struct extent_state *state) | 258 | struct extent_state *state) |
259 | { | 259 | { |
260 | struct extent_state *other; | 260 | struct extent_state *other; |
261 | struct rb_node *other_node; | 261 | struct rb_node *other_node; |
262 | 262 | ||
263 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | 263 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
264 | return 0; | 264 | return; |
265 | 265 | ||
266 | other_node = rb_prev(&state->rb_node); | 266 | other_node = rb_prev(&state->rb_node); |
267 | if (other_node) { | 267 | if (other_node) { |
@@ -281,26 +281,19 @@ static int merge_state(struct extent_io_tree *tree, | |||
281 | if (other->start == state->end + 1 && | 281 | if (other->start == state->end + 1 && |
282 | other->state == state->state) { | 282 | other->state == state->state) { |
283 | merge_cb(tree, state, other); | 283 | merge_cb(tree, state, other); |
284 | other->start = state->start; | 284 | state->end = other->end; |
285 | state->tree = NULL; | 285 | other->tree = NULL; |
286 | rb_erase(&state->rb_node, &tree->state); | 286 | rb_erase(&other->rb_node, &tree->state); |
287 | free_extent_state(state); | 287 | free_extent_state(other); |
288 | state = NULL; | ||
289 | } | 288 | } |
290 | } | 289 | } |
291 | |||
292 | return 0; | ||
293 | } | 290 | } |
294 | 291 | ||
295 | static int set_state_cb(struct extent_io_tree *tree, | 292 | static void set_state_cb(struct extent_io_tree *tree, |
296 | struct extent_state *state, int *bits) | 293 | struct extent_state *state, int *bits) |
297 | { | 294 | { |
298 | if (tree->ops && tree->ops->set_bit_hook) { | 295 | if (tree->ops && tree->ops->set_bit_hook) |
299 | return tree->ops->set_bit_hook(tree->mapping->host, | 296 | tree->ops->set_bit_hook(tree->mapping->host, state, bits); |
300 | state, bits); | ||
301 | } | ||
302 | |||
303 | return 0; | ||
304 | } | 297 | } |
305 | 298 | ||
306 | static void clear_state_cb(struct extent_io_tree *tree, | 299 | static void clear_state_cb(struct extent_io_tree *tree, |
@@ -310,6 +303,9 @@ static void clear_state_cb(struct extent_io_tree *tree, | |||
310 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 303 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
311 | } | 304 | } |
312 | 305 | ||
306 | static void set_state_bits(struct extent_io_tree *tree, | ||
307 | struct extent_state *state, int *bits); | ||
308 | |||
313 | /* | 309 | /* |
314 | * insert an extent_state struct into the tree. 'bits' are set on the | 310 | * insert an extent_state struct into the tree. 'bits' are set on the |
315 | * struct before it is inserted. | 311 | * struct before it is inserted. |
@@ -325,8 +321,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
325 | int *bits) | 321 | int *bits) |
326 | { | 322 | { |
327 | struct rb_node *node; | 323 | struct rb_node *node; |
328 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
329 | int ret; | ||
330 | 324 | ||
331 | if (end < start) { | 325 | if (end < start) { |
332 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 326 | printk(KERN_ERR "btrfs end < start %llu %llu\n", |
@@ -336,13 +330,9 @@ static int insert_state(struct extent_io_tree *tree, | |||
336 | } | 330 | } |
337 | state->start = start; | 331 | state->start = start; |
338 | state->end = end; | 332 | state->end = end; |
339 | ret = set_state_cb(tree, state, bits); | ||
340 | if (ret) | ||
341 | return ret; | ||
342 | 333 | ||
343 | if (bits_to_set & EXTENT_DIRTY) | 334 | set_state_bits(tree, state, bits); |
344 | tree->dirty_bytes += end - start + 1; | 335 | |
345 | state->state |= bits_to_set; | ||
346 | node = tree_insert(&tree->state, end, &state->rb_node); | 336 | node = tree_insert(&tree->state, end, &state->rb_node); |
347 | if (node) { | 337 | if (node) { |
348 | struct extent_state *found; | 338 | struct extent_state *found; |
@@ -351,7 +341,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
351 | "%llu %llu\n", (unsigned long long)found->start, | 341 | "%llu %llu\n", (unsigned long long)found->start, |
352 | (unsigned long long)found->end, | 342 | (unsigned long long)found->end, |
353 | (unsigned long long)start, (unsigned long long)end); | 343 | (unsigned long long)start, (unsigned long long)end); |
354 | free_extent_state(state); | ||
355 | return -EEXIST; | 344 | return -EEXIST; |
356 | } | 345 | } |
357 | state->tree = tree; | 346 | state->tree = tree; |
@@ -359,13 +348,11 @@ static int insert_state(struct extent_io_tree *tree, | |||
359 | return 0; | 348 | return 0; |
360 | } | 349 | } |
361 | 350 | ||
362 | static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, | 351 | static void split_cb(struct extent_io_tree *tree, struct extent_state *orig, |
363 | u64 split) | 352 | u64 split) |
364 | { | 353 | { |
365 | if (tree->ops && tree->ops->split_extent_hook) | 354 | if (tree->ops && tree->ops->split_extent_hook) |
366 | return tree->ops->split_extent_hook(tree->mapping->host, | 355 | tree->ops->split_extent_hook(tree->mapping->host, orig, split); |
367 | orig, split); | ||
368 | return 0; | ||
369 | } | 356 | } |
370 | 357 | ||
371 | /* | 358 | /* |
@@ -500,7 +487,8 @@ again: | |||
500 | cached_state = NULL; | 487 | cached_state = NULL; |
501 | } | 488 | } |
502 | 489 | ||
503 | if (cached && cached->tree && cached->start == start) { | 490 | if (cached && cached->tree && cached->start <= start && |
491 | cached->end > start) { | ||
504 | if (clear) | 492 | if (clear) |
505 | atomic_dec(&cached->refs); | 493 | atomic_dec(&cached->refs); |
506 | state = cached; | 494 | state = cached; |
@@ -660,34 +648,25 @@ again: | |||
660 | if (start > end) | 648 | if (start > end) |
661 | break; | 649 | break; |
662 | 650 | ||
663 | if (need_resched()) { | 651 | cond_resched_lock(&tree->lock); |
664 | spin_unlock(&tree->lock); | ||
665 | cond_resched(); | ||
666 | spin_lock(&tree->lock); | ||
667 | } | ||
668 | } | 652 | } |
669 | out: | 653 | out: |
670 | spin_unlock(&tree->lock); | 654 | spin_unlock(&tree->lock); |
671 | return 0; | 655 | return 0; |
672 | } | 656 | } |
673 | 657 | ||
674 | static int set_state_bits(struct extent_io_tree *tree, | 658 | static void set_state_bits(struct extent_io_tree *tree, |
675 | struct extent_state *state, | 659 | struct extent_state *state, |
676 | int *bits) | 660 | int *bits) |
677 | { | 661 | { |
678 | int ret; | ||
679 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | 662 | int bits_to_set = *bits & ~EXTENT_CTLBITS; |
680 | 663 | ||
681 | ret = set_state_cb(tree, state, bits); | 664 | set_state_cb(tree, state, bits); |
682 | if (ret) | ||
683 | return ret; | ||
684 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | 665 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
685 | u64 range = state->end - state->start + 1; | 666 | u64 range = state->end - state->start + 1; |
686 | tree->dirty_bytes += range; | 667 | tree->dirty_bytes += range; |
687 | } | 668 | } |
688 | state->state |= bits_to_set; | 669 | state->state |= bits_to_set; |
689 | |||
690 | return 0; | ||
691 | } | 670 | } |
692 | 671 | ||
693 | static void cache_state(struct extent_state *state, | 672 | static void cache_state(struct extent_state *state, |
@@ -742,7 +721,8 @@ again: | |||
742 | spin_lock(&tree->lock); | 721 | spin_lock(&tree->lock); |
743 | if (cached_state && *cached_state) { | 722 | if (cached_state && *cached_state) { |
744 | state = *cached_state; | 723 | state = *cached_state; |
745 | if (state->start == start && state->tree) { | 724 | if (state->start <= start && state->end > start && |
725 | state->tree) { | ||
746 | node = &state->rb_node; | 726 | node = &state->rb_node; |
747 | goto hit_next; | 727 | goto hit_next; |
748 | } | 728 | } |
@@ -779,17 +759,15 @@ hit_next: | |||
779 | goto out; | 759 | goto out; |
780 | } | 760 | } |
781 | 761 | ||
782 | err = set_state_bits(tree, state, &bits); | 762 | set_state_bits(tree, state, &bits); |
783 | if (err) | ||
784 | goto out; | ||
785 | 763 | ||
786 | next_node = rb_next(node); | ||
787 | cache_state(state, cached_state); | 764 | cache_state(state, cached_state); |
788 | merge_state(tree, state); | 765 | merge_state(tree, state); |
789 | if (last_end == (u64)-1) | 766 | if (last_end == (u64)-1) |
790 | goto out; | 767 | goto out; |
791 | 768 | ||
792 | start = last_end + 1; | 769 | start = last_end + 1; |
770 | next_node = rb_next(&state->rb_node); | ||
793 | if (next_node && start < end && prealloc && !need_resched()) { | 771 | if (next_node && start < end && prealloc && !need_resched()) { |
794 | state = rb_entry(next_node, struct extent_state, | 772 | state = rb_entry(next_node, struct extent_state, |
795 | rb_node); | 773 | rb_node); |
@@ -830,9 +808,7 @@ hit_next: | |||
830 | if (err) | 808 | if (err) |
831 | goto out; | 809 | goto out; |
832 | if (state->end <= end) { | 810 | if (state->end <= end) { |
833 | err = set_state_bits(tree, state, &bits); | 811 | set_state_bits(tree, state, &bits); |
834 | if (err) | ||
835 | goto out; | ||
836 | cache_state(state, cached_state); | 812 | cache_state(state, cached_state); |
837 | merge_state(tree, state); | 813 | merge_state(tree, state); |
838 | if (last_end == (u64)-1) | 814 | if (last_end == (u64)-1) |
@@ -862,7 +838,6 @@ hit_next: | |||
862 | * Avoid to free 'prealloc' if it can be merged with | 838 | * Avoid to free 'prealloc' if it can be merged with |
863 | * the later extent. | 839 | * the later extent. |
864 | */ | 840 | */ |
865 | atomic_inc(&prealloc->refs); | ||
866 | err = insert_state(tree, prealloc, start, this_end, | 841 | err = insert_state(tree, prealloc, start, this_end, |
867 | &bits); | 842 | &bits); |
868 | BUG_ON(err == -EEXIST); | 843 | BUG_ON(err == -EEXIST); |
@@ -872,7 +847,6 @@ hit_next: | |||
872 | goto out; | 847 | goto out; |
873 | } | 848 | } |
874 | cache_state(prealloc, cached_state); | 849 | cache_state(prealloc, cached_state); |
875 | free_extent_state(prealloc); | ||
876 | prealloc = NULL; | 850 | prealloc = NULL; |
877 | start = this_end + 1; | 851 | start = this_end + 1; |
878 | goto search_again; | 852 | goto search_again; |
@@ -895,11 +869,7 @@ hit_next: | |||
895 | err = split_state(tree, state, prealloc, end + 1); | 869 | err = split_state(tree, state, prealloc, end + 1); |
896 | BUG_ON(err == -EEXIST); | 870 | BUG_ON(err == -EEXIST); |
897 | 871 | ||
898 | err = set_state_bits(tree, prealloc, &bits); | 872 | set_state_bits(tree, prealloc, &bits); |
899 | if (err) { | ||
900 | prealloc = NULL; | ||
901 | goto out; | ||
902 | } | ||
903 | cache_state(prealloc, cached_state); | 873 | cache_state(prealloc, cached_state); |
904 | merge_state(tree, prealloc); | 874 | merge_state(tree, prealloc); |
905 | prealloc = NULL; | 875 | prealloc = NULL; |
@@ -1061,46 +1031,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
1061 | return 0; | 1031 | return 0; |
1062 | } | 1032 | } |
1063 | 1033 | ||
1064 | /* | ||
1065 | * find the first offset in the io tree with 'bits' set. zero is | ||
1066 | * returned if we find something, and *start_ret and *end_ret are | ||
1067 | * set to reflect the state struct that was found. | ||
1068 | * | ||
1069 | * If nothing was found, 1 is returned, < 0 on error | ||
1070 | */ | ||
1071 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||
1072 | u64 *start_ret, u64 *end_ret, int bits) | ||
1073 | { | ||
1074 | struct rb_node *node; | ||
1075 | struct extent_state *state; | ||
1076 | int ret = 1; | ||
1077 | |||
1078 | spin_lock(&tree->lock); | ||
1079 | /* | ||
1080 | * this search will find all the extents that end after | ||
1081 | * our range starts. | ||
1082 | */ | ||
1083 | node = tree_search(tree, start); | ||
1084 | if (!node) | ||
1085 | goto out; | ||
1086 | |||
1087 | while (1) { | ||
1088 | state = rb_entry(node, struct extent_state, rb_node); | ||
1089 | if (state->end >= start && (state->state & bits)) { | ||
1090 | *start_ret = state->start; | ||
1091 | *end_ret = state->end; | ||
1092 | ret = 0; | ||
1093 | break; | ||
1094 | } | ||
1095 | node = rb_next(node); | ||
1096 | if (!node) | ||
1097 | break; | ||
1098 | } | ||
1099 | out: | ||
1100 | spin_unlock(&tree->lock); | ||
1101 | return ret; | ||
1102 | } | ||
1103 | |||
1104 | /* find the first state struct with 'bits' set after 'start', and | 1034 | /* find the first state struct with 'bits' set after 'start', and |
1105 | * return it. tree->lock must be held. NULL will returned if | 1035 | * return it. tree->lock must be held. NULL will returned if |
1106 | * nothing was found after 'start' | 1036 | * nothing was found after 'start' |
@@ -1133,6 +1063,30 @@ out: | |||
1133 | } | 1063 | } |
1134 | 1064 | ||
1135 | /* | 1065 | /* |
1066 | * find the first offset in the io tree with 'bits' set. zero is | ||
1067 | * returned if we find something, and *start_ret and *end_ret are | ||
1068 | * set to reflect the state struct that was found. | ||
1069 | * | ||
1070 | * If nothing was found, 1 is returned, < 0 on error | ||
1071 | */ | ||
1072 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||
1073 | u64 *start_ret, u64 *end_ret, int bits) | ||
1074 | { | ||
1075 | struct extent_state *state; | ||
1076 | int ret = 1; | ||
1077 | |||
1078 | spin_lock(&tree->lock); | ||
1079 | state = find_first_extent_bit_state(tree, start, bits); | ||
1080 | if (state) { | ||
1081 | *start_ret = state->start; | ||
1082 | *end_ret = state->end; | ||
1083 | ret = 0; | ||
1084 | } | ||
1085 | spin_unlock(&tree->lock); | ||
1086 | return ret; | ||
1087 | } | ||
1088 | |||
1089 | /* | ||
1136 | * find a contiguous range of bytes in the file marked as delalloc, not | 1090 | * find a contiguous range of bytes in the file marked as delalloc, not |
1137 | * more than 'max_bytes'. start and end are used to return the range, | 1091 | * more than 'max_bytes'. start and end are used to return the range, |
1138 | * | 1092 | * |
@@ -1564,7 +1518,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1564 | int bitset = 0; | 1518 | int bitset = 0; |
1565 | 1519 | ||
1566 | spin_lock(&tree->lock); | 1520 | spin_lock(&tree->lock); |
1567 | if (cached && cached->tree && cached->start == start) | 1521 | if (cached && cached->tree && cached->start <= start && |
1522 | cached->end > start) | ||
1568 | node = &cached->rb_node; | 1523 | node = &cached->rb_node; |
1569 | else | 1524 | else |
1570 | node = tree_search(tree, start); | 1525 | node = tree_search(tree, start); |
@@ -2432,6 +2387,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2432 | pgoff_t index; | 2387 | pgoff_t index; |
2433 | pgoff_t end; /* Inclusive */ | 2388 | pgoff_t end; /* Inclusive */ |
2434 | int scanned = 0; | 2389 | int scanned = 0; |
2390 | int tag; | ||
2435 | 2391 | ||
2436 | pagevec_init(&pvec, 0); | 2392 | pagevec_init(&pvec, 0); |
2437 | if (wbc->range_cyclic) { | 2393 | if (wbc->range_cyclic) { |
@@ -2442,11 +2398,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2442 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2398 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2443 | scanned = 1; | 2399 | scanned = 1; |
2444 | } | 2400 | } |
2401 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2402 | tag = PAGECACHE_TAG_TOWRITE; | ||
2403 | else | ||
2404 | tag = PAGECACHE_TAG_DIRTY; | ||
2445 | retry: | 2405 | retry: |
2406 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2407 | tag_pages_for_writeback(mapping, index, end); | ||
2446 | while (!done && !nr_to_write_done && (index <= end) && | 2408 | while (!done && !nr_to_write_done && (index <= end) && |
2447 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2409 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2448 | PAGECACHE_TAG_DIRTY, min(end - index, | 2410 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
2449 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
2450 | unsigned i; | 2411 | unsigned i; |
2451 | 2412 | ||
2452 | scanned = 1; | 2413 | scanned = 1; |
@@ -2541,7 +2502,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2541 | struct writeback_control *wbc) | 2502 | struct writeback_control *wbc) |
2542 | { | 2503 | { |
2543 | int ret; | 2504 | int ret; |
2544 | struct address_space *mapping = page->mapping; | ||
2545 | struct extent_page_data epd = { | 2505 | struct extent_page_data epd = { |
2546 | .bio = NULL, | 2506 | .bio = NULL, |
2547 | .tree = tree, | 2507 | .tree = tree, |
@@ -2549,18 +2509,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2549 | .extent_locked = 0, | 2509 | .extent_locked = 0, |
2550 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 2510 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
2551 | }; | 2511 | }; |
2552 | struct writeback_control wbc_writepages = { | ||
2553 | .sync_mode = wbc->sync_mode, | ||
2554 | .older_than_this = NULL, | ||
2555 | .nr_to_write = 64, | ||
2556 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | ||
2557 | .range_end = (loff_t)-1, | ||
2558 | }; | ||
2559 | 2512 | ||
2560 | ret = __extent_writepage(page, wbc, &epd); | 2513 | ret = __extent_writepage(page, wbc, &epd); |
2561 | 2514 | ||
2562 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | ||
2563 | __extent_writepage, &epd, flush_write_bio); | ||
2564 | flush_epd_write_bio(&epd); | 2515 | flush_epd_write_bio(&epd); |
2565 | return ret; | 2516 | return ret; |
2566 | } | 2517 | } |
@@ -2584,7 +2535,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2584 | }; | 2535 | }; |
2585 | struct writeback_control wbc_writepages = { | 2536 | struct writeback_control wbc_writepages = { |
2586 | .sync_mode = mode, | 2537 | .sync_mode = mode, |
2587 | .older_than_this = NULL, | ||
2588 | .nr_to_write = nr_pages * 2, | 2538 | .nr_to_write = nr_pages * 2, |
2589 | .range_start = start, | 2539 | .range_start = start, |
2590 | .range_end = end + 1, | 2540 | .range_end = end + 1, |
@@ -3022,8 +2972,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3022 | return NULL; | 2972 | return NULL; |
3023 | eb->start = start; | 2973 | eb->start = start; |
3024 | eb->len = len; | 2974 | eb->len = len; |
3025 | spin_lock_init(&eb->lock); | 2975 | rwlock_init(&eb->lock); |
3026 | init_waitqueue_head(&eb->lock_wq); | 2976 | atomic_set(&eb->write_locks, 0); |
2977 | atomic_set(&eb->read_locks, 0); | ||
2978 | atomic_set(&eb->blocking_readers, 0); | ||
2979 | atomic_set(&eb->blocking_writers, 0); | ||
2980 | atomic_set(&eb->spinning_readers, 0); | ||
2981 | atomic_set(&eb->spinning_writers, 0); | ||
2982 | init_waitqueue_head(&eb->write_lock_wq); | ||
2983 | init_waitqueue_head(&eb->read_lock_wq); | ||
3027 | 2984 | ||
3028 | #if LEAK_DEBUG | 2985 | #if LEAK_DEBUG |
3029 | spin_lock_irqsave(&leak_lock, flags); | 2986 | spin_lock_irqsave(&leak_lock, flags); |
@@ -3119,7 +3076,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3119 | i = 0; | 3076 | i = 0; |
3120 | } | 3077 | } |
3121 | for (; i < num_pages; i++, index++) { | 3078 | for (; i < num_pages; i++, index++) { |
3122 | p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); | 3079 | p = find_or_create_page(mapping, index, GFP_NOFS); |
3123 | if (!p) { | 3080 | if (!p) { |
3124 | WARN_ON(1); | 3081 | WARN_ON(1); |
3125 | goto free_eb; | 3082 | goto free_eb; |
@@ -3266,6 +3223,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3266 | return was_dirty; | 3223 | return was_dirty; |
3267 | } | 3224 | } |
3268 | 3225 | ||
3226 | static int __eb_straddles_pages(u64 start, u64 len) | ||
3227 | { | ||
3228 | if (len < PAGE_CACHE_SIZE) | ||
3229 | return 1; | ||
3230 | if (start & (PAGE_CACHE_SIZE - 1)) | ||
3231 | return 1; | ||
3232 | if ((start + len) & (PAGE_CACHE_SIZE - 1)) | ||
3233 | return 1; | ||
3234 | return 0; | ||
3235 | } | ||
3236 | |||
3237 | static int eb_straddles_pages(struct extent_buffer *eb) | ||
3238 | { | ||
3239 | return __eb_straddles_pages(eb->start, eb->len); | ||
3240 | } | ||
3241 | |||
3269 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 3242 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
3270 | struct extent_buffer *eb, | 3243 | struct extent_buffer *eb, |
3271 | struct extent_state **cached_state) | 3244 | struct extent_state **cached_state) |
@@ -3277,8 +3250,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3277 | num_pages = num_extent_pages(eb->start, eb->len); | 3250 | num_pages = num_extent_pages(eb->start, eb->len); |
3278 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3251 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3279 | 3252 | ||
3280 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3253 | if (eb_straddles_pages(eb)) { |
3281 | cached_state, GFP_NOFS); | 3254 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3255 | cached_state, GFP_NOFS); | ||
3256 | } | ||
3282 | for (i = 0; i < num_pages; i++) { | 3257 | for (i = 0; i < num_pages; i++) { |
3283 | page = extent_buffer_page(eb, i); | 3258 | page = extent_buffer_page(eb, i); |
3284 | if (page) | 3259 | if (page) |
@@ -3296,8 +3271,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3296 | 3271 | ||
3297 | num_pages = num_extent_pages(eb->start, eb->len); | 3272 | num_pages = num_extent_pages(eb->start, eb->len); |
3298 | 3273 | ||
3299 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3274 | if (eb_straddles_pages(eb)) { |
3300 | NULL, GFP_NOFS); | 3275 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3276 | NULL, GFP_NOFS); | ||
3277 | } | ||
3301 | for (i = 0; i < num_pages; i++) { | 3278 | for (i = 0; i < num_pages; i++) { |
3302 | page = extent_buffer_page(eb, i); | 3279 | page = extent_buffer_page(eb, i); |
3303 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | 3280 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || |
@@ -3320,9 +3297,12 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
3320 | int uptodate; | 3297 | int uptodate; |
3321 | unsigned long index; | 3298 | unsigned long index; |
3322 | 3299 | ||
3323 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); | 3300 | if (__eb_straddles_pages(start, end - start + 1)) { |
3324 | if (ret) | 3301 | ret = test_range_bit(tree, start, end, |
3325 | return 1; | 3302 | EXTENT_UPTODATE, 1, NULL); |
3303 | if (ret) | ||
3304 | return 1; | ||
3305 | } | ||
3326 | while (start <= end) { | 3306 | while (start <= end) { |
3327 | index = start >> PAGE_CACHE_SHIFT; | 3307 | index = start >> PAGE_CACHE_SHIFT; |
3328 | page = find_get_page(tree->mapping, index); | 3308 | page = find_get_page(tree->mapping, index); |
@@ -3350,10 +3330,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3350 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) | 3330 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
3351 | return 1; | 3331 | return 1; |
3352 | 3332 | ||
3353 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3333 | if (eb_straddles_pages(eb)) { |
3354 | EXTENT_UPTODATE, 1, cached_state); | 3334 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3355 | if (ret) | 3335 | EXTENT_UPTODATE, 1, cached_state); |
3356 | return ret; | 3336 | if (ret) |
3337 | return ret; | ||
3338 | } | ||
3357 | 3339 | ||
3358 | num_pages = num_extent_pages(eb->start, eb->len); | 3340 | num_pages = num_extent_pages(eb->start, eb->len); |
3359 | for (i = 0; i < num_pages; i++) { | 3341 | for (i = 0; i < num_pages; i++) { |
@@ -3386,9 +3368,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3386 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) | 3368 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
3387 | return 0; | 3369 | return 0; |
3388 | 3370 | ||
3389 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3371 | if (eb_straddles_pages(eb)) { |
3390 | EXTENT_UPTODATE, 1, NULL)) { | 3372 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3391 | return 0; | 3373 | EXTENT_UPTODATE, 1, NULL)) { |
3374 | return 0; | ||
3375 | } | ||
3392 | } | 3376 | } |
3393 | 3377 | ||
3394 | if (start) { | 3378 | if (start) { |
@@ -3492,9 +3476,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
3492 | page = extent_buffer_page(eb, i); | 3476 | page = extent_buffer_page(eb, i); |
3493 | 3477 | ||
3494 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 3478 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
3495 | kaddr = kmap_atomic(page, KM_USER1); | 3479 | kaddr = page_address(page); |
3496 | memcpy(dst, kaddr + offset, cur); | 3480 | memcpy(dst, kaddr + offset, cur); |
3497 | kunmap_atomic(kaddr, KM_USER1); | ||
3498 | 3481 | ||
3499 | dst += cur; | 3482 | dst += cur; |
3500 | len -= cur; | 3483 | len -= cur; |
@@ -3504,9 +3487,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
3504 | } | 3487 | } |
3505 | 3488 | ||
3506 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | 3489 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, |
3507 | unsigned long min_len, char **token, char **map, | 3490 | unsigned long min_len, char **map, |
3508 | unsigned long *map_start, | 3491 | unsigned long *map_start, |
3509 | unsigned long *map_len, int km) | 3492 | unsigned long *map_len) |
3510 | { | 3493 | { |
3511 | size_t offset = start & (PAGE_CACHE_SIZE - 1); | 3494 | size_t offset = start & (PAGE_CACHE_SIZE - 1); |
3512 | char *kaddr; | 3495 | char *kaddr; |
@@ -3536,42 +3519,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
3536 | } | 3519 | } |
3537 | 3520 | ||
3538 | p = extent_buffer_page(eb, i); | 3521 | p = extent_buffer_page(eb, i); |
3539 | kaddr = kmap_atomic(p, km); | 3522 | kaddr = page_address(p); |
3540 | *token = kaddr; | ||
3541 | *map = kaddr + offset; | 3523 | *map = kaddr + offset; |
3542 | *map_len = PAGE_CACHE_SIZE - offset; | 3524 | *map_len = PAGE_CACHE_SIZE - offset; |
3543 | return 0; | 3525 | return 0; |
3544 | } | 3526 | } |
3545 | 3527 | ||
3546 | int map_extent_buffer(struct extent_buffer *eb, unsigned long start, | ||
3547 | unsigned long min_len, | ||
3548 | char **token, char **map, | ||
3549 | unsigned long *map_start, | ||
3550 | unsigned long *map_len, int km) | ||
3551 | { | ||
3552 | int err; | ||
3553 | int save = 0; | ||
3554 | if (eb->map_token) { | ||
3555 | unmap_extent_buffer(eb, eb->map_token, km); | ||
3556 | eb->map_token = NULL; | ||
3557 | save = 1; | ||
3558 | } | ||
3559 | err = map_private_extent_buffer(eb, start, min_len, token, map, | ||
3560 | map_start, map_len, km); | ||
3561 | if (!err && save) { | ||
3562 | eb->map_token = *token; | ||
3563 | eb->kaddr = *map; | ||
3564 | eb->map_start = *map_start; | ||
3565 | eb->map_len = *map_len; | ||
3566 | } | ||
3567 | return err; | ||
3568 | } | ||
3569 | |||
3570 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) | ||
3571 | { | ||
3572 | kunmap_atomic(token, km); | ||
3573 | } | ||
3574 | |||
3575 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | 3528 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, |
3576 | unsigned long start, | 3529 | unsigned long start, |
3577 | unsigned long len) | 3530 | unsigned long len) |
@@ -3595,9 +3548,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | |||
3595 | 3548 | ||
3596 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 3549 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
3597 | 3550 | ||
3598 | kaddr = kmap_atomic(page, KM_USER0); | 3551 | kaddr = page_address(page); |
3599 | ret = memcmp(ptr, kaddr + offset, cur); | 3552 | ret = memcmp(ptr, kaddr + offset, cur); |
3600 | kunmap_atomic(kaddr, KM_USER0); | ||
3601 | if (ret) | 3553 | if (ret) |
3602 | break; | 3554 | break; |
3603 | 3555 | ||
@@ -3630,9 +3582,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, | |||
3630 | WARN_ON(!PageUptodate(page)); | 3582 | WARN_ON(!PageUptodate(page)); |
3631 | 3583 | ||
3632 | cur = min(len, PAGE_CACHE_SIZE - offset); | 3584 | cur = min(len, PAGE_CACHE_SIZE - offset); |
3633 | kaddr = kmap_atomic(page, KM_USER1); | 3585 | kaddr = page_address(page); |
3634 | memcpy(kaddr + offset, src, cur); | 3586 | memcpy(kaddr + offset, src, cur); |
3635 | kunmap_atomic(kaddr, KM_USER1); | ||
3636 | 3587 | ||
3637 | src += cur; | 3588 | src += cur; |
3638 | len -= cur; | 3589 | len -= cur; |
@@ -3661,9 +3612,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, | |||
3661 | WARN_ON(!PageUptodate(page)); | 3612 | WARN_ON(!PageUptodate(page)); |
3662 | 3613 | ||
3663 | cur = min(len, PAGE_CACHE_SIZE - offset); | 3614 | cur = min(len, PAGE_CACHE_SIZE - offset); |
3664 | kaddr = kmap_atomic(page, KM_USER0); | 3615 | kaddr = page_address(page); |
3665 | memset(kaddr + offset, c, cur); | 3616 | memset(kaddr + offset, c, cur); |
3666 | kunmap_atomic(kaddr, KM_USER0); | ||
3667 | 3617 | ||
3668 | len -= cur; | 3618 | len -= cur; |
3669 | offset = 0; | 3619 | offset = 0; |
@@ -3694,9 +3644,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | |||
3694 | 3644 | ||
3695 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); | 3645 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); |
3696 | 3646 | ||
3697 | kaddr = kmap_atomic(page, KM_USER0); | 3647 | kaddr = page_address(page); |
3698 | read_extent_buffer(src, kaddr + offset, src_offset, cur); | 3648 | read_extent_buffer(src, kaddr + offset, src_offset, cur); |
3699 | kunmap_atomic(kaddr, KM_USER0); | ||
3700 | 3649 | ||
3701 | src_offset += cur; | 3650 | src_offset += cur; |
3702 | len -= cur; | 3651 | len -= cur; |
@@ -3709,20 +3658,17 @@ static void move_pages(struct page *dst_page, struct page *src_page, | |||
3709 | unsigned long dst_off, unsigned long src_off, | 3658 | unsigned long dst_off, unsigned long src_off, |
3710 | unsigned long len) | 3659 | unsigned long len) |
3711 | { | 3660 | { |
3712 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3661 | char *dst_kaddr = page_address(dst_page); |
3713 | if (dst_page == src_page) { | 3662 | if (dst_page == src_page) { |
3714 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); | 3663 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); |
3715 | } else { | 3664 | } else { |
3716 | char *src_kaddr = kmap_atomic(src_page, KM_USER1); | 3665 | char *src_kaddr = page_address(src_page); |
3717 | char *p = dst_kaddr + dst_off + len; | 3666 | char *p = dst_kaddr + dst_off + len; |
3718 | char *s = src_kaddr + src_off + len; | 3667 | char *s = src_kaddr + src_off + len; |
3719 | 3668 | ||
3720 | while (len--) | 3669 | while (len--) |
3721 | *--p = *--s; | 3670 | *--p = *--s; |
3722 | |||
3723 | kunmap_atomic(src_kaddr, KM_USER1); | ||
3724 | } | 3671 | } |
3725 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
3726 | } | 3672 | } |
3727 | 3673 | ||
3728 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) | 3674 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) |
@@ -3735,20 +3681,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page, | |||
3735 | unsigned long dst_off, unsigned long src_off, | 3681 | unsigned long dst_off, unsigned long src_off, |
3736 | unsigned long len) | 3682 | unsigned long len) |
3737 | { | 3683 | { |
3738 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3684 | char *dst_kaddr = page_address(dst_page); |
3739 | char *src_kaddr; | 3685 | char *src_kaddr; |
3740 | 3686 | ||
3741 | if (dst_page != src_page) { | 3687 | if (dst_page != src_page) { |
3742 | src_kaddr = kmap_atomic(src_page, KM_USER1); | 3688 | src_kaddr = page_address(src_page); |
3743 | } else { | 3689 | } else { |
3744 | src_kaddr = dst_kaddr; | 3690 | src_kaddr = dst_kaddr; |
3745 | BUG_ON(areas_overlap(src_off, dst_off, len)); | 3691 | BUG_ON(areas_overlap(src_off, dst_off, len)); |
3746 | } | 3692 | } |
3747 | 3693 | ||
3748 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); | 3694 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); |
3749 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
3750 | if (dst_page != src_page) | ||
3751 | kunmap_atomic(src_kaddr, KM_USER1); | ||
3752 | } | 3695 | } |
3753 | 3696 | ||
3754 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | 3697 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a11a92ee2d30..7b2f0c3e7929 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -76,15 +76,15 @@ struct extent_io_ops { | |||
76 | struct extent_state *state); | 76 | struct extent_state *state); |
77 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 77 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
78 | struct extent_state *state, int uptodate); | 78 | struct extent_state *state, int uptodate); |
79 | int (*set_bit_hook)(struct inode *inode, struct extent_state *state, | 79 | void (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
80 | int *bits); | 80 | int *bits); |
81 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, | 81 | void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
82 | int *bits); | 82 | int *bits); |
83 | int (*merge_extent_hook)(struct inode *inode, | 83 | void (*merge_extent_hook)(struct inode *inode, |
84 | struct extent_state *new, | 84 | struct extent_state *new, |
85 | struct extent_state *other); | 85 | struct extent_state *other); |
86 | int (*split_extent_hook)(struct inode *inode, | 86 | void (*split_extent_hook)(struct inode *inode, |
87 | struct extent_state *orig, u64 split); | 87 | struct extent_state *orig, u64 split); |
88 | int (*write_cache_pages_lock_hook)(struct page *page); | 88 | int (*write_cache_pages_lock_hook)(struct page *page); |
89 | }; | 89 | }; |
90 | 90 | ||
@@ -108,8 +108,6 @@ struct extent_state { | |||
108 | wait_queue_head_t wq; | 108 | wait_queue_head_t wq; |
109 | atomic_t refs; | 109 | atomic_t refs; |
110 | unsigned long state; | 110 | unsigned long state; |
111 | u64 split_start; | ||
112 | u64 split_end; | ||
113 | 111 | ||
114 | /* for use by the FS */ | 112 | /* for use by the FS */ |
115 | u64 private; | 113 | u64 private; |
@@ -120,8 +118,6 @@ struct extent_state { | |||
120 | struct extent_buffer { | 118 | struct extent_buffer { |
121 | u64 start; | 119 | u64 start; |
122 | unsigned long len; | 120 | unsigned long len; |
123 | char *map_token; | ||
124 | char *kaddr; | ||
125 | unsigned long map_start; | 121 | unsigned long map_start; |
126 | unsigned long map_len; | 122 | unsigned long map_len; |
127 | struct page *first_page; | 123 | struct page *first_page; |
@@ -130,14 +126,26 @@ struct extent_buffer { | |||
130 | struct rcu_head rcu_head; | 126 | struct rcu_head rcu_head; |
131 | atomic_t refs; | 127 | atomic_t refs; |
132 | 128 | ||
133 | /* the spinlock is used to protect most operations */ | 129 | /* count of read lock holders on the extent buffer */ |
134 | spinlock_t lock; | 130 | atomic_t write_locks; |
131 | atomic_t read_locks; | ||
132 | atomic_t blocking_writers; | ||
133 | atomic_t blocking_readers; | ||
134 | atomic_t spinning_readers; | ||
135 | atomic_t spinning_writers; | ||
136 | |||
137 | /* protects write locks */ | ||
138 | rwlock_t lock; | ||
135 | 139 | ||
136 | /* | 140 | /* readers use lock_wq while they wait for the write |
137 | * when we keep the lock held while blocking, waiters go onto | 141 | * lock holders to unlock |
138 | * the wq | ||
139 | */ | 142 | */ |
140 | wait_queue_head_t lock_wq; | 143 | wait_queue_head_t write_lock_wq; |
144 | |||
145 | /* writers use read_lock_wq while they wait for readers | ||
146 | * to unlock | ||
147 | */ | ||
148 | wait_queue_head_t read_lock_wq; | ||
141 | }; | 149 | }; |
142 | 150 | ||
143 | static inline void extent_set_compress_type(unsigned long *bio_flags, | 151 | static inline void extent_set_compress_type(unsigned long *bio_flags, |
@@ -279,15 +287,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
279 | int extent_buffer_uptodate(struct extent_io_tree *tree, | 287 | int extent_buffer_uptodate(struct extent_io_tree *tree, |
280 | struct extent_buffer *eb, | 288 | struct extent_buffer *eb, |
281 | struct extent_state *cached_state); | 289 | struct extent_state *cached_state); |
282 | int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, | ||
283 | unsigned long min_len, char **token, char **map, | ||
284 | unsigned long *map_start, | ||
285 | unsigned long *map_len, int km); | ||
286 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, | 290 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, |
287 | unsigned long min_len, char **token, char **map, | 291 | unsigned long min_len, char **map, |
288 | unsigned long *map_start, | 292 | unsigned long *map_start, |
289 | unsigned long *map_len, int km); | 293 | unsigned long *map_len); |
290 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); | ||
291 | int extent_range_uptodate(struct extent_io_tree *tree, | 294 | int extent_range_uptodate(struct extent_io_tree *tree, |
292 | u64 start, u64 end); | 295 | u64 start, u64 end); |
293 | int extent_clear_unlock_delalloc(struct inode *inode, | 296 | int extent_clear_unlock_delalloc(struct inode *inode, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2d0410344ea3..7c97b3301459 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
183 | return 0; | 183 | return 0; |
184 | } | 184 | } |
185 | 185 | ||
186 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | 186 | static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) |
187 | { | 187 | { |
188 | int ret = 0; | ||
189 | struct extent_map *merge = NULL; | 188 | struct extent_map *merge = NULL; |
190 | struct rb_node *rb; | 189 | struct rb_node *rb; |
191 | struct extent_map *em; | ||
192 | |||
193 | write_lock(&tree->lock); | ||
194 | em = lookup_extent_mapping(tree, start, len); | ||
195 | |||
196 | WARN_ON(!em || em->start != start); | ||
197 | |||
198 | if (!em) | ||
199 | goto out; | ||
200 | |||
201 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
202 | 190 | ||
203 | if (em->start != 0) { | 191 | if (em->start != 0) { |
204 | rb = rb_prev(&em->rb_node); | 192 | rb = rb_prev(&em->rb_node); |
@@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | |||
225 | merge->in_tree = 0; | 213 | merge->in_tree = 0; |
226 | free_extent_map(merge); | 214 | free_extent_map(merge); |
227 | } | 215 | } |
216 | } | ||
217 | |||
218 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | ||
219 | { | ||
220 | int ret = 0; | ||
221 | struct extent_map *em; | ||
222 | |||
223 | write_lock(&tree->lock); | ||
224 | em = lookup_extent_mapping(tree, start, len); | ||
225 | |||
226 | WARN_ON(!em || em->start != start); | ||
227 | |||
228 | if (!em) | ||
229 | goto out; | ||
230 | |||
231 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
232 | |||
233 | try_merge_map(tree, em); | ||
228 | 234 | ||
229 | free_extent_map(em); | 235 | free_extent_map(em); |
230 | out: | 236 | out: |
@@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
247 | struct extent_map *em) | 253 | struct extent_map *em) |
248 | { | 254 | { |
249 | int ret = 0; | 255 | int ret = 0; |
250 | struct extent_map *merge = NULL; | ||
251 | struct rb_node *rb; | 256 | struct rb_node *rb; |
252 | struct extent_map *exist; | 257 | struct extent_map *exist; |
253 | 258 | ||
@@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
263 | goto out; | 268 | goto out; |
264 | } | 269 | } |
265 | atomic_inc(&em->refs); | 270 | atomic_inc(&em->refs); |
266 | if (em->start != 0) { | 271 | |
267 | rb = rb_prev(&em->rb_node); | 272 | try_merge_map(tree, em); |
268 | if (rb) | ||
269 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
270 | if (rb && mergable_maps(merge, em)) { | ||
271 | em->start = merge->start; | ||
272 | em->len += merge->len; | ||
273 | em->block_len += merge->block_len; | ||
274 | em->block_start = merge->block_start; | ||
275 | merge->in_tree = 0; | ||
276 | rb_erase(&merge->rb_node, &tree->map); | ||
277 | free_extent_map(merge); | ||
278 | } | ||
279 | } | ||
280 | rb = rb_next(&em->rb_node); | ||
281 | if (rb) | ||
282 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
283 | if (rb && mergable_maps(em, merge)) { | ||
284 | em->len += merge->len; | ||
285 | em->block_len += merge->len; | ||
286 | rb_erase(&merge->rb_node, &tree->map); | ||
287 | merge->in_tree = 0; | ||
288 | free_extent_map(merge); | ||
289 | } | ||
290 | out: | 273 | out: |
291 | return ret; | 274 | return ret; |
292 | } | 275 | } |
@@ -299,19 +282,8 @@ static u64 range_end(u64 start, u64 len) | |||
299 | return start + len; | 282 | return start + len; |
300 | } | 283 | } |
301 | 284 | ||
302 | /** | 285 | struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree, |
303 | * lookup_extent_mapping - lookup extent_map | 286 | u64 start, u64 len, int strict) |
304 | * @tree: tree to lookup in | ||
305 | * @start: byte offset to start the search | ||
306 | * @len: length of the lookup range | ||
307 | * | ||
308 | * Find and return the first extent_map struct in @tree that intersects the | ||
309 | * [start, len] range. There may be additional objects in the tree that | ||
310 | * intersect, so check the object returned carefully to make sure that no | ||
311 | * additional lookups are needed. | ||
312 | */ | ||
313 | struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | ||
314 | u64 start, u64 len) | ||
315 | { | 287 | { |
316 | struct extent_map *em; | 288 | struct extent_map *em; |
317 | struct rb_node *rb_node; | 289 | struct rb_node *rb_node; |
@@ -320,38 +292,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
320 | u64 end = range_end(start, len); | 292 | u64 end = range_end(start, len); |
321 | 293 | ||
322 | rb_node = __tree_search(&tree->map, start, &prev, &next); | 294 | rb_node = __tree_search(&tree->map, start, &prev, &next); |
323 | if (!rb_node && prev) { | ||
324 | em = rb_entry(prev, struct extent_map, rb_node); | ||
325 | if (end > em->start && start < extent_map_end(em)) | ||
326 | goto found; | ||
327 | } | ||
328 | if (!rb_node && next) { | ||
329 | em = rb_entry(next, struct extent_map, rb_node); | ||
330 | if (end > em->start && start < extent_map_end(em)) | ||
331 | goto found; | ||
332 | } | ||
333 | if (!rb_node) { | 295 | if (!rb_node) { |
334 | em = NULL; | 296 | if (prev) |
335 | goto out; | 297 | rb_node = prev; |
336 | } | 298 | else if (next) |
337 | if (IS_ERR(rb_node)) { | 299 | rb_node = next; |
338 | em = ERR_CAST(rb_node); | 300 | else |
339 | goto out; | 301 | return NULL; |
340 | } | 302 | } |
303 | |||
341 | em = rb_entry(rb_node, struct extent_map, rb_node); | 304 | em = rb_entry(rb_node, struct extent_map, rb_node); |
342 | if (end > em->start && start < extent_map_end(em)) | ||
343 | goto found; | ||
344 | 305 | ||
345 | em = NULL; | 306 | if (strict && !(end > em->start && start < extent_map_end(em))) |
346 | goto out; | 307 | return NULL; |
347 | 308 | ||
348 | found: | ||
349 | atomic_inc(&em->refs); | 309 | atomic_inc(&em->refs); |
350 | out: | ||
351 | return em; | 310 | return em; |
352 | } | 311 | } |
353 | 312 | ||
354 | /** | 313 | /** |
314 | * lookup_extent_mapping - lookup extent_map | ||
315 | * @tree: tree to lookup in | ||
316 | * @start: byte offset to start the search | ||
317 | * @len: length of the lookup range | ||
318 | * | ||
319 | * Find and return the first extent_map struct in @tree that intersects the | ||
320 | * [start, len] range. There may be additional objects in the tree that | ||
321 | * intersect, so check the object returned carefully to make sure that no | ||
322 | * additional lookups are needed. | ||
323 | */ | ||
324 | struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | ||
325 | u64 start, u64 len) | ||
326 | { | ||
327 | return __lookup_extent_mapping(tree, start, len, 1); | ||
328 | } | ||
329 | |||
330 | /** | ||
355 | * search_extent_mapping - find a nearby extent map | 331 | * search_extent_mapping - find a nearby extent map |
356 | * @tree: tree to lookup in | 332 | * @tree: tree to lookup in |
357 | * @start: byte offset to start the search | 333 | * @start: byte offset to start the search |
@@ -365,38 +341,7 @@ out: | |||
365 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | 341 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, |
366 | u64 start, u64 len) | 342 | u64 start, u64 len) |
367 | { | 343 | { |
368 | struct extent_map *em; | 344 | return __lookup_extent_mapping(tree, start, len, 0); |
369 | struct rb_node *rb_node; | ||
370 | struct rb_node *prev = NULL; | ||
371 | struct rb_node *next = NULL; | ||
372 | |||
373 | rb_node = __tree_search(&tree->map, start, &prev, &next); | ||
374 | if (!rb_node && prev) { | ||
375 | em = rb_entry(prev, struct extent_map, rb_node); | ||
376 | goto found; | ||
377 | } | ||
378 | if (!rb_node && next) { | ||
379 | em = rb_entry(next, struct extent_map, rb_node); | ||
380 | goto found; | ||
381 | } | ||
382 | if (!rb_node) { | ||
383 | em = NULL; | ||
384 | goto out; | ||
385 | } | ||
386 | if (IS_ERR(rb_node)) { | ||
387 | em = ERR_CAST(rb_node); | ||
388 | goto out; | ||
389 | } | ||
390 | em = rb_entry(rb_node, struct extent_map, rb_node); | ||
391 | goto found; | ||
392 | |||
393 | em = NULL; | ||
394 | goto out; | ||
395 | |||
396 | found: | ||
397 | atomic_inc(&em->refs); | ||
398 | out: | ||
399 | return em; | ||
400 | } | 345 | } |
401 | 346 | ||
402 | /** | 347 | /** |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 90d4ee52cd45..b910694f61ed 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -177,6 +177,15 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
177 | 177 | ||
178 | WARN_ON(bio->bi_vcnt <= 0); | 178 | WARN_ON(bio->bi_vcnt <= 0); |
179 | 179 | ||
180 | /* | ||
181 | * the free space stuff is only read when it hasn't been | ||
182 | * updated in the current transaction. So, we can safely | ||
183 | * read from the commit root and sidestep a nasty deadlock | ||
184 | * between reading the free space cache and updating the csum tree. | ||
185 | */ | ||
186 | if (btrfs_is_free_space_inode(root, inode)) | ||
187 | path->search_commit_root = 1; | ||
188 | |||
180 | disk_bytenr = (u64)bio->bi_sector << 9; | 189 | disk_bytenr = (u64)bio->bi_sector << 9; |
181 | if (dio) | 190 | if (dio) |
182 | offset = logical_offset; | 191 | offset = logical_offset; |
@@ -282,7 +291,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
282 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); | 291 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); |
283 | 292 | ||
284 | path = btrfs_alloc_path(); | 293 | path = btrfs_alloc_path(); |
285 | BUG_ON(!path); | 294 | if (!path) |
295 | return -ENOMEM; | ||
286 | 296 | ||
287 | if (search_commit) { | 297 | if (search_commit) { |
288 | path->skip_locking = 1; | 298 | path->skip_locking = 1; |
@@ -664,15 +674,13 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
664 | struct btrfs_sector_sum *sector_sum; | 674 | struct btrfs_sector_sum *sector_sum; |
665 | u32 nritems; | 675 | u32 nritems; |
666 | u32 ins_size; | 676 | u32 ins_size; |
667 | char *eb_map; | ||
668 | char *eb_token; | ||
669 | unsigned long map_len; | ||
670 | unsigned long map_start; | ||
671 | u16 csum_size = | 677 | u16 csum_size = |
672 | btrfs_super_csum_size(&root->fs_info->super_copy); | 678 | btrfs_super_csum_size(&root->fs_info->super_copy); |
673 | 679 | ||
674 | path = btrfs_alloc_path(); | 680 | path = btrfs_alloc_path(); |
675 | BUG_ON(!path); | 681 | if (!path) |
682 | return -ENOMEM; | ||
683 | |||
676 | sector_sum = sums->sums; | 684 | sector_sum = sums->sums; |
677 | again: | 685 | again: |
678 | next_offset = (u64)-1; | 686 | next_offset = (u64)-1; |
@@ -814,30 +822,9 @@ found: | |||
814 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | 822 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
815 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | 823 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + |
816 | btrfs_item_size_nr(leaf, path->slots[0])); | 824 | btrfs_item_size_nr(leaf, path->slots[0])); |
817 | eb_token = NULL; | ||
818 | next_sector: | 825 | next_sector: |
819 | 826 | ||
820 | if (!eb_token || | 827 | write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size); |
821 | (unsigned long)item + csum_size >= map_start + map_len) { | ||
822 | int err; | ||
823 | |||
824 | if (eb_token) | ||
825 | unmap_extent_buffer(leaf, eb_token, KM_USER1); | ||
826 | eb_token = NULL; | ||
827 | err = map_private_extent_buffer(leaf, (unsigned long)item, | ||
828 | csum_size, | ||
829 | &eb_token, &eb_map, | ||
830 | &map_start, &map_len, KM_USER1); | ||
831 | if (err) | ||
832 | eb_token = NULL; | ||
833 | } | ||
834 | if (eb_token) { | ||
835 | memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), | ||
836 | §or_sum->sum, csum_size); | ||
837 | } else { | ||
838 | write_extent_buffer(leaf, §or_sum->sum, | ||
839 | (unsigned long)item, csum_size); | ||
840 | } | ||
841 | 828 | ||
842 | total_bytes += root->sectorsize; | 829 | total_bytes += root->sectorsize; |
843 | sector_sum++; | 830 | sector_sum++; |
@@ -850,10 +837,7 @@ next_sector: | |||
850 | goto next_sector; | 837 | goto next_sector; |
851 | } | 838 | } |
852 | } | 839 | } |
853 | if (eb_token) { | 840 | |
854 | unmap_extent_buffer(leaf, eb_token, KM_USER1); | ||
855 | eb_token = NULL; | ||
856 | } | ||
857 | btrfs_mark_buffer_dirty(path->nodes[0]); | 841 | btrfs_mark_buffer_dirty(path->nodes[0]); |
858 | if (total_bytes < sums->len) { | 842 | if (total_bytes < sums->len) { |
859 | btrfs_release_path(path); | 843 | btrfs_release_path(path); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fa4ef18b66b1..658d66959abe 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -74,7 +74,7 @@ struct inode_defrag { | |||
74 | * If an existing record is found the defrag item you | 74 | * If an existing record is found the defrag item you |
75 | * pass in is freed | 75 | * pass in is freed |
76 | */ | 76 | */ |
77 | static int __btrfs_add_inode_defrag(struct inode *inode, | 77 | static void __btrfs_add_inode_defrag(struct inode *inode, |
78 | struct inode_defrag *defrag) | 78 | struct inode_defrag *defrag) |
79 | { | 79 | { |
80 | struct btrfs_root *root = BTRFS_I(inode)->root; | 80 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode, | |||
106 | BTRFS_I(inode)->in_defrag = 1; | 106 | BTRFS_I(inode)->in_defrag = 1; |
107 | rb_link_node(&defrag->rb_node, parent, p); | 107 | rb_link_node(&defrag->rb_node, parent, p); |
108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | 108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); |
109 | return 0; | 109 | return; |
110 | 110 | ||
111 | exists: | 111 | exists: |
112 | kfree(defrag); | 112 | kfree(defrag); |
113 | return 0; | 113 | return; |
114 | 114 | ||
115 | } | 115 | } |
116 | 116 | ||
@@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
123 | { | 123 | { |
124 | struct btrfs_root *root = BTRFS_I(inode)->root; | 124 | struct btrfs_root *root = BTRFS_I(inode)->root; |
125 | struct inode_defrag *defrag; | 125 | struct inode_defrag *defrag; |
126 | int ret = 0; | ||
127 | u64 transid; | 126 | u64 transid; |
128 | 127 | ||
129 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | 128 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) |
@@ -150,9 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
150 | 149 | ||
151 | spin_lock(&root->fs_info->defrag_inodes_lock); | 150 | spin_lock(&root->fs_info->defrag_inodes_lock); |
152 | if (!BTRFS_I(inode)->in_defrag) | 151 | if (!BTRFS_I(inode)->in_defrag) |
153 | ret = __btrfs_add_inode_defrag(inode, defrag); | 152 | __btrfs_add_inode_defrag(inode, defrag); |
154 | spin_unlock(&root->fs_info->defrag_inodes_lock); | 153 | spin_unlock(&root->fs_info->defrag_inodes_lock); |
155 | return ret; | 154 | return 0; |
156 | } | 155 | } |
157 | 156 | ||
158 | /* | 157 | /* |
@@ -855,7 +854,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | |||
855 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 854 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
856 | 855 | ||
857 | path = btrfs_alloc_path(); | 856 | path = btrfs_alloc_path(); |
858 | BUG_ON(!path); | 857 | if (!path) |
858 | return -ENOMEM; | ||
859 | again: | 859 | again: |
860 | recow = 0; | 860 | recow = 0; |
861 | split = start; | 861 | split = start; |
@@ -1059,7 +1059,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos) | |||
1059 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | 1059 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, |
1060 | struct page **pages, size_t num_pages, | 1060 | struct page **pages, size_t num_pages, |
1061 | loff_t pos, unsigned long first_index, | 1061 | loff_t pos, unsigned long first_index, |
1062 | unsigned long last_index, size_t write_bytes) | 1062 | size_t write_bytes) |
1063 | { | 1063 | { |
1064 | struct extent_state *cached_state = NULL; | 1064 | struct extent_state *cached_state = NULL; |
1065 | int i; | 1065 | int i; |
@@ -1081,7 +1081,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
1081 | 1081 | ||
1082 | again: | 1082 | again: |
1083 | for (i = 0; i < num_pages; i++) { | 1083 | for (i = 0; i < num_pages; i++) { |
1084 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 1084 | pages[i] = find_or_create_page(inode->i_mapping, index + i, |
1085 | GFP_NOFS); | ||
1085 | if (!pages[i]) { | 1086 | if (!pages[i]) { |
1086 | faili = i - 1; | 1087 | faili = i - 1; |
1087 | err = -ENOMEM; | 1088 | err = -ENOMEM; |
@@ -1158,7 +1159,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1158 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1159 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1159 | struct page **pages = NULL; | 1160 | struct page **pages = NULL; |
1160 | unsigned long first_index; | 1161 | unsigned long first_index; |
1161 | unsigned long last_index; | ||
1162 | size_t num_written = 0; | 1162 | size_t num_written = 0; |
1163 | int nrptrs; | 1163 | int nrptrs; |
1164 | int ret = 0; | 1164 | int ret = 0; |
@@ -1171,7 +1171,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1171 | return -ENOMEM; | 1171 | return -ENOMEM; |
1172 | 1172 | ||
1173 | first_index = pos >> PAGE_CACHE_SHIFT; | 1173 | first_index = pos >> PAGE_CACHE_SHIFT; |
1174 | last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; | ||
1175 | 1174 | ||
1176 | while (iov_iter_count(i) > 0) { | 1175 | while (iov_iter_count(i) > 0) { |
1177 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 1176 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
@@ -1205,8 +1204,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1205 | * contents of pages from loop to loop | 1204 | * contents of pages from loop to loop |
1206 | */ | 1205 | */ |
1207 | ret = prepare_pages(root, file, pages, num_pages, | 1206 | ret = prepare_pages(root, file, pages, num_pages, |
1208 | pos, first_index, last_index, | 1207 | pos, first_index, write_bytes); |
1209 | write_bytes); | ||
1210 | if (ret) { | 1208 | if (ret) { |
1211 | btrfs_delalloc_release_space(inode, | 1209 | btrfs_delalloc_release_space(inode, |
1212 | num_pages << PAGE_CACHE_SHIFT); | 1210 | num_pages << PAGE_CACHE_SHIFT); |
@@ -1238,9 +1236,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1238 | * managed to copy. | 1236 | * managed to copy. |
1239 | */ | 1237 | */ |
1240 | if (num_pages > dirty_pages) { | 1238 | if (num_pages > dirty_pages) { |
1241 | if (copied > 0) | 1239 | if (copied > 0) { |
1242 | atomic_inc( | 1240 | spin_lock(&BTRFS_I(inode)->lock); |
1243 | &BTRFS_I(inode)->outstanding_extents); | 1241 | BTRFS_I(inode)->outstanding_extents++; |
1242 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1243 | } | ||
1244 | btrfs_delalloc_release_space(inode, | 1244 | btrfs_delalloc_release_space(inode, |
1245 | (num_pages - dirty_pages) << | 1245 | (num_pages - dirty_pages) << |
1246 | PAGE_CACHE_SHIFT); | 1246 | PAGE_CACHE_SHIFT); |
@@ -1452,7 +1452,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1452 | * important optimization for directories because holding the mutex prevents | 1452 | * important optimization for directories because holding the mutex prevents |
1453 | * new operations on the dir while we write to disk. | 1453 | * new operations on the dir while we write to disk. |
1454 | */ | 1454 | */ |
1455 | int btrfs_sync_file(struct file *file, int datasync) | 1455 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
1456 | { | 1456 | { |
1457 | struct dentry *dentry = file->f_path.dentry; | 1457 | struct dentry *dentry = file->f_path.dentry; |
1458 | struct inode *inode = dentry->d_inode; | 1458 | struct inode *inode = dentry->d_inode; |
@@ -1462,9 +1462,13 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1462 | 1462 | ||
1463 | trace_btrfs_sync_file(file, datasync); | 1463 | trace_btrfs_sync_file(file, datasync); |
1464 | 1464 | ||
1465 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
1466 | if (ret) | ||
1467 | return ret; | ||
1468 | mutex_lock(&inode->i_mutex); | ||
1469 | |||
1465 | /* we wait first, since the writeback may change the inode */ | 1470 | /* we wait first, since the writeback may change the inode */ |
1466 | root->log_batch++; | 1471 | root->log_batch++; |
1467 | /* the VFS called filemap_fdatawrite for us */ | ||
1468 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 1472 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
1469 | root->log_batch++; | 1473 | root->log_batch++; |
1470 | 1474 | ||
@@ -1472,8 +1476,10 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1472 | * check the transaction that last modified this inode | 1476 | * check the transaction that last modified this inode |
1473 | * and see if its already been committed | 1477 | * and see if its already been committed |
1474 | */ | 1478 | */ |
1475 | if (!BTRFS_I(inode)->last_trans) | 1479 | if (!BTRFS_I(inode)->last_trans) { |
1480 | mutex_unlock(&inode->i_mutex); | ||
1476 | goto out; | 1481 | goto out; |
1482 | } | ||
1477 | 1483 | ||
1478 | /* | 1484 | /* |
1479 | * if the last transaction that changed this file was before | 1485 | * if the last transaction that changed this file was before |
@@ -1484,6 +1490,7 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1484 | if (BTRFS_I(inode)->last_trans <= | 1490 | if (BTRFS_I(inode)->last_trans <= |
1485 | root->fs_info->last_trans_committed) { | 1491 | root->fs_info->last_trans_committed) { |
1486 | BTRFS_I(inode)->last_trans = 0; | 1492 | BTRFS_I(inode)->last_trans = 0; |
1493 | mutex_unlock(&inode->i_mutex); | ||
1487 | goto out; | 1494 | goto out; |
1488 | } | 1495 | } |
1489 | 1496 | ||
@@ -1496,12 +1503,15 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1496 | trans = btrfs_start_transaction(root, 0); | 1503 | trans = btrfs_start_transaction(root, 0); |
1497 | if (IS_ERR(trans)) { | 1504 | if (IS_ERR(trans)) { |
1498 | ret = PTR_ERR(trans); | 1505 | ret = PTR_ERR(trans); |
1506 | mutex_unlock(&inode->i_mutex); | ||
1499 | goto out; | 1507 | goto out; |
1500 | } | 1508 | } |
1501 | 1509 | ||
1502 | ret = btrfs_log_dentry_safe(trans, root, dentry); | 1510 | ret = btrfs_log_dentry_safe(trans, root, dentry); |
1503 | if (ret < 0) | 1511 | if (ret < 0) { |
1512 | mutex_unlock(&inode->i_mutex); | ||
1504 | goto out; | 1513 | goto out; |
1514 | } | ||
1505 | 1515 | ||
1506 | /* we've logged all the items and now have a consistent | 1516 | /* we've logged all the items and now have a consistent |
1507 | * version of the file in the log. It is possible that | 1517 | * version of the file in the log. It is possible that |
@@ -1513,7 +1523,7 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1513 | * file again, but that will end up using the synchronization | 1523 | * file again, but that will end up using the synchronization |
1514 | * inside btrfs_sync_log to keep things safe. | 1524 | * inside btrfs_sync_log to keep things safe. |
1515 | */ | 1525 | */ |
1516 | mutex_unlock(&dentry->d_inode->i_mutex); | 1526 | mutex_unlock(&inode->i_mutex); |
1517 | 1527 | ||
1518 | if (ret != BTRFS_NO_LOG_SYNC) { | 1528 | if (ret != BTRFS_NO_LOG_SYNC) { |
1519 | if (ret > 0) { | 1529 | if (ret > 0) { |
@@ -1528,7 +1538,6 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1528 | } else { | 1538 | } else { |
1529 | ret = btrfs_end_transaction(trans, root); | 1539 | ret = btrfs_end_transaction(trans, root); |
1530 | } | 1540 | } |
1531 | mutex_lock(&dentry->d_inode->i_mutex); | ||
1532 | out: | 1541 | out: |
1533 | return ret > 0 ? -EIO : ret; | 1542 | return ret > 0 ? -EIO : ret; |
1534 | } | 1543 | } |
@@ -1664,8 +1673,154 @@ out: | |||
1664 | return ret; | 1673 | return ret; |
1665 | } | 1674 | } |
1666 | 1675 | ||
1676 | static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) | ||
1677 | { | ||
1678 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1679 | struct extent_map *em; | ||
1680 | struct extent_state *cached_state = NULL; | ||
1681 | u64 lockstart = *offset; | ||
1682 | u64 lockend = i_size_read(inode); | ||
1683 | u64 start = *offset; | ||
1684 | u64 orig_start = *offset; | ||
1685 | u64 len = i_size_read(inode); | ||
1686 | u64 last_end = 0; | ||
1687 | int ret = 0; | ||
1688 | |||
1689 | lockend = max_t(u64, root->sectorsize, lockend); | ||
1690 | if (lockend <= lockstart) | ||
1691 | lockend = lockstart + root->sectorsize; | ||
1692 | |||
1693 | len = lockend - lockstart + 1; | ||
1694 | |||
1695 | len = max_t(u64, len, root->sectorsize); | ||
1696 | if (inode->i_size == 0) | ||
1697 | return -ENXIO; | ||
1698 | |||
1699 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, | ||
1700 | &cached_state, GFP_NOFS); | ||
1701 | |||
1702 | /* | ||
1703 | * Delalloc is such a pain. If we have a hole and we have pending | ||
1704 | * delalloc for a portion of the hole we will get back a hole that | ||
1705 | * exists for the entire range since it hasn't been actually written | ||
1706 | * yet. So to take care of this case we need to look for an extent just | ||
1707 | * before the position we want in case there is outstanding delalloc | ||
1708 | * going on here. | ||
1709 | */ | ||
1710 | if (origin == SEEK_HOLE && start != 0) { | ||
1711 | if (start <= root->sectorsize) | ||
1712 | em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, | ||
1713 | root->sectorsize, 0); | ||
1714 | else | ||
1715 | em = btrfs_get_extent_fiemap(inode, NULL, 0, | ||
1716 | start - root->sectorsize, | ||
1717 | root->sectorsize, 0); | ||
1718 | if (IS_ERR(em)) { | ||
1719 | ret = -ENXIO; | ||
1720 | goto out; | ||
1721 | } | ||
1722 | last_end = em->start + em->len; | ||
1723 | if (em->block_start == EXTENT_MAP_DELALLOC) | ||
1724 | last_end = min_t(u64, last_end, inode->i_size); | ||
1725 | free_extent_map(em); | ||
1726 | } | ||
1727 | |||
1728 | while (1) { | ||
1729 | em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); | ||
1730 | if (IS_ERR(em)) { | ||
1731 | ret = -ENXIO; | ||
1732 | break; | ||
1733 | } | ||
1734 | |||
1735 | if (em->block_start == EXTENT_MAP_HOLE) { | ||
1736 | if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { | ||
1737 | if (last_end <= orig_start) { | ||
1738 | free_extent_map(em); | ||
1739 | ret = -ENXIO; | ||
1740 | break; | ||
1741 | } | ||
1742 | } | ||
1743 | |||
1744 | if (origin == SEEK_HOLE) { | ||
1745 | *offset = start; | ||
1746 | free_extent_map(em); | ||
1747 | break; | ||
1748 | } | ||
1749 | } else { | ||
1750 | if (origin == SEEK_DATA) { | ||
1751 | if (em->block_start == EXTENT_MAP_DELALLOC) { | ||
1752 | if (start >= inode->i_size) { | ||
1753 | free_extent_map(em); | ||
1754 | ret = -ENXIO; | ||
1755 | break; | ||
1756 | } | ||
1757 | } | ||
1758 | |||
1759 | *offset = start; | ||
1760 | free_extent_map(em); | ||
1761 | break; | ||
1762 | } | ||
1763 | } | ||
1764 | |||
1765 | start = em->start + em->len; | ||
1766 | last_end = em->start + em->len; | ||
1767 | |||
1768 | if (em->block_start == EXTENT_MAP_DELALLOC) | ||
1769 | last_end = min_t(u64, last_end, inode->i_size); | ||
1770 | |||
1771 | if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { | ||
1772 | free_extent_map(em); | ||
1773 | ret = -ENXIO; | ||
1774 | break; | ||
1775 | } | ||
1776 | free_extent_map(em); | ||
1777 | cond_resched(); | ||
1778 | } | ||
1779 | if (!ret) | ||
1780 | *offset = min(*offset, inode->i_size); | ||
1781 | out: | ||
1782 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
1783 | &cached_state, GFP_NOFS); | ||
1784 | return ret; | ||
1785 | } | ||
1786 | |||
1787 | static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) | ||
1788 | { | ||
1789 | struct inode *inode = file->f_mapping->host; | ||
1790 | int ret; | ||
1791 | |||
1792 | mutex_lock(&inode->i_mutex); | ||
1793 | switch (origin) { | ||
1794 | case SEEK_END: | ||
1795 | case SEEK_CUR: | ||
1796 | offset = generic_file_llseek_unlocked(file, offset, origin); | ||
1797 | goto out; | ||
1798 | case SEEK_DATA: | ||
1799 | case SEEK_HOLE: | ||
1800 | ret = find_desired_extent(inode, &offset, origin); | ||
1801 | if (ret) { | ||
1802 | mutex_unlock(&inode->i_mutex); | ||
1803 | return ret; | ||
1804 | } | ||
1805 | } | ||
1806 | |||
1807 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | ||
1808 | return -EINVAL; | ||
1809 | if (offset > inode->i_sb->s_maxbytes) | ||
1810 | return -EINVAL; | ||
1811 | |||
1812 | /* Special lock needed here? */ | ||
1813 | if (offset != file->f_pos) { | ||
1814 | file->f_pos = offset; | ||
1815 | file->f_version = 0; | ||
1816 | } | ||
1817 | out: | ||
1818 | mutex_unlock(&inode->i_mutex); | ||
1819 | return offset; | ||
1820 | } | ||
1821 | |||
1667 | const struct file_operations btrfs_file_operations = { | 1822 | const struct file_operations btrfs_file_operations = { |
1668 | .llseek = generic_file_llseek, | 1823 | .llseek = btrfs_file_llseek, |
1669 | .read = do_sync_read, | 1824 | .read = do_sync_read, |
1670 | .write = do_sync_write, | 1825 | .write = do_sync_write, |
1671 | .aio_read = generic_file_aio_read, | 1826 | .aio_read = generic_file_aio_read, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index bf0d61567f3d..6377713f639c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
98 | return inode; | 98 | return inode; |
99 | 99 | ||
100 | spin_lock(&block_group->lock); | 100 | spin_lock(&block_group->lock); |
101 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { | ||
102 | printk(KERN_INFO "Old style space inode found, converting.\n"); | ||
103 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; | ||
104 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | ||
105 | } | ||
106 | |||
101 | if (!btrfs_fs_closing(root->fs_info)) { | 107 | if (!btrfs_fs_closing(root->fs_info)) { |
102 | block_group->inode = igrab(inode); | 108 | block_group->inode = igrab(inode); |
103 | block_group->iref = 1; | 109 | block_group->iref = 1; |
@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root, | |||
135 | btrfs_set_inode_gid(leaf, inode_item, 0); | 141 | btrfs_set_inode_gid(leaf, inode_item, 0); |
136 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); | 142 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); |
137 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | | 143 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | |
138 | BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); | 144 | BTRFS_INODE_PREALLOC); |
139 | btrfs_set_inode_nlink(leaf, inode_item, 1); | 145 | btrfs_set_inode_nlink(leaf, inode_item, 1); |
140 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); | 146 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); |
141 | btrfs_set_inode_block_group(leaf, inode_item, offset); | 147 | btrfs_set_inode_block_group(leaf, inode_item, offset); |
@@ -239,17 +245,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
239 | struct btrfs_free_space_header *header; | 245 | struct btrfs_free_space_header *header; |
240 | struct extent_buffer *leaf; | 246 | struct extent_buffer *leaf; |
241 | struct page *page; | 247 | struct page *page; |
242 | u32 *checksums = NULL, *crc; | ||
243 | char *disk_crcs = NULL; | ||
244 | struct btrfs_key key; | 248 | struct btrfs_key key; |
245 | struct list_head bitmaps; | 249 | struct list_head bitmaps; |
246 | u64 num_entries; | 250 | u64 num_entries; |
247 | u64 num_bitmaps; | 251 | u64 num_bitmaps; |
248 | u64 generation; | 252 | u64 generation; |
249 | u32 cur_crc = ~(u32)0; | ||
250 | pgoff_t index = 0; | 253 | pgoff_t index = 0; |
251 | unsigned long first_page_offset; | ||
252 | int num_checksums; | ||
253 | int ret = 0; | 254 | int ret = 0; |
254 | 255 | ||
255 | INIT_LIST_HEAD(&bitmaps); | 256 | INIT_LIST_HEAD(&bitmaps); |
@@ -292,16 +293,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
292 | if (!num_entries) | 293 | if (!num_entries) |
293 | goto out; | 294 | goto out; |
294 | 295 | ||
295 | /* Setup everything for doing checksumming */ | ||
296 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | ||
297 | checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
298 | if (!checksums) | ||
299 | goto out; | ||
300 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | ||
301 | disk_crcs = kzalloc(first_page_offset, GFP_NOFS); | ||
302 | if (!disk_crcs) | ||
303 | goto out; | ||
304 | |||
305 | ret = readahead_cache(inode); | 296 | ret = readahead_cache(inode); |
306 | if (ret) | 297 | if (ret) |
307 | goto out; | 298 | goto out; |
@@ -311,18 +302,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
311 | struct btrfs_free_space *e; | 302 | struct btrfs_free_space *e; |
312 | void *addr; | 303 | void *addr; |
313 | unsigned long offset = 0; | 304 | unsigned long offset = 0; |
314 | unsigned long start_offset = 0; | ||
315 | int need_loop = 0; | 305 | int need_loop = 0; |
316 | 306 | ||
317 | if (!num_entries && !num_bitmaps) | 307 | if (!num_entries && !num_bitmaps) |
318 | break; | 308 | break; |
319 | 309 | ||
320 | if (index == 0) { | 310 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); |
321 | start_offset = first_page_offset; | ||
322 | offset = start_offset; | ||
323 | } | ||
324 | |||
325 | page = grab_cache_page(inode->i_mapping, index); | ||
326 | if (!page) | 311 | if (!page) |
327 | goto free_cache; | 312 | goto free_cache; |
328 | 313 | ||
@@ -342,8 +327,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
342 | if (index == 0) { | 327 | if (index == 0) { |
343 | u64 *gen; | 328 | u64 *gen; |
344 | 329 | ||
345 | memcpy(disk_crcs, addr, first_page_offset); | 330 | /* |
346 | gen = addr + (sizeof(u32) * num_checksums); | 331 | * We put a bogus crc in the front of the first page in |
332 | * case old kernels try to mount a fs with the new | ||
333 | * format to make sure they discard the cache. | ||
334 | */ | ||
335 | addr += sizeof(u64); | ||
336 | offset += sizeof(u64); | ||
337 | |||
338 | gen = addr; | ||
347 | if (*gen != BTRFS_I(inode)->generation) { | 339 | if (*gen != BTRFS_I(inode)->generation) { |
348 | printk(KERN_ERR "btrfs: space cache generation" | 340 | printk(KERN_ERR "btrfs: space cache generation" |
349 | " (%llu) does not match inode (%llu)\n", | 341 | " (%llu) does not match inode (%llu)\n", |
@@ -355,24 +347,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
355 | page_cache_release(page); | 347 | page_cache_release(page); |
356 | goto free_cache; | 348 | goto free_cache; |
357 | } | 349 | } |
358 | crc = (u32 *)disk_crcs; | 350 | addr += sizeof(u64); |
359 | } | 351 | offset += sizeof(u64); |
360 | entry = addr + start_offset; | ||
361 | |||
362 | /* First lets check our crc before we do anything fun */ | ||
363 | cur_crc = ~(u32)0; | ||
364 | cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, | ||
365 | PAGE_CACHE_SIZE - start_offset); | ||
366 | btrfs_csum_final(cur_crc, (char *)&cur_crc); | ||
367 | if (cur_crc != *crc) { | ||
368 | printk(KERN_ERR "btrfs: crc mismatch for page %lu\n", | ||
369 | index); | ||
370 | kunmap(page); | ||
371 | unlock_page(page); | ||
372 | page_cache_release(page); | ||
373 | goto free_cache; | ||
374 | } | 352 | } |
375 | crc++; | 353 | entry = addr; |
376 | 354 | ||
377 | while (1) { | 355 | while (1) { |
378 | if (!num_entries) | 356 | if (!num_entries) |
@@ -470,8 +448,6 @@ next: | |||
470 | 448 | ||
471 | ret = 1; | 449 | ret = 1; |
472 | out: | 450 | out: |
473 | kfree(checksums); | ||
474 | kfree(disk_crcs); | ||
475 | return ret; | 451 | return ret; |
476 | free_cache: | 452 | free_cache: |
477 | __btrfs_remove_free_space_cache(ctl); | 453 | __btrfs_remove_free_space_cache(ctl); |
@@ -569,8 +545,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
569 | struct btrfs_key key; | 545 | struct btrfs_key key; |
570 | u64 start, end, len; | 546 | u64 start, end, len; |
571 | u64 bytes = 0; | 547 | u64 bytes = 0; |
572 | u32 *crc, *checksums; | 548 | u32 crc = ~(u32)0; |
573 | unsigned long first_page_offset; | ||
574 | int index = 0, num_pages = 0; | 549 | int index = 0, num_pages = 0; |
575 | int entries = 0; | 550 | int entries = 0; |
576 | int bitmaps = 0; | 551 | int bitmaps = 0; |
@@ -590,34 +565,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
590 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 565 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
591 | PAGE_CACHE_SHIFT; | 566 | PAGE_CACHE_SHIFT; |
592 | 567 | ||
593 | /* Since the first page has all of our checksums and our generation we | ||
594 | * need to calculate the offset into the page that we can start writing | ||
595 | * our entries. | ||
596 | */ | ||
597 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); | ||
598 | |||
599 | filemap_write_and_wait(inode->i_mapping); | 568 | filemap_write_and_wait(inode->i_mapping); |
600 | btrfs_wait_ordered_range(inode, inode->i_size & | 569 | btrfs_wait_ordered_range(inode, inode->i_size & |
601 | ~(root->sectorsize - 1), (u64)-1); | 570 | ~(root->sectorsize - 1), (u64)-1); |
602 | 571 | ||
603 | /* make sure we don't overflow that first page */ | ||
604 | if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { | ||
605 | /* this is really the same as running out of space, where we also return 0 */ | ||
606 | printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); | ||
607 | ret = 0; | ||
608 | goto out_update; | ||
609 | } | ||
610 | |||
611 | /* We need a checksum per page. */ | ||
612 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); | ||
613 | if (!crc) | ||
614 | return -1; | ||
615 | |||
616 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | 572 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); |
617 | if (!pages) { | 573 | if (!pages) |
618 | kfree(crc); | ||
619 | return -1; | 574 | return -1; |
620 | } | ||
621 | 575 | ||
622 | /* Get the cluster for this block_group if it exists */ | 576 | /* Get the cluster for this block_group if it exists */ |
623 | if (block_group && !list_empty(&block_group->cluster_list)) | 577 | if (block_group && !list_empty(&block_group->cluster_list)) |
@@ -640,7 +594,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
640 | * know and don't freak out. | 594 | * know and don't freak out. |
641 | */ | 595 | */ |
642 | while (index < num_pages) { | 596 | while (index < num_pages) { |
643 | page = grab_cache_page(inode->i_mapping, index); | 597 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); |
644 | if (!page) { | 598 | if (!page) { |
645 | int i; | 599 | int i; |
646 | 600 | ||
@@ -648,7 +602,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
648 | unlock_page(pages[i]); | 602 | unlock_page(pages[i]); |
649 | page_cache_release(pages[i]); | 603 | page_cache_release(pages[i]); |
650 | } | 604 | } |
651 | goto out_free; | 605 | goto out; |
652 | } | 606 | } |
653 | pages[index] = page; | 607 | pages[index] = page; |
654 | index++; | 608 | index++; |
@@ -668,17 +622,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
668 | /* Write out the extent entries */ | 622 | /* Write out the extent entries */ |
669 | do { | 623 | do { |
670 | struct btrfs_free_space_entry *entry; | 624 | struct btrfs_free_space_entry *entry; |
671 | void *addr; | 625 | void *addr, *orig; |
672 | unsigned long offset = 0; | 626 | unsigned long offset = 0; |
673 | unsigned long start_offset = 0; | ||
674 | 627 | ||
675 | next_page = false; | 628 | next_page = false; |
676 | 629 | ||
677 | if (index == 0) { | ||
678 | start_offset = first_page_offset; | ||
679 | offset = start_offset; | ||
680 | } | ||
681 | |||
682 | if (index >= num_pages) { | 630 | if (index >= num_pages) { |
683 | out_of_space = true; | 631 | out_of_space = true; |
684 | break; | 632 | break; |
@@ -686,10 +634,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
686 | 634 | ||
687 | page = pages[index]; | 635 | page = pages[index]; |
688 | 636 | ||
689 | addr = kmap(page); | 637 | orig = addr = kmap(page); |
690 | entry = addr + start_offset; | 638 | if (index == 0) { |
639 | u64 *gen; | ||
691 | 640 | ||
692 | memset(addr, 0, PAGE_CACHE_SIZE); | 641 | /* |
642 | * We're going to put in a bogus crc for this page to | ||
643 | * make sure that old kernels who aren't aware of this | ||
644 | * format will be sure to discard the cache. | ||
645 | */ | ||
646 | addr += sizeof(u64); | ||
647 | offset += sizeof(u64); | ||
648 | |||
649 | gen = addr; | ||
650 | *gen = trans->transid; | ||
651 | addr += sizeof(u64); | ||
652 | offset += sizeof(u64); | ||
653 | } | ||
654 | entry = addr; | ||
655 | |||
656 | memset(addr, 0, PAGE_CACHE_SIZE - offset); | ||
693 | while (node && !next_page) { | 657 | while (node && !next_page) { |
694 | struct btrfs_free_space *e; | 658 | struct btrfs_free_space *e; |
695 | 659 | ||
@@ -752,13 +716,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
752 | next_page = true; | 716 | next_page = true; |
753 | entry++; | 717 | entry++; |
754 | } | 718 | } |
755 | *crc = ~(u32)0; | ||
756 | *crc = btrfs_csum_data(root, addr + start_offset, *crc, | ||
757 | PAGE_CACHE_SIZE - start_offset); | ||
758 | kunmap(page); | ||
759 | 719 | ||
760 | btrfs_csum_final(*crc, (char *)crc); | 720 | /* Generate bogus crc value */ |
761 | crc++; | 721 | if (index == 0) { |
722 | u32 *tmp; | ||
723 | crc = btrfs_csum_data(root, orig + sizeof(u64), crc, | ||
724 | PAGE_CACHE_SIZE - sizeof(u64)); | ||
725 | btrfs_csum_final(crc, (char *)&crc); | ||
726 | crc++; | ||
727 | tmp = orig; | ||
728 | *tmp = crc; | ||
729 | } | ||
730 | |||
731 | kunmap(page); | ||
762 | 732 | ||
763 | bytes += PAGE_CACHE_SIZE; | 733 | bytes += PAGE_CACHE_SIZE; |
764 | 734 | ||
@@ -779,11 +749,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
779 | 749 | ||
780 | addr = kmap(page); | 750 | addr = kmap(page); |
781 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | 751 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); |
782 | *crc = ~(u32)0; | ||
783 | *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); | ||
784 | kunmap(page); | 752 | kunmap(page); |
785 | btrfs_csum_final(*crc, (char *)crc); | ||
786 | crc++; | ||
787 | bytes += PAGE_CACHE_SIZE; | 753 | bytes += PAGE_CACHE_SIZE; |
788 | 754 | ||
789 | list_del_init(&entry->list); | 755 | list_del_init(&entry->list); |
@@ -796,7 +762,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
796 | i_size_read(inode) - 1, &cached_state, | 762 | i_size_read(inode) - 1, &cached_state, |
797 | GFP_NOFS); | 763 | GFP_NOFS); |
798 | ret = 0; | 764 | ret = 0; |
799 | goto out_free; | 765 | goto out; |
800 | } | 766 | } |
801 | 767 | ||
802 | /* Zero out the rest of the pages just to make sure */ | 768 | /* Zero out the rest of the pages just to make sure */ |
@@ -811,20 +777,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
811 | index++; | 777 | index++; |
812 | } | 778 | } |
813 | 779 | ||
814 | /* Write the checksums and trans id to the first page */ | ||
815 | { | ||
816 | void *addr; | ||
817 | u64 *gen; | ||
818 | |||
819 | page = pages[0]; | ||
820 | |||
821 | addr = kmap(page); | ||
822 | memcpy(addr, checksums, sizeof(u32) * num_pages); | ||
823 | gen = addr + (sizeof(u32) * num_pages); | ||
824 | *gen = trans->transid; | ||
825 | kunmap(page); | ||
826 | } | ||
827 | |||
828 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | 780 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, |
829 | bytes, &cached_state); | 781 | bytes, &cached_state); |
830 | btrfs_drop_pages(pages, num_pages); | 782 | btrfs_drop_pages(pages, num_pages); |
@@ -833,7 +785,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
833 | 785 | ||
834 | if (ret) { | 786 | if (ret) { |
835 | ret = 0; | 787 | ret = 0; |
836 | goto out_free; | 788 | goto out; |
837 | } | 789 | } |
838 | 790 | ||
839 | BTRFS_I(inode)->generation = trans->transid; | 791 | BTRFS_I(inode)->generation = trans->transid; |
@@ -850,7 +802,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
850 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | 802 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, |
851 | EXTENT_DIRTY | EXTENT_DELALLOC | | 803 | EXTENT_DIRTY | EXTENT_DELALLOC | |
852 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); | 804 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); |
853 | goto out_free; | 805 | goto out; |
854 | } | 806 | } |
855 | leaf = path->nodes[0]; | 807 | leaf = path->nodes[0]; |
856 | if (ret > 0) { | 808 | if (ret > 0) { |
@@ -866,7 +818,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
866 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, | 818 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, |
867 | GFP_NOFS); | 819 | GFP_NOFS); |
868 | btrfs_release_path(path); | 820 | btrfs_release_path(path); |
869 | goto out_free; | 821 | goto out; |
870 | } | 822 | } |
871 | } | 823 | } |
872 | header = btrfs_item_ptr(leaf, path->slots[0], | 824 | header = btrfs_item_ptr(leaf, path->slots[0], |
@@ -879,11 +831,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
879 | 831 | ||
880 | ret = 1; | 832 | ret = 1; |
881 | 833 | ||
882 | out_free: | 834 | out: |
883 | kfree(checksums); | ||
884 | kfree(pages); | 835 | kfree(pages); |
885 | |||
886 | out_update: | ||
887 | if (ret != 1) { | 836 | if (ret != 1) { |
888 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | 837 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); |
889 | BTRFS_I(inode)->generation = 0; | 838 | BTRFS_I(inode)->generation = 0; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d340f63d8f07..15fceefbca0a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | |||
750 | return alloc_hint; | 750 | return alloc_hint; |
751 | } | 751 | } |
752 | 752 | ||
753 | static inline bool is_free_space_inode(struct btrfs_root *root, | ||
754 | struct inode *inode) | ||
755 | { | ||
756 | if (root == root->fs_info->tree_root || | ||
757 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | ||
758 | return true; | ||
759 | return false; | ||
760 | } | ||
761 | |||
762 | /* | 753 | /* |
763 | * when extent_io.c finds a delayed allocation range in the file, | 754 | * when extent_io.c finds a delayed allocation range in the file, |
764 | * the call backs end up in this code. The basic idea is to | 755 | * the call backs end up in this code. The basic idea is to |
@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
791 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 782 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
792 | int ret = 0; | 783 | int ret = 0; |
793 | 784 | ||
794 | BUG_ON(is_free_space_inode(root, inode)); | 785 | BUG_ON(btrfs_is_free_space_inode(root, inode)); |
795 | trans = btrfs_join_transaction(root); | 786 | trans = btrfs_join_transaction(root); |
796 | BUG_ON(IS_ERR(trans)); | 787 | BUG_ON(IS_ERR(trans)); |
797 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 788 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
@@ -1070,9 +1061,10 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1070 | u64 ino = btrfs_ino(inode); | 1061 | u64 ino = btrfs_ino(inode); |
1071 | 1062 | ||
1072 | path = btrfs_alloc_path(); | 1063 | path = btrfs_alloc_path(); |
1073 | BUG_ON(!path); | 1064 | if (!path) |
1065 | return -ENOMEM; | ||
1074 | 1066 | ||
1075 | nolock = is_free_space_inode(root, inode); | 1067 | nolock = btrfs_is_free_space_inode(root, inode); |
1076 | 1068 | ||
1077 | if (nolock) | 1069 | if (nolock) |
1078 | trans = btrfs_join_transaction_nolock(root); | 1070 | trans = btrfs_join_transaction_nolock(root); |
@@ -1291,15 +1283,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1291 | return ret; | 1283 | return ret; |
1292 | } | 1284 | } |
1293 | 1285 | ||
1294 | static int btrfs_split_extent_hook(struct inode *inode, | 1286 | static void btrfs_split_extent_hook(struct inode *inode, |
1295 | struct extent_state *orig, u64 split) | 1287 | struct extent_state *orig, u64 split) |
1296 | { | 1288 | { |
1297 | /* not delalloc, ignore it */ | 1289 | /* not delalloc, ignore it */ |
1298 | if (!(orig->state & EXTENT_DELALLOC)) | 1290 | if (!(orig->state & EXTENT_DELALLOC)) |
1299 | return 0; | 1291 | return; |
1300 | 1292 | ||
1301 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 1293 | spin_lock(&BTRFS_I(inode)->lock); |
1302 | return 0; | 1294 | BTRFS_I(inode)->outstanding_extents++; |
1295 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1303 | } | 1296 | } |
1304 | 1297 | ||
1305 | /* | 1298 | /* |
@@ -1308,16 +1301,17 @@ static int btrfs_split_extent_hook(struct inode *inode, | |||
1308 | * extents, such as when we are doing sequential writes, so we can properly | 1301 | * extents, such as when we are doing sequential writes, so we can properly |
1309 | * account for the metadata space we'll need. | 1302 | * account for the metadata space we'll need. |
1310 | */ | 1303 | */ |
1311 | static int btrfs_merge_extent_hook(struct inode *inode, | 1304 | static void btrfs_merge_extent_hook(struct inode *inode, |
1312 | struct extent_state *new, | 1305 | struct extent_state *new, |
1313 | struct extent_state *other) | 1306 | struct extent_state *other) |
1314 | { | 1307 | { |
1315 | /* not delalloc, ignore it */ | 1308 | /* not delalloc, ignore it */ |
1316 | if (!(other->state & EXTENT_DELALLOC)) | 1309 | if (!(other->state & EXTENT_DELALLOC)) |
1317 | return 0; | 1310 | return; |
1318 | 1311 | ||
1319 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 1312 | spin_lock(&BTRFS_I(inode)->lock); |
1320 | return 0; | 1313 | BTRFS_I(inode)->outstanding_extents--; |
1314 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1321 | } | 1315 | } |
1322 | 1316 | ||
1323 | /* | 1317 | /* |
@@ -1325,8 +1319,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1325 | * bytes in this file, and to maintain the list of inodes that | 1319 | * bytes in this file, and to maintain the list of inodes that |
1326 | * have pending delalloc work to be done. | 1320 | * have pending delalloc work to be done. |
1327 | */ | 1321 | */ |
1328 | static int btrfs_set_bit_hook(struct inode *inode, | 1322 | static void btrfs_set_bit_hook(struct inode *inode, |
1329 | struct extent_state *state, int *bits) | 1323 | struct extent_state *state, int *bits) |
1330 | { | 1324 | { |
1331 | 1325 | ||
1332 | /* | 1326 | /* |
@@ -1337,12 +1331,15 @@ static int btrfs_set_bit_hook(struct inode *inode, | |||
1337 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1331 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1338 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1332 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1339 | u64 len = state->end + 1 - state->start; | 1333 | u64 len = state->end + 1 - state->start; |
1340 | bool do_list = !is_free_space_inode(root, inode); | 1334 | bool do_list = !btrfs_is_free_space_inode(root, inode); |
1341 | 1335 | ||
1342 | if (*bits & EXTENT_FIRST_DELALLOC) | 1336 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1343 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1337 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1344 | else | 1338 | } else { |
1345 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 1339 | spin_lock(&BTRFS_I(inode)->lock); |
1340 | BTRFS_I(inode)->outstanding_extents++; | ||
1341 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1342 | } | ||
1346 | 1343 | ||
1347 | spin_lock(&root->fs_info->delalloc_lock); | 1344 | spin_lock(&root->fs_info->delalloc_lock); |
1348 | BTRFS_I(inode)->delalloc_bytes += len; | 1345 | BTRFS_I(inode)->delalloc_bytes += len; |
@@ -1353,14 +1350,13 @@ static int btrfs_set_bit_hook(struct inode *inode, | |||
1353 | } | 1350 | } |
1354 | spin_unlock(&root->fs_info->delalloc_lock); | 1351 | spin_unlock(&root->fs_info->delalloc_lock); |
1355 | } | 1352 | } |
1356 | return 0; | ||
1357 | } | 1353 | } |
1358 | 1354 | ||
1359 | /* | 1355 | /* |
1360 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1356 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
1361 | */ | 1357 | */ |
1362 | static int btrfs_clear_bit_hook(struct inode *inode, | 1358 | static void btrfs_clear_bit_hook(struct inode *inode, |
1363 | struct extent_state *state, int *bits) | 1359 | struct extent_state *state, int *bits) |
1364 | { | 1360 | { |
1365 | /* | 1361 | /* |
1366 | * set_bit and clear bit hooks normally require _irqsave/restore | 1362 | * set_bit and clear bit hooks normally require _irqsave/restore |
@@ -1370,12 +1366,15 @@ static int btrfs_clear_bit_hook(struct inode *inode, | |||
1370 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1366 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1371 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1367 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1372 | u64 len = state->end + 1 - state->start; | 1368 | u64 len = state->end + 1 - state->start; |
1373 | bool do_list = !is_free_space_inode(root, inode); | 1369 | bool do_list = !btrfs_is_free_space_inode(root, inode); |
1374 | 1370 | ||
1375 | if (*bits & EXTENT_FIRST_DELALLOC) | 1371 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1376 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1372 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1377 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) | 1373 | } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { |
1378 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 1374 | spin_lock(&BTRFS_I(inode)->lock); |
1375 | BTRFS_I(inode)->outstanding_extents--; | ||
1376 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1377 | } | ||
1379 | 1378 | ||
1380 | if (*bits & EXTENT_DO_ACCOUNTING) | 1379 | if (*bits & EXTENT_DO_ACCOUNTING) |
1381 | btrfs_delalloc_release_metadata(inode, len); | 1380 | btrfs_delalloc_release_metadata(inode, len); |
@@ -1394,7 +1393,6 @@ static int btrfs_clear_bit_hook(struct inode *inode, | |||
1394 | } | 1393 | } |
1395 | spin_unlock(&root->fs_info->delalloc_lock); | 1394 | spin_unlock(&root->fs_info->delalloc_lock); |
1396 | } | 1395 | } |
1397 | return 0; | ||
1398 | } | 1396 | } |
1399 | 1397 | ||
1400 | /* | 1398 | /* |
@@ -1477,7 +1475,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1477 | 1475 | ||
1478 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 1476 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
1479 | 1477 | ||
1480 | if (is_free_space_inode(root, inode)) | 1478 | if (btrfs_is_free_space_inode(root, inode)) |
1481 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); | 1479 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); |
1482 | else | 1480 | else |
1483 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 1481 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
@@ -1644,7 +1642,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1644 | int ret; | 1642 | int ret; |
1645 | 1643 | ||
1646 | path = btrfs_alloc_path(); | 1644 | path = btrfs_alloc_path(); |
1647 | BUG_ON(!path); | 1645 | if (!path) |
1646 | return -ENOMEM; | ||
1648 | 1647 | ||
1649 | path->leave_spinning = 1; | 1648 | path->leave_spinning = 1; |
1650 | 1649 | ||
@@ -1726,7 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1726 | return 0; | 1725 | return 0; |
1727 | BUG_ON(!ordered_extent); | 1726 | BUG_ON(!ordered_extent); |
1728 | 1727 | ||
1729 | nolock = is_free_space_inode(root, inode); | 1728 | nolock = btrfs_is_free_space_inode(root, inode); |
1730 | 1729 | ||
1731 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1730 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
1732 | BUG_ON(!list_empty(&ordered_extent->list)); | 1731 | BUG_ON(!list_empty(&ordered_extent->list)); |
@@ -2214,7 +2213,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2214 | 2213 | ||
2215 | if (!root->orphan_block_rsv) { | 2214 | if (!root->orphan_block_rsv) { |
2216 | block_rsv = btrfs_alloc_block_rsv(root); | 2215 | block_rsv = btrfs_alloc_block_rsv(root); |
2217 | BUG_ON(!block_rsv); | 2216 | if (!block_rsv) |
2217 | return -ENOMEM; | ||
2218 | } | 2218 | } |
2219 | 2219 | ||
2220 | spin_lock(&root->orphan_lock); | 2220 | spin_lock(&root->orphan_lock); |
@@ -2516,7 +2516,9 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2516 | filled = true; | 2516 | filled = true; |
2517 | 2517 | ||
2518 | path = btrfs_alloc_path(); | 2518 | path = btrfs_alloc_path(); |
2519 | BUG_ON(!path); | 2519 | if (!path) |
2520 | goto make_bad; | ||
2521 | |||
2520 | path->leave_spinning = 1; | 2522 | path->leave_spinning = 1; |
2521 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); | 2523 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); |
2522 | 2524 | ||
@@ -2531,13 +2533,6 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2531 | 2533 | ||
2532 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 2534 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
2533 | struct btrfs_inode_item); | 2535 | struct btrfs_inode_item); |
2534 | if (!leaf->map_token) | ||
2535 | map_private_extent_buffer(leaf, (unsigned long)inode_item, | ||
2536 | sizeof(struct btrfs_inode_item), | ||
2537 | &leaf->map_token, &leaf->kaddr, | ||
2538 | &leaf->map_start, &leaf->map_len, | ||
2539 | KM_USER1); | ||
2540 | |||
2541 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); | 2536 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); |
2542 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); | 2537 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); |
2543 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); | 2538 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); |
@@ -2575,11 +2570,6 @@ cache_acl: | |||
2575 | if (!maybe_acls) | 2570 | if (!maybe_acls) |
2576 | cache_no_acl(inode); | 2571 | cache_no_acl(inode); |
2577 | 2572 | ||
2578 | if (leaf->map_token) { | ||
2579 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2580 | leaf->map_token = NULL; | ||
2581 | } | ||
2582 | |||
2583 | btrfs_free_path(path); | 2573 | btrfs_free_path(path); |
2584 | 2574 | ||
2585 | switch (inode->i_mode & S_IFMT) { | 2575 | switch (inode->i_mode & S_IFMT) { |
@@ -2624,13 +2614,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2624 | struct btrfs_inode_item *item, | 2614 | struct btrfs_inode_item *item, |
2625 | struct inode *inode) | 2615 | struct inode *inode) |
2626 | { | 2616 | { |
2627 | if (!leaf->map_token) | ||
2628 | map_private_extent_buffer(leaf, (unsigned long)item, | ||
2629 | sizeof(struct btrfs_inode_item), | ||
2630 | &leaf->map_token, &leaf->kaddr, | ||
2631 | &leaf->map_start, &leaf->map_len, | ||
2632 | KM_USER1); | ||
2633 | |||
2634 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 2617 | btrfs_set_inode_uid(leaf, item, inode->i_uid); |
2635 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 2618 | btrfs_set_inode_gid(leaf, item, inode->i_gid); |
2636 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | 2619 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); |
@@ -2659,11 +2642,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2659 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2642 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2660 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2643 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
2661 | btrfs_set_inode_block_group(leaf, item, 0); | 2644 | btrfs_set_inode_block_group(leaf, item, 0); |
2662 | |||
2663 | if (leaf->map_token) { | ||
2664 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2665 | leaf->map_token = NULL; | ||
2666 | } | ||
2667 | } | 2645 | } |
2668 | 2646 | ||
2669 | /* | 2647 | /* |
@@ -2678,12 +2656,14 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2678 | int ret; | 2656 | int ret; |
2679 | 2657 | ||
2680 | /* | 2658 | /* |
2681 | * If root is tree root, it means this inode is used to | 2659 | * If the inode is a free space inode, we can deadlock during commit |
2682 | * store free space information. And these inodes are updated | 2660 | * if we put it into the delayed code. |
2683 | * when committing the transaction, so they needn't delaye to | 2661 | * |
2684 | * be updated, or deadlock will occured. | 2662 | * The data relocation inode should also be directly updated |
2663 | * without delay | ||
2685 | */ | 2664 | */ |
2686 | if (!is_free_space_inode(root, inode)) { | 2665 | if (!btrfs_is_free_space_inode(root, inode) |
2666 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
2687 | ret = btrfs_delayed_update_inode(trans, root, inode); | 2667 | ret = btrfs_delayed_update_inode(trans, root, inode); |
2688 | if (!ret) | 2668 | if (!ret) |
2689 | btrfs_set_inode_last_trans(trans, inode); | 2669 | btrfs_set_inode_last_trans(trans, inode); |
@@ -3019,13 +2999,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
3019 | 2999 | ||
3020 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 3000 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
3021 | dentry->d_name.name, dentry->d_name.len); | 3001 | dentry->d_name.name, dentry->d_name.len); |
3022 | BUG_ON(ret); | 3002 | if (ret) |
3003 | goto out; | ||
3023 | 3004 | ||
3024 | if (inode->i_nlink == 0) { | 3005 | if (inode->i_nlink == 0) { |
3025 | ret = btrfs_orphan_add(trans, inode); | 3006 | ret = btrfs_orphan_add(trans, inode); |
3026 | BUG_ON(ret); | 3007 | if (ret) |
3008 | goto out; | ||
3027 | } | 3009 | } |
3028 | 3010 | ||
3011 | out: | ||
3029 | nr = trans->blocks_used; | 3012 | nr = trans->blocks_used; |
3030 | __unlink_end_trans(trans, root); | 3013 | __unlink_end_trans(trans, root); |
3031 | btrfs_btree_balance_dirty(root, nr); | 3014 | btrfs_btree_balance_dirty(root, nr); |
@@ -3168,6 +3151,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3168 | 3151 | ||
3169 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); | 3152 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); |
3170 | 3153 | ||
3154 | path = btrfs_alloc_path(); | ||
3155 | if (!path) | ||
3156 | return -ENOMEM; | ||
3157 | path->reada = -1; | ||
3158 | |||
3171 | if (root->ref_cows || root == root->fs_info->tree_root) | 3159 | if (root->ref_cows || root == root->fs_info->tree_root) |
3172 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); | 3160 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); |
3173 | 3161 | ||
@@ -3180,10 +3168,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3180 | if (min_type == 0 && root == BTRFS_I(inode)->root) | 3168 | if (min_type == 0 && root == BTRFS_I(inode)->root) |
3181 | btrfs_kill_delayed_inode_items(inode); | 3169 | btrfs_kill_delayed_inode_items(inode); |
3182 | 3170 | ||
3183 | path = btrfs_alloc_path(); | ||
3184 | BUG_ON(!path); | ||
3185 | path->reada = -1; | ||
3186 | |||
3187 | key.objectid = ino; | 3171 | key.objectid = ino; |
3188 | key.offset = (u64)-1; | 3172 | key.offset = (u64)-1; |
3189 | key.type = (u8)-1; | 3173 | key.type = (u8)-1; |
@@ -3396,7 +3380,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3396 | 3380 | ||
3397 | ret = -ENOMEM; | 3381 | ret = -ENOMEM; |
3398 | again: | 3382 | again: |
3399 | page = grab_cache_page(mapping, index); | 3383 | page = find_or_create_page(mapping, index, GFP_NOFS); |
3400 | if (!page) { | 3384 | if (!page) { |
3401 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 3385 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3402 | goto out; | 3386 | goto out; |
@@ -3632,7 +3616,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
3632 | 3616 | ||
3633 | truncate_inode_pages(&inode->i_data, 0); | 3617 | truncate_inode_pages(&inode->i_data, 0); |
3634 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || | 3618 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || |
3635 | is_free_space_inode(root, inode))) | 3619 | btrfs_is_free_space_inode(root, inode))) |
3636 | goto no_delete; | 3620 | goto no_delete; |
3637 | 3621 | ||
3638 | if (is_bad_inode(inode)) { | 3622 | if (is_bad_inode(inode)) { |
@@ -3711,7 +3695,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, | |||
3711 | int ret = 0; | 3695 | int ret = 0; |
3712 | 3696 | ||
3713 | path = btrfs_alloc_path(); | 3697 | path = btrfs_alloc_path(); |
3714 | BUG_ON(!path); | 3698 | if (!path) |
3699 | return -ENOMEM; | ||
3715 | 3700 | ||
3716 | di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, | 3701 | di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, |
3717 | namelen, 0); | 3702 | namelen, 0); |
@@ -3967,6 +3952,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
3967 | struct btrfs_root *root, int *new) | 3952 | struct btrfs_root *root, int *new) |
3968 | { | 3953 | { |
3969 | struct inode *inode; | 3954 | struct inode *inode; |
3955 | int bad_inode = 0; | ||
3970 | 3956 | ||
3971 | inode = btrfs_iget_locked(s, location->objectid, root); | 3957 | inode = btrfs_iget_locked(s, location->objectid, root); |
3972 | if (!inode) | 3958 | if (!inode) |
@@ -3976,10 +3962,19 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
3976 | BTRFS_I(inode)->root = root; | 3962 | BTRFS_I(inode)->root = root; |
3977 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); | 3963 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); |
3978 | btrfs_read_locked_inode(inode); | 3964 | btrfs_read_locked_inode(inode); |
3979 | inode_tree_add(inode); | 3965 | if (!is_bad_inode(inode)) { |
3980 | unlock_new_inode(inode); | 3966 | inode_tree_add(inode); |
3981 | if (new) | 3967 | unlock_new_inode(inode); |
3982 | *new = 1; | 3968 | if (new) |
3969 | *new = 1; | ||
3970 | } else { | ||
3971 | bad_inode = 1; | ||
3972 | } | ||
3973 | } | ||
3974 | |||
3975 | if (bad_inode) { | ||
3976 | iput(inode); | ||
3977 | inode = ERR_PTR(-ESTALE); | ||
3983 | } | 3978 | } |
3984 | 3979 | ||
3985 | return inode; | 3980 | return inode; |
@@ -4014,12 +4009,19 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4014 | struct btrfs_root *sub_root = root; | 4009 | struct btrfs_root *sub_root = root; |
4015 | struct btrfs_key location; | 4010 | struct btrfs_key location; |
4016 | int index; | 4011 | int index; |
4017 | int ret; | 4012 | int ret = 0; |
4018 | 4013 | ||
4019 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 4014 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
4020 | return ERR_PTR(-ENAMETOOLONG); | 4015 | return ERR_PTR(-ENAMETOOLONG); |
4021 | 4016 | ||
4022 | ret = btrfs_inode_by_name(dir, dentry, &location); | 4017 | if (unlikely(d_need_lookup(dentry))) { |
4018 | memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); | ||
4019 | kfree(dentry->d_fsdata); | ||
4020 | dentry->d_fsdata = NULL; | ||
4021 | d_clear_need_lookup(dentry); | ||
4022 | } else { | ||
4023 | ret = btrfs_inode_by_name(dir, dentry, &location); | ||
4024 | } | ||
4023 | 4025 | ||
4024 | if (ret < 0) | 4026 | if (ret < 0) |
4025 | return ERR_PTR(ret); | 4027 | return ERR_PTR(ret); |
@@ -4074,16 +4076,16 @@ static int btrfs_dentry_delete(const struct dentry *dentry) | |||
4074 | return 0; | 4076 | return 0; |
4075 | } | 4077 | } |
4076 | 4078 | ||
4079 | static void btrfs_dentry_release(struct dentry *dentry) | ||
4080 | { | ||
4081 | if (dentry->d_fsdata) | ||
4082 | kfree(dentry->d_fsdata); | ||
4083 | } | ||
4084 | |||
4077 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | 4085 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
4078 | struct nameidata *nd) | 4086 | struct nameidata *nd) |
4079 | { | 4087 | { |
4080 | struct inode *inode; | 4088 | return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); |
4081 | |||
4082 | inode = btrfs_lookup_dentry(dir, dentry); | ||
4083 | if (IS_ERR(inode)) | ||
4084 | return ERR_CAST(inode); | ||
4085 | |||
4086 | return d_splice_alias(inode, dentry); | ||
4087 | } | 4089 | } |
4088 | 4090 | ||
4089 | unsigned char btrfs_filetype_table[] = { | 4091 | unsigned char btrfs_filetype_table[] = { |
@@ -4102,6 +4104,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4102 | struct btrfs_path *path; | 4104 | struct btrfs_path *path; |
4103 | struct list_head ins_list; | 4105 | struct list_head ins_list; |
4104 | struct list_head del_list; | 4106 | struct list_head del_list; |
4107 | struct qstr q; | ||
4105 | int ret; | 4108 | int ret; |
4106 | struct extent_buffer *leaf; | 4109 | struct extent_buffer *leaf; |
4107 | int slot; | 4110 | int slot; |
@@ -4191,6 +4194,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4191 | 4194 | ||
4192 | while (di_cur < di_total) { | 4195 | while (di_cur < di_total) { |
4193 | struct btrfs_key location; | 4196 | struct btrfs_key location; |
4197 | struct dentry *tmp; | ||
4194 | 4198 | ||
4195 | if (verify_dir_item(root, leaf, di)) | 4199 | if (verify_dir_item(root, leaf, di)) |
4196 | break; | 4200 | break; |
@@ -4211,6 +4215,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4211 | d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; | 4215 | d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; |
4212 | btrfs_dir_item_key_to_cpu(leaf, di, &location); | 4216 | btrfs_dir_item_key_to_cpu(leaf, di, &location); |
4213 | 4217 | ||
4218 | q.name = name_ptr; | ||
4219 | q.len = name_len; | ||
4220 | q.hash = full_name_hash(q.name, q.len); | ||
4221 | tmp = d_lookup(filp->f_dentry, &q); | ||
4222 | if (!tmp) { | ||
4223 | struct btrfs_key *newkey; | ||
4224 | |||
4225 | newkey = kzalloc(sizeof(struct btrfs_key), | ||
4226 | GFP_NOFS); | ||
4227 | if (!newkey) | ||
4228 | goto no_dentry; | ||
4229 | tmp = d_alloc(filp->f_dentry, &q); | ||
4230 | if (!tmp) { | ||
4231 | kfree(newkey); | ||
4232 | dput(tmp); | ||
4233 | goto no_dentry; | ||
4234 | } | ||
4235 | memcpy(newkey, &location, | ||
4236 | sizeof(struct btrfs_key)); | ||
4237 | tmp->d_fsdata = newkey; | ||
4238 | tmp->d_flags |= DCACHE_NEED_LOOKUP; | ||
4239 | d_rehash(tmp); | ||
4240 | dput(tmp); | ||
4241 | } else { | ||
4242 | dput(tmp); | ||
4243 | } | ||
4244 | no_dentry: | ||
4214 | /* is this a reference to our own snapshot? If so | 4245 | /* is this a reference to our own snapshot? If so |
4215 | * skip it | 4246 | * skip it |
4216 | */ | 4247 | */ |
@@ -4275,7 +4306,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4275 | if (BTRFS_I(inode)->dummy_inode) | 4306 | if (BTRFS_I(inode)->dummy_inode) |
4276 | return 0; | 4307 | return 0; |
4277 | 4308 | ||
4278 | if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) | 4309 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) |
4279 | nolock = true; | 4310 | nolock = true; |
4280 | 4311 | ||
4281 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4312 | if (wbc->sync_mode == WB_SYNC_ALL) { |
@@ -4436,7 +4467,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4436 | int owner; | 4467 | int owner; |
4437 | 4468 | ||
4438 | path = btrfs_alloc_path(); | 4469 | path = btrfs_alloc_path(); |
4439 | BUG_ON(!path); | 4470 | if (!path) |
4471 | return ERR_PTR(-ENOMEM); | ||
4440 | 4472 | ||
4441 | inode = new_inode(root->fs_info->sb); | 4473 | inode = new_inode(root->fs_info->sb); |
4442 | if (!inode) { | 4474 | if (!inode) { |
@@ -4471,7 +4503,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4471 | inode->i_generation = BTRFS_I(inode)->generation; | 4503 | inode->i_generation = BTRFS_I(inode)->generation; |
4472 | btrfs_set_inode_space_info(root, inode); | 4504 | btrfs_set_inode_space_info(root, inode); |
4473 | 4505 | ||
4474 | if (mode & S_IFDIR) | 4506 | if (S_ISDIR(mode)) |
4475 | owner = 0; | 4507 | owner = 0; |
4476 | else | 4508 | else |
4477 | owner = 1; | 4509 | owner = 1; |
@@ -4516,7 +4548,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4516 | 4548 | ||
4517 | btrfs_inherit_iflags(inode, dir); | 4549 | btrfs_inherit_iflags(inode, dir); |
4518 | 4550 | ||
4519 | if ((mode & S_IFREG)) { | 4551 | if (S_ISREG(mode)) { |
4520 | if (btrfs_test_opt(root, NODATASUM)) | 4552 | if (btrfs_test_opt(root, NODATASUM)) |
4521 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | 4553 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; |
4522 | if (btrfs_test_opt(root, NODATACOW) || | 4554 | if (btrfs_test_opt(root, NODATACOW) || |
@@ -4770,11 +4802,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4770 | if (err) { | 4802 | if (err) { |
4771 | drop_inode = 1; | 4803 | drop_inode = 1; |
4772 | } else { | 4804 | } else { |
4773 | struct dentry *parent = dget_parent(dentry); | 4805 | struct dentry *parent = dentry->d_parent; |
4774 | err = btrfs_update_inode(trans, root, inode); | 4806 | err = btrfs_update_inode(trans, root, inode); |
4775 | BUG_ON(err); | 4807 | BUG_ON(err); |
4776 | btrfs_log_new_name(trans, inode, NULL, parent); | 4808 | btrfs_log_new_name(trans, inode, NULL, parent); |
4777 | dput(parent); | ||
4778 | } | 4809 | } |
4779 | 4810 | ||
4780 | nr = trans->blocks_used; | 4811 | nr = trans->blocks_used; |
@@ -6697,19 +6728,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
6697 | return 0; | 6728 | return 0; |
6698 | } | 6729 | } |
6699 | 6730 | ||
6700 | /* helper function for file defrag and space balancing. This | ||
6701 | * forces readahead on a given range of bytes in an inode | ||
6702 | */ | ||
6703 | unsigned long btrfs_force_ra(struct address_space *mapping, | ||
6704 | struct file_ra_state *ra, struct file *file, | ||
6705 | pgoff_t offset, pgoff_t last_index) | ||
6706 | { | ||
6707 | pgoff_t req_size = last_index - offset + 1; | ||
6708 | |||
6709 | page_cache_sync_readahead(mapping, ra, file, offset, req_size); | ||
6710 | return offset + req_size; | ||
6711 | } | ||
6712 | |||
6713 | struct inode *btrfs_alloc_inode(struct super_block *sb) | 6731 | struct inode *btrfs_alloc_inode(struct super_block *sb) |
6714 | { | 6732 | { |
6715 | struct btrfs_inode *ei; | 6733 | struct btrfs_inode *ei; |
@@ -6733,8 +6751,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6733 | ei->index_cnt = (u64)-1; | 6751 | ei->index_cnt = (u64)-1; |
6734 | ei->last_unlink_trans = 0; | 6752 | ei->last_unlink_trans = 0; |
6735 | 6753 | ||
6736 | atomic_set(&ei->outstanding_extents, 0); | 6754 | spin_lock_init(&ei->lock); |
6737 | atomic_set(&ei->reserved_extents, 0); | 6755 | ei->outstanding_extents = 0; |
6756 | ei->reserved_extents = 0; | ||
6738 | 6757 | ||
6739 | ei->ordered_data_close = 0; | 6758 | ei->ordered_data_close = 0; |
6740 | ei->orphan_meta_reserved = 0; | 6759 | ei->orphan_meta_reserved = 0; |
@@ -6772,8 +6791,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6772 | 6791 | ||
6773 | WARN_ON(!list_empty(&inode->i_dentry)); | 6792 | WARN_ON(!list_empty(&inode->i_dentry)); |
6774 | WARN_ON(inode->i_data.nrpages); | 6793 | WARN_ON(inode->i_data.nrpages); |
6775 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | 6794 | WARN_ON(BTRFS_I(inode)->outstanding_extents); |
6776 | WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); | 6795 | WARN_ON(BTRFS_I(inode)->reserved_extents); |
6777 | 6796 | ||
6778 | /* | 6797 | /* |
6779 | * This can happen where we create an inode, but somebody else also | 6798 | * This can happen where we create an inode, but somebody else also |
@@ -6828,7 +6847,7 @@ int btrfs_drop_inode(struct inode *inode) | |||
6828 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6847 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6829 | 6848 | ||
6830 | if (btrfs_root_refs(&root->root_item) == 0 && | 6849 | if (btrfs_root_refs(&root->root_item) == 0 && |
6831 | !is_free_space_inode(root, inode)) | 6850 | !btrfs_is_free_space_inode(root, inode)) |
6832 | return 1; | 6851 | return 1; |
6833 | else | 6852 | else |
6834 | return generic_drop_inode(inode); | 6853 | return generic_drop_inode(inode); |
@@ -6898,7 +6917,7 @@ static int btrfs_getattr(struct vfsmount *mnt, | |||
6898 | { | 6917 | { |
6899 | struct inode *inode = dentry->d_inode; | 6918 | struct inode *inode = dentry->d_inode; |
6900 | generic_fillattr(inode, stat); | 6919 | generic_fillattr(inode, stat); |
6901 | stat->dev = BTRFS_I(inode)->root->anon_super.s_dev; | 6920 | stat->dev = BTRFS_I(inode)->root->anon_dev; |
6902 | stat->blksize = PAGE_CACHE_SIZE; | 6921 | stat->blksize = PAGE_CACHE_SIZE; |
6903 | stat->blocks = (inode_get_bytes(inode) + | 6922 | stat->blocks = (inode_get_bytes(inode) + |
6904 | BTRFS_I(inode)->delalloc_bytes) >> 9; | 6923 | BTRFS_I(inode)->delalloc_bytes) >> 9; |
@@ -7066,9 +7085,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
7066 | BUG_ON(ret); | 7085 | BUG_ON(ret); |
7067 | 7086 | ||
7068 | if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { | 7087 | if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { |
7069 | struct dentry *parent = dget_parent(new_dentry); | 7088 | struct dentry *parent = new_dentry->d_parent; |
7070 | btrfs_log_new_name(trans, old_inode, old_dir, parent); | 7089 | btrfs_log_new_name(trans, old_inode, old_dir, parent); |
7071 | dput(parent); | ||
7072 | btrfs_end_log_trans(root); | 7090 | btrfs_end_log_trans(root); |
7073 | } | 7091 | } |
7074 | out_fail: | 7092 | out_fail: |
@@ -7192,7 +7210,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
7192 | goto out_unlock; | 7210 | goto out_unlock; |
7193 | 7211 | ||
7194 | path = btrfs_alloc_path(); | 7212 | path = btrfs_alloc_path(); |
7195 | BUG_ON(!path); | 7213 | if (!path) { |
7214 | err = -ENOMEM; | ||
7215 | drop_inode = 1; | ||
7216 | goto out_unlock; | ||
7217 | } | ||
7196 | key.objectid = btrfs_ino(inode); | 7218 | key.objectid = btrfs_ino(inode); |
7197 | key.offset = 0; | 7219 | key.offset = 0; |
7198 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | 7220 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); |
@@ -7329,7 +7351,7 @@ static int btrfs_set_page_dirty(struct page *page) | |||
7329 | return __set_page_dirty_nobuffers(page); | 7351 | return __set_page_dirty_nobuffers(page); |
7330 | } | 7352 | } |
7331 | 7353 | ||
7332 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) | 7354 | static int btrfs_permission(struct inode *inode, int mask) |
7333 | { | 7355 | { |
7334 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7356 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7335 | 7357 | ||
@@ -7337,7 +7359,7 @@ static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) | |||
7337 | return -EROFS; | 7359 | return -EROFS; |
7338 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7360 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) |
7339 | return -EACCES; | 7361 | return -EACCES; |
7340 | return generic_permission(inode, mask, flags, btrfs_check_acl); | 7362 | return generic_permission(inode, mask); |
7341 | } | 7363 | } |
7342 | 7364 | ||
7343 | static const struct inode_operations btrfs_dir_inode_operations = { | 7365 | static const struct inode_operations btrfs_dir_inode_operations = { |
@@ -7357,10 +7379,12 @@ static const struct inode_operations btrfs_dir_inode_operations = { | |||
7357 | .listxattr = btrfs_listxattr, | 7379 | .listxattr = btrfs_listxattr, |
7358 | .removexattr = btrfs_removexattr, | 7380 | .removexattr = btrfs_removexattr, |
7359 | .permission = btrfs_permission, | 7381 | .permission = btrfs_permission, |
7382 | .get_acl = btrfs_get_acl, | ||
7360 | }; | 7383 | }; |
7361 | static const struct inode_operations btrfs_dir_ro_inode_operations = { | 7384 | static const struct inode_operations btrfs_dir_ro_inode_operations = { |
7362 | .lookup = btrfs_lookup, | 7385 | .lookup = btrfs_lookup, |
7363 | .permission = btrfs_permission, | 7386 | .permission = btrfs_permission, |
7387 | .get_acl = btrfs_get_acl, | ||
7364 | }; | 7388 | }; |
7365 | 7389 | ||
7366 | static const struct file_operations btrfs_dir_file_operations = { | 7390 | static const struct file_operations btrfs_dir_file_operations = { |
@@ -7429,6 +7453,7 @@ static const struct inode_operations btrfs_file_inode_operations = { | |||
7429 | .removexattr = btrfs_removexattr, | 7453 | .removexattr = btrfs_removexattr, |
7430 | .permission = btrfs_permission, | 7454 | .permission = btrfs_permission, |
7431 | .fiemap = btrfs_fiemap, | 7455 | .fiemap = btrfs_fiemap, |
7456 | .get_acl = btrfs_get_acl, | ||
7432 | }; | 7457 | }; |
7433 | static const struct inode_operations btrfs_special_inode_operations = { | 7458 | static const struct inode_operations btrfs_special_inode_operations = { |
7434 | .getattr = btrfs_getattr, | 7459 | .getattr = btrfs_getattr, |
@@ -7438,6 +7463,7 @@ static const struct inode_operations btrfs_special_inode_operations = { | |||
7438 | .getxattr = btrfs_getxattr, | 7463 | .getxattr = btrfs_getxattr, |
7439 | .listxattr = btrfs_listxattr, | 7464 | .listxattr = btrfs_listxattr, |
7440 | .removexattr = btrfs_removexattr, | 7465 | .removexattr = btrfs_removexattr, |
7466 | .get_acl = btrfs_get_acl, | ||
7441 | }; | 7467 | }; |
7442 | static const struct inode_operations btrfs_symlink_inode_operations = { | 7468 | static const struct inode_operations btrfs_symlink_inode_operations = { |
7443 | .readlink = generic_readlink, | 7469 | .readlink = generic_readlink, |
@@ -7449,8 +7475,10 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
7449 | .getxattr = btrfs_getxattr, | 7475 | .getxattr = btrfs_getxattr, |
7450 | .listxattr = btrfs_listxattr, | 7476 | .listxattr = btrfs_listxattr, |
7451 | .removexattr = btrfs_removexattr, | 7477 | .removexattr = btrfs_removexattr, |
7478 | .get_acl = btrfs_get_acl, | ||
7452 | }; | 7479 | }; |
7453 | 7480 | ||
7454 | const struct dentry_operations btrfs_dentry_operations = { | 7481 | const struct dentry_operations btrfs_dentry_operations = { |
7455 | .d_delete = btrfs_dentry_delete, | 7482 | .d_delete = btrfs_dentry_delete, |
7483 | .d_release = btrfs_dentry_release, | ||
7456 | }; | 7484 | }; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a3c4751e07db..7cf013349941 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -323,7 +323,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
323 | struct btrfs_inode_item *inode_item; | 323 | struct btrfs_inode_item *inode_item; |
324 | struct extent_buffer *leaf; | 324 | struct extent_buffer *leaf; |
325 | struct btrfs_root *new_root; | 325 | struct btrfs_root *new_root; |
326 | struct dentry *parent = dget_parent(dentry); | 326 | struct dentry *parent = dentry->d_parent; |
327 | struct inode *dir; | 327 | struct inode *dir; |
328 | int ret; | 328 | int ret; |
329 | int err; | 329 | int err; |
@@ -332,10 +332,8 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
332 | u64 index = 0; | 332 | u64 index = 0; |
333 | 333 | ||
334 | ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); | 334 | ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); |
335 | if (ret) { | 335 | if (ret) |
336 | dput(parent); | ||
337 | return ret; | 336 | return ret; |
338 | } | ||
339 | 337 | ||
340 | dir = parent->d_inode; | 338 | dir = parent->d_inode; |
341 | 339 | ||
@@ -346,10 +344,8 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
346 | * 2 - dir items | 344 | * 2 - dir items |
347 | */ | 345 | */ |
348 | trans = btrfs_start_transaction(root, 6); | 346 | trans = btrfs_start_transaction(root, 6); |
349 | if (IS_ERR(trans)) { | 347 | if (IS_ERR(trans)) |
350 | dput(parent); | ||
351 | return PTR_ERR(trans); | 348 | return PTR_ERR(trans); |
352 | } | ||
353 | 349 | ||
354 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 350 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
355 | 0, objectid, NULL, 0, 0, 0); | 351 | 0, objectid, NULL, 0, 0, 0); |
@@ -439,7 +435,6 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
439 | 435 | ||
440 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); | 436 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); |
441 | fail: | 437 | fail: |
442 | dput(parent); | ||
443 | if (async_transid) { | 438 | if (async_transid) { |
444 | *async_transid = trans->transid; | 439 | *async_transid = trans->transid; |
445 | err = btrfs_commit_transaction_async(trans, root, 1); | 440 | err = btrfs_commit_transaction_async(trans, root, 1); |
@@ -456,7 +451,6 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
456 | bool readonly) | 451 | bool readonly) |
457 | { | 452 | { |
458 | struct inode *inode; | 453 | struct inode *inode; |
459 | struct dentry *parent; | ||
460 | struct btrfs_pending_snapshot *pending_snapshot; | 454 | struct btrfs_pending_snapshot *pending_snapshot; |
461 | struct btrfs_trans_handle *trans; | 455 | struct btrfs_trans_handle *trans; |
462 | int ret; | 456 | int ret; |
@@ -504,9 +498,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
504 | if (ret) | 498 | if (ret) |
505 | goto fail; | 499 | goto fail; |
506 | 500 | ||
507 | parent = dget_parent(dentry); | 501 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
508 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); | ||
509 | dput(parent); | ||
510 | if (IS_ERR(inode)) { | 502 | if (IS_ERR(inode)) { |
511 | ret = PTR_ERR(inode); | 503 | ret = PTR_ERR(inode); |
512 | goto fail; | 504 | goto fail; |
@@ -867,8 +859,8 @@ again: | |||
867 | /* step one, lock all the pages */ | 859 | /* step one, lock all the pages */ |
868 | for (i = 0; i < num_pages; i++) { | 860 | for (i = 0; i < num_pages; i++) { |
869 | struct page *page; | 861 | struct page *page; |
870 | page = grab_cache_page(inode->i_mapping, | 862 | page = find_or_create_page(inode->i_mapping, |
871 | start_index + i); | 863 | start_index + i, GFP_NOFS); |
872 | if (!page) | 864 | if (!page) |
873 | break; | 865 | break; |
874 | 866 | ||
@@ -938,7 +930,9 @@ again: | |||
938 | GFP_NOFS); | 930 | GFP_NOFS); |
939 | 931 | ||
940 | if (i_done != num_pages) { | 932 | if (i_done != num_pages) { |
941 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 933 | spin_lock(&BTRFS_I(inode)->lock); |
934 | BTRFS_I(inode)->outstanding_extents++; | ||
935 | spin_unlock(&BTRFS_I(inode)->lock); | ||
942 | btrfs_delalloc_release_space(inode, | 936 | btrfs_delalloc_release_space(inode, |
943 | (num_pages - i_done) << PAGE_CACHE_SHIFT); | 937 | (num_pages - i_done) << PAGE_CACHE_SHIFT); |
944 | } | 938 | } |
@@ -1755,11 +1749,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, | |||
1755 | key.objectid = key.offset; | 1749 | key.objectid = key.offset; |
1756 | key.offset = (u64)-1; | 1750 | key.offset = (u64)-1; |
1757 | dirid = key.objectid; | 1751 | dirid = key.objectid; |
1758 | |||
1759 | } | 1752 | } |
1760 | if (ptr < name) | 1753 | if (ptr < name) |
1761 | goto out; | 1754 | goto out; |
1762 | memcpy(name, ptr, total_len); | 1755 | memmove(name, ptr, total_len); |
1763 | name[total_len]='\0'; | 1756 | name[total_len]='\0'; |
1764 | ret = 0; | 1757 | ret = 0; |
1765 | out: | 1758 | out: |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 66fa43dc3f0f..d77b67c4b275 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -24,185 +24,197 @@ | |||
24 | #include "extent_io.h" | 24 | #include "extent_io.h" |
25 | #include "locking.h" | 25 | #include "locking.h" |
26 | 26 | ||
27 | static inline void spin_nested(struct extent_buffer *eb) | 27 | void btrfs_assert_tree_read_locked(struct extent_buffer *eb); |
28 | { | ||
29 | spin_lock(&eb->lock); | ||
30 | } | ||
31 | 28 | ||
32 | /* | 29 | /* |
33 | * Setting a lock to blocking will drop the spinlock and set the | 30 | * if we currently have a spinning reader or writer lock |
34 | * flag that forces other procs who want the lock to wait. After | 31 | * (indicated by the rw flag) this will bump the count |
35 | * this you can safely schedule with the lock held. | 32 | * of blocking holders and drop the spinlock. |
36 | */ | 33 | */ |
37 | void btrfs_set_lock_blocking(struct extent_buffer *eb) | 34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) |
38 | { | 35 | { |
39 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 36 | if (rw == BTRFS_WRITE_LOCK) { |
40 | set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | 37 | if (atomic_read(&eb->blocking_writers) == 0) { |
41 | spin_unlock(&eb->lock); | 38 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); |
39 | atomic_dec(&eb->spinning_writers); | ||
40 | btrfs_assert_tree_locked(eb); | ||
41 | atomic_inc(&eb->blocking_writers); | ||
42 | write_unlock(&eb->lock); | ||
43 | } | ||
44 | } else if (rw == BTRFS_READ_LOCK) { | ||
45 | btrfs_assert_tree_read_locked(eb); | ||
46 | atomic_inc(&eb->blocking_readers); | ||
47 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); | ||
48 | atomic_dec(&eb->spinning_readers); | ||
49 | read_unlock(&eb->lock); | ||
42 | } | 50 | } |
43 | /* exit with the spin lock released and the bit set */ | 51 | return; |
44 | } | 52 | } |
45 | 53 | ||
46 | /* | 54 | /* |
47 | * clearing the blocking flag will take the spinlock again. | 55 | * if we currently have a blocking lock, take the spinlock |
48 | * After this you can't safely schedule | 56 | * and drop our blocking count |
49 | */ | 57 | */ |
50 | void btrfs_clear_lock_blocking(struct extent_buffer *eb) | 58 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) |
51 | { | 59 | { |
52 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 60 | if (rw == BTRFS_WRITE_LOCK_BLOCKING) { |
53 | spin_nested(eb); | 61 | BUG_ON(atomic_read(&eb->blocking_writers) != 1); |
54 | clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | 62 | write_lock(&eb->lock); |
55 | smp_mb__after_clear_bit(); | 63 | WARN_ON(atomic_read(&eb->spinning_writers)); |
64 | atomic_inc(&eb->spinning_writers); | ||
65 | if (atomic_dec_and_test(&eb->blocking_writers)) | ||
66 | wake_up(&eb->write_lock_wq); | ||
67 | } else if (rw == BTRFS_READ_LOCK_BLOCKING) { | ||
68 | BUG_ON(atomic_read(&eb->blocking_readers) == 0); | ||
69 | read_lock(&eb->lock); | ||
70 | atomic_inc(&eb->spinning_readers); | ||
71 | if (atomic_dec_and_test(&eb->blocking_readers)) | ||
72 | wake_up(&eb->read_lock_wq); | ||
56 | } | 73 | } |
57 | /* exit with the spin lock held */ | 74 | return; |
58 | } | 75 | } |
59 | 76 | ||
60 | /* | 77 | /* |
61 | * unfortunately, many of the places that currently set a lock to blocking | 78 | * take a spinning read lock. This will wait for any blocking |
62 | * don't end up blocking for very long, and often they don't block | 79 | * writers |
63 | * at all. For a dbench 50 run, if we don't spin on the blocking bit | ||
64 | * at all, the context switch rate can jump up to 400,000/sec or more. | ||
65 | * | ||
66 | * So, we're still stuck with this crummy spin on the blocking bit, | ||
67 | * at least until the most common causes of the short blocks | ||
68 | * can be dealt with. | ||
69 | */ | 80 | */ |
70 | static int btrfs_spin_on_block(struct extent_buffer *eb) | 81 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
71 | { | 82 | { |
72 | int i; | 83 | again: |
73 | 84 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); | |
74 | for (i = 0; i < 512; i++) { | 85 | read_lock(&eb->lock); |
75 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 86 | if (atomic_read(&eb->blocking_writers)) { |
76 | return 1; | 87 | read_unlock(&eb->lock); |
77 | if (need_resched()) | 88 | wait_event(eb->write_lock_wq, |
78 | break; | 89 | atomic_read(&eb->blocking_writers) == 0); |
79 | cpu_relax(); | 90 | goto again; |
80 | } | 91 | } |
81 | return 0; | 92 | atomic_inc(&eb->read_locks); |
93 | atomic_inc(&eb->spinning_readers); | ||
82 | } | 94 | } |
83 | 95 | ||
84 | /* | 96 | /* |
85 | * This is somewhat different from trylock. It will take the | 97 | * returns 1 if we get the read lock and 0 if we don't |
86 | * spinlock but if it finds the lock is set to blocking, it will | 98 | * this won't wait for blocking writers |
87 | * return without the lock held. | ||
88 | * | ||
89 | * returns 1 if it was able to take the lock and zero otherwise | ||
90 | * | ||
91 | * After this call, scheduling is not safe without first calling | ||
92 | * btrfs_set_lock_blocking() | ||
93 | */ | 99 | */ |
94 | int btrfs_try_spin_lock(struct extent_buffer *eb) | 100 | int btrfs_try_tree_read_lock(struct extent_buffer *eb) |
95 | { | 101 | { |
96 | int i; | 102 | if (atomic_read(&eb->blocking_writers)) |
103 | return 0; | ||
97 | 104 | ||
98 | if (btrfs_spin_on_block(eb)) { | 105 | read_lock(&eb->lock); |
99 | spin_nested(eb); | 106 | if (atomic_read(&eb->blocking_writers)) { |
100 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 107 | read_unlock(&eb->lock); |
101 | return 1; | 108 | return 0; |
102 | spin_unlock(&eb->lock); | ||
103 | } | 109 | } |
104 | /* spin for a bit on the BLOCKING flag */ | 110 | atomic_inc(&eb->read_locks); |
105 | for (i = 0; i < 2; i++) { | 111 | atomic_inc(&eb->spinning_readers); |
106 | cpu_relax(); | 112 | return 1; |
107 | if (!btrfs_spin_on_block(eb)) | ||
108 | break; | ||
109 | |||
110 | spin_nested(eb); | ||
111 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
112 | return 1; | ||
113 | spin_unlock(&eb->lock); | ||
114 | } | ||
115 | return 0; | ||
116 | } | 113 | } |
117 | 114 | ||
118 | /* | 115 | /* |
119 | * the autoremove wake function will return 0 if it tried to wake up | 116 | * returns 1 if we get the read lock and 0 if we don't |
120 | * a process that was already awake, which means that process won't | 117 | * this won't wait for blocking writers or readers |
121 | * count as an exclusive wakeup. The waitq code will continue waking | ||
122 | * procs until it finds one that was actually sleeping. | ||
123 | * | ||
124 | * For btrfs, this isn't quite what we want. We want a single proc | ||
125 | * to be notified that the lock is ready for taking. If that proc | ||
126 | * already happen to be awake, great, it will loop around and try for | ||
127 | * the lock. | ||
128 | * | ||
129 | * So, btrfs_wake_function always returns 1, even when the proc that we | ||
130 | * tried to wake up was already awake. | ||
131 | */ | 118 | */ |
132 | static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, | 119 | int btrfs_try_tree_write_lock(struct extent_buffer *eb) |
133 | int sync, void *key) | ||
134 | { | 120 | { |
135 | autoremove_wake_function(wait, mode, sync, key); | 121 | if (atomic_read(&eb->blocking_writers) || |
122 | atomic_read(&eb->blocking_readers)) | ||
123 | return 0; | ||
124 | write_lock(&eb->lock); | ||
125 | if (atomic_read(&eb->blocking_writers) || | ||
126 | atomic_read(&eb->blocking_readers)) { | ||
127 | write_unlock(&eb->lock); | ||
128 | return 0; | ||
129 | } | ||
130 | atomic_inc(&eb->write_locks); | ||
131 | atomic_inc(&eb->spinning_writers); | ||
136 | return 1; | 132 | return 1; |
137 | } | 133 | } |
138 | 134 | ||
139 | /* | 135 | /* |
140 | * returns with the extent buffer spinlocked. | 136 | * drop a spinning read lock |
141 | * | 137 | */ |
142 | * This will spin and/or wait as required to take the lock, and then | 138 | void btrfs_tree_read_unlock(struct extent_buffer *eb) |
143 | * return with the spinlock held. | 139 | { |
144 | * | 140 | btrfs_assert_tree_read_locked(eb); |
145 | * After this call, scheduling is not safe without first calling | 141 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); |
146 | * btrfs_set_lock_blocking() | 142 | atomic_dec(&eb->spinning_readers); |
143 | atomic_dec(&eb->read_locks); | ||
144 | read_unlock(&eb->lock); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * drop a blocking read lock | ||
149 | */ | ||
150 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) | ||
151 | { | ||
152 | btrfs_assert_tree_read_locked(eb); | ||
153 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); | ||
154 | if (atomic_dec_and_test(&eb->blocking_readers)) | ||
155 | wake_up(&eb->read_lock_wq); | ||
156 | atomic_dec(&eb->read_locks); | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * take a spinning write lock. This will wait for both | ||
161 | * blocking readers or writers | ||
147 | */ | 162 | */ |
148 | int btrfs_tree_lock(struct extent_buffer *eb) | 163 | int btrfs_tree_lock(struct extent_buffer *eb) |
149 | { | 164 | { |
150 | DEFINE_WAIT(wait); | 165 | again: |
151 | wait.func = btrfs_wake_function; | 166 | wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); |
152 | 167 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); | |
153 | if (!btrfs_spin_on_block(eb)) | 168 | write_lock(&eb->lock); |
154 | goto sleep; | 169 | if (atomic_read(&eb->blocking_readers)) { |
155 | 170 | write_unlock(&eb->lock); | |
156 | while(1) { | 171 | wait_event(eb->read_lock_wq, |
157 | spin_nested(eb); | 172 | atomic_read(&eb->blocking_readers) == 0); |
158 | 173 | goto again; | |
159 | /* nobody is blocking, exit with the spinlock held */ | ||
160 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
161 | return 0; | ||
162 | |||
163 | /* | ||
164 | * we have the spinlock, but the real owner is blocking. | ||
165 | * wait for them | ||
166 | */ | ||
167 | spin_unlock(&eb->lock); | ||
168 | |||
169 | /* | ||
170 | * spin for a bit, and if the blocking flag goes away, | ||
171 | * loop around | ||
172 | */ | ||
173 | cpu_relax(); | ||
174 | if (btrfs_spin_on_block(eb)) | ||
175 | continue; | ||
176 | sleep: | ||
177 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, | ||
178 | TASK_UNINTERRUPTIBLE); | ||
179 | |||
180 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
181 | schedule(); | ||
182 | |||
183 | finish_wait(&eb->lock_wq, &wait); | ||
184 | } | 174 | } |
175 | if (atomic_read(&eb->blocking_writers)) { | ||
176 | write_unlock(&eb->lock); | ||
177 | wait_event(eb->write_lock_wq, | ||
178 | atomic_read(&eb->blocking_writers) == 0); | ||
179 | goto again; | ||
180 | } | ||
181 | WARN_ON(atomic_read(&eb->spinning_writers)); | ||
182 | atomic_inc(&eb->spinning_writers); | ||
183 | atomic_inc(&eb->write_locks); | ||
185 | return 0; | 184 | return 0; |
186 | } | 185 | } |
187 | 186 | ||
187 | /* | ||
188 | * drop a spinning or a blocking write lock. | ||
189 | */ | ||
188 | int btrfs_tree_unlock(struct extent_buffer *eb) | 190 | int btrfs_tree_unlock(struct extent_buffer *eb) |
189 | { | 191 | { |
190 | /* | 192 | int blockers = atomic_read(&eb->blocking_writers); |
191 | * if we were a blocking owner, we don't have the spinlock held | 193 | |
192 | * just clear the bit and look for waiters | 194 | BUG_ON(blockers > 1); |
193 | */ | 195 | |
194 | if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 196 | btrfs_assert_tree_locked(eb); |
195 | smp_mb__after_clear_bit(); | 197 | atomic_dec(&eb->write_locks); |
196 | else | 198 | |
197 | spin_unlock(&eb->lock); | 199 | if (blockers) { |
198 | 200 | WARN_ON(atomic_read(&eb->spinning_writers)); | |
199 | if (waitqueue_active(&eb->lock_wq)) | 201 | atomic_dec(&eb->blocking_writers); |
200 | wake_up(&eb->lock_wq); | 202 | smp_wmb(); |
203 | wake_up(&eb->write_lock_wq); | ||
204 | } else { | ||
205 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); | ||
206 | atomic_dec(&eb->spinning_writers); | ||
207 | write_unlock(&eb->lock); | ||
208 | } | ||
201 | return 0; | 209 | return 0; |
202 | } | 210 | } |
203 | 211 | ||
204 | void btrfs_assert_tree_locked(struct extent_buffer *eb) | 212 | void btrfs_assert_tree_locked(struct extent_buffer *eb) |
205 | { | 213 | { |
206 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 214 | BUG_ON(!atomic_read(&eb->write_locks)); |
207 | assert_spin_locked(&eb->lock); | 215 | } |
216 | |||
217 | void btrfs_assert_tree_read_locked(struct extent_buffer *eb) | ||
218 | { | ||
219 | BUG_ON(!atomic_read(&eb->read_locks)); | ||
208 | } | 220 | } |
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 5c33a560a2f1..17247ddb81a0 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h | |||
@@ -19,11 +19,43 @@ | |||
19 | #ifndef __BTRFS_LOCKING_ | 19 | #ifndef __BTRFS_LOCKING_ |
20 | #define __BTRFS_LOCKING_ | 20 | #define __BTRFS_LOCKING_ |
21 | 21 | ||
22 | #define BTRFS_WRITE_LOCK 1 | ||
23 | #define BTRFS_READ_LOCK 2 | ||
24 | #define BTRFS_WRITE_LOCK_BLOCKING 3 | ||
25 | #define BTRFS_READ_LOCK_BLOCKING 4 | ||
26 | |||
22 | int btrfs_tree_lock(struct extent_buffer *eb); | 27 | int btrfs_tree_lock(struct extent_buffer *eb); |
23 | int btrfs_tree_unlock(struct extent_buffer *eb); | 28 | int btrfs_tree_unlock(struct extent_buffer *eb); |
24 | int btrfs_try_spin_lock(struct extent_buffer *eb); | 29 | int btrfs_try_spin_lock(struct extent_buffer *eb); |
25 | 30 | ||
26 | void btrfs_set_lock_blocking(struct extent_buffer *eb); | 31 | void btrfs_tree_read_lock(struct extent_buffer *eb); |
27 | void btrfs_clear_lock_blocking(struct extent_buffer *eb); | 32 | void btrfs_tree_read_unlock(struct extent_buffer *eb); |
33 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb); | ||
34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw); | ||
35 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); | ||
28 | void btrfs_assert_tree_locked(struct extent_buffer *eb); | 36 | void btrfs_assert_tree_locked(struct extent_buffer *eb); |
37 | int btrfs_try_tree_read_lock(struct extent_buffer *eb); | ||
38 | int btrfs_try_tree_write_lock(struct extent_buffer *eb); | ||
39 | |||
40 | static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) | ||
41 | { | ||
42 | if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING) | ||
43 | btrfs_tree_unlock(eb); | ||
44 | else if (rw == BTRFS_READ_LOCK_BLOCKING) | ||
45 | btrfs_tree_read_unlock_blocking(eb); | ||
46 | else if (rw == BTRFS_READ_LOCK) | ||
47 | btrfs_tree_read_unlock(eb); | ||
48 | else | ||
49 | BUG(); | ||
50 | } | ||
51 | |||
52 | static inline void btrfs_set_lock_blocking(struct extent_buffer *eb) | ||
53 | { | ||
54 | btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK); | ||
55 | } | ||
56 | |||
57 | static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb) | ||
58 | { | ||
59 | btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING); | ||
60 | } | ||
29 | #endif | 61 | #endif |
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c deleted file mode 100644 index 82d569cb6267..000000000000 --- a/fs/btrfs/ref-cache.c +++ /dev/null | |||
@@ -1,68 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/sort.h> | ||
22 | #include "ctree.h" | ||
23 | #include "ref-cache.h" | ||
24 | #include "transaction.h" | ||
25 | |||
26 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | ||
27 | struct rb_node *node) | ||
28 | { | ||
29 | struct rb_node **p = &root->rb_node; | ||
30 | struct rb_node *parent = NULL; | ||
31 | struct btrfs_leaf_ref *entry; | ||
32 | |||
33 | while (*p) { | ||
34 | parent = *p; | ||
35 | entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); | ||
36 | |||
37 | if (bytenr < entry->bytenr) | ||
38 | p = &(*p)->rb_left; | ||
39 | else if (bytenr > entry->bytenr) | ||
40 | p = &(*p)->rb_right; | ||
41 | else | ||
42 | return parent; | ||
43 | } | ||
44 | |||
45 | entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); | ||
46 | rb_link_node(node, parent, p); | ||
47 | rb_insert_color(node, root); | ||
48 | return NULL; | ||
49 | } | ||
50 | |||
51 | static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) | ||
52 | { | ||
53 | struct rb_node *n = root->rb_node; | ||
54 | struct btrfs_leaf_ref *entry; | ||
55 | |||
56 | while (n) { | ||
57 | entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); | ||
58 | WARN_ON(!entry->in_tree); | ||
59 | |||
60 | if (bytenr < entry->bytenr) | ||
61 | n = n->rb_left; | ||
62 | else if (bytenr > entry->bytenr) | ||
63 | n = n->rb_right; | ||
64 | else | ||
65 | return n; | ||
66 | } | ||
67 | return NULL; | ||
68 | } | ||
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h deleted file mode 100644 index 24f7001f6387..000000000000 --- a/fs/btrfs/ref-cache.h +++ /dev/null | |||
@@ -1,52 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | #ifndef __REFCACHE__ | ||
19 | #define __REFCACHE__ | ||
20 | |||
21 | struct btrfs_extent_info { | ||
22 | /* bytenr and num_bytes find the extent in the extent allocation tree */ | ||
23 | u64 bytenr; | ||
24 | u64 num_bytes; | ||
25 | |||
26 | /* objectid and offset find the back reference for the file */ | ||
27 | u64 objectid; | ||
28 | u64 offset; | ||
29 | }; | ||
30 | |||
31 | struct btrfs_leaf_ref { | ||
32 | struct rb_node rb_node; | ||
33 | struct btrfs_leaf_ref_tree *tree; | ||
34 | int in_tree; | ||
35 | atomic_t usage; | ||
36 | |||
37 | u64 root_gen; | ||
38 | u64 bytenr; | ||
39 | u64 owner; | ||
40 | u64 generation; | ||
41 | int nritems; | ||
42 | |||
43 | struct list_head list; | ||
44 | struct btrfs_extent_info extents[]; | ||
45 | }; | ||
46 | |||
47 | static inline size_t btrfs_leaf_ref_size(int nr_extents) | ||
48 | { | ||
49 | return sizeof(struct btrfs_leaf_ref) + | ||
50 | sizeof(struct btrfs_extent_info) * nr_extents; | ||
51 | } | ||
52 | #endif | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 5e0a3dc79a45..59bb1764273d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2955 | page_cache_sync_readahead(inode->i_mapping, | 2955 | page_cache_sync_readahead(inode->i_mapping, |
2956 | ra, NULL, index, | 2956 | ra, NULL, index, |
2957 | last_index + 1 - index); | 2957 | last_index + 1 - index); |
2958 | page = grab_cache_page(inode->i_mapping, index); | 2958 | page = find_or_create_page(inode->i_mapping, index, |
2959 | GFP_NOFS); | ||
2959 | if (!page) { | 2960 | if (!page) { |
2960 | btrfs_delalloc_release_metadata(inode, | 2961 | btrfs_delalloc_release_metadata(inode, |
2961 | PAGE_CACHE_SIZE); | 2962 | PAGE_CACHE_SIZE); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ebe45443de06..f4099904565a 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -71,13 +71,12 @@ out: | |||
71 | return ret; | 71 | return ret; |
72 | } | 72 | } |
73 | 73 | ||
74 | int btrfs_set_root_node(struct btrfs_root_item *item, | 74 | void btrfs_set_root_node(struct btrfs_root_item *item, |
75 | struct extent_buffer *node) | 75 | struct extent_buffer *node) |
76 | { | 76 | { |
77 | btrfs_set_root_bytenr(item, node->start); | 77 | btrfs_set_root_bytenr(item, node->start); |
78 | btrfs_set_root_level(item, btrfs_header_level(node)); | 78 | btrfs_set_root_level(item, btrfs_header_level(node)); |
79 | btrfs_set_root_generation(item, btrfs_header_generation(node)); | 79 | btrfs_set_root_generation(item, btrfs_header_generation(node)); |
80 | return 0; | ||
81 | } | 80 | } |
82 | 81 | ||
83 | /* | 82 | /* |
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index c0f7ecaf1e79..bc1f6ad18442 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c | |||
@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ | |||
50 | unsigned long part_offset = (unsigned long)s; \ | 50 | unsigned long part_offset = (unsigned long)s; \ |
51 | unsigned long offset = part_offset + offsetof(type, member); \ | 51 | unsigned long offset = part_offset + offsetof(type, member); \ |
52 | type *p; \ | 52 | type *p; \ |
53 | /* ugly, but we want the fast path here */ \ | 53 | int err; \ |
54 | if (eb->map_token && offset >= eb->map_start && \ | 54 | char *kaddr; \ |
55 | offset + sizeof(((type *)0)->member) <= eb->map_start + \ | 55 | unsigned long map_start; \ |
56 | eb->map_len) { \ | 56 | unsigned long map_len; \ |
57 | p = (type *)(eb->kaddr + part_offset - eb->map_start); \ | 57 | u##bits res; \ |
58 | return le##bits##_to_cpu(p->member); \ | 58 | err = map_private_extent_buffer(eb, offset, \ |
59 | } \ | 59 | sizeof(((type *)0)->member), \ |
60 | { \ | 60 | &kaddr, &map_start, &map_len); \ |
61 | int err; \ | 61 | if (err) { \ |
62 | char *map_token; \ | 62 | __le##bits leres; \ |
63 | char *kaddr; \ | 63 | read_eb_member(eb, s, type, member, &leres); \ |
64 | int unmap_on_exit = (eb->map_token == NULL); \ | 64 | return le##bits##_to_cpu(leres); \ |
65 | unsigned long map_start; \ | 65 | } \ |
66 | unsigned long map_len; \ | 66 | p = (type *)(kaddr + part_offset - map_start); \ |
67 | u##bits res; \ | 67 | res = le##bits##_to_cpu(p->member); \ |
68 | err = map_extent_buffer(eb, offset, \ | 68 | return res; \ |
69 | sizeof(((type *)0)->member), \ | ||
70 | &map_token, &kaddr, \ | ||
71 | &map_start, &map_len, KM_USER1); \ | ||
72 | if (err) { \ | ||
73 | __le##bits leres; \ | ||
74 | read_eb_member(eb, s, type, member, &leres); \ | ||
75 | return le##bits##_to_cpu(leres); \ | ||
76 | } \ | ||
77 | p = (type *)(kaddr + part_offset - map_start); \ | ||
78 | res = le##bits##_to_cpu(p->member); \ | ||
79 | if (unmap_on_exit) \ | ||
80 | unmap_extent_buffer(eb, map_token, KM_USER1); \ | ||
81 | return res; \ | ||
82 | } \ | ||
83 | } \ | 69 | } \ |
84 | void btrfs_set_##name(struct extent_buffer *eb, \ | 70 | void btrfs_set_##name(struct extent_buffer *eb, \ |
85 | type *s, u##bits val) \ | 71 | type *s, u##bits val) \ |
@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \ | |||
87 | unsigned long part_offset = (unsigned long)s; \ | 73 | unsigned long part_offset = (unsigned long)s; \ |
88 | unsigned long offset = part_offset + offsetof(type, member); \ | 74 | unsigned long offset = part_offset + offsetof(type, member); \ |
89 | type *p; \ | 75 | type *p; \ |
90 | /* ugly, but we want the fast path here */ \ | 76 | int err; \ |
91 | if (eb->map_token && offset >= eb->map_start && \ | 77 | char *kaddr; \ |
92 | offset + sizeof(((type *)0)->member) <= eb->map_start + \ | 78 | unsigned long map_start; \ |
93 | eb->map_len) { \ | 79 | unsigned long map_len; \ |
94 | p = (type *)(eb->kaddr + part_offset - eb->map_start); \ | 80 | err = map_private_extent_buffer(eb, offset, \ |
95 | p->member = cpu_to_le##bits(val); \ | 81 | sizeof(((type *)0)->member), \ |
96 | return; \ | 82 | &kaddr, &map_start, &map_len); \ |
97 | } \ | 83 | if (err) { \ |
98 | { \ | 84 | __le##bits val2; \ |
99 | int err; \ | 85 | val2 = cpu_to_le##bits(val); \ |
100 | char *map_token; \ | 86 | write_eb_member(eb, s, type, member, &val2); \ |
101 | char *kaddr; \ | 87 | return; \ |
102 | int unmap_on_exit = (eb->map_token == NULL); \ | 88 | } \ |
103 | unsigned long map_start; \ | 89 | p = (type *)(kaddr + part_offset - map_start); \ |
104 | unsigned long map_len; \ | 90 | p->member = cpu_to_le##bits(val); \ |
105 | err = map_extent_buffer(eb, offset, \ | ||
106 | sizeof(((type *)0)->member), \ | ||
107 | &map_token, &kaddr, \ | ||
108 | &map_start, &map_len, KM_USER1); \ | ||
109 | if (err) { \ | ||
110 | __le##bits val2; \ | ||
111 | val2 = cpu_to_le##bits(val); \ | ||
112 | write_eb_member(eb, s, type, member, &val2); \ | ||
113 | return; \ | ||
114 | } \ | ||
115 | p = (type *)(kaddr + part_offset - map_start); \ | ||
116 | p->member = cpu_to_le##bits(val); \ | ||
117 | if (unmap_on_exit) \ | ||
118 | unmap_extent_buffer(eb, map_token, KM_USER1); \ | ||
119 | } \ | ||
120 | } | 91 | } |
121 | 92 | ||
122 | #include "ctree.h" | 93 | #include "ctree.h" |
@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb, | |||
125 | struct btrfs_disk_key *disk_key, int nr) | 96 | struct btrfs_disk_key *disk_key, int nr) |
126 | { | 97 | { |
127 | unsigned long ptr = btrfs_node_key_ptr_offset(nr); | 98 | unsigned long ptr = btrfs_node_key_ptr_offset(nr); |
128 | if (eb->map_token && ptr >= eb->map_start && | ||
129 | ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) { | ||
130 | memcpy(disk_key, eb->kaddr + ptr - eb->map_start, | ||
131 | sizeof(*disk_key)); | ||
132 | return; | ||
133 | } else if (eb->map_token) { | ||
134 | unmap_extent_buffer(eb, eb->map_token, KM_USER1); | ||
135 | eb->map_token = NULL; | ||
136 | } | ||
137 | read_eb_member(eb, (struct btrfs_key_ptr *)ptr, | 99 | read_eb_member(eb, (struct btrfs_key_ptr *)ptr, |
138 | struct btrfs_key_ptr, key, disk_key); | 100 | struct btrfs_key_ptr, key, disk_key); |
139 | } | 101 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0bb4ebbb71b7..15634d4648d7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -723,6 +723,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
723 | seq_puts(seq, ",clear_cache"); | 723 | seq_puts(seq, ",clear_cache"); |
724 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) | 724 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) |
725 | seq_puts(seq, ",user_subvol_rm_allowed"); | 725 | seq_puts(seq, ",user_subvol_rm_allowed"); |
726 | if (btrfs_test_opt(root, ENOSPC_DEBUG)) | ||
727 | seq_puts(seq, ",enospc_debug"); | ||
728 | if (btrfs_test_opt(root, AUTO_DEFRAG)) | ||
729 | seq_puts(seq, ",autodefrag"); | ||
730 | if (btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
731 | seq_puts(seq, ",inode_cache"); | ||
726 | return 0; | 732 | return 0; |
727 | } | 733 | } |
728 | 734 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 51dcec86757f..7dc36fab4afc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -216,17 +216,11 @@ static void wait_current_trans(struct btrfs_root *root) | |||
216 | spin_lock(&root->fs_info->trans_lock); | 216 | spin_lock(&root->fs_info->trans_lock); |
217 | cur_trans = root->fs_info->running_transaction; | 217 | cur_trans = root->fs_info->running_transaction; |
218 | if (cur_trans && cur_trans->blocked) { | 218 | if (cur_trans && cur_trans->blocked) { |
219 | DEFINE_WAIT(wait); | ||
220 | atomic_inc(&cur_trans->use_count); | 219 | atomic_inc(&cur_trans->use_count); |
221 | spin_unlock(&root->fs_info->trans_lock); | 220 | spin_unlock(&root->fs_info->trans_lock); |
222 | while (1) { | 221 | |
223 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 222 | wait_event(root->fs_info->transaction_wait, |
224 | TASK_UNINTERRUPTIBLE); | 223 | !cur_trans->blocked); |
225 | if (!cur_trans->blocked) | ||
226 | break; | ||
227 | schedule(); | ||
228 | } | ||
229 | finish_wait(&root->fs_info->transaction_wait, &wait); | ||
230 | put_transaction(cur_trans); | 224 | put_transaction(cur_trans); |
231 | } else { | 225 | } else { |
232 | spin_unlock(&root->fs_info->trans_lock); | 226 | spin_unlock(&root->fs_info->trans_lock); |
@@ -260,7 +254,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
260 | { | 254 | { |
261 | struct btrfs_trans_handle *h; | 255 | struct btrfs_trans_handle *h; |
262 | struct btrfs_transaction *cur_trans; | 256 | struct btrfs_transaction *cur_trans; |
263 | int retries = 0; | 257 | u64 num_bytes = 0; |
264 | int ret; | 258 | int ret; |
265 | 259 | ||
266 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 260 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
@@ -274,6 +268,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
274 | h->block_rsv = NULL; | 268 | h->block_rsv = NULL; |
275 | goto got_it; | 269 | goto got_it; |
276 | } | 270 | } |
271 | |||
272 | /* | ||
273 | * Do the reservation before we join the transaction so we can do all | ||
274 | * the appropriate flushing if need be. | ||
275 | */ | ||
276 | if (num_items > 0 && root != root->fs_info->chunk_root) { | ||
277 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | ||
278 | ret = btrfs_block_rsv_add(NULL, root, | ||
279 | &root->fs_info->trans_block_rsv, | ||
280 | num_bytes); | ||
281 | if (ret) | ||
282 | return ERR_PTR(ret); | ||
283 | } | ||
277 | again: | 284 | again: |
278 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 285 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
279 | if (!h) | 286 | if (!h) |
@@ -310,24 +317,9 @@ again: | |||
310 | goto again; | 317 | goto again; |
311 | } | 318 | } |
312 | 319 | ||
313 | if (num_items > 0) { | 320 | if (num_bytes) { |
314 | ret = btrfs_trans_reserve_metadata(h, root, num_items); | 321 | h->block_rsv = &root->fs_info->trans_block_rsv; |
315 | if (ret == -EAGAIN && !retries) { | 322 | h->bytes_reserved = num_bytes; |
316 | retries++; | ||
317 | btrfs_commit_transaction(h, root); | ||
318 | goto again; | ||
319 | } else if (ret == -EAGAIN) { | ||
320 | /* | ||
321 | * We have already retried and got EAGAIN, so really we | ||
322 | * don't have space, so set ret to -ENOSPC. | ||
323 | */ | ||
324 | ret = -ENOSPC; | ||
325 | } | ||
326 | |||
327 | if (ret < 0) { | ||
328 | btrfs_end_transaction(h, root); | ||
329 | return ERR_PTR(ret); | ||
330 | } | ||
331 | } | 323 | } |
332 | 324 | ||
333 | got_it: | 325 | got_it: |
@@ -359,19 +351,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root | |||
359 | } | 351 | } |
360 | 352 | ||
361 | /* wait for a transaction commit to be fully complete */ | 353 | /* wait for a transaction commit to be fully complete */ |
362 | static noinline int wait_for_commit(struct btrfs_root *root, | 354 | static noinline void wait_for_commit(struct btrfs_root *root, |
363 | struct btrfs_transaction *commit) | 355 | struct btrfs_transaction *commit) |
364 | { | 356 | { |
365 | DEFINE_WAIT(wait); | 357 | wait_event(commit->commit_wait, commit->commit_done); |
366 | while (!commit->commit_done) { | ||
367 | prepare_to_wait(&commit->commit_wait, &wait, | ||
368 | TASK_UNINTERRUPTIBLE); | ||
369 | if (commit->commit_done) | ||
370 | break; | ||
371 | schedule(); | ||
372 | } | ||
373 | finish_wait(&commit->commit_wait, &wait); | ||
374 | return 0; | ||
375 | } | 358 | } |
376 | 359 | ||
377 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | 360 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) |
@@ -499,10 +482,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
499 | } | 482 | } |
500 | 483 | ||
501 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { | 484 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { |
502 | if (throttle) | 485 | if (throttle) { |
486 | /* | ||
487 | * We may race with somebody else here so end up having | ||
488 | * to call end_transaction on ourselves again, so inc | ||
489 | * our use_count. | ||
490 | */ | ||
491 | trans->use_count++; | ||
503 | return btrfs_commit_transaction(trans, root); | 492 | return btrfs_commit_transaction(trans, root); |
504 | else | 493 | } else { |
505 | wake_up_process(info->transaction_kthread); | 494 | wake_up_process(info->transaction_kthread); |
495 | } | ||
506 | } | 496 | } |
507 | 497 | ||
508 | WARN_ON(cur_trans != info->running_transaction); | 498 | WARN_ON(cur_trans != info->running_transaction); |
@@ -1080,22 +1070,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info) | |||
1080 | static void wait_current_trans_commit_start(struct btrfs_root *root, | 1070 | static void wait_current_trans_commit_start(struct btrfs_root *root, |
1081 | struct btrfs_transaction *trans) | 1071 | struct btrfs_transaction *trans) |
1082 | { | 1072 | { |
1083 | DEFINE_WAIT(wait); | 1073 | wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit); |
1084 | |||
1085 | if (trans->in_commit) | ||
1086 | return; | ||
1087 | |||
1088 | while (1) { | ||
1089 | prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait, | ||
1090 | TASK_UNINTERRUPTIBLE); | ||
1091 | if (trans->in_commit) { | ||
1092 | finish_wait(&root->fs_info->transaction_blocked_wait, | ||
1093 | &wait); | ||
1094 | break; | ||
1095 | } | ||
1096 | schedule(); | ||
1097 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); | ||
1098 | } | ||
1099 | } | 1074 | } |
1100 | 1075 | ||
1101 | /* | 1076 | /* |
@@ -1105,24 +1080,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, | |||
1105 | static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | 1080 | static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, |
1106 | struct btrfs_transaction *trans) | 1081 | struct btrfs_transaction *trans) |
1107 | { | 1082 | { |
1108 | DEFINE_WAIT(wait); | 1083 | wait_event(root->fs_info->transaction_wait, |
1109 | 1084 | trans->commit_done || (trans->in_commit && !trans->blocked)); | |
1110 | if (trans->commit_done || (trans->in_commit && !trans->blocked)) | ||
1111 | return; | ||
1112 | |||
1113 | while (1) { | ||
1114 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | ||
1115 | TASK_UNINTERRUPTIBLE); | ||
1116 | if (trans->commit_done || | ||
1117 | (trans->in_commit && !trans->blocked)) { | ||
1118 | finish_wait(&root->fs_info->transaction_wait, | ||
1119 | &wait); | ||
1120 | break; | ||
1121 | } | ||
1122 | schedule(); | ||
1123 | finish_wait(&root->fs_info->transaction_wait, | ||
1124 | &wait); | ||
1125 | } | ||
1126 | } | 1085 | } |
1127 | 1086 | ||
1128 | /* | 1087 | /* |
@@ -1229,8 +1188,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1229 | atomic_inc(&cur_trans->use_count); | 1188 | atomic_inc(&cur_trans->use_count); |
1230 | btrfs_end_transaction(trans, root); | 1189 | btrfs_end_transaction(trans, root); |
1231 | 1190 | ||
1232 | ret = wait_for_commit(root, cur_trans); | 1191 | wait_for_commit(root, cur_trans); |
1233 | BUG_ON(ret); | ||
1234 | 1192 | ||
1235 | put_transaction(cur_trans); | 1193 | put_transaction(cur_trans); |
1236 | 1194 | ||
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4ce8a9f41d1e..babee65f8eda 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -1617,7 +1617,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1617 | return 0; | 1617 | return 0; |
1618 | 1618 | ||
1619 | path = btrfs_alloc_path(); | 1619 | path = btrfs_alloc_path(); |
1620 | BUG_ON(!path); | 1620 | if (!path) |
1621 | return -ENOMEM; | ||
1621 | 1622 | ||
1622 | nritems = btrfs_header_nritems(eb); | 1623 | nritems = btrfs_header_nritems(eb); |
1623 | for (i = 0; i < nritems; i++) { | 1624 | for (i = 0; i < nritems; i++) { |
@@ -1723,15 +1724,17 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1723 | return -ENOMEM; | 1724 | return -ENOMEM; |
1724 | 1725 | ||
1725 | if (*level == 1) { | 1726 | if (*level == 1) { |
1726 | wc->process_func(root, next, wc, ptr_gen); | 1727 | ret = wc->process_func(root, next, wc, ptr_gen); |
1728 | if (ret) | ||
1729 | return ret; | ||
1727 | 1730 | ||
1728 | path->slots[*level]++; | 1731 | path->slots[*level]++; |
1729 | if (wc->free) { | 1732 | if (wc->free) { |
1730 | btrfs_read_buffer(next, ptr_gen); | 1733 | btrfs_read_buffer(next, ptr_gen); |
1731 | 1734 | ||
1732 | btrfs_tree_lock(next); | 1735 | btrfs_tree_lock(next); |
1733 | clean_tree_block(trans, root, next); | ||
1734 | btrfs_set_lock_blocking(next); | 1736 | btrfs_set_lock_blocking(next); |
1737 | clean_tree_block(trans, root, next); | ||
1735 | btrfs_wait_tree_block_writeback(next); | 1738 | btrfs_wait_tree_block_writeback(next); |
1736 | btrfs_tree_unlock(next); | 1739 | btrfs_tree_unlock(next); |
1737 | 1740 | ||
@@ -1788,16 +1791,19 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1788 | parent = path->nodes[*level + 1]; | 1791 | parent = path->nodes[*level + 1]; |
1789 | 1792 | ||
1790 | root_owner = btrfs_header_owner(parent); | 1793 | root_owner = btrfs_header_owner(parent); |
1791 | wc->process_func(root, path->nodes[*level], wc, | 1794 | ret = wc->process_func(root, path->nodes[*level], wc, |
1792 | btrfs_header_generation(path->nodes[*level])); | 1795 | btrfs_header_generation(path->nodes[*level])); |
1796 | if (ret) | ||
1797 | return ret; | ||
1798 | |||
1793 | if (wc->free) { | 1799 | if (wc->free) { |
1794 | struct extent_buffer *next; | 1800 | struct extent_buffer *next; |
1795 | 1801 | ||
1796 | next = path->nodes[*level]; | 1802 | next = path->nodes[*level]; |
1797 | 1803 | ||
1798 | btrfs_tree_lock(next); | 1804 | btrfs_tree_lock(next); |
1799 | clean_tree_block(trans, root, next); | ||
1800 | btrfs_set_lock_blocking(next); | 1805 | btrfs_set_lock_blocking(next); |
1806 | clean_tree_block(trans, root, next); | ||
1801 | btrfs_wait_tree_block_writeback(next); | 1807 | btrfs_wait_tree_block_writeback(next); |
1802 | btrfs_tree_unlock(next); | 1808 | btrfs_tree_unlock(next); |
1803 | 1809 | ||
@@ -1864,8 +1870,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1864 | next = path->nodes[orig_level]; | 1870 | next = path->nodes[orig_level]; |
1865 | 1871 | ||
1866 | btrfs_tree_lock(next); | 1872 | btrfs_tree_lock(next); |
1867 | clean_tree_block(trans, log, next); | ||
1868 | btrfs_set_lock_blocking(next); | 1873 | btrfs_set_lock_blocking(next); |
1874 | clean_tree_block(trans, log, next); | ||
1869 | btrfs_wait_tree_block_writeback(next); | 1875 | btrfs_wait_tree_block_writeback(next); |
1870 | btrfs_tree_unlock(next); | 1876 | btrfs_tree_unlock(next); |
1871 | 1877 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1efa56e18f9b..53875ae73ad4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -1037,7 +1037,8 @@ static noinline int find_next_chunk(struct btrfs_root *root, | |||
1037 | struct btrfs_key found_key; | 1037 | struct btrfs_key found_key; |
1038 | 1038 | ||
1039 | path = btrfs_alloc_path(); | 1039 | path = btrfs_alloc_path(); |
1040 | BUG_ON(!path); | 1040 | if (!path) |
1041 | return -ENOMEM; | ||
1041 | 1042 | ||
1042 | key.objectid = objectid; | 1043 | key.objectid = objectid; |
1043 | key.offset = (u64)-1; | 1044 | key.offset = (u64)-1; |
@@ -2061,8 +2062,10 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
2061 | 2062 | ||
2062 | /* step two, relocate all the chunks */ | 2063 | /* step two, relocate all the chunks */ |
2063 | path = btrfs_alloc_path(); | 2064 | path = btrfs_alloc_path(); |
2064 | BUG_ON(!path); | 2065 | if (!path) { |
2065 | 2066 | ret = -ENOMEM; | |
2067 | goto error; | ||
2068 | } | ||
2066 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 2069 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
2067 | key.offset = (u64)-1; | 2070 | key.offset = (u64)-1; |
2068 | key.type = BTRFS_CHUNK_ITEM_KEY; | 2071 | key.type = BTRFS_CHUNK_ITEM_KEY; |
@@ -2098,7 +2101,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
2098 | chunk_root->root_key.objectid, | 2101 | chunk_root->root_key.objectid, |
2099 | found_key.objectid, | 2102 | found_key.objectid, |
2100 | found_key.offset); | 2103 | found_key.offset); |
2101 | BUG_ON(ret && ret != -ENOSPC); | 2104 | if (ret && ret != -ENOSPC) |
2105 | goto error; | ||
2102 | key.offset = found_key.offset - 1; | 2106 | key.offset = found_key.offset - 1; |
2103 | } | 2107 | } |
2104 | ret = 0; | 2108 | ret = 0; |
@@ -2660,7 +2664,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
2660 | 2664 | ||
2661 | ret = find_next_chunk(fs_info->chunk_root, | 2665 | ret = find_next_chunk(fs_info->chunk_root, |
2662 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); | 2666 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); |
2663 | BUG_ON(ret); | 2667 | if (ret) |
2668 | return ret; | ||
2664 | 2669 | ||
2665 | alloc_profile = BTRFS_BLOCK_GROUP_METADATA | | 2670 | alloc_profile = BTRFS_BLOCK_GROUP_METADATA | |
2666 | (fs_info->metadata_alloc_profile & | 2671 | (fs_info->metadata_alloc_profile & |
@@ -3594,7 +3599,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
3594 | if (!sb) | 3599 | if (!sb) |
3595 | return -ENOMEM; | 3600 | return -ENOMEM; |
3596 | btrfs_set_buffer_uptodate(sb); | 3601 | btrfs_set_buffer_uptodate(sb); |
3597 | btrfs_set_buffer_lockdep_class(sb, 0); | 3602 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); |
3598 | 3603 | ||
3599 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); | 3604 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); |
3600 | array_size = btrfs_super_sys_array_size(super_copy); | 3605 | array_size = btrfs_super_sys_array_size(super_copy); |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index a039e6ed4ce0..6196e1a76c14 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -102,43 +102,57 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
102 | if (!path) | 102 | if (!path) |
103 | return -ENOMEM; | 103 | return -ENOMEM; |
104 | 104 | ||
105 | /* first lets see if we already have this xattr */ | 105 | if (flags & XATTR_REPLACE) { |
106 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, | 106 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, |
107 | strlen(name), -1); | 107 | name_len, -1); |
108 | if (IS_ERR(di)) { | 108 | if (IS_ERR(di)) { |
109 | ret = PTR_ERR(di); | 109 | ret = PTR_ERR(di); |
110 | goto out; | 110 | goto out; |
111 | } | 111 | } else if (!di) { |
112 | 112 | ret = -ENODATA; | |
113 | /* ok we already have this xattr, lets remove it */ | ||
114 | if (di) { | ||
115 | /* if we want create only exit */ | ||
116 | if (flags & XATTR_CREATE) { | ||
117 | ret = -EEXIST; | ||
118 | goto out; | 113 | goto out; |
119 | } | 114 | } |
120 | |||
121 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | 115 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
122 | BUG_ON(ret); | 116 | if (ret) |
117 | goto out; | ||
123 | btrfs_release_path(path); | 118 | btrfs_release_path(path); |
119 | } | ||
124 | 120 | ||
125 | /* if we don't have a value then we are removing the xattr */ | 121 | again: |
126 | if (!value) | 122 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), |
123 | name, name_len, value, size); | ||
124 | if (ret == -EEXIST) { | ||
125 | if (flags & XATTR_CREATE) | ||
127 | goto out; | 126 | goto out; |
128 | } else { | 127 | /* |
128 | * We can't use the path we already have since we won't have the | ||
129 | * proper locking for a delete, so release the path and | ||
130 | * re-lookup to delete the thing. | ||
131 | */ | ||
129 | btrfs_release_path(path); | 132 | btrfs_release_path(path); |
133 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), | ||
134 | name, name_len, -1); | ||
135 | if (IS_ERR(di)) { | ||
136 | ret = PTR_ERR(di); | ||
137 | goto out; | ||
138 | } else if (!di) { | ||
139 | /* Shouldn't happen but just in case... */ | ||
140 | btrfs_release_path(path); | ||
141 | goto again; | ||
142 | } | ||
130 | 143 | ||
131 | if (flags & XATTR_REPLACE) { | 144 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
132 | /* we couldn't find the attr to replace */ | 145 | if (ret) |
133 | ret = -ENODATA; | ||
134 | goto out; | 146 | goto out; |
147 | |||
148 | /* | ||
149 | * We have a value to set, so go back and try to insert it now. | ||
150 | */ | ||
151 | if (value) { | ||
152 | btrfs_release_path(path); | ||
153 | goto again; | ||
135 | } | 154 | } |
136 | } | 155 | } |
137 | |||
138 | /* ok we have to create a completely new xattr */ | ||
139 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), | ||
140 | name, name_len, value, size); | ||
141 | BUG_ON(ret); | ||
142 | out: | 156 | out: |
143 | btrfs_free_path(path); | 157 | btrfs_free_path(path); |
144 | return ret; | 158 | return ret; |