diff options
Diffstat (limited to 'fs/btrfs')
32 files changed, 1765 insertions, 1707 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9b72dcf1cd2..40e6ac08c21 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -6,5 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ | 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o |
11 | |||
12 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | ||
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index f66fc995973..eb159aaa5a1 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -28,9 +28,7 @@ | |||
28 | #include "btrfs_inode.h" | 28 | #include "btrfs_inode.h" |
29 | #include "xattr.h" | 29 | #include "xattr.h" |
30 | 30 | ||
31 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 31 | struct posix_acl *btrfs_get_acl(struct inode *inode, int type) |
32 | |||
33 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | ||
34 | { | 32 | { |
35 | int size; | 33 | int size; |
36 | const char *name; | 34 | const char *name; |
@@ -111,7 +109,6 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
111 | int ret, size = 0; | 109 | int ret, size = 0; |
112 | const char *name; | 110 | const char *name; |
113 | char *value = NULL; | 111 | char *value = NULL; |
114 | mode_t mode; | ||
115 | 112 | ||
116 | if (acl) { | 113 | if (acl) { |
117 | ret = posix_acl_valid(acl); | 114 | ret = posix_acl_valid(acl); |
@@ -122,13 +119,11 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
122 | 119 | ||
123 | switch (type) { | 120 | switch (type) { |
124 | case ACL_TYPE_ACCESS: | 121 | case ACL_TYPE_ACCESS: |
125 | mode = inode->i_mode; | ||
126 | name = POSIX_ACL_XATTR_ACCESS; | 122 | name = POSIX_ACL_XATTR_ACCESS; |
127 | if (acl) { | 123 | if (acl) { |
128 | ret = posix_acl_equiv_mode(acl, &mode); | 124 | ret = posix_acl_equiv_mode(acl, &inode->i_mode); |
129 | if (ret < 0) | 125 | if (ret < 0) |
130 | return ret; | 126 | return ret; |
131 | inode->i_mode = mode; | ||
132 | } | 127 | } |
133 | ret = 0; | 128 | ret = 0; |
134 | break; | 129 | break; |
@@ -195,28 +190,6 @@ out: | |||
195 | return ret; | 190 | return ret; |
196 | } | 191 | } |
197 | 192 | ||
198 | int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags) | ||
199 | { | ||
200 | int error = -EAGAIN; | ||
201 | |||
202 | if (flags & IPERM_FLAG_RCU) { | ||
203 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) | ||
204 | error = -ECHILD; | ||
205 | |||
206 | } else { | ||
207 | struct posix_acl *acl; | ||
208 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); | ||
209 | if (IS_ERR(acl)) | ||
210 | return PTR_ERR(acl); | ||
211 | if (acl) { | ||
212 | error = posix_acl_permission(inode, acl, mask); | ||
213 | posix_acl_release(acl); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | return error; | ||
218 | } | ||
219 | |||
220 | /* | 193 | /* |
221 | * btrfs_init_acl is already generally called under fs_mutex, so the locking | 194 | * btrfs_init_acl is already generally called under fs_mutex, so the locking |
222 | * stuff has been fixed to work with that. If the locking stuff changes, we | 195 | * stuff has been fixed to work with that. If the locking stuff changes, we |
@@ -244,31 +217,20 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, | |||
244 | } | 217 | } |
245 | 218 | ||
246 | if (IS_POSIXACL(dir) && acl) { | 219 | if (IS_POSIXACL(dir) && acl) { |
247 | struct posix_acl *clone; | ||
248 | mode_t mode; | ||
249 | |||
250 | if (S_ISDIR(inode->i_mode)) { | 220 | if (S_ISDIR(inode->i_mode)) { |
251 | ret = btrfs_set_acl(trans, inode, acl, | 221 | ret = btrfs_set_acl(trans, inode, acl, |
252 | ACL_TYPE_DEFAULT); | 222 | ACL_TYPE_DEFAULT); |
253 | if (ret) | 223 | if (ret) |
254 | goto failed; | 224 | goto failed; |
255 | } | 225 | } |
256 | clone = posix_acl_clone(acl, GFP_NOFS); | 226 | ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); |
257 | ret = -ENOMEM; | 227 | if (ret < 0) |
258 | if (!clone) | 228 | return ret; |
259 | goto failed; | 229 | |
260 | 230 | if (ret > 0) { | |
261 | mode = inode->i_mode; | 231 | /* we need an acl */ |
262 | ret = posix_acl_create_masq(clone, &mode); | 232 | ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); |
263 | if (ret >= 0) { | ||
264 | inode->i_mode = mode; | ||
265 | if (ret > 0) { | ||
266 | /* we need an acl */ | ||
267 | ret = btrfs_set_acl(trans, inode, clone, | ||
268 | ACL_TYPE_ACCESS); | ||
269 | } | ||
270 | } | 233 | } |
271 | posix_acl_release(clone); | ||
272 | } | 234 | } |
273 | failed: | 235 | failed: |
274 | posix_acl_release(acl); | 236 | posix_acl_release(acl); |
@@ -278,7 +240,7 @@ failed: | |||
278 | 240 | ||
279 | int btrfs_acl_chmod(struct inode *inode) | 241 | int btrfs_acl_chmod(struct inode *inode) |
280 | { | 242 | { |
281 | struct posix_acl *acl, *clone; | 243 | struct posix_acl *acl; |
282 | int ret = 0; | 244 | int ret = 0; |
283 | 245 | ||
284 | if (S_ISLNK(inode->i_mode)) | 246 | if (S_ISLNK(inode->i_mode)) |
@@ -291,17 +253,11 @@ int btrfs_acl_chmod(struct inode *inode) | |||
291 | if (IS_ERR_OR_NULL(acl)) | 253 | if (IS_ERR_OR_NULL(acl)) |
292 | return PTR_ERR(acl); | 254 | return PTR_ERR(acl); |
293 | 255 | ||
294 | clone = posix_acl_clone(acl, GFP_KERNEL); | 256 | ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); |
257 | if (ret) | ||
258 | return ret; | ||
259 | ret = btrfs_set_acl(NULL, inode, acl, ACL_TYPE_ACCESS); | ||
295 | posix_acl_release(acl); | 260 | posix_acl_release(acl); |
296 | if (!clone) | ||
297 | return -ENOMEM; | ||
298 | |||
299 | ret = posix_acl_chmod_masq(clone, inode->i_mode); | ||
300 | if (!ret) | ||
301 | ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS); | ||
302 | |||
303 | posix_acl_release(clone); | ||
304 | |||
305 | return ret; | 261 | return ret; |
306 | } | 262 | } |
307 | 263 | ||
@@ -318,18 +274,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = { | |||
318 | .get = btrfs_xattr_acl_get, | 274 | .get = btrfs_xattr_acl_get, |
319 | .set = btrfs_xattr_acl_set, | 275 | .set = btrfs_xattr_acl_set, |
320 | }; | 276 | }; |
321 | |||
322 | #else /* CONFIG_BTRFS_FS_POSIX_ACL */ | ||
323 | |||
324 | int btrfs_acl_chmod(struct inode *inode) | ||
325 | { | ||
326 | return 0; | ||
327 | } | ||
328 | |||
329 | int btrfs_init_acl(struct btrfs_trans_handle *trans, | ||
330 | struct inode *inode, struct inode *dir) | ||
331 | { | ||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | #endif /* CONFIG_BTRFS_FS_POSIX_ACL */ | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 52d7eca8c7b..d9f99a16edd 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -34,6 +34,9 @@ struct btrfs_inode { | |||
34 | */ | 34 | */ |
35 | struct btrfs_key location; | 35 | struct btrfs_key location; |
36 | 36 | ||
37 | /* Lock for counters */ | ||
38 | spinlock_t lock; | ||
39 | |||
37 | /* the extent_tree has caches of all the extent mappings to disk */ | 40 | /* the extent_tree has caches of all the extent mappings to disk */ |
38 | struct extent_map_tree extent_tree; | 41 | struct extent_map_tree extent_tree; |
39 | 42 | ||
@@ -134,8 +137,8 @@ struct btrfs_inode { | |||
134 | * items we think we'll end up using, and reserved_extents is the number | 137 | * items we think we'll end up using, and reserved_extents is the number |
135 | * of extent items we've reserved metadata for. | 138 | * of extent items we've reserved metadata for. |
136 | */ | 139 | */ |
137 | atomic_t outstanding_extents; | 140 | unsigned outstanding_extents; |
138 | atomic_t reserved_extents; | 141 | unsigned reserved_extents; |
139 | 142 | ||
140 | /* | 143 | /* |
141 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
@@ -173,7 +176,11 @@ static inline u64 btrfs_ino(struct inode *inode) | |||
173 | { | 176 | { |
174 | u64 ino = BTRFS_I(inode)->location.objectid; | 177 | u64 ino = BTRFS_I(inode)->location.objectid; |
175 | 178 | ||
176 | if (ino <= BTRFS_FIRST_FREE_OBJECTID) | 179 | /* |
180 | * !ino: btree_inode | ||
181 | * type == BTRFS_ROOT_ITEM_KEY: subvol dir | ||
182 | */ | ||
183 | if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY) | ||
177 | ino = inode->i_ino; | 184 | ino = inode->i_ino; |
178 | return ino; | 185 | return ino; |
179 | } | 186 | } |
@@ -184,4 +191,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) | |||
184 | BTRFS_I(inode)->disk_i_size = size; | 191 | BTRFS_I(inode)->disk_i_size = size; |
185 | } | 192 | } |
186 | 193 | ||
194 | static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, | ||
195 | struct inode *inode) | ||
196 | { | ||
197 | if (root == root->fs_info->tree_root || | ||
198 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | ||
199 | return true; | ||
200 | return false; | ||
201 | } | ||
202 | |||
187 | #endif | 203 | #endif |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index bfe42b03eaf..8ec5d86f173 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -338,6 +338,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
338 | u64 first_byte = disk_start; | 338 | u64 first_byte = disk_start; |
339 | struct block_device *bdev; | 339 | struct block_device *bdev; |
340 | int ret; | 340 | int ret; |
341 | int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
341 | 342 | ||
342 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | 343 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); |
343 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 344 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
@@ -392,8 +393,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
392 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 393 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
393 | BUG_ON(ret); | 394 | BUG_ON(ret); |
394 | 395 | ||
395 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); | 396 | if (!skip_sum) { |
396 | BUG_ON(ret); | 397 | ret = btrfs_csum_one_bio(root, inode, bio, |
398 | start, 1); | ||
399 | BUG_ON(ret); | ||
400 | } | ||
397 | 401 | ||
398 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | 402 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); |
399 | BUG_ON(ret); | 403 | BUG_ON(ret); |
@@ -418,8 +422,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
418 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 422 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
419 | BUG_ON(ret); | 423 | BUG_ON(ret); |
420 | 424 | ||
421 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); | 425 | if (!skip_sum) { |
422 | BUG_ON(ret); | 426 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); |
427 | BUG_ON(ret); | ||
428 | } | ||
423 | 429 | ||
424 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | 430 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); |
425 | BUG_ON(ret); | 431 | BUG_ON(ret); |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2e667868e0d..011cab3aca8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) | |||
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | 56 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { |
57 | if (p->nodes[i] && p->locks[i]) | 57 | if (!p->nodes[i] || !p->locks[i]) |
58 | btrfs_set_lock_blocking(p->nodes[i]); | 58 | continue; |
59 | btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]); | ||
60 | if (p->locks[i] == BTRFS_READ_LOCK) | ||
61 | p->locks[i] = BTRFS_READ_LOCK_BLOCKING; | ||
62 | else if (p->locks[i] == BTRFS_WRITE_LOCK) | ||
63 | p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; | ||
59 | } | 64 | } |
60 | } | 65 | } |
61 | 66 | ||
@@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) | |||
68 | * for held | 73 | * for held |
69 | */ | 74 | */ |
70 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | 75 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p, |
71 | struct extent_buffer *held) | 76 | struct extent_buffer *held, int held_rw) |
72 | { | 77 | { |
73 | int i; | 78 | int i; |
74 | 79 | ||
@@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
79 | * really sure by forcing the path to blocking before we clear | 84 | * really sure by forcing the path to blocking before we clear |
80 | * the path blocking. | 85 | * the path blocking. |
81 | */ | 86 | */ |
82 | if (held) | 87 | if (held) { |
83 | btrfs_set_lock_blocking(held); | 88 | btrfs_set_lock_blocking_rw(held, held_rw); |
89 | if (held_rw == BTRFS_WRITE_LOCK) | ||
90 | held_rw = BTRFS_WRITE_LOCK_BLOCKING; | ||
91 | else if (held_rw == BTRFS_READ_LOCK) | ||
92 | held_rw = BTRFS_READ_LOCK_BLOCKING; | ||
93 | } | ||
84 | btrfs_set_path_blocking(p); | 94 | btrfs_set_path_blocking(p); |
85 | #endif | 95 | #endif |
86 | 96 | ||
87 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { | 97 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { |
88 | if (p->nodes[i] && p->locks[i]) | 98 | if (p->nodes[i] && p->locks[i]) { |
89 | btrfs_clear_lock_blocking(p->nodes[i]); | 99 | btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]); |
100 | if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING) | ||
101 | p->locks[i] = BTRFS_WRITE_LOCK; | ||
102 | else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING) | ||
103 | p->locks[i] = BTRFS_READ_LOCK; | ||
104 | } | ||
90 | } | 105 | } |
91 | 106 | ||
92 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 107 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
93 | if (held) | 108 | if (held) |
94 | btrfs_clear_lock_blocking(held); | 109 | btrfs_clear_lock_blocking_rw(held, held_rw); |
95 | #endif | 110 | #endif |
96 | } | 111 | } |
97 | 112 | ||
@@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p) | |||
119 | if (!p->nodes[i]) | 134 | if (!p->nodes[i]) |
120 | continue; | 135 | continue; |
121 | if (p->locks[i]) { | 136 | if (p->locks[i]) { |
122 | btrfs_tree_unlock(p->nodes[i]); | 137 | btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); |
123 | p->locks[i] = 0; | 138 | p->locks[i] = 0; |
124 | } | 139 | } |
125 | free_extent_buffer(p->nodes[i]); | 140 | free_extent_buffer(p->nodes[i]); |
@@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
167 | return eb; | 182 | return eb; |
168 | } | 183 | } |
169 | 184 | ||
185 | /* loop around taking references on and locking the root node of the | ||
186 | * tree until you end up with a lock on the root. A locked buffer | ||
187 | * is returned, with a reference held. | ||
188 | */ | ||
189 | struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) | ||
190 | { | ||
191 | struct extent_buffer *eb; | ||
192 | |||
193 | while (1) { | ||
194 | eb = btrfs_root_node(root); | ||
195 | btrfs_tree_read_lock(eb); | ||
196 | if (eb == root->node) | ||
197 | break; | ||
198 | btrfs_tree_read_unlock(eb); | ||
199 | free_extent_buffer(eb); | ||
200 | } | ||
201 | return eb; | ||
202 | } | ||
203 | |||
170 | /* cowonly root (everything not a reference counted cow subvolume), just get | 204 | /* cowonly root (everything not a reference counted cow subvolume), just get |
171 | * put onto a simple dirty list. transaction.c walks this to make sure they | 205 | * put onto a simple dirty list. transaction.c walks this to make sure they |
172 | * get properly updated on disk. | 206 | * get properly updated on disk. |
@@ -626,14 +660,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
626 | for (i = start_slot; i < end_slot; i++) { | 660 | for (i = start_slot; i < end_slot; i++) { |
627 | int close = 1; | 661 | int close = 1; |
628 | 662 | ||
629 | if (!parent->map_token) { | ||
630 | map_extent_buffer(parent, | ||
631 | btrfs_node_key_ptr_offset(i), | ||
632 | sizeof(struct btrfs_key_ptr), | ||
633 | &parent->map_token, &parent->kaddr, | ||
634 | &parent->map_start, &parent->map_len, | ||
635 | KM_USER1); | ||
636 | } | ||
637 | btrfs_node_key(parent, &disk_key, i); | 663 | btrfs_node_key(parent, &disk_key, i); |
638 | if (!progress_passed && comp_keys(&disk_key, progress) < 0) | 664 | if (!progress_passed && comp_keys(&disk_key, progress) < 0) |
639 | continue; | 665 | continue; |
@@ -656,11 +682,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
656 | last_block = blocknr; | 682 | last_block = blocknr; |
657 | continue; | 683 | continue; |
658 | } | 684 | } |
659 | if (parent->map_token) { | ||
660 | unmap_extent_buffer(parent, parent->map_token, | ||
661 | KM_USER1); | ||
662 | parent->map_token = NULL; | ||
663 | } | ||
664 | 685 | ||
665 | cur = btrfs_find_tree_block(root, blocknr, blocksize); | 686 | cur = btrfs_find_tree_block(root, blocknr, blocksize); |
666 | if (cur) | 687 | if (cur) |
@@ -701,11 +722,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
701 | btrfs_tree_unlock(cur); | 722 | btrfs_tree_unlock(cur); |
702 | free_extent_buffer(cur); | 723 | free_extent_buffer(cur); |
703 | } | 724 | } |
704 | if (parent->map_token) { | ||
705 | unmap_extent_buffer(parent, parent->map_token, | ||
706 | KM_USER1); | ||
707 | parent->map_token = NULL; | ||
708 | } | ||
709 | return err; | 725 | return err; |
710 | } | 726 | } |
711 | 727 | ||
@@ -746,7 +762,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
746 | struct btrfs_disk_key *tmp = NULL; | 762 | struct btrfs_disk_key *tmp = NULL; |
747 | struct btrfs_disk_key unaligned; | 763 | struct btrfs_disk_key unaligned; |
748 | unsigned long offset; | 764 | unsigned long offset; |
749 | char *map_token = NULL; | ||
750 | char *kaddr = NULL; | 765 | char *kaddr = NULL; |
751 | unsigned long map_start = 0; | 766 | unsigned long map_start = 0; |
752 | unsigned long map_len = 0; | 767 | unsigned long map_len = 0; |
@@ -756,18 +771,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
756 | mid = (low + high) / 2; | 771 | mid = (low + high) / 2; |
757 | offset = p + mid * item_size; | 772 | offset = p + mid * item_size; |
758 | 773 | ||
759 | if (!map_token || offset < map_start || | 774 | if (!kaddr || offset < map_start || |
760 | (offset + sizeof(struct btrfs_disk_key)) > | 775 | (offset + sizeof(struct btrfs_disk_key)) > |
761 | map_start + map_len) { | 776 | map_start + map_len) { |
762 | if (map_token) { | ||
763 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
764 | map_token = NULL; | ||
765 | } | ||
766 | 777 | ||
767 | err = map_private_extent_buffer(eb, offset, | 778 | err = map_private_extent_buffer(eb, offset, |
768 | sizeof(struct btrfs_disk_key), | 779 | sizeof(struct btrfs_disk_key), |
769 | &map_token, &kaddr, | 780 | &kaddr, &map_start, &map_len); |
770 | &map_start, &map_len, KM_USER0); | ||
771 | 781 | ||
772 | if (!err) { | 782 | if (!err) { |
773 | tmp = (struct btrfs_disk_key *)(kaddr + offset - | 783 | tmp = (struct btrfs_disk_key *)(kaddr + offset - |
@@ -790,14 +800,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
790 | high = mid; | 800 | high = mid; |
791 | else { | 801 | else { |
792 | *slot = mid; | 802 | *slot = mid; |
793 | if (map_token) | ||
794 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
795 | return 0; | 803 | return 0; |
796 | } | 804 | } |
797 | } | 805 | } |
798 | *slot = low; | 806 | *slot = low; |
799 | if (map_token) | ||
800 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
801 | return 1; | 807 | return 1; |
802 | } | 808 | } |
803 | 809 | ||
@@ -890,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
890 | 896 | ||
891 | mid = path->nodes[level]; | 897 | mid = path->nodes[level]; |
892 | 898 | ||
893 | WARN_ON(!path->locks[level]); | 899 | WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && |
900 | path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING); | ||
894 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | 901 | WARN_ON(btrfs_header_generation(mid) != trans->transid); |
895 | 902 | ||
896 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); | 903 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); |
@@ -1228,7 +1235,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1228 | u32 nr; | 1235 | u32 nr; |
1229 | u32 blocksize; | 1236 | u32 blocksize; |
1230 | u32 nscan = 0; | 1237 | u32 nscan = 0; |
1231 | bool map = true; | ||
1232 | 1238 | ||
1233 | if (level != 1) | 1239 | if (level != 1) |
1234 | return; | 1240 | return; |
@@ -1250,19 +1256,8 @@ static void reada_for_search(struct btrfs_root *root, | |||
1250 | 1256 | ||
1251 | nritems = btrfs_header_nritems(node); | 1257 | nritems = btrfs_header_nritems(node); |
1252 | nr = slot; | 1258 | nr = slot; |
1253 | if (node->map_token || path->skip_locking) | ||
1254 | map = false; | ||
1255 | 1259 | ||
1256 | while (1) { | 1260 | while (1) { |
1257 | if (map && !node->map_token) { | ||
1258 | unsigned long offset = btrfs_node_key_ptr_offset(nr); | ||
1259 | map_private_extent_buffer(node, offset, | ||
1260 | sizeof(struct btrfs_key_ptr), | ||
1261 | &node->map_token, | ||
1262 | &node->kaddr, | ||
1263 | &node->map_start, | ||
1264 | &node->map_len, KM_USER1); | ||
1265 | } | ||
1266 | if (direction < 0) { | 1261 | if (direction < 0) { |
1267 | if (nr == 0) | 1262 | if (nr == 0) |
1268 | break; | 1263 | break; |
@@ -1281,11 +1276,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1281 | if ((search <= target && target - search <= 65536) || | 1276 | if ((search <= target && target - search <= 65536) || |
1282 | (search > target && search - target <= 65536)) { | 1277 | (search > target && search - target <= 65536)) { |
1283 | gen = btrfs_node_ptr_generation(node, nr); | 1278 | gen = btrfs_node_ptr_generation(node, nr); |
1284 | if (map && node->map_token) { | ||
1285 | unmap_extent_buffer(node, node->map_token, | ||
1286 | KM_USER1); | ||
1287 | node->map_token = NULL; | ||
1288 | } | ||
1289 | readahead_tree_block(root, search, blocksize, gen); | 1279 | readahead_tree_block(root, search, blocksize, gen); |
1290 | nread += blocksize; | 1280 | nread += blocksize; |
1291 | } | 1281 | } |
@@ -1293,10 +1283,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1293 | if ((nread > 65536 || nscan > 32)) | 1283 | if ((nread > 65536 || nscan > 32)) |
1294 | break; | 1284 | break; |
1295 | } | 1285 | } |
1296 | if (map && node->map_token) { | ||
1297 | unmap_extent_buffer(node, node->map_token, KM_USER1); | ||
1298 | node->map_token = NULL; | ||
1299 | } | ||
1300 | } | 1286 | } |
1301 | 1287 | ||
1302 | /* | 1288 | /* |
@@ -1409,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level, | |||
1409 | 1395 | ||
1410 | t = path->nodes[i]; | 1396 | t = path->nodes[i]; |
1411 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { | 1397 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { |
1412 | btrfs_tree_unlock(t); | 1398 | btrfs_tree_unlock_rw(t, path->locks[i]); |
1413 | path->locks[i] = 0; | 1399 | path->locks[i] = 0; |
1414 | } | 1400 | } |
1415 | } | 1401 | } |
@@ -1436,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
1436 | continue; | 1422 | continue; |
1437 | if (!path->locks[i]) | 1423 | if (!path->locks[i]) |
1438 | continue; | 1424 | continue; |
1439 | btrfs_tree_unlock(path->nodes[i]); | 1425 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); |
1440 | path->locks[i] = 0; | 1426 | path->locks[i] = 0; |
1441 | } | 1427 | } |
1442 | } | 1428 | } |
@@ -1485,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1485 | * we can trust our generation number | 1471 | * we can trust our generation number |
1486 | */ | 1472 | */ |
1487 | free_extent_buffer(tmp); | 1473 | free_extent_buffer(tmp); |
1474 | btrfs_set_path_blocking(p); | ||
1475 | |||
1488 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1476 | tmp = read_tree_block(root, blocknr, blocksize, gen); |
1489 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | 1477 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { |
1490 | *eb_ret = tmp; | 1478 | *eb_ret = tmp; |
@@ -1540,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1540 | static int | 1528 | static int |
1541 | setup_nodes_for_search(struct btrfs_trans_handle *trans, | 1529 | setup_nodes_for_search(struct btrfs_trans_handle *trans, |
1542 | struct btrfs_root *root, struct btrfs_path *p, | 1530 | struct btrfs_root *root, struct btrfs_path *p, |
1543 | struct extent_buffer *b, int level, int ins_len) | 1531 | struct extent_buffer *b, int level, int ins_len, |
1532 | int *write_lock_level) | ||
1544 | { | 1533 | { |
1545 | int ret; | 1534 | int ret; |
1546 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= | 1535 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= |
1547 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1536 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { |
1548 | int sret; | 1537 | int sret; |
1549 | 1538 | ||
1539 | if (*write_lock_level < level + 1) { | ||
1540 | *write_lock_level = level + 1; | ||
1541 | btrfs_release_path(p); | ||
1542 | goto again; | ||
1543 | } | ||
1544 | |||
1550 | sret = reada_for_balance(root, p, level); | 1545 | sret = reada_for_balance(root, p, level); |
1551 | if (sret) | 1546 | if (sret) |
1552 | goto again; | 1547 | goto again; |
1553 | 1548 | ||
1554 | btrfs_set_path_blocking(p); | 1549 | btrfs_set_path_blocking(p); |
1555 | sret = split_node(trans, root, p, level); | 1550 | sret = split_node(trans, root, p, level); |
1556 | btrfs_clear_path_blocking(p, NULL); | 1551 | btrfs_clear_path_blocking(p, NULL, 0); |
1557 | 1552 | ||
1558 | BUG_ON(sret > 0); | 1553 | BUG_ON(sret > 0); |
1559 | if (sret) { | 1554 | if (sret) { |
@@ -1565,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, | |||
1565 | BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { | 1560 | BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { |
1566 | int sret; | 1561 | int sret; |
1567 | 1562 | ||
1563 | if (*write_lock_level < level + 1) { | ||
1564 | *write_lock_level = level + 1; | ||
1565 | btrfs_release_path(p); | ||
1566 | goto again; | ||
1567 | } | ||
1568 | |||
1568 | sret = reada_for_balance(root, p, level); | 1569 | sret = reada_for_balance(root, p, level); |
1569 | if (sret) | 1570 | if (sret) |
1570 | goto again; | 1571 | goto again; |
1571 | 1572 | ||
1572 | btrfs_set_path_blocking(p); | 1573 | btrfs_set_path_blocking(p); |
1573 | sret = balance_level(trans, root, p, level); | 1574 | sret = balance_level(trans, root, p, level); |
1574 | btrfs_clear_path_blocking(p, NULL); | 1575 | btrfs_clear_path_blocking(p, NULL, 0); |
1575 | 1576 | ||
1576 | if (sret) { | 1577 | if (sret) { |
1577 | ret = sret; | 1578 | ret = sret; |
@@ -1615,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1615 | int err; | 1616 | int err; |
1616 | int level; | 1617 | int level; |
1617 | int lowest_unlock = 1; | 1618 | int lowest_unlock = 1; |
1619 | int root_lock; | ||
1620 | /* everything at write_lock_level or lower must be write locked */ | ||
1621 | int write_lock_level = 0; | ||
1618 | u8 lowest_level = 0; | 1622 | u8 lowest_level = 0; |
1619 | 1623 | ||
1620 | lowest_level = p->lowest_level; | 1624 | lowest_level = p->lowest_level; |
1621 | WARN_ON(lowest_level && ins_len > 0); | 1625 | WARN_ON(lowest_level && ins_len > 0); |
1622 | WARN_ON(p->nodes[0] != NULL); | 1626 | WARN_ON(p->nodes[0] != NULL); |
1623 | 1627 | ||
1624 | if (ins_len < 0) | 1628 | if (ins_len < 0) { |
1625 | lowest_unlock = 2; | 1629 | lowest_unlock = 2; |
1626 | 1630 | ||
1631 | /* when we are removing items, we might have to go up to level | ||
1632 | * two as we update tree pointers Make sure we keep write | ||
1633 | * for those levels as well | ||
1634 | */ | ||
1635 | write_lock_level = 2; | ||
1636 | } else if (ins_len > 0) { | ||
1637 | /* | ||
1638 | * for inserting items, make sure we have a write lock on | ||
1639 | * level 1 so we can update keys | ||
1640 | */ | ||
1641 | write_lock_level = 1; | ||
1642 | } | ||
1643 | |||
1644 | if (!cow) | ||
1645 | write_lock_level = -1; | ||
1646 | |||
1647 | if (cow && (p->keep_locks || p->lowest_level)) | ||
1648 | write_lock_level = BTRFS_MAX_LEVEL; | ||
1649 | |||
1627 | again: | 1650 | again: |
1651 | /* | ||
1652 | * we try very hard to do read locks on the root | ||
1653 | */ | ||
1654 | root_lock = BTRFS_READ_LOCK; | ||
1655 | level = 0; | ||
1628 | if (p->search_commit_root) { | 1656 | if (p->search_commit_root) { |
1657 | /* | ||
1658 | * the commit roots are read only | ||
1659 | * so we always do read locks | ||
1660 | */ | ||
1629 | b = root->commit_root; | 1661 | b = root->commit_root; |
1630 | extent_buffer_get(b); | 1662 | extent_buffer_get(b); |
1663 | level = btrfs_header_level(b); | ||
1631 | if (!p->skip_locking) | 1664 | if (!p->skip_locking) |
1632 | btrfs_tree_lock(b); | 1665 | btrfs_tree_read_lock(b); |
1633 | } else { | 1666 | } else { |
1634 | if (p->skip_locking) | 1667 | if (p->skip_locking) { |
1635 | b = btrfs_root_node(root); | 1668 | b = btrfs_root_node(root); |
1636 | else | 1669 | level = btrfs_header_level(b); |
1637 | b = btrfs_lock_root_node(root); | 1670 | } else { |
1671 | /* we don't know the level of the root node | ||
1672 | * until we actually have it read locked | ||
1673 | */ | ||
1674 | b = btrfs_read_lock_root_node(root); | ||
1675 | level = btrfs_header_level(b); | ||
1676 | if (level <= write_lock_level) { | ||
1677 | /* whoops, must trade for write lock */ | ||
1678 | btrfs_tree_read_unlock(b); | ||
1679 | free_extent_buffer(b); | ||
1680 | b = btrfs_lock_root_node(root); | ||
1681 | root_lock = BTRFS_WRITE_LOCK; | ||
1682 | |||
1683 | /* the level might have changed, check again */ | ||
1684 | level = btrfs_header_level(b); | ||
1685 | } | ||
1686 | } | ||
1638 | } | 1687 | } |
1688 | p->nodes[level] = b; | ||
1689 | if (!p->skip_locking) | ||
1690 | p->locks[level] = root_lock; | ||
1639 | 1691 | ||
1640 | while (b) { | 1692 | while (b) { |
1641 | level = btrfs_header_level(b); | 1693 | level = btrfs_header_level(b); |
@@ -1644,10 +1696,6 @@ again: | |||
1644 | * setup the path here so we can release it under lock | 1696 | * setup the path here so we can release it under lock |
1645 | * contention with the cow code | 1697 | * contention with the cow code |
1646 | */ | 1698 | */ |
1647 | p->nodes[level] = b; | ||
1648 | if (!p->skip_locking) | ||
1649 | p->locks[level] = 1; | ||
1650 | |||
1651 | if (cow) { | 1699 | if (cow) { |
1652 | /* | 1700 | /* |
1653 | * if we don't really need to cow this block | 1701 | * if we don't really need to cow this block |
@@ -1659,6 +1707,16 @@ again: | |||
1659 | 1707 | ||
1660 | btrfs_set_path_blocking(p); | 1708 | btrfs_set_path_blocking(p); |
1661 | 1709 | ||
1710 | /* | ||
1711 | * must have write locks on this node and the | ||
1712 | * parent | ||
1713 | */ | ||
1714 | if (level + 1 > write_lock_level) { | ||
1715 | write_lock_level = level + 1; | ||
1716 | btrfs_release_path(p); | ||
1717 | goto again; | ||
1718 | } | ||
1719 | |||
1662 | err = btrfs_cow_block(trans, root, b, | 1720 | err = btrfs_cow_block(trans, root, b, |
1663 | p->nodes[level + 1], | 1721 | p->nodes[level + 1], |
1664 | p->slots[level + 1], &b); | 1722 | p->slots[level + 1], &b); |
@@ -1671,10 +1729,7 @@ cow_done: | |||
1671 | BUG_ON(!cow && ins_len); | 1729 | BUG_ON(!cow && ins_len); |
1672 | 1730 | ||
1673 | p->nodes[level] = b; | 1731 | p->nodes[level] = b; |
1674 | if (!p->skip_locking) | 1732 | btrfs_clear_path_blocking(p, NULL, 0); |
1675 | p->locks[level] = 1; | ||
1676 | |||
1677 | btrfs_clear_path_blocking(p, NULL); | ||
1678 | 1733 | ||
1679 | /* | 1734 | /* |
1680 | * we have a lock on b and as long as we aren't changing | 1735 | * we have a lock on b and as long as we aren't changing |
@@ -1700,7 +1755,7 @@ cow_done: | |||
1700 | } | 1755 | } |
1701 | p->slots[level] = slot; | 1756 | p->slots[level] = slot; |
1702 | err = setup_nodes_for_search(trans, root, p, b, level, | 1757 | err = setup_nodes_for_search(trans, root, p, b, level, |
1703 | ins_len); | 1758 | ins_len, &write_lock_level); |
1704 | if (err == -EAGAIN) | 1759 | if (err == -EAGAIN) |
1705 | goto again; | 1760 | goto again; |
1706 | if (err) { | 1761 | if (err) { |
@@ -1710,6 +1765,19 @@ cow_done: | |||
1710 | b = p->nodes[level]; | 1765 | b = p->nodes[level]; |
1711 | slot = p->slots[level]; | 1766 | slot = p->slots[level]; |
1712 | 1767 | ||
1768 | /* | ||
1769 | * slot 0 is special, if we change the key | ||
1770 | * we have to update the parent pointer | ||
1771 | * which means we must have a write lock | ||
1772 | * on the parent | ||
1773 | */ | ||
1774 | if (slot == 0 && cow && | ||
1775 | write_lock_level < level + 1) { | ||
1776 | write_lock_level = level + 1; | ||
1777 | btrfs_release_path(p); | ||
1778 | goto again; | ||
1779 | } | ||
1780 | |||
1713 | unlock_up(p, level, lowest_unlock); | 1781 | unlock_up(p, level, lowest_unlock); |
1714 | 1782 | ||
1715 | if (level == lowest_level) { | 1783 | if (level == lowest_level) { |
@@ -1728,23 +1796,42 @@ cow_done: | |||
1728 | } | 1796 | } |
1729 | 1797 | ||
1730 | if (!p->skip_locking) { | 1798 | if (!p->skip_locking) { |
1731 | btrfs_clear_path_blocking(p, NULL); | 1799 | level = btrfs_header_level(b); |
1732 | err = btrfs_try_spin_lock(b); | 1800 | if (level <= write_lock_level) { |
1733 | 1801 | err = btrfs_try_tree_write_lock(b); | |
1734 | if (!err) { | 1802 | if (!err) { |
1735 | btrfs_set_path_blocking(p); | 1803 | btrfs_set_path_blocking(p); |
1736 | btrfs_tree_lock(b); | 1804 | btrfs_tree_lock(b); |
1737 | btrfs_clear_path_blocking(p, b); | 1805 | btrfs_clear_path_blocking(p, b, |
1806 | BTRFS_WRITE_LOCK); | ||
1807 | } | ||
1808 | p->locks[level] = BTRFS_WRITE_LOCK; | ||
1809 | } else { | ||
1810 | err = btrfs_try_tree_read_lock(b); | ||
1811 | if (!err) { | ||
1812 | btrfs_set_path_blocking(p); | ||
1813 | btrfs_tree_read_lock(b); | ||
1814 | btrfs_clear_path_blocking(p, b, | ||
1815 | BTRFS_READ_LOCK); | ||
1816 | } | ||
1817 | p->locks[level] = BTRFS_READ_LOCK; | ||
1738 | } | 1818 | } |
1819 | p->nodes[level] = b; | ||
1739 | } | 1820 | } |
1740 | } else { | 1821 | } else { |
1741 | p->slots[level] = slot; | 1822 | p->slots[level] = slot; |
1742 | if (ins_len > 0 && | 1823 | if (ins_len > 0 && |
1743 | btrfs_leaf_free_space(root, b) < ins_len) { | 1824 | btrfs_leaf_free_space(root, b) < ins_len) { |
1825 | if (write_lock_level < 1) { | ||
1826 | write_lock_level = 1; | ||
1827 | btrfs_release_path(p); | ||
1828 | goto again; | ||
1829 | } | ||
1830 | |||
1744 | btrfs_set_path_blocking(p); | 1831 | btrfs_set_path_blocking(p); |
1745 | err = split_leaf(trans, root, key, | 1832 | err = split_leaf(trans, root, key, |
1746 | p, ins_len, ret == 0); | 1833 | p, ins_len, ret == 0); |
1747 | btrfs_clear_path_blocking(p, NULL); | 1834 | btrfs_clear_path_blocking(p, NULL, 0); |
1748 | 1835 | ||
1749 | BUG_ON(err > 0); | 1836 | BUG_ON(err > 0); |
1750 | if (err) { | 1837 | if (err) { |
@@ -2025,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2025 | add_root_to_dirty_list(root); | 2112 | add_root_to_dirty_list(root); |
2026 | extent_buffer_get(c); | 2113 | extent_buffer_get(c); |
2027 | path->nodes[level] = c; | 2114 | path->nodes[level] = c; |
2028 | path->locks[level] = 1; | 2115 | path->locks[level] = BTRFS_WRITE_LOCK; |
2029 | path->slots[level] = 0; | 2116 | path->slots[level] = 0; |
2030 | return 0; | 2117 | return 0; |
2031 | } | 2118 | } |
@@ -2253,14 +2340,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2253 | if (path->slots[0] == i) | 2340 | if (path->slots[0] == i) |
2254 | push_space += data_size; | 2341 | push_space += data_size; |
2255 | 2342 | ||
2256 | if (!left->map_token) { | ||
2257 | map_extent_buffer(left, (unsigned long)item, | ||
2258 | sizeof(struct btrfs_item), | ||
2259 | &left->map_token, &left->kaddr, | ||
2260 | &left->map_start, &left->map_len, | ||
2261 | KM_USER1); | ||
2262 | } | ||
2263 | |||
2264 | this_item_size = btrfs_item_size(left, item); | 2343 | this_item_size = btrfs_item_size(left, item); |
2265 | if (this_item_size + sizeof(*item) + push_space > free_space) | 2344 | if (this_item_size + sizeof(*item) + push_space > free_space) |
2266 | break; | 2345 | break; |
@@ -2271,10 +2350,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2271 | break; | 2350 | break; |
2272 | i--; | 2351 | i--; |
2273 | } | 2352 | } |
2274 | if (left->map_token) { | ||
2275 | unmap_extent_buffer(left, left->map_token, KM_USER1); | ||
2276 | left->map_token = NULL; | ||
2277 | } | ||
2278 | 2353 | ||
2279 | if (push_items == 0) | 2354 | if (push_items == 0) |
2280 | goto out_unlock; | 2355 | goto out_unlock; |
@@ -2316,21 +2391,10 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2316 | push_space = BTRFS_LEAF_DATA_SIZE(root); | 2391 | push_space = BTRFS_LEAF_DATA_SIZE(root); |
2317 | for (i = 0; i < right_nritems; i++) { | 2392 | for (i = 0; i < right_nritems; i++) { |
2318 | item = btrfs_item_nr(right, i); | 2393 | item = btrfs_item_nr(right, i); |
2319 | if (!right->map_token) { | ||
2320 | map_extent_buffer(right, (unsigned long)item, | ||
2321 | sizeof(struct btrfs_item), | ||
2322 | &right->map_token, &right->kaddr, | ||
2323 | &right->map_start, &right->map_len, | ||
2324 | KM_USER1); | ||
2325 | } | ||
2326 | push_space -= btrfs_item_size(right, item); | 2394 | push_space -= btrfs_item_size(right, item); |
2327 | btrfs_set_item_offset(right, item, push_space); | 2395 | btrfs_set_item_offset(right, item, push_space); |
2328 | } | 2396 | } |
2329 | 2397 | ||
2330 | if (right->map_token) { | ||
2331 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2332 | right->map_token = NULL; | ||
2333 | } | ||
2334 | left_nritems -= push_items; | 2398 | left_nritems -= push_items; |
2335 | btrfs_set_header_nritems(left, left_nritems); | 2399 | btrfs_set_header_nritems(left, left_nritems); |
2336 | 2400 | ||
@@ -2467,13 +2531,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2467 | 2531 | ||
2468 | for (i = 0; i < nr; i++) { | 2532 | for (i = 0; i < nr; i++) { |
2469 | item = btrfs_item_nr(right, i); | 2533 | item = btrfs_item_nr(right, i); |
2470 | if (!right->map_token) { | ||
2471 | map_extent_buffer(right, (unsigned long)item, | ||
2472 | sizeof(struct btrfs_item), | ||
2473 | &right->map_token, &right->kaddr, | ||
2474 | &right->map_start, &right->map_len, | ||
2475 | KM_USER1); | ||
2476 | } | ||
2477 | 2534 | ||
2478 | if (!empty && push_items > 0) { | 2535 | if (!empty && push_items > 0) { |
2479 | if (path->slots[0] < i) | 2536 | if (path->slots[0] < i) |
@@ -2496,11 +2553,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2496 | push_space += this_item_size + sizeof(*item); | 2553 | push_space += this_item_size + sizeof(*item); |
2497 | } | 2554 | } |
2498 | 2555 | ||
2499 | if (right->map_token) { | ||
2500 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2501 | right->map_token = NULL; | ||
2502 | } | ||
2503 | |||
2504 | if (push_items == 0) { | 2556 | if (push_items == 0) { |
2505 | ret = 1; | 2557 | ret = 1; |
2506 | goto out; | 2558 | goto out; |
@@ -2530,23 +2582,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2530 | u32 ioff; | 2582 | u32 ioff; |
2531 | 2583 | ||
2532 | item = btrfs_item_nr(left, i); | 2584 | item = btrfs_item_nr(left, i); |
2533 | if (!left->map_token) { | ||
2534 | map_extent_buffer(left, (unsigned long)item, | ||
2535 | sizeof(struct btrfs_item), | ||
2536 | &left->map_token, &left->kaddr, | ||
2537 | &left->map_start, &left->map_len, | ||
2538 | KM_USER1); | ||
2539 | } | ||
2540 | 2585 | ||
2541 | ioff = btrfs_item_offset(left, item); | 2586 | ioff = btrfs_item_offset(left, item); |
2542 | btrfs_set_item_offset(left, item, | 2587 | btrfs_set_item_offset(left, item, |
2543 | ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); | 2588 | ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); |
2544 | } | 2589 | } |
2545 | btrfs_set_header_nritems(left, old_left_nritems + push_items); | 2590 | btrfs_set_header_nritems(left, old_left_nritems + push_items); |
2546 | if (left->map_token) { | ||
2547 | unmap_extent_buffer(left, left->map_token, KM_USER1); | ||
2548 | left->map_token = NULL; | ||
2549 | } | ||
2550 | 2591 | ||
2551 | /* fixup right node */ | 2592 | /* fixup right node */ |
2552 | if (push_items > right_nritems) { | 2593 | if (push_items > right_nritems) { |
@@ -2574,21 +2615,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2574 | for (i = 0; i < right_nritems; i++) { | 2615 | for (i = 0; i < right_nritems; i++) { |
2575 | item = btrfs_item_nr(right, i); | 2616 | item = btrfs_item_nr(right, i); |
2576 | 2617 | ||
2577 | if (!right->map_token) { | ||
2578 | map_extent_buffer(right, (unsigned long)item, | ||
2579 | sizeof(struct btrfs_item), | ||
2580 | &right->map_token, &right->kaddr, | ||
2581 | &right->map_start, &right->map_len, | ||
2582 | KM_USER1); | ||
2583 | } | ||
2584 | |||
2585 | push_space = push_space - btrfs_item_size(right, item); | 2618 | push_space = push_space - btrfs_item_size(right, item); |
2586 | btrfs_set_item_offset(right, item, push_space); | 2619 | btrfs_set_item_offset(right, item, push_space); |
2587 | } | 2620 | } |
2588 | if (right->map_token) { | ||
2589 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2590 | right->map_token = NULL; | ||
2591 | } | ||
2592 | 2621 | ||
2593 | btrfs_mark_buffer_dirty(left); | 2622 | btrfs_mark_buffer_dirty(left); |
2594 | if (right_nritems) | 2623 | if (right_nritems) |
@@ -2729,23 +2758,10 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, | |||
2729 | struct btrfs_item *item = btrfs_item_nr(right, i); | 2758 | struct btrfs_item *item = btrfs_item_nr(right, i); |
2730 | u32 ioff; | 2759 | u32 ioff; |
2731 | 2760 | ||
2732 | if (!right->map_token) { | ||
2733 | map_extent_buffer(right, (unsigned long)item, | ||
2734 | sizeof(struct btrfs_item), | ||
2735 | &right->map_token, &right->kaddr, | ||
2736 | &right->map_start, &right->map_len, | ||
2737 | KM_USER1); | ||
2738 | } | ||
2739 | |||
2740 | ioff = btrfs_item_offset(right, item); | 2761 | ioff = btrfs_item_offset(right, item); |
2741 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | 2762 | btrfs_set_item_offset(right, item, ioff + rt_data_off); |
2742 | } | 2763 | } |
2743 | 2764 | ||
2744 | if (right->map_token) { | ||
2745 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2746 | right->map_token = NULL; | ||
2747 | } | ||
2748 | |||
2749 | btrfs_set_header_nritems(l, mid); | 2765 | btrfs_set_header_nritems(l, mid); |
2750 | ret = 0; | 2766 | ret = 0; |
2751 | btrfs_item_key(right, &disk_key, 0); | 2767 | btrfs_item_key(right, &disk_key, 0); |
@@ -3264,23 +3280,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
3264 | u32 ioff; | 3280 | u32 ioff; |
3265 | item = btrfs_item_nr(leaf, i); | 3281 | item = btrfs_item_nr(leaf, i); |
3266 | 3282 | ||
3267 | if (!leaf->map_token) { | ||
3268 | map_extent_buffer(leaf, (unsigned long)item, | ||
3269 | sizeof(struct btrfs_item), | ||
3270 | &leaf->map_token, &leaf->kaddr, | ||
3271 | &leaf->map_start, &leaf->map_len, | ||
3272 | KM_USER1); | ||
3273 | } | ||
3274 | |||
3275 | ioff = btrfs_item_offset(leaf, item); | 3283 | ioff = btrfs_item_offset(leaf, item); |
3276 | btrfs_set_item_offset(leaf, item, ioff + size_diff); | 3284 | btrfs_set_item_offset(leaf, item, ioff + size_diff); |
3277 | } | 3285 | } |
3278 | 3286 | ||
3279 | if (leaf->map_token) { | ||
3280 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3281 | leaf->map_token = NULL; | ||
3282 | } | ||
3283 | |||
3284 | /* shift the data */ | 3287 | /* shift the data */ |
3285 | if (from_end) { | 3288 | if (from_end) { |
3286 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 3289 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + |
@@ -3377,22 +3380,10 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
3377 | u32 ioff; | 3380 | u32 ioff; |
3378 | item = btrfs_item_nr(leaf, i); | 3381 | item = btrfs_item_nr(leaf, i); |
3379 | 3382 | ||
3380 | if (!leaf->map_token) { | ||
3381 | map_extent_buffer(leaf, (unsigned long)item, | ||
3382 | sizeof(struct btrfs_item), | ||
3383 | &leaf->map_token, &leaf->kaddr, | ||
3384 | &leaf->map_start, &leaf->map_len, | ||
3385 | KM_USER1); | ||
3386 | } | ||
3387 | ioff = btrfs_item_offset(leaf, item); | 3383 | ioff = btrfs_item_offset(leaf, item); |
3388 | btrfs_set_item_offset(leaf, item, ioff - data_size); | 3384 | btrfs_set_item_offset(leaf, item, ioff - data_size); |
3389 | } | 3385 | } |
3390 | 3386 | ||
3391 | if (leaf->map_token) { | ||
3392 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3393 | leaf->map_token = NULL; | ||
3394 | } | ||
3395 | |||
3396 | /* shift the data */ | 3387 | /* shift the data */ |
3397 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 3388 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + |
3398 | data_end - data_size, btrfs_leaf_data(leaf) + | 3389 | data_end - data_size, btrfs_leaf_data(leaf) + |
@@ -3494,27 +3485,13 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, | |||
3494 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | 3485 | * item0..itemN ... dataN.offset..dataN.size .. data0.size |
3495 | */ | 3486 | */ |
3496 | /* first correct the data pointers */ | 3487 | /* first correct the data pointers */ |
3497 | WARN_ON(leaf->map_token); | ||
3498 | for (i = slot; i < nritems; i++) { | 3488 | for (i = slot; i < nritems; i++) { |
3499 | u32 ioff; | 3489 | u32 ioff; |
3500 | 3490 | ||
3501 | item = btrfs_item_nr(leaf, i); | 3491 | item = btrfs_item_nr(leaf, i); |
3502 | if (!leaf->map_token) { | ||
3503 | map_extent_buffer(leaf, (unsigned long)item, | ||
3504 | sizeof(struct btrfs_item), | ||
3505 | &leaf->map_token, &leaf->kaddr, | ||
3506 | &leaf->map_start, &leaf->map_len, | ||
3507 | KM_USER1); | ||
3508 | } | ||
3509 | |||
3510 | ioff = btrfs_item_offset(leaf, item); | 3492 | ioff = btrfs_item_offset(leaf, item); |
3511 | btrfs_set_item_offset(leaf, item, ioff - total_data); | 3493 | btrfs_set_item_offset(leaf, item, ioff - total_data); |
3512 | } | 3494 | } |
3513 | if (leaf->map_token) { | ||
3514 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3515 | leaf->map_token = NULL; | ||
3516 | } | ||
3517 | |||
3518 | /* shift the items */ | 3495 | /* shift the items */ |
3519 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | 3496 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), |
3520 | btrfs_item_nr_offset(slot), | 3497 | btrfs_item_nr_offset(slot), |
@@ -3608,27 +3585,13 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans, | |||
3608 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | 3585 | * item0..itemN ... dataN.offset..dataN.size .. data0.size |
3609 | */ | 3586 | */ |
3610 | /* first correct the data pointers */ | 3587 | /* first correct the data pointers */ |
3611 | WARN_ON(leaf->map_token); | ||
3612 | for (i = slot; i < nritems; i++) { | 3588 | for (i = slot; i < nritems; i++) { |
3613 | u32 ioff; | 3589 | u32 ioff; |
3614 | 3590 | ||
3615 | item = btrfs_item_nr(leaf, i); | 3591 | item = btrfs_item_nr(leaf, i); |
3616 | if (!leaf->map_token) { | ||
3617 | map_extent_buffer(leaf, (unsigned long)item, | ||
3618 | sizeof(struct btrfs_item), | ||
3619 | &leaf->map_token, &leaf->kaddr, | ||
3620 | &leaf->map_start, &leaf->map_len, | ||
3621 | KM_USER1); | ||
3622 | } | ||
3623 | |||
3624 | ioff = btrfs_item_offset(leaf, item); | 3592 | ioff = btrfs_item_offset(leaf, item); |
3625 | btrfs_set_item_offset(leaf, item, ioff - total_data); | 3593 | btrfs_set_item_offset(leaf, item, ioff - total_data); |
3626 | } | 3594 | } |
3627 | if (leaf->map_token) { | ||
3628 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3629 | leaf->map_token = NULL; | ||
3630 | } | ||
3631 | |||
3632 | /* shift the items */ | 3595 | /* shift the items */ |
3633 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | 3596 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), |
3634 | btrfs_item_nr_offset(slot), | 3597 | btrfs_item_nr_offset(slot), |
@@ -3840,22 +3803,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3840 | u32 ioff; | 3803 | u32 ioff; |
3841 | 3804 | ||
3842 | item = btrfs_item_nr(leaf, i); | 3805 | item = btrfs_item_nr(leaf, i); |
3843 | if (!leaf->map_token) { | ||
3844 | map_extent_buffer(leaf, (unsigned long)item, | ||
3845 | sizeof(struct btrfs_item), | ||
3846 | &leaf->map_token, &leaf->kaddr, | ||
3847 | &leaf->map_start, &leaf->map_len, | ||
3848 | KM_USER1); | ||
3849 | } | ||
3850 | ioff = btrfs_item_offset(leaf, item); | 3806 | ioff = btrfs_item_offset(leaf, item); |
3851 | btrfs_set_item_offset(leaf, item, ioff + dsize); | 3807 | btrfs_set_item_offset(leaf, item, ioff + dsize); |
3852 | } | 3808 | } |
3853 | 3809 | ||
3854 | if (leaf->map_token) { | ||
3855 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3856 | leaf->map_token = NULL; | ||
3857 | } | ||
3858 | |||
3859 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), | 3810 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), |
3860 | btrfs_item_nr_offset(slot + nr), | 3811 | btrfs_item_nr_offset(slot + nr), |
3861 | sizeof(struct btrfs_item) * | 3812 | sizeof(struct btrfs_item) * |
@@ -4004,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
4004 | 3955 | ||
4005 | WARN_ON(!path->keep_locks); | 3956 | WARN_ON(!path->keep_locks); |
4006 | again: | 3957 | again: |
4007 | cur = btrfs_lock_root_node(root); | 3958 | cur = btrfs_read_lock_root_node(root); |
4008 | level = btrfs_header_level(cur); | 3959 | level = btrfs_header_level(cur); |
4009 | WARN_ON(path->nodes[level]); | 3960 | WARN_ON(path->nodes[level]); |
4010 | path->nodes[level] = cur; | 3961 | path->nodes[level] = cur; |
4011 | path->locks[level] = 1; | 3962 | path->locks[level] = BTRFS_READ_LOCK; |
4012 | 3963 | ||
4013 | if (btrfs_header_generation(cur) < min_trans) { | 3964 | if (btrfs_header_generation(cur) < min_trans) { |
4014 | ret = 1; | 3965 | ret = 1; |
@@ -4098,12 +4049,12 @@ find_next_key: | |||
4098 | cur = read_node_slot(root, cur, slot); | 4049 | cur = read_node_slot(root, cur, slot); |
4099 | BUG_ON(!cur); | 4050 | BUG_ON(!cur); |
4100 | 4051 | ||
4101 | btrfs_tree_lock(cur); | 4052 | btrfs_tree_read_lock(cur); |
4102 | 4053 | ||
4103 | path->locks[level - 1] = 1; | 4054 | path->locks[level - 1] = BTRFS_READ_LOCK; |
4104 | path->nodes[level - 1] = cur; | 4055 | path->nodes[level - 1] = cur; |
4105 | unlock_up(path, level, 1); | 4056 | unlock_up(path, level, 1); |
4106 | btrfs_clear_path_blocking(path, NULL); | 4057 | btrfs_clear_path_blocking(path, NULL, 0); |
4107 | } | 4058 | } |
4108 | out: | 4059 | out: |
4109 | if (ret == 0) | 4060 | if (ret == 0) |
@@ -4218,30 +4169,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4218 | u32 nritems; | 4169 | u32 nritems; |
4219 | int ret; | 4170 | int ret; |
4220 | int old_spinning = path->leave_spinning; | 4171 | int old_spinning = path->leave_spinning; |
4221 | int force_blocking = 0; | 4172 | int next_rw_lock = 0; |
4222 | 4173 | ||
4223 | nritems = btrfs_header_nritems(path->nodes[0]); | 4174 | nritems = btrfs_header_nritems(path->nodes[0]); |
4224 | if (nritems == 0) | 4175 | if (nritems == 0) |
4225 | return 1; | 4176 | return 1; |
4226 | 4177 | ||
4227 | /* | ||
4228 | * we take the blocks in an order that upsets lockdep. Using | ||
4229 | * blocking mode is the only way around it. | ||
4230 | */ | ||
4231 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
4232 | force_blocking = 1; | ||
4233 | #endif | ||
4234 | |||
4235 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | 4178 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); |
4236 | again: | 4179 | again: |
4237 | level = 1; | 4180 | level = 1; |
4238 | next = NULL; | 4181 | next = NULL; |
4182 | next_rw_lock = 0; | ||
4239 | btrfs_release_path(path); | 4183 | btrfs_release_path(path); |
4240 | 4184 | ||
4241 | path->keep_locks = 1; | 4185 | path->keep_locks = 1; |
4242 | 4186 | path->leave_spinning = 1; | |
4243 | if (!force_blocking) | ||
4244 | path->leave_spinning = 1; | ||
4245 | 4187 | ||
4246 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4188 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4247 | path->keep_locks = 0; | 4189 | path->keep_locks = 0; |
@@ -4281,11 +4223,12 @@ again: | |||
4281 | } | 4223 | } |
4282 | 4224 | ||
4283 | if (next) { | 4225 | if (next) { |
4284 | btrfs_tree_unlock(next); | 4226 | btrfs_tree_unlock_rw(next, next_rw_lock); |
4285 | free_extent_buffer(next); | 4227 | free_extent_buffer(next); |
4286 | } | 4228 | } |
4287 | 4229 | ||
4288 | next = c; | 4230 | next = c; |
4231 | next_rw_lock = path->locks[level]; | ||
4289 | ret = read_block_for_search(NULL, root, path, &next, level, | 4232 | ret = read_block_for_search(NULL, root, path, &next, level, |
4290 | slot, &key); | 4233 | slot, &key); |
4291 | if (ret == -EAGAIN) | 4234 | if (ret == -EAGAIN) |
@@ -4297,15 +4240,14 @@ again: | |||
4297 | } | 4240 | } |
4298 | 4241 | ||
4299 | if (!path->skip_locking) { | 4242 | if (!path->skip_locking) { |
4300 | ret = btrfs_try_spin_lock(next); | 4243 | ret = btrfs_try_tree_read_lock(next); |
4301 | if (!ret) { | 4244 | if (!ret) { |
4302 | btrfs_set_path_blocking(path); | 4245 | btrfs_set_path_blocking(path); |
4303 | btrfs_tree_lock(next); | 4246 | btrfs_tree_read_lock(next); |
4304 | if (!force_blocking) | 4247 | btrfs_clear_path_blocking(path, next, |
4305 | btrfs_clear_path_blocking(path, next); | 4248 | BTRFS_READ_LOCK); |
4306 | } | 4249 | } |
4307 | if (force_blocking) | 4250 | next_rw_lock = BTRFS_READ_LOCK; |
4308 | btrfs_set_lock_blocking(next); | ||
4309 | } | 4251 | } |
4310 | break; | 4252 | break; |
4311 | } | 4253 | } |
@@ -4314,14 +4256,13 @@ again: | |||
4314 | level--; | 4256 | level--; |
4315 | c = path->nodes[level]; | 4257 | c = path->nodes[level]; |
4316 | if (path->locks[level]) | 4258 | if (path->locks[level]) |
4317 | btrfs_tree_unlock(c); | 4259 | btrfs_tree_unlock_rw(c, path->locks[level]); |
4318 | 4260 | ||
4319 | free_extent_buffer(c); | 4261 | free_extent_buffer(c); |
4320 | path->nodes[level] = next; | 4262 | path->nodes[level] = next; |
4321 | path->slots[level] = 0; | 4263 | path->slots[level] = 0; |
4322 | if (!path->skip_locking) | 4264 | if (!path->skip_locking) |
4323 | path->locks[level] = 1; | 4265 | path->locks[level] = next_rw_lock; |
4324 | |||
4325 | if (!level) | 4266 | if (!level) |
4326 | break; | 4267 | break; |
4327 | 4268 | ||
@@ -4336,16 +4277,14 @@ again: | |||
4336 | } | 4277 | } |
4337 | 4278 | ||
4338 | if (!path->skip_locking) { | 4279 | if (!path->skip_locking) { |
4339 | btrfs_assert_tree_locked(path->nodes[level]); | 4280 | ret = btrfs_try_tree_read_lock(next); |
4340 | ret = btrfs_try_spin_lock(next); | ||
4341 | if (!ret) { | 4281 | if (!ret) { |
4342 | btrfs_set_path_blocking(path); | 4282 | btrfs_set_path_blocking(path); |
4343 | btrfs_tree_lock(next); | 4283 | btrfs_tree_read_lock(next); |
4344 | if (!force_blocking) | 4284 | btrfs_clear_path_blocking(path, next, |
4345 | btrfs_clear_path_blocking(path, next); | 4285 | BTRFS_READ_LOCK); |
4346 | } | 4286 | } |
4347 | if (force_blocking) | 4287 | next_rw_lock = BTRFS_READ_LOCK; |
4348 | btrfs_set_lock_blocking(next); | ||
4349 | } | 4288 | } |
4350 | } | 4289 | } |
4351 | ret = 0; | 4290 | ret = 0; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3b859a3e6a0..03912c5c6f4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -755,6 +755,8 @@ struct btrfs_space_info { | |||
755 | chunks for this space */ | 755 | chunks for this space */ |
756 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ | 756 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ |
757 | 757 | ||
758 | unsigned int flush:1; /* set if we are trying to make space */ | ||
759 | |||
758 | unsigned int force_alloc; /* set if we need to force a chunk | 760 | unsigned int force_alloc; /* set if we need to force a chunk |
759 | alloc for this space */ | 761 | alloc for this space */ |
760 | 762 | ||
@@ -764,7 +766,7 @@ struct btrfs_space_info { | |||
764 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; | 766 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
765 | spinlock_t lock; | 767 | spinlock_t lock; |
766 | struct rw_semaphore groups_sem; | 768 | struct rw_semaphore groups_sem; |
767 | atomic_t caching_threads; | 769 | wait_queue_head_t wait; |
768 | }; | 770 | }; |
769 | 771 | ||
770 | struct btrfs_block_rsv { | 772 | struct btrfs_block_rsv { |
@@ -824,6 +826,7 @@ struct btrfs_caching_control { | |||
824 | struct list_head list; | 826 | struct list_head list; |
825 | struct mutex mutex; | 827 | struct mutex mutex; |
826 | wait_queue_head_t wait; | 828 | wait_queue_head_t wait; |
829 | struct btrfs_work work; | ||
827 | struct btrfs_block_group_cache *block_group; | 830 | struct btrfs_block_group_cache *block_group; |
828 | u64 progress; | 831 | u64 progress; |
829 | atomic_t count; | 832 | atomic_t count; |
@@ -1032,6 +1035,8 @@ struct btrfs_fs_info { | |||
1032 | struct btrfs_workers endio_write_workers; | 1035 | struct btrfs_workers endio_write_workers; |
1033 | struct btrfs_workers endio_freespace_worker; | 1036 | struct btrfs_workers endio_freespace_worker; |
1034 | struct btrfs_workers submit_workers; | 1037 | struct btrfs_workers submit_workers; |
1038 | struct btrfs_workers caching_workers; | ||
1039 | |||
1035 | /* | 1040 | /* |
1036 | * fixup workers take dirty pages that didn't properly go through | 1041 | * fixup workers take dirty pages that didn't properly go through |
1037 | * the cow mechanism and make them safe to write. It happens | 1042 | * the cow mechanism and make them safe to write. It happens |
@@ -1219,7 +1224,7 @@ struct btrfs_root { | |||
1219 | * right now this just gets used so that a root has its own devid | 1224 | * right now this just gets used so that a root has its own devid |
1220 | * for stat. It may be used for more later | 1225 | * for stat. It may be used for more later |
1221 | */ | 1226 | */ |
1222 | struct super_block anon_super; | 1227 | dev_t anon_dev; |
1223 | }; | 1228 | }; |
1224 | 1229 | ||
1225 | struct btrfs_ioctl_defrag_range_args { | 1230 | struct btrfs_ioctl_defrag_range_args { |
@@ -1410,17 +1415,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); | |||
1410 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ | 1415 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ |
1411 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ | 1416 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ |
1412 | { \ | 1417 | { \ |
1413 | type *p = kmap_atomic(eb->first_page, KM_USER0); \ | 1418 | type *p = page_address(eb->first_page); \ |
1414 | u##bits res = le##bits##_to_cpu(p->member); \ | 1419 | u##bits res = le##bits##_to_cpu(p->member); \ |
1415 | kunmap_atomic(p, KM_USER0); \ | ||
1416 | return res; \ | 1420 | return res; \ |
1417 | } \ | 1421 | } \ |
1418 | static inline void btrfs_set_##name(struct extent_buffer *eb, \ | 1422 | static inline void btrfs_set_##name(struct extent_buffer *eb, \ |
1419 | u##bits val) \ | 1423 | u##bits val) \ |
1420 | { \ | 1424 | { \ |
1421 | type *p = kmap_atomic(eb->first_page, KM_USER0); \ | 1425 | type *p = page_address(eb->first_page); \ |
1422 | p->member = cpu_to_le##bits(val); \ | 1426 | p->member = cpu_to_le##bits(val); \ |
1423 | kunmap_atomic(p, KM_USER0); \ | ||
1424 | } | 1427 | } |
1425 | 1428 | ||
1426 | #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ | 1429 | #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ |
@@ -2128,7 +2131,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) | |||
2128 | 2131 | ||
2129 | /* extent-tree.c */ | 2132 | /* extent-tree.c */ |
2130 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 2133 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
2131 | int num_items) | 2134 | unsigned num_items) |
2132 | { | 2135 | { |
2133 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 2136 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * |
2134 | 3 * num_items; | 2137 | 3 * num_items; |
@@ -2222,9 +2225,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | |||
2222 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2225 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2223 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2226 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
2224 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); | 2227 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
2225 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2226 | struct btrfs_root *root, | ||
2227 | int num_items); | ||
2228 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 2228 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
2229 | struct btrfs_root *root); | 2229 | struct btrfs_root *root); |
2230 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | 2230 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
@@ -2330,7 +2330,7 @@ struct btrfs_path *btrfs_alloc_path(void); | |||
2330 | void btrfs_free_path(struct btrfs_path *p); | 2330 | void btrfs_free_path(struct btrfs_path *p); |
2331 | void btrfs_set_path_blocking(struct btrfs_path *p); | 2331 | void btrfs_set_path_blocking(struct btrfs_path *p); |
2332 | void btrfs_clear_path_blocking(struct btrfs_path *p, | 2332 | void btrfs_clear_path_blocking(struct btrfs_path *p, |
2333 | struct extent_buffer *held); | 2333 | struct extent_buffer *held, int held_rw); |
2334 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); | 2334 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); |
2335 | 2335 | ||
2336 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2336 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
@@ -2365,8 +2365,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
2365 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2365 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2366 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2366 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2367 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2367 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
2368 | int btrfs_drop_snapshot(struct btrfs_root *root, | 2368 | void btrfs_drop_snapshot(struct btrfs_root *root, |
2369 | struct btrfs_block_rsv *block_rsv, int update_ref); | 2369 | struct btrfs_block_rsv *block_rsv, int update_ref); |
2370 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2370 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
2371 | struct btrfs_root *root, | 2371 | struct btrfs_root *root, |
2372 | struct extent_buffer *node, | 2372 | struct extent_buffer *node, |
@@ -2404,8 +2404,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | |||
2404 | btrfs_root_item *item, struct btrfs_key *key); | 2404 | btrfs_root_item *item, struct btrfs_key *key); |
2405 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 2405 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); |
2406 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | 2406 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); |
2407 | int btrfs_set_root_node(struct btrfs_root_item *item, | 2407 | void btrfs_set_root_node(struct btrfs_root_item *item, |
2408 | struct extent_buffer *node); | 2408 | struct extent_buffer *node); |
2409 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); | 2409 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); |
2410 | 2410 | ||
2411 | /* dir-item.c */ | 2411 | /* dir-item.c */ |
@@ -2510,6 +2510,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, | |||
2510 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 2510 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
2511 | struct list_head *list, int search_commit); | 2511 | struct list_head *list, int search_commit); |
2512 | /* inode.c */ | 2512 | /* inode.c */ |
2513 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, | ||
2514 | size_t pg_offset, u64 start, u64 len, | ||
2515 | int create); | ||
2513 | 2516 | ||
2514 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ | 2517 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ |
2515 | #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) | 2518 | #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) |
@@ -2518,6 +2521,14 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
2518 | #define PageChecked PageFsMisc | 2521 | #define PageChecked PageFsMisc |
2519 | #endif | 2522 | #endif |
2520 | 2523 | ||
2524 | /* This forces readahead on a given range of bytes in an inode */ | ||
2525 | static inline void btrfs_force_ra(struct address_space *mapping, | ||
2526 | struct file_ra_state *ra, struct file *file, | ||
2527 | pgoff_t offset, unsigned long req_size) | ||
2528 | { | ||
2529 | page_cache_sync_readahead(mapping, ra, file, offset, req_size); | ||
2530 | } | ||
2531 | |||
2521 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); | 2532 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); |
2522 | int btrfs_set_inode_index(struct inode *dir, u64 *index); | 2533 | int btrfs_set_inode_index(struct inode *dir, u64 *index); |
2523 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | 2534 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, |
@@ -2546,9 +2557,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
2546 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 2557 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
2547 | size_t size, struct bio *bio, unsigned long bio_flags); | 2558 | size_t size, struct bio *bio, unsigned long bio_flags); |
2548 | 2559 | ||
2549 | unsigned long btrfs_force_ra(struct address_space *mapping, | ||
2550 | struct file_ra_state *ra, struct file *file, | ||
2551 | pgoff_t offset, pgoff_t last_index); | ||
2552 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2560 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2553 | int btrfs_readpage(struct file *file, struct page *page); | 2561 | int btrfs_readpage(struct file *file, struct page *page); |
2554 | void btrfs_evict_inode(struct inode *inode); | 2562 | void btrfs_evict_inode(struct inode *inode); |
@@ -2602,7 +2610,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
2602 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | 2610 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, |
2603 | struct inode *inode); | 2611 | struct inode *inode); |
2604 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); | 2612 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); |
2605 | int btrfs_sync_file(struct file *file, int datasync); | 2613 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); |
2606 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2614 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
2607 | int skip_pinned); | 2615 | int skip_pinned); |
2608 | extern const struct file_operations btrfs_file_operations; | 2616 | extern const struct file_operations btrfs_file_operations; |
@@ -2642,13 +2650,22 @@ do { \ | |||
2642 | 2650 | ||
2643 | /* acl.c */ | 2651 | /* acl.c */ |
2644 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 2652 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
2645 | int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); | 2653 | struct posix_acl *btrfs_get_acl(struct inode *inode, int type); |
2646 | #else | ||
2647 | #define btrfs_check_acl NULL | ||
2648 | #endif | ||
2649 | int btrfs_init_acl(struct btrfs_trans_handle *trans, | 2654 | int btrfs_init_acl(struct btrfs_trans_handle *trans, |
2650 | struct inode *inode, struct inode *dir); | 2655 | struct inode *inode, struct inode *dir); |
2651 | int btrfs_acl_chmod(struct inode *inode); | 2656 | int btrfs_acl_chmod(struct inode *inode); |
2657 | #else | ||
2658 | #define btrfs_get_acl NULL | ||
2659 | static inline int btrfs_init_acl(struct btrfs_trans_handle *trans, | ||
2660 | struct inode *inode, struct inode *dir) | ||
2661 | { | ||
2662 | return 0; | ||
2663 | } | ||
2664 | static inline int btrfs_acl_chmod(struct inode *inode) | ||
2665 | { | ||
2666 | return 0; | ||
2667 | } | ||
2668 | #endif | ||
2652 | 2669 | ||
2653 | /* relocation.c */ | 2670 | /* relocation.c */ |
2654 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); | 2671 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 98c68e658a9..b52c672f4c1 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, | |||
735 | } | 735 | } |
736 | 736 | ||
737 | /* reset all the locked nodes in the patch to spinning locks. */ | 737 | /* reset all the locked nodes in the patch to spinning locks. */ |
738 | btrfs_clear_path_blocking(path, NULL); | 738 | btrfs_clear_path_blocking(path, NULL, 0); |
739 | 739 | ||
740 | /* insert the keys of the items */ | 740 | /* insert the keys of the items */ |
741 | ret = setup_items_for_insert(trans, root, path, keys, data_size, | 741 | ret = setup_items_for_insert(trans, root, path, keys, data_size, |
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 8d27af4bd8b..7083d08b2a2 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
26 | #include <linux/list.h> | 26 | #include <linux/list.h> |
27 | #include <linux/wait.h> | 27 | #include <linux/wait.h> |
28 | #include <asm/atomic.h> | 28 | #include <linux/atomic.h> |
29 | 29 | ||
30 | #include "ctree.h" | 30 | #include "ctree.h" |
31 | 31 | ||
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 685f2593c4f..31d84e78129 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | |||
89 | data_size = sizeof(*dir_item) + name_len + data_len; | 89 | data_size = sizeof(*dir_item) + name_len + data_len; |
90 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | 90 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, |
91 | name, name_len); | 91 | name, name_len); |
92 | /* | 92 | if (IS_ERR(dir_item)) |
93 | * FIXME: at some point we should handle xattr's that are larger than | 93 | return PTR_ERR(dir_item); |
94 | * what we can fit in our leaf. We set location to NULL b/c we arent | ||
95 | * pointing at anything else, that will change if we store the xattr | ||
96 | * data in a separate inode. | ||
97 | */ | ||
98 | BUG_ON(IS_ERR(dir_item)); | ||
99 | memset(&location, 0, sizeof(location)); | 94 | memset(&location, 0, sizeof(location)); |
100 | 95 | ||
101 | leaf = path->nodes[0]; | 96 | leaf = path->nodes[0]; |
@@ -203,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
203 | struct btrfs_key key; | 198 | struct btrfs_key key; |
204 | int ins_len = mod < 0 ? -1 : 0; | 199 | int ins_len = mod < 0 ? -1 : 0; |
205 | int cow = mod != 0; | 200 | int cow = mod != 0; |
206 | struct btrfs_key found_key; | ||
207 | struct extent_buffer *leaf; | ||
208 | 201 | ||
209 | key.objectid = dir; | 202 | key.objectid = dir; |
210 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 203 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); |
@@ -214,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
214 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | 207 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); |
215 | if (ret < 0) | 208 | if (ret < 0) |
216 | return ERR_PTR(ret); | 209 | return ERR_PTR(ret); |
217 | if (ret > 0) { | 210 | if (ret > 0) |
218 | if (path->slots[0] == 0) | ||
219 | return NULL; | ||
220 | path->slots[0]--; | ||
221 | } | ||
222 | |||
223 | leaf = path->nodes[0]; | ||
224 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
225 | |||
226 | if (found_key.objectid != dir || | ||
227 | btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || | ||
228 | found_key.offset != key.offset) | ||
229 | return NULL; | 211 | return NULL; |
230 | 212 | ||
231 | return btrfs_match_dir_item_name(root, path, name, name_len); | 213 | return btrfs_match_dir_item_name(root, path, name, name_len); |
@@ -320,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
320 | struct btrfs_key key; | 302 | struct btrfs_key key; |
321 | int ins_len = mod < 0 ? -1 : 0; | 303 | int ins_len = mod < 0 ? -1 : 0; |
322 | int cow = mod != 0; | 304 | int cow = mod != 0; |
323 | struct btrfs_key found_key; | ||
324 | struct extent_buffer *leaf; | ||
325 | 305 | ||
326 | key.objectid = dir; | 306 | key.objectid = dir; |
327 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | 307 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); |
@@ -329,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
329 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | 309 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); |
330 | if (ret < 0) | 310 | if (ret < 0) |
331 | return ERR_PTR(ret); | 311 | return ERR_PTR(ret); |
332 | if (ret > 0) { | 312 | if (ret > 0) |
333 | if (path->slots[0] == 0) | ||
334 | return NULL; | ||
335 | path->slots[0]--; | ||
336 | } | ||
337 | |||
338 | leaf = path->nodes[0]; | ||
339 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
340 | |||
341 | if (found_key.objectid != dir || | ||
342 | btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY || | ||
343 | found_key.offset != key.offset) | ||
344 | return NULL; | 313 | return NULL; |
345 | 314 | ||
346 | return btrfs_match_dir_item_name(root, path, name, name_len); | 315 | return btrfs_match_dir_item_name(root, path, name, name_len); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1ac8db5dc0a..07b3ac662e1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -100,38 +100,83 @@ struct async_submit_bio { | |||
100 | struct btrfs_work work; | 100 | struct btrfs_work work; |
101 | }; | 101 | }; |
102 | 102 | ||
103 | /* These are used to set the lockdep class on the extent buffer locks. | 103 | /* |
104 | * The class is set by the readpage_end_io_hook after the buffer has | 104 | * Lockdep class keys for extent_buffer->lock's in this root. For a given |
105 | * passed csum validation but before the pages are unlocked. | 105 | * eb, the lockdep key is determined by the btrfs_root it belongs to and |
106 | * the level the eb occupies in the tree. | ||
107 | * | ||
108 | * Different roots are used for different purposes and may nest inside each | ||
109 | * other and they require separate keysets. As lockdep keys should be | ||
110 | * static, assign keysets according to the purpose of the root as indicated | ||
111 | * by btrfs_root->objectid. This ensures that all special purpose roots | ||
112 | * have separate keysets. | ||
106 | * | 113 | * |
107 | * The lockdep class is also set by btrfs_init_new_buffer on freshly | 114 | * Lock-nesting across peer nodes is always done with the immediate parent |
108 | * allocated blocks. | 115 | * node locked thus preventing deadlock. As lockdep doesn't know this, use |
116 | * subclass to avoid triggering lockdep warning in such cases. | ||
109 | * | 117 | * |
110 | * The class is based on the level in the tree block, which allows lockdep | 118 | * The key is set by the readpage_end_io_hook after the buffer has passed |
111 | * to know that lower nodes nest inside the locks of higher nodes. | 119 | * csum validation but before the pages are unlocked. It is also set by |
120 | * btrfs_init_new_buffer on freshly allocated blocks. | ||
112 | * | 121 | * |
113 | * We also add a check to make sure the highest level of the tree is | 122 | * We also add a check to make sure the highest level of the tree is the |
114 | * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this | 123 | * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code |
115 | * code needs update as well. | 124 | * needs update as well. |
116 | */ | 125 | */ |
117 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 126 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
118 | # if BTRFS_MAX_LEVEL != 8 | 127 | # if BTRFS_MAX_LEVEL != 8 |
119 | # error | 128 | # error |
120 | # endif | 129 | # endif |
121 | static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; | 130 | |
122 | static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { | 131 | static struct btrfs_lockdep_keyset { |
123 | /* leaf */ | 132 | u64 id; /* root objectid */ |
124 | "btrfs-extent-00", | 133 | const char *name_stem; /* lock name stem */ |
125 | "btrfs-extent-01", | 134 | char names[BTRFS_MAX_LEVEL + 1][20]; |
126 | "btrfs-extent-02", | 135 | struct lock_class_key keys[BTRFS_MAX_LEVEL + 1]; |
127 | "btrfs-extent-03", | 136 | } btrfs_lockdep_keysets[] = { |
128 | "btrfs-extent-04", | 137 | { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" }, |
129 | "btrfs-extent-05", | 138 | { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" }, |
130 | "btrfs-extent-06", | 139 | { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" }, |
131 | "btrfs-extent-07", | 140 | { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" }, |
132 | /* highest possible level */ | 141 | { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" }, |
133 | "btrfs-extent-08", | 142 | { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" }, |
143 | { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" }, | ||
144 | { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, | ||
145 | { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, | ||
146 | { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, | ||
147 | { .id = 0, .name_stem = "tree" }, | ||
134 | }; | 148 | }; |
149 | |||
150 | void __init btrfs_init_lockdep(void) | ||
151 | { | ||
152 | int i, j; | ||
153 | |||
154 | /* initialize lockdep class names */ | ||
155 | for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) { | ||
156 | struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i]; | ||
157 | |||
158 | for (j = 0; j < ARRAY_SIZE(ks->names); j++) | ||
159 | snprintf(ks->names[j], sizeof(ks->names[j]), | ||
160 | "btrfs-%s-%02d", ks->name_stem, j); | ||
161 | } | ||
162 | } | ||
163 | |||
164 | void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, | ||
165 | int level) | ||
166 | { | ||
167 | struct btrfs_lockdep_keyset *ks; | ||
168 | |||
169 | BUG_ON(level >= ARRAY_SIZE(ks->keys)); | ||
170 | |||
171 | /* find the matching keyset, id 0 is the default entry */ | ||
172 | for (ks = btrfs_lockdep_keysets; ks->id; ks++) | ||
173 | if (ks->id == objectid) | ||
174 | break; | ||
175 | |||
176 | lockdep_set_class_and_name(&eb->lock, | ||
177 | &ks->keys[level], ks->names[level]); | ||
178 | } | ||
179 | |||
135 | #endif | 180 | #endif |
136 | 181 | ||
137 | /* | 182 | /* |
@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
217 | unsigned long len; | 262 | unsigned long len; |
218 | unsigned long cur_len; | 263 | unsigned long cur_len; |
219 | unsigned long offset = BTRFS_CSUM_SIZE; | 264 | unsigned long offset = BTRFS_CSUM_SIZE; |
220 | char *map_token = NULL; | ||
221 | char *kaddr; | 265 | char *kaddr; |
222 | unsigned long map_start; | 266 | unsigned long map_start; |
223 | unsigned long map_len; | 267 | unsigned long map_len; |
@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
228 | len = buf->len - offset; | 272 | len = buf->len - offset; |
229 | while (len > 0) { | 273 | while (len > 0) { |
230 | err = map_private_extent_buffer(buf, offset, 32, | 274 | err = map_private_extent_buffer(buf, offset, 32, |
231 | &map_token, &kaddr, | 275 | &kaddr, &map_start, &map_len); |
232 | &map_start, &map_len, KM_USER0); | ||
233 | if (err) | 276 | if (err) |
234 | return 1; | 277 | return 1; |
235 | cur_len = min(len, map_len - (offset - map_start)); | 278 | cur_len = min(len, map_len - (offset - map_start)); |
@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
237 | crc, cur_len); | 280 | crc, cur_len); |
238 | len -= cur_len; | 281 | len -= cur_len; |
239 | offset += cur_len; | 282 | offset += cur_len; |
240 | unmap_extent_buffer(buf, map_token, KM_USER0); | ||
241 | } | 283 | } |
242 | if (csum_size > sizeof(inline_result)) { | 284 | if (csum_size > sizeof(inline_result)) { |
243 | result = kzalloc(csum_size * sizeof(char), GFP_NOFS); | 285 | result = kzalloc(csum_size * sizeof(char), GFP_NOFS); |
@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root, | |||
494 | return 0; | 536 | return 0; |
495 | } | 537 | } |
496 | 538 | ||
497 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
498 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | ||
499 | { | ||
500 | lockdep_set_class_and_name(&eb->lock, | ||
501 | &btrfs_eb_class[level], | ||
502 | btrfs_eb_name[level]); | ||
503 | } | ||
504 | #endif | ||
505 | |||
506 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 539 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
507 | struct extent_state *state) | 540 | struct extent_state *state) |
508 | { | 541 | { |
@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
553 | } | 586 | } |
554 | found_level = btrfs_header_level(eb); | 587 | found_level = btrfs_header_level(eb); |
555 | 588 | ||
556 | btrfs_set_buffer_lockdep_class(eb, found_level); | 589 | btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), |
590 | eb, found_level); | ||
557 | 591 | ||
558 | ret = csum_tree_block(root, eb, 1); | 592 | ret = csum_tree_block(root, eb, 1); |
559 | if (ret) { | 593 | if (ret) { |
@@ -1077,12 +1111,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1077 | init_completion(&root->kobj_unregister); | 1111 | init_completion(&root->kobj_unregister); |
1078 | root->defrag_running = 0; | 1112 | root->defrag_running = 0; |
1079 | root->root_key.objectid = objectid; | 1113 | root->root_key.objectid = objectid; |
1080 | root->anon_super.s_root = NULL; | 1114 | root->anon_dev = 0; |
1081 | root->anon_super.s_dev = 0; | ||
1082 | INIT_LIST_HEAD(&root->anon_super.s_list); | ||
1083 | INIT_LIST_HEAD(&root->anon_super.s_instances); | ||
1084 | init_rwsem(&root->anon_super.s_umount); | ||
1085 | |||
1086 | return 0; | 1115 | return 0; |
1087 | } | 1116 | } |
1088 | 1117 | ||
@@ -1311,7 +1340,7 @@ again: | |||
1311 | spin_lock_init(&root->cache_lock); | 1340 | spin_lock_init(&root->cache_lock); |
1312 | init_waitqueue_head(&root->cache_wait); | 1341 | init_waitqueue_head(&root->cache_wait); |
1313 | 1342 | ||
1314 | ret = set_anon_super(&root->anon_super, NULL); | 1343 | ret = get_anon_bdev(&root->anon_dev); |
1315 | if (ret) | 1344 | if (ret) |
1316 | goto fail; | 1345 | goto fail; |
1317 | 1346 | ||
@@ -1603,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1603 | goto fail_bdi; | 1632 | goto fail_bdi; |
1604 | } | 1633 | } |
1605 | 1634 | ||
1606 | fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; | 1635 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
1607 | 1636 | ||
1608 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1637 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1609 | INIT_LIST_HEAD(&fs_info->trans_list); | 1638 | INIT_LIST_HEAD(&fs_info->trans_list); |
@@ -1807,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1807 | fs_info->thread_pool_size), | 1836 | fs_info->thread_pool_size), |
1808 | &fs_info->generic_worker); | 1837 | &fs_info->generic_worker); |
1809 | 1838 | ||
1839 | btrfs_init_workers(&fs_info->caching_workers, "cache", | ||
1840 | 2, &fs_info->generic_worker); | ||
1841 | |||
1810 | /* a higher idle thresh on the submit workers makes it much more | 1842 | /* a higher idle thresh on the submit workers makes it much more |
1811 | * likely that bios will be send down in a sane order to the | 1843 | * likely that bios will be send down in a sane order to the |
1812 | * devices | 1844 | * devices |
@@ -1860,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1860 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1892 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1861 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); | 1893 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); |
1862 | btrfs_start_workers(&fs_info->delayed_workers, 1); | 1894 | btrfs_start_workers(&fs_info->delayed_workers, 1); |
1895 | btrfs_start_workers(&fs_info->caching_workers, 1); | ||
1863 | 1896 | ||
1864 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1897 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1865 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1898 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -2117,6 +2150,7 @@ fail_sb_buffer: | |||
2117 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2150 | btrfs_stop_workers(&fs_info->endio_freespace_worker); |
2118 | btrfs_stop_workers(&fs_info->submit_workers); | 2151 | btrfs_stop_workers(&fs_info->submit_workers); |
2119 | btrfs_stop_workers(&fs_info->delayed_workers); | 2152 | btrfs_stop_workers(&fs_info->delayed_workers); |
2153 | btrfs_stop_workers(&fs_info->caching_workers); | ||
2120 | fail_alloc: | 2154 | fail_alloc: |
2121 | kfree(fs_info->delayed_root); | 2155 | kfree(fs_info->delayed_root); |
2122 | fail_iput: | 2156 | fail_iput: |
@@ -2393,10 +2427,8 @@ static void free_fs_root(struct btrfs_root *root) | |||
2393 | { | 2427 | { |
2394 | iput(root->cache_inode); | 2428 | iput(root->cache_inode); |
2395 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | 2429 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); |
2396 | if (root->anon_super.s_dev) { | 2430 | if (root->anon_dev) |
2397 | down_write(&root->anon_super.s_umount); | 2431 | free_anon_bdev(root->anon_dev); |
2398 | kill_anon_super(&root->anon_super); | ||
2399 | } | ||
2400 | free_extent_buffer(root->node); | 2432 | free_extent_buffer(root->node); |
2401 | free_extent_buffer(root->commit_root); | 2433 | free_extent_buffer(root->commit_root); |
2402 | kfree(root->free_ino_ctl); | 2434 | kfree(root->free_ino_ctl); |
@@ -2584,6 +2616,7 @@ int close_ctree(struct btrfs_root *root) | |||
2584 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2616 | btrfs_stop_workers(&fs_info->endio_freespace_worker); |
2585 | btrfs_stop_workers(&fs_info->submit_workers); | 2617 | btrfs_stop_workers(&fs_info->submit_workers); |
2586 | btrfs_stop_workers(&fs_info->delayed_workers); | 2618 | btrfs_stop_workers(&fs_info->delayed_workers); |
2619 | btrfs_stop_workers(&fs_info->caching_workers); | ||
2587 | 2620 | ||
2588 | btrfs_close_devices(fs_info->fs_devices); | 2621 | btrfs_close_devices(fs_info->fs_devices); |
2589 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2622 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a0b610a67aa..bec3ea4bd67 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page); | |||
87 | 87 | ||
88 | 88 | ||
89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
90 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); | 90 | void btrfs_init_lockdep(void); |
91 | void btrfs_set_buffer_lockdep_class(u64 objectid, | ||
92 | struct extent_buffer *eb, int level); | ||
91 | #else | 93 | #else |
92 | static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, | 94 | static inline void btrfs_init_lockdep(void) |
93 | int level) | 95 | { } |
96 | static inline void btrfs_set_buffer_lockdep_class(u64 objectid, | ||
97 | struct extent_buffer *eb, int level) | ||
94 | { | 98 | { |
95 | } | 99 | } |
96 | #endif | 100 | #endif |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71cd456fdb6..f5be06a2462 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
320 | return total_added; | 320 | return total_added; |
321 | } | 321 | } |
322 | 322 | ||
323 | static int caching_kthread(void *data) | 323 | static noinline void caching_thread(struct btrfs_work *work) |
324 | { | 324 | { |
325 | struct btrfs_block_group_cache *block_group = data; | 325 | struct btrfs_block_group_cache *block_group; |
326 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 326 | struct btrfs_fs_info *fs_info; |
327 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; | 327 | struct btrfs_caching_control *caching_ctl; |
328 | struct btrfs_root *extent_root = fs_info->extent_root; | 328 | struct btrfs_root *extent_root; |
329 | struct btrfs_path *path; | 329 | struct btrfs_path *path; |
330 | struct extent_buffer *leaf; | 330 | struct extent_buffer *leaf; |
331 | struct btrfs_key key; | 331 | struct btrfs_key key; |
@@ -334,9 +334,14 @@ static int caching_kthread(void *data) | |||
334 | u32 nritems; | 334 | u32 nritems; |
335 | int ret = 0; | 335 | int ret = 0; |
336 | 336 | ||
337 | caching_ctl = container_of(work, struct btrfs_caching_control, work); | ||
338 | block_group = caching_ctl->block_group; | ||
339 | fs_info = block_group->fs_info; | ||
340 | extent_root = fs_info->extent_root; | ||
341 | |||
337 | path = btrfs_alloc_path(); | 342 | path = btrfs_alloc_path(); |
338 | if (!path) | 343 | if (!path) |
339 | return -ENOMEM; | 344 | goto out; |
340 | 345 | ||
341 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 346 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
342 | 347 | ||
@@ -433,13 +438,11 @@ err: | |||
433 | free_excluded_extents(extent_root, block_group); | 438 | free_excluded_extents(extent_root, block_group); |
434 | 439 | ||
435 | mutex_unlock(&caching_ctl->mutex); | 440 | mutex_unlock(&caching_ctl->mutex); |
441 | out: | ||
436 | wake_up(&caching_ctl->wait); | 442 | wake_up(&caching_ctl->wait); |
437 | 443 | ||
438 | put_caching_control(caching_ctl); | 444 | put_caching_control(caching_ctl); |
439 | atomic_dec(&block_group->space_info->caching_threads); | ||
440 | btrfs_put_block_group(block_group); | 445 | btrfs_put_block_group(block_group); |
441 | |||
442 | return 0; | ||
443 | } | 446 | } |
444 | 447 | ||
445 | static int cache_block_group(struct btrfs_block_group_cache *cache, | 448 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
449 | { | 452 | { |
450 | struct btrfs_fs_info *fs_info = cache->fs_info; | 453 | struct btrfs_fs_info *fs_info = cache->fs_info; |
451 | struct btrfs_caching_control *caching_ctl; | 454 | struct btrfs_caching_control *caching_ctl; |
452 | struct task_struct *tsk; | ||
453 | int ret = 0; | 455 | int ret = 0; |
454 | 456 | ||
455 | smp_mb(); | 457 | smp_mb(); |
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
501 | caching_ctl->progress = cache->key.objectid; | 503 | caching_ctl->progress = cache->key.objectid; |
502 | /* one for caching kthread, one for caching block group list */ | 504 | /* one for caching kthread, one for caching block group list */ |
503 | atomic_set(&caching_ctl->count, 2); | 505 | atomic_set(&caching_ctl->count, 2); |
506 | caching_ctl->work.func = caching_thread; | ||
504 | 507 | ||
505 | spin_lock(&cache->lock); | 508 | spin_lock(&cache->lock); |
506 | if (cache->cached != BTRFS_CACHE_NO) { | 509 | if (cache->cached != BTRFS_CACHE_NO) { |
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
516 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | 519 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); |
517 | up_write(&fs_info->extent_commit_sem); | 520 | up_write(&fs_info->extent_commit_sem); |
518 | 521 | ||
519 | atomic_inc(&cache->space_info->caching_threads); | ||
520 | btrfs_get_block_group(cache); | 522 | btrfs_get_block_group(cache); |
521 | 523 | ||
522 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 524 | btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); |
523 | cache->key.objectid); | ||
524 | if (IS_ERR(tsk)) { | ||
525 | ret = PTR_ERR(tsk); | ||
526 | printk(KERN_ERR "error running thread %d\n", ret); | ||
527 | BUG(); | ||
528 | } | ||
529 | 525 | ||
530 | return ret; | 526 | return ret; |
531 | } | 527 | } |
@@ -667,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
667 | struct btrfs_path *path; | 663 | struct btrfs_path *path; |
668 | 664 | ||
669 | path = btrfs_alloc_path(); | 665 | path = btrfs_alloc_path(); |
670 | BUG_ON(!path); | 666 | if (!path) |
667 | return -ENOMEM; | ||
668 | |||
671 | key.objectid = start; | 669 | key.objectid = start; |
672 | key.offset = len; | 670 | key.offset = len; |
673 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 671 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); |
@@ -1784,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1784 | 1782 | ||
1785 | 1783 | ||
1786 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1784 | for (i = 0; i < multi->num_stripes; i++, stripe++) { |
1785 | if (!stripe->dev->can_discard) | ||
1786 | continue; | ||
1787 | |||
1787 | ret = btrfs_issue_discard(stripe->dev->bdev, | 1788 | ret = btrfs_issue_discard(stripe->dev->bdev, |
1788 | stripe->physical, | 1789 | stripe->physical, |
1789 | stripe->length); | 1790 | stripe->length); |
@@ -1791,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1791 | discarded_bytes += stripe->length; | 1792 | discarded_bytes += stripe->length; |
1792 | else if (ret != -EOPNOTSUPP) | 1793 | else if (ret != -EOPNOTSUPP) |
1793 | break; | 1794 | break; |
1795 | |||
1796 | /* | ||
1797 | * Just in case we get back EOPNOTSUPP for some reason, | ||
1798 | * just ignore the return value so we don't screw up | ||
1799 | * people calling discard_extent. | ||
1800 | */ | ||
1801 | ret = 0; | ||
1794 | } | 1802 | } |
1795 | kfree(multi); | 1803 | kfree(multi); |
1796 | } | 1804 | } |
1797 | if (discarded_bytes && ret == -EOPNOTSUPP) | ||
1798 | ret = 0; | ||
1799 | 1805 | ||
1800 | if (actual_bytes) | 1806 | if (actual_bytes) |
1801 | *actual_bytes = discarded_bytes; | 1807 | *actual_bytes = discarded_bytes; |
@@ -2932,9 +2938,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2932 | found->full = 0; | 2938 | found->full = 0; |
2933 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; | 2939 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; |
2934 | found->chunk_alloc = 0; | 2940 | found->chunk_alloc = 0; |
2941 | found->flush = 0; | ||
2942 | init_waitqueue_head(&found->wait); | ||
2935 | *space_info = found; | 2943 | *space_info = found; |
2936 | list_add_rcu(&found->list, &info->space_info); | 2944 | list_add_rcu(&found->list, &info->space_info); |
2937 | atomic_set(&found->caching_threads, 0); | ||
2938 | return 0; | 2945 | return 0; |
2939 | } | 2946 | } |
2940 | 2947 | ||
@@ -3275,6 +3282,9 @@ again: | |||
3275 | } | 3282 | } |
3276 | 3283 | ||
3277 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3284 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
3285 | if (ret < 0 && ret != -ENOSPC) | ||
3286 | goto out; | ||
3287 | |||
3278 | spin_lock(&space_info->lock); | 3288 | spin_lock(&space_info->lock); |
3279 | if (ret) | 3289 | if (ret) |
3280 | space_info->full = 1; | 3290 | space_info->full = 1; |
@@ -3284,6 +3294,7 @@ again: | |||
3284 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | 3294 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; |
3285 | space_info->chunk_alloc = 0; | 3295 | space_info->chunk_alloc = 0; |
3286 | spin_unlock(&space_info->lock); | 3296 | spin_unlock(&space_info->lock); |
3297 | out: | ||
3287 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3298 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
3288 | return ret; | 3299 | return ret; |
3289 | } | 3300 | } |
@@ -3314,6 +3325,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3314 | if (reserved == 0) | 3325 | if (reserved == 0) |
3315 | return 0; | 3326 | return 0; |
3316 | 3327 | ||
3328 | smp_mb(); | ||
3329 | if (root->fs_info->delalloc_bytes == 0) { | ||
3330 | if (trans) | ||
3331 | return 0; | ||
3332 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3333 | return 0; | ||
3334 | } | ||
3335 | |||
3317 | max_reclaim = min(reserved, to_reclaim); | 3336 | max_reclaim = min(reserved, to_reclaim); |
3318 | 3337 | ||
3319 | while (loops < 1024) { | 3338 | while (loops < 1024) { |
@@ -3356,6 +3375,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3356 | } | 3375 | } |
3357 | 3376 | ||
3358 | } | 3377 | } |
3378 | if (reclaimed >= to_reclaim && !trans) | ||
3379 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3359 | return reclaimed >= to_reclaim; | 3380 | return reclaimed >= to_reclaim; |
3360 | } | 3381 | } |
3361 | 3382 | ||
@@ -3380,15 +3401,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | |||
3380 | u64 num_bytes = orig_bytes; | 3401 | u64 num_bytes = orig_bytes; |
3381 | int retries = 0; | 3402 | int retries = 0; |
3382 | int ret = 0; | 3403 | int ret = 0; |
3383 | bool reserved = false; | ||
3384 | bool committed = false; | 3404 | bool committed = false; |
3405 | bool flushing = false; | ||
3385 | 3406 | ||
3386 | again: | 3407 | again: |
3387 | ret = -ENOSPC; | 3408 | ret = 0; |
3388 | if (reserved) | ||
3389 | num_bytes = 0; | ||
3390 | |||
3391 | spin_lock(&space_info->lock); | 3409 | spin_lock(&space_info->lock); |
3410 | /* | ||
3411 | * We only want to wait if somebody other than us is flushing and we are | ||
3412 | * actually alloed to flush. | ||
3413 | */ | ||
3414 | while (flush && !flushing && space_info->flush) { | ||
3415 | spin_unlock(&space_info->lock); | ||
3416 | /* | ||
3417 | * If we have a trans handle we can't wait because the flusher | ||
3418 | * may have to commit the transaction, which would mean we would | ||
3419 | * deadlock since we are waiting for the flusher to finish, but | ||
3420 | * hold the current transaction open. | ||
3421 | */ | ||
3422 | if (trans) | ||
3423 | return -EAGAIN; | ||
3424 | ret = wait_event_interruptible(space_info->wait, | ||
3425 | !space_info->flush); | ||
3426 | /* Must have been interrupted, return */ | ||
3427 | if (ret) | ||
3428 | return -EINTR; | ||
3429 | |||
3430 | spin_lock(&space_info->lock); | ||
3431 | } | ||
3432 | |||
3433 | ret = -ENOSPC; | ||
3392 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3434 | unused = space_info->bytes_used + space_info->bytes_reserved + |
3393 | space_info->bytes_pinned + space_info->bytes_readonly + | 3435 | space_info->bytes_pinned + space_info->bytes_readonly + |
3394 | space_info->bytes_may_use; | 3436 | space_info->bytes_may_use; |
@@ -3403,8 +3445,7 @@ again: | |||
3403 | if (unused <= space_info->total_bytes) { | 3445 | if (unused <= space_info->total_bytes) { |
3404 | unused = space_info->total_bytes - unused; | 3446 | unused = space_info->total_bytes - unused; |
3405 | if (unused >= num_bytes) { | 3447 | if (unused >= num_bytes) { |
3406 | if (!reserved) | 3448 | space_info->bytes_reserved += orig_bytes; |
3407 | space_info->bytes_reserved += orig_bytes; | ||
3408 | ret = 0; | 3449 | ret = 0; |
3409 | } else { | 3450 | } else { |
3410 | /* | 3451 | /* |
@@ -3429,17 +3470,14 @@ again: | |||
3429 | * to reclaim space we can actually use it instead of somebody else | 3470 | * to reclaim space we can actually use it instead of somebody else |
3430 | * stealing it from us. | 3471 | * stealing it from us. |
3431 | */ | 3472 | */ |
3432 | if (ret && !reserved) { | 3473 | if (ret && flush) { |
3433 | space_info->bytes_reserved += orig_bytes; | 3474 | flushing = true; |
3434 | reserved = true; | 3475 | space_info->flush = 1; |
3435 | } | 3476 | } |
3436 | 3477 | ||
3437 | spin_unlock(&space_info->lock); | 3478 | spin_unlock(&space_info->lock); |
3438 | 3479 | ||
3439 | if (!ret) | 3480 | if (!ret || !flush) |
3440 | return 0; | ||
3441 | |||
3442 | if (!flush) | ||
3443 | goto out; | 3481 | goto out; |
3444 | 3482 | ||
3445 | /* | 3483 | /* |
@@ -3447,11 +3485,11 @@ again: | |||
3447 | * metadata until after the IO is completed. | 3485 | * metadata until after the IO is completed. |
3448 | */ | 3486 | */ |
3449 | ret = shrink_delalloc(trans, root, num_bytes, 1); | 3487 | ret = shrink_delalloc(trans, root, num_bytes, 1); |
3450 | if (ret > 0) | 3488 | if (ret < 0) |
3451 | return 0; | ||
3452 | else if (ret < 0) | ||
3453 | goto out; | 3489 | goto out; |
3454 | 3490 | ||
3491 | ret = 0; | ||
3492 | |||
3455 | /* | 3493 | /* |
3456 | * So if we were overcommitted it's possible that somebody else flushed | 3494 | * So if we were overcommitted it's possible that somebody else flushed |
3457 | * out enough space and we simply didn't have enough space to reclaim, | 3495 | * out enough space and we simply didn't have enough space to reclaim, |
@@ -3462,11 +3500,11 @@ again: | |||
3462 | goto again; | 3500 | goto again; |
3463 | } | 3501 | } |
3464 | 3502 | ||
3465 | spin_lock(&space_info->lock); | ||
3466 | /* | 3503 | /* |
3467 | * Not enough space to be reclaimed, don't bother committing the | 3504 | * Not enough space to be reclaimed, don't bother committing the |
3468 | * transaction. | 3505 | * transaction. |
3469 | */ | 3506 | */ |
3507 | spin_lock(&space_info->lock); | ||
3470 | if (space_info->bytes_pinned < orig_bytes) | 3508 | if (space_info->bytes_pinned < orig_bytes) |
3471 | ret = -ENOSPC; | 3509 | ret = -ENOSPC; |
3472 | spin_unlock(&space_info->lock); | 3510 | spin_unlock(&space_info->lock); |
@@ -3474,10 +3512,13 @@ again: | |||
3474 | goto out; | 3512 | goto out; |
3475 | 3513 | ||
3476 | ret = -EAGAIN; | 3514 | ret = -EAGAIN; |
3477 | if (trans || committed) | 3515 | if (trans) |
3478 | goto out; | 3516 | goto out; |
3479 | 3517 | ||
3480 | ret = -ENOSPC; | 3518 | ret = -ENOSPC; |
3519 | if (committed) | ||
3520 | goto out; | ||
3521 | |||
3481 | trans = btrfs_join_transaction(root); | 3522 | trans = btrfs_join_transaction(root); |
3482 | if (IS_ERR(trans)) | 3523 | if (IS_ERR(trans)) |
3483 | goto out; | 3524 | goto out; |
@@ -3489,12 +3530,12 @@ again: | |||
3489 | } | 3530 | } |
3490 | 3531 | ||
3491 | out: | 3532 | out: |
3492 | if (reserved) { | 3533 | if (flushing) { |
3493 | spin_lock(&space_info->lock); | 3534 | spin_lock(&space_info->lock); |
3494 | space_info->bytes_reserved -= orig_bytes; | 3535 | space_info->flush = 0; |
3536 | wake_up_all(&space_info->wait); | ||
3495 | spin_unlock(&space_info->lock); | 3537 | spin_unlock(&space_info->lock); |
3496 | } | 3538 | } |
3497 | |||
3498 | return ret; | 3539 | return ret; |
3499 | } | 3540 | } |
3500 | 3541 | ||
@@ -3704,7 +3745,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3704 | if (commit_trans) { | 3745 | if (commit_trans) { |
3705 | if (trans) | 3746 | if (trans) |
3706 | return -EAGAIN; | 3747 | return -EAGAIN; |
3707 | |||
3708 | trans = btrfs_join_transaction(root); | 3748 | trans = btrfs_join_transaction(root); |
3709 | BUG_ON(IS_ERR(trans)); | 3749 | BUG_ON(IS_ERR(trans)); |
3710 | ret = btrfs_commit_transaction(trans, root); | 3750 | ret = btrfs_commit_transaction(trans, root); |
@@ -3874,26 +3914,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3874 | return 0; | 3914 | return 0; |
3875 | } | 3915 | } |
3876 | 3916 | ||
3877 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3878 | struct btrfs_root *root, | ||
3879 | int num_items) | ||
3880 | { | ||
3881 | u64 num_bytes; | ||
3882 | int ret; | ||
3883 | |||
3884 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
3885 | return 0; | ||
3886 | |||
3887 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | ||
3888 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
3889 | num_bytes); | ||
3890 | if (!ret) { | ||
3891 | trans->bytes_reserved += num_bytes; | ||
3892 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3893 | } | ||
3894 | return ret; | ||
3895 | } | ||
3896 | |||
3897 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 3917 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
3898 | struct btrfs_root *root) | 3918 | struct btrfs_root *root) |
3899 | { | 3919 | { |
@@ -3944,6 +3964,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3944 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3964 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3945 | } | 3965 | } |
3946 | 3966 | ||
3967 | static unsigned drop_outstanding_extent(struct inode *inode) | ||
3968 | { | ||
3969 | unsigned dropped_extents = 0; | ||
3970 | |||
3971 | spin_lock(&BTRFS_I(inode)->lock); | ||
3972 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | ||
3973 | BTRFS_I(inode)->outstanding_extents--; | ||
3974 | |||
3975 | /* | ||
3976 | * If we have more or the same amount of outsanding extents than we have | ||
3977 | * reserved then we need to leave the reserved extents count alone. | ||
3978 | */ | ||
3979 | if (BTRFS_I(inode)->outstanding_extents >= | ||
3980 | BTRFS_I(inode)->reserved_extents) | ||
3981 | goto out; | ||
3982 | |||
3983 | dropped_extents = BTRFS_I(inode)->reserved_extents - | ||
3984 | BTRFS_I(inode)->outstanding_extents; | ||
3985 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | ||
3986 | out: | ||
3987 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3988 | return dropped_extents; | ||
3989 | } | ||
3990 | |||
3947 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | 3991 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) |
3948 | { | 3992 | { |
3949 | return num_bytes >>= 3; | 3993 | return num_bytes >>= 3; |
@@ -3953,9 +3997,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3953 | { | 3997 | { |
3954 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3998 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3955 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 3999 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3956 | u64 to_reserve; | 4000 | u64 to_reserve = 0; |
3957 | int nr_extents; | 4001 | unsigned nr_extents = 0; |
3958 | int reserved_extents; | ||
3959 | int ret; | 4002 | int ret; |
3960 | 4003 | ||
3961 | if (btrfs_transaction_in_commit(root->fs_info)) | 4004 | if (btrfs_transaction_in_commit(root->fs_info)) |
@@ -3963,66 +4006,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3963 | 4006 | ||
3964 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4007 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3965 | 4008 | ||
3966 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 4009 | spin_lock(&BTRFS_I(inode)->lock); |
3967 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | 4010 | BTRFS_I(inode)->outstanding_extents++; |
4011 | |||
4012 | if (BTRFS_I(inode)->outstanding_extents > | ||
4013 | BTRFS_I(inode)->reserved_extents) { | ||
4014 | nr_extents = BTRFS_I(inode)->outstanding_extents - | ||
4015 | BTRFS_I(inode)->reserved_extents; | ||
4016 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
3968 | 4017 | ||
3969 | if (nr_extents > reserved_extents) { | ||
3970 | nr_extents -= reserved_extents; | ||
3971 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 4018 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); |
3972 | } else { | ||
3973 | nr_extents = 0; | ||
3974 | to_reserve = 0; | ||
3975 | } | 4019 | } |
4020 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3976 | 4021 | ||
3977 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4022 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
3978 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | 4023 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
3979 | if (ret) | 4024 | if (ret) { |
4025 | unsigned dropped; | ||
4026 | /* | ||
4027 | * We don't need the return value since our reservation failed, | ||
4028 | * we just need to clean up our counter. | ||
4029 | */ | ||
4030 | dropped = drop_outstanding_extent(inode); | ||
4031 | WARN_ON(dropped > 1); | ||
3980 | return ret; | 4032 | return ret; |
3981 | 4033 | } | |
3982 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); | ||
3983 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
3984 | 4034 | ||
3985 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4035 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
3986 | 4036 | ||
3987 | if (block_rsv->size > 512 * 1024 * 1024) | ||
3988 | shrink_delalloc(NULL, root, to_reserve, 0); | ||
3989 | |||
3990 | return 0; | 4037 | return 0; |
3991 | } | 4038 | } |
3992 | 4039 | ||
3993 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | 4040 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) |
3994 | { | 4041 | { |
3995 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4042 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3996 | u64 to_free; | 4043 | u64 to_free = 0; |
3997 | int nr_extents; | 4044 | unsigned dropped; |
3998 | int reserved_extents; | ||
3999 | 4045 | ||
4000 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4046 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4001 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4047 | dropped = drop_outstanding_extent(inode); |
4002 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | ||
4003 | |||
4004 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | ||
4005 | do { | ||
4006 | int old, new; | ||
4007 | |||
4008 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
4009 | if (nr_extents >= reserved_extents) { | ||
4010 | nr_extents = 0; | ||
4011 | break; | ||
4012 | } | ||
4013 | old = reserved_extents; | ||
4014 | nr_extents = reserved_extents - nr_extents; | ||
4015 | new = reserved_extents - nr_extents; | ||
4016 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4017 | reserved_extents, new); | ||
4018 | if (likely(old == reserved_extents)) | ||
4019 | break; | ||
4020 | reserved_extents = old; | ||
4021 | } while (1); | ||
4022 | 4048 | ||
4023 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4049 | to_free = calc_csum_metadata_size(inode, num_bytes); |
4024 | if (nr_extents > 0) | 4050 | if (dropped > 0) |
4025 | to_free += btrfs_calc_trans_metadata_size(root, nr_extents); | 4051 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
4026 | 4052 | ||
4027 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4053 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
4028 | to_free); | 4054 | to_free); |
@@ -4444,7 +4470,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4444 | printk(KERN_ERR "umm, got %d back from search" | 4470 | printk(KERN_ERR "umm, got %d back from search" |
4445 | ", was looking for %llu\n", ret, | 4471 | ", was looking for %llu\n", ret, |
4446 | (unsigned long long)bytenr); | 4472 | (unsigned long long)bytenr); |
4447 | btrfs_print_leaf(extent_root, path->nodes[0]); | 4473 | if (ret > 0) |
4474 | btrfs_print_leaf(extent_root, | ||
4475 | path->nodes[0]); | ||
4448 | } | 4476 | } |
4449 | BUG_ON(ret); | 4477 | BUG_ON(ret); |
4450 | extent_slot = path->slots[0]; | 4478 | extent_slot = path->slots[0]; |
@@ -4990,14 +5018,10 @@ have_block_group: | |||
4990 | } | 5018 | } |
4991 | 5019 | ||
4992 | /* | 5020 | /* |
4993 | * We only want to start kthread caching if we are at | 5021 | * The caching workers are limited to 2 threads, so we |
4994 | * the point where we will wait for caching to make | 5022 | * can queue as much work as we care to. |
4995 | * progress, or if our ideal search is over and we've | ||
4996 | * found somebody to start caching. | ||
4997 | */ | 5023 | */ |
4998 | if (loop > LOOP_CACHING_NOWAIT || | 5024 | if (loop > LOOP_FIND_IDEAL) { |
4999 | (loop > LOOP_FIND_IDEAL && | ||
5000 | atomic_read(&space_info->caching_threads) < 2)) { | ||
5001 | ret = cache_block_group(block_group, trans, | 5025 | ret = cache_block_group(block_group, trans, |
5002 | orig_root, 0); | 5026 | orig_root, 0); |
5003 | BUG_ON(ret); | 5027 | BUG_ON(ret); |
@@ -5065,7 +5089,9 @@ have_block_group: | |||
5065 | * group is does point to and try again | 5089 | * group is does point to and try again |
5066 | */ | 5090 | */ |
5067 | if (!last_ptr_loop && last_ptr->block_group && | 5091 | if (!last_ptr_loop && last_ptr->block_group && |
5068 | last_ptr->block_group != block_group) { | 5092 | last_ptr->block_group != block_group && |
5093 | index <= | ||
5094 | get_block_group_index(last_ptr->block_group)) { | ||
5069 | 5095 | ||
5070 | btrfs_put_block_group(block_group); | 5096 | btrfs_put_block_group(block_group); |
5071 | block_group = last_ptr->block_group; | 5097 | block_group = last_ptr->block_group; |
@@ -5219,8 +5245,7 @@ loop: | |||
5219 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 5245 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
5220 | found_uncached_bg = false; | 5246 | found_uncached_bg = false; |
5221 | loop++; | 5247 | loop++; |
5222 | if (!ideal_cache_percent && | 5248 | if (!ideal_cache_percent) |
5223 | atomic_read(&space_info->caching_threads)) | ||
5224 | goto search; | 5249 | goto search; |
5225 | 5250 | ||
5226 | /* | 5251 | /* |
@@ -5494,7 +5519,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
5494 | u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); | 5519 | u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); |
5495 | 5520 | ||
5496 | path = btrfs_alloc_path(); | 5521 | path = btrfs_alloc_path(); |
5497 | BUG_ON(!path); | 5522 | if (!path) |
5523 | return -ENOMEM; | ||
5498 | 5524 | ||
5499 | path->leave_spinning = 1; | 5525 | path->leave_spinning = 1; |
5500 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, | 5526 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
@@ -5623,7 +5649,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
5623 | if (!buf) | 5649 | if (!buf) |
5624 | return ERR_PTR(-ENOMEM); | 5650 | return ERR_PTR(-ENOMEM); |
5625 | btrfs_set_header_generation(buf, trans->transid); | 5651 | btrfs_set_header_generation(buf, trans->transid); |
5626 | btrfs_set_buffer_lockdep_class(buf, level); | 5652 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); |
5627 | btrfs_tree_lock(buf); | 5653 | btrfs_tree_lock(buf); |
5628 | clean_tree_block(trans, root, buf); | 5654 | clean_tree_block(trans, root, buf); |
5629 | 5655 | ||
@@ -5910,7 +5936,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
5910 | return 1; | 5936 | return 1; |
5911 | 5937 | ||
5912 | if (path->locks[level] && !wc->keep_locks) { | 5938 | if (path->locks[level] && !wc->keep_locks) { |
5913 | btrfs_tree_unlock(eb); | 5939 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
5914 | path->locks[level] = 0; | 5940 | path->locks[level] = 0; |
5915 | } | 5941 | } |
5916 | return 0; | 5942 | return 0; |
@@ -5934,7 +5960,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
5934 | * keep the tree lock | 5960 | * keep the tree lock |
5935 | */ | 5961 | */ |
5936 | if (path->locks[level] && level > 0) { | 5962 | if (path->locks[level] && level > 0) { |
5937 | btrfs_tree_unlock(eb); | 5963 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
5938 | path->locks[level] = 0; | 5964 | path->locks[level] = 0; |
5939 | } | 5965 | } |
5940 | return 0; | 5966 | return 0; |
@@ -6047,7 +6073,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
6047 | BUG_ON(level != btrfs_header_level(next)); | 6073 | BUG_ON(level != btrfs_header_level(next)); |
6048 | path->nodes[level] = next; | 6074 | path->nodes[level] = next; |
6049 | path->slots[level] = 0; | 6075 | path->slots[level] = 0; |
6050 | path->locks[level] = 1; | 6076 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6051 | wc->level = level; | 6077 | wc->level = level; |
6052 | if (wc->level == 1) | 6078 | if (wc->level == 1) |
6053 | wc->reada_slot = 0; | 6079 | wc->reada_slot = 0; |
@@ -6118,7 +6144,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6118 | BUG_ON(level == 0); | 6144 | BUG_ON(level == 0); |
6119 | btrfs_tree_lock(eb); | 6145 | btrfs_tree_lock(eb); |
6120 | btrfs_set_lock_blocking(eb); | 6146 | btrfs_set_lock_blocking(eb); |
6121 | path->locks[level] = 1; | 6147 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6122 | 6148 | ||
6123 | ret = btrfs_lookup_extent_info(trans, root, | 6149 | ret = btrfs_lookup_extent_info(trans, root, |
6124 | eb->start, eb->len, | 6150 | eb->start, eb->len, |
@@ -6127,8 +6153,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6127 | BUG_ON(ret); | 6153 | BUG_ON(ret); |
6128 | BUG_ON(wc->refs[level] == 0); | 6154 | BUG_ON(wc->refs[level] == 0); |
6129 | if (wc->refs[level] == 1) { | 6155 | if (wc->refs[level] == 1) { |
6130 | btrfs_tree_unlock(eb); | 6156 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
6131 | path->locks[level] = 0; | ||
6132 | return 1; | 6157 | return 1; |
6133 | } | 6158 | } |
6134 | } | 6159 | } |
@@ -6150,7 +6175,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6150 | btrfs_header_generation(eb) == trans->transid) { | 6175 | btrfs_header_generation(eb) == trans->transid) { |
6151 | btrfs_tree_lock(eb); | 6176 | btrfs_tree_lock(eb); |
6152 | btrfs_set_lock_blocking(eb); | 6177 | btrfs_set_lock_blocking(eb); |
6153 | path->locks[level] = 1; | 6178 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6154 | } | 6179 | } |
6155 | clean_tree_block(trans, root, eb); | 6180 | clean_tree_block(trans, root, eb); |
6156 | } | 6181 | } |
@@ -6229,7 +6254,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
6229 | return 0; | 6254 | return 0; |
6230 | 6255 | ||
6231 | if (path->locks[level]) { | 6256 | if (path->locks[level]) { |
6232 | btrfs_tree_unlock(path->nodes[level]); | 6257 | btrfs_tree_unlock_rw(path->nodes[level], |
6258 | path->locks[level]); | ||
6233 | path->locks[level] = 0; | 6259 | path->locks[level] = 0; |
6234 | } | 6260 | } |
6235 | free_extent_buffer(path->nodes[level]); | 6261 | free_extent_buffer(path->nodes[level]); |
@@ -6251,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
6251 | * also make sure backrefs for the shared block and all lower level | 6277 | * also make sure backrefs for the shared block and all lower level |
6252 | * blocks are properly updated. | 6278 | * blocks are properly updated. |
6253 | */ | 6279 | */ |
6254 | int btrfs_drop_snapshot(struct btrfs_root *root, | 6280 | void btrfs_drop_snapshot(struct btrfs_root *root, |
6255 | struct btrfs_block_rsv *block_rsv, int update_ref) | 6281 | struct btrfs_block_rsv *block_rsv, int update_ref) |
6256 | { | 6282 | { |
6257 | struct btrfs_path *path; | 6283 | struct btrfs_path *path; |
6258 | struct btrfs_trans_handle *trans; | 6284 | struct btrfs_trans_handle *trans; |
@@ -6265,10 +6291,17 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6265 | int level; | 6291 | int level; |
6266 | 6292 | ||
6267 | path = btrfs_alloc_path(); | 6293 | path = btrfs_alloc_path(); |
6268 | BUG_ON(!path); | 6294 | if (!path) { |
6295 | err = -ENOMEM; | ||
6296 | goto out; | ||
6297 | } | ||
6269 | 6298 | ||
6270 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6299 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
6271 | BUG_ON(!wc); | 6300 | if (!wc) { |
6301 | btrfs_free_path(path); | ||
6302 | err = -ENOMEM; | ||
6303 | goto out; | ||
6304 | } | ||
6272 | 6305 | ||
6273 | trans = btrfs_start_transaction(tree_root, 0); | 6306 | trans = btrfs_start_transaction(tree_root, 0); |
6274 | BUG_ON(IS_ERR(trans)); | 6307 | BUG_ON(IS_ERR(trans)); |
@@ -6281,7 +6314,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6281 | path->nodes[level] = btrfs_lock_root_node(root); | 6314 | path->nodes[level] = btrfs_lock_root_node(root); |
6282 | btrfs_set_lock_blocking(path->nodes[level]); | 6315 | btrfs_set_lock_blocking(path->nodes[level]); |
6283 | path->slots[level] = 0; | 6316 | path->slots[level] = 0; |
6284 | path->locks[level] = 1; | 6317 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6285 | memset(&wc->update_progress, 0, | 6318 | memset(&wc->update_progress, 0, |
6286 | sizeof(wc->update_progress)); | 6319 | sizeof(wc->update_progress)); |
6287 | } else { | 6320 | } else { |
@@ -6296,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6296 | path->lowest_level = 0; | 6329 | path->lowest_level = 0; |
6297 | if (ret < 0) { | 6330 | if (ret < 0) { |
6298 | err = ret; | 6331 | err = ret; |
6299 | goto out; | 6332 | goto out_free; |
6300 | } | 6333 | } |
6301 | WARN_ON(ret > 0); | 6334 | WARN_ON(ret > 0); |
6302 | 6335 | ||
@@ -6403,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6403 | free_extent_buffer(root->commit_root); | 6436 | free_extent_buffer(root->commit_root); |
6404 | kfree(root); | 6437 | kfree(root); |
6405 | } | 6438 | } |
6406 | out: | 6439 | out_free: |
6407 | btrfs_end_transaction_throttle(trans, tree_root); | 6440 | btrfs_end_transaction_throttle(trans, tree_root); |
6408 | kfree(wc); | 6441 | kfree(wc); |
6409 | btrfs_free_path(path); | 6442 | btrfs_free_path(path); |
6410 | return err; | 6443 | out: |
6444 | if (err) | ||
6445 | btrfs_std_error(root->fs_info, err); | ||
6446 | return; | ||
6411 | } | 6447 | } |
6412 | 6448 | ||
6413 | /* | 6449 | /* |
@@ -6449,7 +6485,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6449 | level = btrfs_header_level(node); | 6485 | level = btrfs_header_level(node); |
6450 | path->nodes[level] = node; | 6486 | path->nodes[level] = node; |
6451 | path->slots[level] = 0; | 6487 | path->slots[level] = 0; |
6452 | path->locks[level] = 1; | 6488 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6453 | 6489 | ||
6454 | wc->refs[parent_level] = 1; | 6490 | wc->refs[parent_level] = 1; |
6455 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 6491 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
@@ -6524,30 +6560,48 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
6524 | return flags; | 6560 | return flags; |
6525 | } | 6561 | } |
6526 | 6562 | ||
6527 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) | 6563 | static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) |
6528 | { | 6564 | { |
6529 | struct btrfs_space_info *sinfo = cache->space_info; | 6565 | struct btrfs_space_info *sinfo = cache->space_info; |
6530 | u64 num_bytes; | 6566 | u64 num_bytes; |
6567 | u64 min_allocable_bytes; | ||
6531 | int ret = -ENOSPC; | 6568 | int ret = -ENOSPC; |
6532 | 6569 | ||
6533 | if (cache->ro) | 6570 | |
6534 | return 0; | 6571 | /* |
6572 | * We need some metadata space and system metadata space for | ||
6573 | * allocating chunks in some corner cases until we force to set | ||
6574 | * it to be readonly. | ||
6575 | */ | ||
6576 | if ((sinfo->flags & | ||
6577 | (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && | ||
6578 | !force) | ||
6579 | min_allocable_bytes = 1 * 1024 * 1024; | ||
6580 | else | ||
6581 | min_allocable_bytes = 0; | ||
6535 | 6582 | ||
6536 | spin_lock(&sinfo->lock); | 6583 | spin_lock(&sinfo->lock); |
6537 | spin_lock(&cache->lock); | 6584 | spin_lock(&cache->lock); |
6585 | |||
6586 | if (cache->ro) { | ||
6587 | ret = 0; | ||
6588 | goto out; | ||
6589 | } | ||
6590 | |||
6538 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | 6591 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - |
6539 | cache->bytes_super - btrfs_block_group_used(&cache->item); | 6592 | cache->bytes_super - btrfs_block_group_used(&cache->item); |
6540 | 6593 | ||
6541 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 6594 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
6542 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 6595 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
6543 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { | 6596 | cache->reserved_pinned + num_bytes + min_allocable_bytes <= |
6597 | sinfo->total_bytes) { | ||
6544 | sinfo->bytes_readonly += num_bytes; | 6598 | sinfo->bytes_readonly += num_bytes; |
6545 | sinfo->bytes_reserved += cache->reserved_pinned; | 6599 | sinfo->bytes_reserved += cache->reserved_pinned; |
6546 | cache->reserved_pinned = 0; | 6600 | cache->reserved_pinned = 0; |
6547 | cache->ro = 1; | 6601 | cache->ro = 1; |
6548 | ret = 0; | 6602 | ret = 0; |
6549 | } | 6603 | } |
6550 | 6604 | out: | |
6551 | spin_unlock(&cache->lock); | 6605 | spin_unlock(&cache->lock); |
6552 | spin_unlock(&sinfo->lock); | 6606 | spin_unlock(&sinfo->lock); |
6553 | return ret; | 6607 | return ret; |
@@ -6571,7 +6625,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6571 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 6625 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
6572 | CHUNK_ALLOC_FORCE); | 6626 | CHUNK_ALLOC_FORCE); |
6573 | 6627 | ||
6574 | ret = set_block_group_ro(cache); | 6628 | ret = set_block_group_ro(cache, 0); |
6575 | if (!ret) | 6629 | if (!ret) |
6576 | goto out; | 6630 | goto out; |
6577 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 6631 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
@@ -6579,7 +6633,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6579 | CHUNK_ALLOC_FORCE); | 6633 | CHUNK_ALLOC_FORCE); |
6580 | if (ret < 0) | 6634 | if (ret < 0) |
6581 | goto out; | 6635 | goto out; |
6582 | ret = set_block_group_ro(cache); | 6636 | ret = set_block_group_ro(cache, 0); |
6583 | out: | 6637 | out: |
6584 | btrfs_end_transaction(trans, root); | 6638 | btrfs_end_transaction(trans, root); |
6585 | return ret; | 6639 | return ret; |
@@ -6680,6 +6734,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6680 | struct btrfs_space_info *space_info; | 6734 | struct btrfs_space_info *space_info; |
6681 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 6735 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
6682 | struct btrfs_device *device; | 6736 | struct btrfs_device *device; |
6737 | u64 min_free; | ||
6738 | u64 dev_min = 1; | ||
6739 | u64 dev_nr = 0; | ||
6740 | int index; | ||
6683 | int full = 0; | 6741 | int full = 0; |
6684 | int ret = 0; | 6742 | int ret = 0; |
6685 | 6743 | ||
@@ -6689,8 +6747,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6689 | if (!block_group) | 6747 | if (!block_group) |
6690 | return -1; | 6748 | return -1; |
6691 | 6749 | ||
6750 | min_free = btrfs_block_group_used(&block_group->item); | ||
6751 | |||
6692 | /* no bytes used, we're good */ | 6752 | /* no bytes used, we're good */ |
6693 | if (!btrfs_block_group_used(&block_group->item)) | 6753 | if (!min_free) |
6694 | goto out; | 6754 | goto out; |
6695 | 6755 | ||
6696 | space_info = block_group->space_info; | 6756 | space_info = block_group->space_info; |
@@ -6706,10 +6766,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6706 | * all of the extents from this block group. If we can, we're good | 6766 | * all of the extents from this block group. If we can, we're good |
6707 | */ | 6767 | */ |
6708 | if ((space_info->total_bytes != block_group->key.offset) && | 6768 | if ((space_info->total_bytes != block_group->key.offset) && |
6709 | (space_info->bytes_used + space_info->bytes_reserved + | 6769 | (space_info->bytes_used + space_info->bytes_reserved + |
6710 | space_info->bytes_pinned + space_info->bytes_readonly + | 6770 | space_info->bytes_pinned + space_info->bytes_readonly + |
6711 | btrfs_block_group_used(&block_group->item) < | 6771 | min_free < space_info->total_bytes)) { |
6712 | space_info->total_bytes)) { | ||
6713 | spin_unlock(&space_info->lock); | 6772 | spin_unlock(&space_info->lock); |
6714 | goto out; | 6773 | goto out; |
6715 | } | 6774 | } |
@@ -6726,9 +6785,31 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6726 | if (full) | 6785 | if (full) |
6727 | goto out; | 6786 | goto out; |
6728 | 6787 | ||
6788 | /* | ||
6789 | * index: | ||
6790 | * 0: raid10 | ||
6791 | * 1: raid1 | ||
6792 | * 2: dup | ||
6793 | * 3: raid0 | ||
6794 | * 4: single | ||
6795 | */ | ||
6796 | index = get_block_group_index(block_group); | ||
6797 | if (index == 0) { | ||
6798 | dev_min = 4; | ||
6799 | /* Divide by 2 */ | ||
6800 | min_free >>= 1; | ||
6801 | } else if (index == 1) { | ||
6802 | dev_min = 2; | ||
6803 | } else if (index == 2) { | ||
6804 | /* Multiply by 2 */ | ||
6805 | min_free <<= 1; | ||
6806 | } else if (index == 3) { | ||
6807 | dev_min = fs_devices->rw_devices; | ||
6808 | do_div(min_free, dev_min); | ||
6809 | } | ||
6810 | |||
6729 | mutex_lock(&root->fs_info->chunk_mutex); | 6811 | mutex_lock(&root->fs_info->chunk_mutex); |
6730 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 6812 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
6731 | u64 min_free = btrfs_block_group_used(&block_group->item); | ||
6732 | u64 dev_offset; | 6813 | u64 dev_offset; |
6733 | 6814 | ||
6734 | /* | 6815 | /* |
@@ -6739,7 +6820,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6739 | ret = find_free_dev_extent(NULL, device, min_free, | 6820 | ret = find_free_dev_extent(NULL, device, min_free, |
6740 | &dev_offset, NULL); | 6821 | &dev_offset, NULL); |
6741 | if (!ret) | 6822 | if (!ret) |
6823 | dev_nr++; | ||
6824 | |||
6825 | if (dev_nr >= dev_min) | ||
6742 | break; | 6826 | break; |
6827 | |||
6743 | ret = -1; | 6828 | ret = -1; |
6744 | } | 6829 | } |
6745 | } | 6830 | } |
@@ -7016,7 +7101,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7016 | 7101 | ||
7017 | set_avail_alloc_bits(root->fs_info, cache->flags); | 7102 | set_avail_alloc_bits(root->fs_info, cache->flags); |
7018 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 7103 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
7019 | set_block_group_ro(cache); | 7104 | set_block_group_ro(cache, 1); |
7020 | } | 7105 | } |
7021 | 7106 | ||
7022 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | 7107 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { |
@@ -7030,9 +7115,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7030 | * mirrored block groups. | 7115 | * mirrored block groups. |
7031 | */ | 7116 | */ |
7032 | list_for_each_entry(cache, &space_info->block_groups[3], list) | 7117 | list_for_each_entry(cache, &space_info->block_groups[3], list) |
7033 | set_block_group_ro(cache); | 7118 | set_block_group_ro(cache, 1); |
7034 | list_for_each_entry(cache, &space_info->block_groups[4], list) | 7119 | list_for_each_entry(cache, &space_info->block_groups[4], list) |
7035 | set_block_group_ro(cache); | 7120 | set_block_group_ro(cache, 1); |
7036 | } | 7121 | } |
7037 | 7122 | ||
7038 | init_global_block_rsv(info); | 7123 | init_global_block_rsv(info); |
@@ -7162,11 +7247,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7162 | spin_unlock(&cluster->refill_lock); | 7247 | spin_unlock(&cluster->refill_lock); |
7163 | 7248 | ||
7164 | path = btrfs_alloc_path(); | 7249 | path = btrfs_alloc_path(); |
7165 | BUG_ON(!path); | 7250 | if (!path) { |
7251 | ret = -ENOMEM; | ||
7252 | goto out; | ||
7253 | } | ||
7166 | 7254 | ||
7167 | inode = lookup_free_space_inode(root, block_group, path); | 7255 | inode = lookup_free_space_inode(root, block_group, path); |
7168 | if (!IS_ERR(inode)) { | 7256 | if (!IS_ERR(inode)) { |
7169 | btrfs_orphan_add(trans, inode); | 7257 | ret = btrfs_orphan_add(trans, inode); |
7258 | BUG_ON(ret); | ||
7170 | clear_nlink(inode); | 7259 | clear_nlink(inode); |
7171 | /* One for the block groups ref */ | 7260 | /* One for the block groups ref */ |
7172 | spin_lock(&block_group->lock); | 7261 | spin_lock(&block_group->lock); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7055d11c1ef..d418164a35f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | |||
254 | * | 254 | * |
255 | * This should be called with the tree lock held. | 255 | * This should be called with the tree lock held. |
256 | */ | 256 | */ |
257 | static int merge_state(struct extent_io_tree *tree, | 257 | static void merge_state(struct extent_io_tree *tree, |
258 | struct extent_state *state) | 258 | struct extent_state *state) |
259 | { | 259 | { |
260 | struct extent_state *other; | 260 | struct extent_state *other; |
261 | struct rb_node *other_node; | 261 | struct rb_node *other_node; |
262 | 262 | ||
263 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | 263 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
264 | return 0; | 264 | return; |
265 | 265 | ||
266 | other_node = rb_prev(&state->rb_node); | 266 | other_node = rb_prev(&state->rb_node); |
267 | if (other_node) { | 267 | if (other_node) { |
@@ -281,26 +281,19 @@ static int merge_state(struct extent_io_tree *tree, | |||
281 | if (other->start == state->end + 1 && | 281 | if (other->start == state->end + 1 && |
282 | other->state == state->state) { | 282 | other->state == state->state) { |
283 | merge_cb(tree, state, other); | 283 | merge_cb(tree, state, other); |
284 | other->start = state->start; | 284 | state->end = other->end; |
285 | state->tree = NULL; | 285 | other->tree = NULL; |
286 | rb_erase(&state->rb_node, &tree->state); | 286 | rb_erase(&other->rb_node, &tree->state); |
287 | free_extent_state(state); | 287 | free_extent_state(other); |
288 | state = NULL; | ||
289 | } | 288 | } |
290 | } | 289 | } |
291 | |||
292 | return 0; | ||
293 | } | 290 | } |
294 | 291 | ||
295 | static int set_state_cb(struct extent_io_tree *tree, | 292 | static void set_state_cb(struct extent_io_tree *tree, |
296 | struct extent_state *state, int *bits) | 293 | struct extent_state *state, int *bits) |
297 | { | 294 | { |
298 | if (tree->ops && tree->ops->set_bit_hook) { | 295 | if (tree->ops && tree->ops->set_bit_hook) |
299 | return tree->ops->set_bit_hook(tree->mapping->host, | 296 | tree->ops->set_bit_hook(tree->mapping->host, state, bits); |
300 | state, bits); | ||
301 | } | ||
302 | |||
303 | return 0; | ||
304 | } | 297 | } |
305 | 298 | ||
306 | static void clear_state_cb(struct extent_io_tree *tree, | 299 | static void clear_state_cb(struct extent_io_tree *tree, |
@@ -310,6 +303,9 @@ static void clear_state_cb(struct extent_io_tree *tree, | |||
310 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 303 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
311 | } | 304 | } |
312 | 305 | ||
306 | static void set_state_bits(struct extent_io_tree *tree, | ||
307 | struct extent_state *state, int *bits); | ||
308 | |||
313 | /* | 309 | /* |
314 | * insert an extent_state struct into the tree. 'bits' are set on the | 310 | * insert an extent_state struct into the tree. 'bits' are set on the |
315 | * struct before it is inserted. | 311 | * struct before it is inserted. |
@@ -325,8 +321,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
325 | int *bits) | 321 | int *bits) |
326 | { | 322 | { |
327 | struct rb_node *node; | 323 | struct rb_node *node; |
328 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
329 | int ret; | ||
330 | 324 | ||
331 | if (end < start) { | 325 | if (end < start) { |
332 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 326 | printk(KERN_ERR "btrfs end < start %llu %llu\n", |
@@ -336,13 +330,9 @@ static int insert_state(struct extent_io_tree *tree, | |||
336 | } | 330 | } |
337 | state->start = start; | 331 | state->start = start; |
338 | state->end = end; | 332 | state->end = end; |
339 | ret = set_state_cb(tree, state, bits); | ||
340 | if (ret) | ||
341 | return ret; | ||
342 | 333 | ||
343 | if (bits_to_set & EXTENT_DIRTY) | 334 | set_state_bits(tree, state, bits); |
344 | tree->dirty_bytes += end - start + 1; | 335 | |
345 | state->state |= bits_to_set; | ||
346 | node = tree_insert(&tree->state, end, &state->rb_node); | 336 | node = tree_insert(&tree->state, end, &state->rb_node); |
347 | if (node) { | 337 | if (node) { |
348 | struct extent_state *found; | 338 | struct extent_state *found; |
@@ -351,7 +341,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
351 | "%llu %llu\n", (unsigned long long)found->start, | 341 | "%llu %llu\n", (unsigned long long)found->start, |
352 | (unsigned long long)found->end, | 342 | (unsigned long long)found->end, |
353 | (unsigned long long)start, (unsigned long long)end); | 343 | (unsigned long long)start, (unsigned long long)end); |
354 | free_extent_state(state); | ||
355 | return -EEXIST; | 344 | return -EEXIST; |
356 | } | 345 | } |
357 | state->tree = tree; | 346 | state->tree = tree; |
@@ -359,13 +348,11 @@ static int insert_state(struct extent_io_tree *tree, | |||
359 | return 0; | 348 | return 0; |
360 | } | 349 | } |
361 | 350 | ||
362 | static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, | 351 | static void split_cb(struct extent_io_tree *tree, struct extent_state *orig, |
363 | u64 split) | 352 | u64 split) |
364 | { | 353 | { |
365 | if (tree->ops && tree->ops->split_extent_hook) | 354 | if (tree->ops && tree->ops->split_extent_hook) |
366 | return tree->ops->split_extent_hook(tree->mapping->host, | 355 | tree->ops->split_extent_hook(tree->mapping->host, orig, split); |
367 | orig, split); | ||
368 | return 0; | ||
369 | } | 356 | } |
370 | 357 | ||
371 | /* | 358 | /* |
@@ -500,7 +487,8 @@ again: | |||
500 | cached_state = NULL; | 487 | cached_state = NULL; |
501 | } | 488 | } |
502 | 489 | ||
503 | if (cached && cached->tree && cached->start == start) { | 490 | if (cached && cached->tree && cached->start <= start && |
491 | cached->end > start) { | ||
504 | if (clear) | 492 | if (clear) |
505 | atomic_dec(&cached->refs); | 493 | atomic_dec(&cached->refs); |
506 | state = cached; | 494 | state = cached; |
@@ -660,34 +648,25 @@ again: | |||
660 | if (start > end) | 648 | if (start > end) |
661 | break; | 649 | break; |
662 | 650 | ||
663 | if (need_resched()) { | 651 | cond_resched_lock(&tree->lock); |
664 | spin_unlock(&tree->lock); | ||
665 | cond_resched(); | ||
666 | spin_lock(&tree->lock); | ||
667 | } | ||
668 | } | 652 | } |
669 | out: | 653 | out: |
670 | spin_unlock(&tree->lock); | 654 | spin_unlock(&tree->lock); |
671 | return 0; | 655 | return 0; |
672 | } | 656 | } |
673 | 657 | ||
674 | static int set_state_bits(struct extent_io_tree *tree, | 658 | static void set_state_bits(struct extent_io_tree *tree, |
675 | struct extent_state *state, | 659 | struct extent_state *state, |
676 | int *bits) | 660 | int *bits) |
677 | { | 661 | { |
678 | int ret; | ||
679 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | 662 | int bits_to_set = *bits & ~EXTENT_CTLBITS; |
680 | 663 | ||
681 | ret = set_state_cb(tree, state, bits); | 664 | set_state_cb(tree, state, bits); |
682 | if (ret) | ||
683 | return ret; | ||
684 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | 665 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
685 | u64 range = state->end - state->start + 1; | 666 | u64 range = state->end - state->start + 1; |
686 | tree->dirty_bytes += range; | 667 | tree->dirty_bytes += range; |
687 | } | 668 | } |
688 | state->state |= bits_to_set; | 669 | state->state |= bits_to_set; |
689 | |||
690 | return 0; | ||
691 | } | 670 | } |
692 | 671 | ||
693 | static void cache_state(struct extent_state *state, | 672 | static void cache_state(struct extent_state *state, |
@@ -742,7 +721,8 @@ again: | |||
742 | spin_lock(&tree->lock); | 721 | spin_lock(&tree->lock); |
743 | if (cached_state && *cached_state) { | 722 | if (cached_state && *cached_state) { |
744 | state = *cached_state; | 723 | state = *cached_state; |
745 | if (state->start == start && state->tree) { | 724 | if (state->start <= start && state->end > start && |
725 | state->tree) { | ||
746 | node = &state->rb_node; | 726 | node = &state->rb_node; |
747 | goto hit_next; | 727 | goto hit_next; |
748 | } | 728 | } |
@@ -779,17 +759,15 @@ hit_next: | |||
779 | goto out; | 759 | goto out; |
780 | } | 760 | } |
781 | 761 | ||
782 | err = set_state_bits(tree, state, &bits); | 762 | set_state_bits(tree, state, &bits); |
783 | if (err) | ||
784 | goto out; | ||
785 | 763 | ||
786 | next_node = rb_next(node); | ||
787 | cache_state(state, cached_state); | 764 | cache_state(state, cached_state); |
788 | merge_state(tree, state); | 765 | merge_state(tree, state); |
789 | if (last_end == (u64)-1) | 766 | if (last_end == (u64)-1) |
790 | goto out; | 767 | goto out; |
791 | 768 | ||
792 | start = last_end + 1; | 769 | start = last_end + 1; |
770 | next_node = rb_next(&state->rb_node); | ||
793 | if (next_node && start < end && prealloc && !need_resched()) { | 771 | if (next_node && start < end && prealloc && !need_resched()) { |
794 | state = rb_entry(next_node, struct extent_state, | 772 | state = rb_entry(next_node, struct extent_state, |
795 | rb_node); | 773 | rb_node); |
@@ -830,9 +808,7 @@ hit_next: | |||
830 | if (err) | 808 | if (err) |
831 | goto out; | 809 | goto out; |
832 | if (state->end <= end) { | 810 | if (state->end <= end) { |
833 | err = set_state_bits(tree, state, &bits); | 811 | set_state_bits(tree, state, &bits); |
834 | if (err) | ||
835 | goto out; | ||
836 | cache_state(state, cached_state); | 812 | cache_state(state, cached_state); |
837 | merge_state(tree, state); | 813 | merge_state(tree, state); |
838 | if (last_end == (u64)-1) | 814 | if (last_end == (u64)-1) |
@@ -862,7 +838,6 @@ hit_next: | |||
862 | * Avoid to free 'prealloc' if it can be merged with | 838 | * Avoid to free 'prealloc' if it can be merged with |
863 | * the later extent. | 839 | * the later extent. |
864 | */ | 840 | */ |
865 | atomic_inc(&prealloc->refs); | ||
866 | err = insert_state(tree, prealloc, start, this_end, | 841 | err = insert_state(tree, prealloc, start, this_end, |
867 | &bits); | 842 | &bits); |
868 | BUG_ON(err == -EEXIST); | 843 | BUG_ON(err == -EEXIST); |
@@ -872,7 +847,6 @@ hit_next: | |||
872 | goto out; | 847 | goto out; |
873 | } | 848 | } |
874 | cache_state(prealloc, cached_state); | 849 | cache_state(prealloc, cached_state); |
875 | free_extent_state(prealloc); | ||
876 | prealloc = NULL; | 850 | prealloc = NULL; |
877 | start = this_end + 1; | 851 | start = this_end + 1; |
878 | goto search_again; | 852 | goto search_again; |
@@ -895,11 +869,7 @@ hit_next: | |||
895 | err = split_state(tree, state, prealloc, end + 1); | 869 | err = split_state(tree, state, prealloc, end + 1); |
896 | BUG_ON(err == -EEXIST); | 870 | BUG_ON(err == -EEXIST); |
897 | 871 | ||
898 | err = set_state_bits(tree, prealloc, &bits); | 872 | set_state_bits(tree, prealloc, &bits); |
899 | if (err) { | ||
900 | prealloc = NULL; | ||
901 | goto out; | ||
902 | } | ||
903 | cache_state(prealloc, cached_state); | 873 | cache_state(prealloc, cached_state); |
904 | merge_state(tree, prealloc); | 874 | merge_state(tree, prealloc); |
905 | prealloc = NULL; | 875 | prealloc = NULL; |
@@ -1061,46 +1031,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
1061 | return 0; | 1031 | return 0; |
1062 | } | 1032 | } |
1063 | 1033 | ||
1064 | /* | ||
1065 | * find the first offset in the io tree with 'bits' set. zero is | ||
1066 | * returned if we find something, and *start_ret and *end_ret are | ||
1067 | * set to reflect the state struct that was found. | ||
1068 | * | ||
1069 | * If nothing was found, 1 is returned, < 0 on error | ||
1070 | */ | ||
1071 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||
1072 | u64 *start_ret, u64 *end_ret, int bits) | ||
1073 | { | ||
1074 | struct rb_node *node; | ||
1075 | struct extent_state *state; | ||
1076 | int ret = 1; | ||
1077 | |||
1078 | spin_lock(&tree->lock); | ||
1079 | /* | ||
1080 | * this search will find all the extents that end after | ||
1081 | * our range starts. | ||
1082 | */ | ||
1083 | node = tree_search(tree, start); | ||
1084 | if (!node) | ||
1085 | goto out; | ||
1086 | |||
1087 | while (1) { | ||
1088 | state = rb_entry(node, struct extent_state, rb_node); | ||
1089 | if (state->end >= start && (state->state & bits)) { | ||
1090 | *start_ret = state->start; | ||
1091 | *end_ret = state->end; | ||
1092 | ret = 0; | ||
1093 | break; | ||
1094 | } | ||
1095 | node = rb_next(node); | ||
1096 | if (!node) | ||
1097 | break; | ||
1098 | } | ||
1099 | out: | ||
1100 | spin_unlock(&tree->lock); | ||
1101 | return ret; | ||
1102 | } | ||
1103 | |||
1104 | /* find the first state struct with 'bits' set after 'start', and | 1034 | /* find the first state struct with 'bits' set after 'start', and |
1105 | * return it. tree->lock must be held. NULL will returned if | 1035 | * return it. tree->lock must be held. NULL will returned if |
1106 | * nothing was found after 'start' | 1036 | * nothing was found after 'start' |
@@ -1133,6 +1063,30 @@ out: | |||
1133 | } | 1063 | } |
1134 | 1064 | ||
1135 | /* | 1065 | /* |
1066 | * find the first offset in the io tree with 'bits' set. zero is | ||
1067 | * returned if we find something, and *start_ret and *end_ret are | ||
1068 | * set to reflect the state struct that was found. | ||
1069 | * | ||
1070 | * If nothing was found, 1 is returned, < 0 on error | ||
1071 | */ | ||
1072 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||
1073 | u64 *start_ret, u64 *end_ret, int bits) | ||
1074 | { | ||
1075 | struct extent_state *state; | ||
1076 | int ret = 1; | ||
1077 | |||
1078 | spin_lock(&tree->lock); | ||
1079 | state = find_first_extent_bit_state(tree, start, bits); | ||
1080 | if (state) { | ||
1081 | *start_ret = state->start; | ||
1082 | *end_ret = state->end; | ||
1083 | ret = 0; | ||
1084 | } | ||
1085 | spin_unlock(&tree->lock); | ||
1086 | return ret; | ||
1087 | } | ||
1088 | |||
1089 | /* | ||
1136 | * find a contiguous range of bytes in the file marked as delalloc, not | 1090 | * find a contiguous range of bytes in the file marked as delalloc, not |
1137 | * more than 'max_bytes'. start and end are used to return the range, | 1091 | * more than 'max_bytes'. start and end are used to return the range, |
1138 | * | 1092 | * |
@@ -1564,7 +1518,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1564 | int bitset = 0; | 1518 | int bitset = 0; |
1565 | 1519 | ||
1566 | spin_lock(&tree->lock); | 1520 | spin_lock(&tree->lock); |
1567 | if (cached && cached->tree && cached->start == start) | 1521 | if (cached && cached->tree && cached->start <= start && |
1522 | cached->end > start) | ||
1568 | node = &cached->rb_node; | 1523 | node = &cached->rb_node; |
1569 | else | 1524 | else |
1570 | node = tree_search(tree, start); | 1525 | node = tree_search(tree, start); |
@@ -2432,6 +2387,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2432 | pgoff_t index; | 2387 | pgoff_t index; |
2433 | pgoff_t end; /* Inclusive */ | 2388 | pgoff_t end; /* Inclusive */ |
2434 | int scanned = 0; | 2389 | int scanned = 0; |
2390 | int tag; | ||
2435 | 2391 | ||
2436 | pagevec_init(&pvec, 0); | 2392 | pagevec_init(&pvec, 0); |
2437 | if (wbc->range_cyclic) { | 2393 | if (wbc->range_cyclic) { |
@@ -2442,11 +2398,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2442 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2398 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2443 | scanned = 1; | 2399 | scanned = 1; |
2444 | } | 2400 | } |
2401 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2402 | tag = PAGECACHE_TAG_TOWRITE; | ||
2403 | else | ||
2404 | tag = PAGECACHE_TAG_DIRTY; | ||
2445 | retry: | 2405 | retry: |
2406 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2407 | tag_pages_for_writeback(mapping, index, end); | ||
2446 | while (!done && !nr_to_write_done && (index <= end) && | 2408 | while (!done && !nr_to_write_done && (index <= end) && |
2447 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2409 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2448 | PAGECACHE_TAG_DIRTY, min(end - index, | 2410 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
2449 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
2450 | unsigned i; | 2411 | unsigned i; |
2451 | 2412 | ||
2452 | scanned = 1; | 2413 | scanned = 1; |
@@ -2541,7 +2502,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2541 | struct writeback_control *wbc) | 2502 | struct writeback_control *wbc) |
2542 | { | 2503 | { |
2543 | int ret; | 2504 | int ret; |
2544 | struct address_space *mapping = page->mapping; | ||
2545 | struct extent_page_data epd = { | 2505 | struct extent_page_data epd = { |
2546 | .bio = NULL, | 2506 | .bio = NULL, |
2547 | .tree = tree, | 2507 | .tree = tree, |
@@ -2549,18 +2509,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2549 | .extent_locked = 0, | 2509 | .extent_locked = 0, |
2550 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 2510 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
2551 | }; | 2511 | }; |
2552 | struct writeback_control wbc_writepages = { | ||
2553 | .sync_mode = wbc->sync_mode, | ||
2554 | .older_than_this = NULL, | ||
2555 | .nr_to_write = 64, | ||
2556 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | ||
2557 | .range_end = (loff_t)-1, | ||
2558 | }; | ||
2559 | 2512 | ||
2560 | ret = __extent_writepage(page, wbc, &epd); | 2513 | ret = __extent_writepage(page, wbc, &epd); |
2561 | 2514 | ||
2562 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | ||
2563 | __extent_writepage, &epd, flush_write_bio); | ||
2564 | flush_epd_write_bio(&epd); | 2515 | flush_epd_write_bio(&epd); |
2565 | return ret; | 2516 | return ret; |
2566 | } | 2517 | } |
@@ -2584,7 +2535,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2584 | }; | 2535 | }; |
2585 | struct writeback_control wbc_writepages = { | 2536 | struct writeback_control wbc_writepages = { |
2586 | .sync_mode = mode, | 2537 | .sync_mode = mode, |
2587 | .older_than_this = NULL, | ||
2588 | .nr_to_write = nr_pages * 2, | 2538 | .nr_to_write = nr_pages * 2, |
2589 | .range_start = start, | 2539 | .range_start = start, |
2590 | .range_end = end + 1, | 2540 | .range_end = end + 1, |
@@ -3022,8 +2972,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3022 | return NULL; | 2972 | return NULL; |
3023 | eb->start = start; | 2973 | eb->start = start; |
3024 | eb->len = len; | 2974 | eb->len = len; |
3025 | spin_lock_init(&eb->lock); | 2975 | rwlock_init(&eb->lock); |
3026 | init_waitqueue_head(&eb->lock_wq); | 2976 | atomic_set(&eb->write_locks, 0); |
2977 | atomic_set(&eb->read_locks, 0); | ||
2978 | atomic_set(&eb->blocking_readers, 0); | ||
2979 | atomic_set(&eb->blocking_writers, 0); | ||
2980 | atomic_set(&eb->spinning_readers, 0); | ||
2981 | atomic_set(&eb->spinning_writers, 0); | ||
2982 | init_waitqueue_head(&eb->write_lock_wq); | ||
2983 | init_waitqueue_head(&eb->read_lock_wq); | ||
3027 | 2984 | ||
3028 | #if LEAK_DEBUG | 2985 | #if LEAK_DEBUG |
3029 | spin_lock_irqsave(&leak_lock, flags); | 2986 | spin_lock_irqsave(&leak_lock, flags); |
@@ -3119,7 +3076,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3119 | i = 0; | 3076 | i = 0; |
3120 | } | 3077 | } |
3121 | for (; i < num_pages; i++, index++) { | 3078 | for (; i < num_pages; i++, index++) { |
3122 | p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); | 3079 | p = find_or_create_page(mapping, index, GFP_NOFS); |
3123 | if (!p) { | 3080 | if (!p) { |
3124 | WARN_ON(1); | 3081 | WARN_ON(1); |
3125 | goto free_eb; | 3082 | goto free_eb; |
@@ -3266,6 +3223,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3266 | return was_dirty; | 3223 | return was_dirty; |
3267 | } | 3224 | } |
3268 | 3225 | ||
3226 | static int __eb_straddles_pages(u64 start, u64 len) | ||
3227 | { | ||
3228 | if (len < PAGE_CACHE_SIZE) | ||
3229 | return 1; | ||
3230 | if (start & (PAGE_CACHE_SIZE - 1)) | ||
3231 | return 1; | ||
3232 | if ((start + len) & (PAGE_CACHE_SIZE - 1)) | ||
3233 | return 1; | ||
3234 | return 0; | ||
3235 | } | ||
3236 | |||
3237 | static int eb_straddles_pages(struct extent_buffer *eb) | ||
3238 | { | ||
3239 | return __eb_straddles_pages(eb->start, eb->len); | ||
3240 | } | ||
3241 | |||
3269 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 3242 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
3270 | struct extent_buffer *eb, | 3243 | struct extent_buffer *eb, |
3271 | struct extent_state **cached_state) | 3244 | struct extent_state **cached_state) |
@@ -3277,8 +3250,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3277 | num_pages = num_extent_pages(eb->start, eb->len); | 3250 | num_pages = num_extent_pages(eb->start, eb->len); |
3278 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3251 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3279 | 3252 | ||
3280 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3253 | if (eb_straddles_pages(eb)) { |
3281 | cached_state, GFP_NOFS); | 3254 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3255 | cached_state, GFP_NOFS); | ||
3256 | } | ||
3282 | for (i = 0; i < num_pages; i++) { | 3257 | for (i = 0; i < num_pages; i++) { |
3283 | page = extent_buffer_page(eb, i); | 3258 | page = extent_buffer_page(eb, i); |
3284 | if (page) | 3259 | if (page) |
@@ -3296,8 +3271,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3296 | 3271 | ||
3297 | num_pages = num_extent_pages(eb->start, eb->len); | 3272 | num_pages = num_extent_pages(eb->start, eb->len); |
3298 | 3273 | ||
3299 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3274 | if (eb_straddles_pages(eb)) { |
3300 | NULL, GFP_NOFS); | 3275 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3276 | NULL, GFP_NOFS); | ||
3277 | } | ||
3301 | for (i = 0; i < num_pages; i++) { | 3278 | for (i = 0; i < num_pages; i++) { |
3302 | page = extent_buffer_page(eb, i); | 3279 | page = extent_buffer_page(eb, i); |
3303 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | 3280 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || |
@@ -3320,9 +3297,12 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
3320 | int uptodate; | 3297 | int uptodate; |
3321 | unsigned long index; | 3298 | unsigned long index; |
3322 | 3299 | ||
3323 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); | 3300 | if (__eb_straddles_pages(start, end - start + 1)) { |
3324 | if (ret) | 3301 | ret = test_range_bit(tree, start, end, |
3325 | return 1; | 3302 | EXTENT_UPTODATE, 1, NULL); |
3303 | if (ret) | ||
3304 | return 1; | ||
3305 | } | ||
3326 | while (start <= end) { | 3306 | while (start <= end) { |
3327 | index = start >> PAGE_CACHE_SHIFT; | 3307 | index = start >> PAGE_CACHE_SHIFT; |
3328 | page = find_get_page(tree->mapping, index); | 3308 | page = find_get_page(tree->mapping, index); |
@@ -3350,10 +3330,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3350 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) | 3330 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
3351 | return 1; | 3331 | return 1; |
3352 | 3332 | ||
3353 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3333 | if (eb_straddles_pages(eb)) { |
3354 | EXTENT_UPTODATE, 1, cached_state); | 3334 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3355 | if (ret) | 3335 | EXTENT_UPTODATE, 1, cached_state); |
3356 | return ret; | 3336 | if (ret) |
3337 | return ret; | ||
3338 | } | ||
3357 | 3339 | ||
3358 | num_pages = num_extent_pages(eb->start, eb->len); | 3340 | num_pages = num_extent_pages(eb->start, eb->len); |
3359 | for (i = 0; i < num_pages; i++) { | 3341 | for (i = 0; i < num_pages; i++) { |
@@ -3386,9 +3368,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3386 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) | 3368 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
3387 | return 0; | 3369 | return 0; |
3388 | 3370 | ||
3389 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3371 | if (eb_straddles_pages(eb)) { |
3390 | EXTENT_UPTODATE, 1, NULL)) { | 3372 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3391 | return 0; | 3373 | EXTENT_UPTODATE, 1, NULL)) { |
3374 | return 0; | ||
3375 | } | ||
3392 | } | 3376 | } |
3393 | 3377 | ||
3394 | if (start) { | 3378 | if (start) { |
@@ -3492,9 +3476,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
3492 | page = extent_buffer_page(eb, i); | 3476 | page = extent_buffer_page(eb, i); |
3493 | 3477 | ||
3494 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 3478 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
3495 | kaddr = kmap_atomic(page, KM_USER1); | 3479 | kaddr = page_address(page); |
3496 | memcpy(dst, kaddr + offset, cur); | 3480 | memcpy(dst, kaddr + offset, cur); |
3497 | kunmap_atomic(kaddr, KM_USER1); | ||
3498 | 3481 | ||
3499 | dst += cur; | 3482 | dst += cur; |
3500 | len -= cur; | 3483 | len -= cur; |
@@ -3504,9 +3487,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
3504 | } | 3487 | } |
3505 | 3488 | ||
3506 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | 3489 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, |
3507 | unsigned long min_len, char **token, char **map, | 3490 | unsigned long min_len, char **map, |
3508 | unsigned long *map_start, | 3491 | unsigned long *map_start, |
3509 | unsigned long *map_len, int km) | 3492 | unsigned long *map_len) |
3510 | { | 3493 | { |
3511 | size_t offset = start & (PAGE_CACHE_SIZE - 1); | 3494 | size_t offset = start & (PAGE_CACHE_SIZE - 1); |
3512 | char *kaddr; | 3495 | char *kaddr; |
@@ -3536,42 +3519,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
3536 | } | 3519 | } |
3537 | 3520 | ||
3538 | p = extent_buffer_page(eb, i); | 3521 | p = extent_buffer_page(eb, i); |
3539 | kaddr = kmap_atomic(p, km); | 3522 | kaddr = page_address(p); |
3540 | *token = kaddr; | ||
3541 | *map = kaddr + offset; | 3523 | *map = kaddr + offset; |
3542 | *map_len = PAGE_CACHE_SIZE - offset; | 3524 | *map_len = PAGE_CACHE_SIZE - offset; |
3543 | return 0; | 3525 | return 0; |
3544 | } | 3526 | } |
3545 | 3527 | ||
3546 | int map_extent_buffer(struct extent_buffer *eb, unsigned long start, | ||
3547 | unsigned long min_len, | ||
3548 | char **token, char **map, | ||
3549 | unsigned long *map_start, | ||
3550 | unsigned long *map_len, int km) | ||
3551 | { | ||
3552 | int err; | ||
3553 | int save = 0; | ||
3554 | if (eb->map_token) { | ||
3555 | unmap_extent_buffer(eb, eb->map_token, km); | ||
3556 | eb->map_token = NULL; | ||
3557 | save = 1; | ||
3558 | } | ||
3559 | err = map_private_extent_buffer(eb, start, min_len, token, map, | ||
3560 | map_start, map_len, km); | ||
3561 | if (!err && save) { | ||
3562 | eb->map_token = *token; | ||
3563 | eb->kaddr = *map; | ||
3564 | eb->map_start = *map_start; | ||
3565 | eb->map_len = *map_len; | ||
3566 | } | ||
3567 | return err; | ||
3568 | } | ||
3569 | |||
3570 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) | ||
3571 | { | ||
3572 | kunmap_atomic(token, km); | ||
3573 | } | ||
3574 | |||
3575 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | 3528 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, |
3576 | unsigned long start, | 3529 | unsigned long start, |
3577 | unsigned long len) | 3530 | unsigned long len) |
@@ -3595,9 +3548,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | |||
3595 | 3548 | ||
3596 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 3549 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
3597 | 3550 | ||
3598 | kaddr = kmap_atomic(page, KM_USER0); | 3551 | kaddr = page_address(page); |
3599 | ret = memcmp(ptr, kaddr + offset, cur); | 3552 | ret = memcmp(ptr, kaddr + offset, cur); |
3600 | kunmap_atomic(kaddr, KM_USER0); | ||
3601 | if (ret) | 3553 | if (ret) |
3602 | break; | 3554 | break; |
3603 | 3555 | ||
@@ -3630,9 +3582,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, | |||
3630 | WARN_ON(!PageUptodate(page)); | 3582 | WARN_ON(!PageUptodate(page)); |
3631 | 3583 | ||
3632 | cur = min(len, PAGE_CACHE_SIZE - offset); | 3584 | cur = min(len, PAGE_CACHE_SIZE - offset); |
3633 | kaddr = kmap_atomic(page, KM_USER1); | 3585 | kaddr = page_address(page); |
3634 | memcpy(kaddr + offset, src, cur); | 3586 | memcpy(kaddr + offset, src, cur); |
3635 | kunmap_atomic(kaddr, KM_USER1); | ||
3636 | 3587 | ||
3637 | src += cur; | 3588 | src += cur; |
3638 | len -= cur; | 3589 | len -= cur; |
@@ -3661,9 +3612,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, | |||
3661 | WARN_ON(!PageUptodate(page)); | 3612 | WARN_ON(!PageUptodate(page)); |
3662 | 3613 | ||
3663 | cur = min(len, PAGE_CACHE_SIZE - offset); | 3614 | cur = min(len, PAGE_CACHE_SIZE - offset); |
3664 | kaddr = kmap_atomic(page, KM_USER0); | 3615 | kaddr = page_address(page); |
3665 | memset(kaddr + offset, c, cur); | 3616 | memset(kaddr + offset, c, cur); |
3666 | kunmap_atomic(kaddr, KM_USER0); | ||
3667 | 3617 | ||
3668 | len -= cur; | 3618 | len -= cur; |
3669 | offset = 0; | 3619 | offset = 0; |
@@ -3694,9 +3644,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | |||
3694 | 3644 | ||
3695 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); | 3645 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); |
3696 | 3646 | ||
3697 | kaddr = kmap_atomic(page, KM_USER0); | 3647 | kaddr = page_address(page); |
3698 | read_extent_buffer(src, kaddr + offset, src_offset, cur); | 3648 | read_extent_buffer(src, kaddr + offset, src_offset, cur); |
3699 | kunmap_atomic(kaddr, KM_USER0); | ||
3700 | 3649 | ||
3701 | src_offset += cur; | 3650 | src_offset += cur; |
3702 | len -= cur; | 3651 | len -= cur; |
@@ -3709,20 +3658,17 @@ static void move_pages(struct page *dst_page, struct page *src_page, | |||
3709 | unsigned long dst_off, unsigned long src_off, | 3658 | unsigned long dst_off, unsigned long src_off, |
3710 | unsigned long len) | 3659 | unsigned long len) |
3711 | { | 3660 | { |
3712 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3661 | char *dst_kaddr = page_address(dst_page); |
3713 | if (dst_page == src_page) { | 3662 | if (dst_page == src_page) { |
3714 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); | 3663 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); |
3715 | } else { | 3664 | } else { |
3716 | char *src_kaddr = kmap_atomic(src_page, KM_USER1); | 3665 | char *src_kaddr = page_address(src_page); |
3717 | char *p = dst_kaddr + dst_off + len; | 3666 | char *p = dst_kaddr + dst_off + len; |
3718 | char *s = src_kaddr + src_off + len; | 3667 | char *s = src_kaddr + src_off + len; |
3719 | 3668 | ||
3720 | while (len--) | 3669 | while (len--) |
3721 | *--p = *--s; | 3670 | *--p = *--s; |
3722 | |||
3723 | kunmap_atomic(src_kaddr, KM_USER1); | ||
3724 | } | 3671 | } |
3725 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
3726 | } | 3672 | } |
3727 | 3673 | ||
3728 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) | 3674 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) |
@@ -3735,20 +3681,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page, | |||
3735 | unsigned long dst_off, unsigned long src_off, | 3681 | unsigned long dst_off, unsigned long src_off, |
3736 | unsigned long len) | 3682 | unsigned long len) |
3737 | { | 3683 | { |
3738 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3684 | char *dst_kaddr = page_address(dst_page); |
3739 | char *src_kaddr; | 3685 | char *src_kaddr; |
3740 | 3686 | ||
3741 | if (dst_page != src_page) { | 3687 | if (dst_page != src_page) { |
3742 | src_kaddr = kmap_atomic(src_page, KM_USER1); | 3688 | src_kaddr = page_address(src_page); |
3743 | } else { | 3689 | } else { |
3744 | src_kaddr = dst_kaddr; | 3690 | src_kaddr = dst_kaddr; |
3745 | BUG_ON(areas_overlap(src_off, dst_off, len)); | 3691 | BUG_ON(areas_overlap(src_off, dst_off, len)); |
3746 | } | 3692 | } |
3747 | 3693 | ||
3748 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); | 3694 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); |
3749 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
3750 | if (dst_page != src_page) | ||
3751 | kunmap_atomic(src_kaddr, KM_USER1); | ||
3752 | } | 3695 | } |
3753 | 3696 | ||
3754 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | 3697 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a11a92ee2d3..7b2f0c3e792 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -76,15 +76,15 @@ struct extent_io_ops { | |||
76 | struct extent_state *state); | 76 | struct extent_state *state); |
77 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 77 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
78 | struct extent_state *state, int uptodate); | 78 | struct extent_state *state, int uptodate); |
79 | int (*set_bit_hook)(struct inode *inode, struct extent_state *state, | 79 | void (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
80 | int *bits); | 80 | int *bits); |
81 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, | 81 | void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
82 | int *bits); | 82 | int *bits); |
83 | int (*merge_extent_hook)(struct inode *inode, | 83 | void (*merge_extent_hook)(struct inode *inode, |
84 | struct extent_state *new, | 84 | struct extent_state *new, |
85 | struct extent_state *other); | 85 | struct extent_state *other); |
86 | int (*split_extent_hook)(struct inode *inode, | 86 | void (*split_extent_hook)(struct inode *inode, |
87 | struct extent_state *orig, u64 split); | 87 | struct extent_state *orig, u64 split); |
88 | int (*write_cache_pages_lock_hook)(struct page *page); | 88 | int (*write_cache_pages_lock_hook)(struct page *page); |
89 | }; | 89 | }; |
90 | 90 | ||
@@ -108,8 +108,6 @@ struct extent_state { | |||
108 | wait_queue_head_t wq; | 108 | wait_queue_head_t wq; |
109 | atomic_t refs; | 109 | atomic_t refs; |
110 | unsigned long state; | 110 | unsigned long state; |
111 | u64 split_start; | ||
112 | u64 split_end; | ||
113 | 111 | ||
114 | /* for use by the FS */ | 112 | /* for use by the FS */ |
115 | u64 private; | 113 | u64 private; |
@@ -120,8 +118,6 @@ struct extent_state { | |||
120 | struct extent_buffer { | 118 | struct extent_buffer { |
121 | u64 start; | 119 | u64 start; |
122 | unsigned long len; | 120 | unsigned long len; |
123 | char *map_token; | ||
124 | char *kaddr; | ||
125 | unsigned long map_start; | 121 | unsigned long map_start; |
126 | unsigned long map_len; | 122 | unsigned long map_len; |
127 | struct page *first_page; | 123 | struct page *first_page; |
@@ -130,14 +126,26 @@ struct extent_buffer { | |||
130 | struct rcu_head rcu_head; | 126 | struct rcu_head rcu_head; |
131 | atomic_t refs; | 127 | atomic_t refs; |
132 | 128 | ||
133 | /* the spinlock is used to protect most operations */ | 129 | /* count of read lock holders on the extent buffer */ |
134 | spinlock_t lock; | 130 | atomic_t write_locks; |
131 | atomic_t read_locks; | ||
132 | atomic_t blocking_writers; | ||
133 | atomic_t blocking_readers; | ||
134 | atomic_t spinning_readers; | ||
135 | atomic_t spinning_writers; | ||
136 | |||
137 | /* protects write locks */ | ||
138 | rwlock_t lock; | ||
135 | 139 | ||
136 | /* | 140 | /* readers use lock_wq while they wait for the write |
137 | * when we keep the lock held while blocking, waiters go onto | 141 | * lock holders to unlock |
138 | * the wq | ||
139 | */ | 142 | */ |
140 | wait_queue_head_t lock_wq; | 143 | wait_queue_head_t write_lock_wq; |
144 | |||
145 | /* writers use read_lock_wq while they wait for readers | ||
146 | * to unlock | ||
147 | */ | ||
148 | wait_queue_head_t read_lock_wq; | ||
141 | }; | 149 | }; |
142 | 150 | ||
143 | static inline void extent_set_compress_type(unsigned long *bio_flags, | 151 | static inline void extent_set_compress_type(unsigned long *bio_flags, |
@@ -279,15 +287,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
279 | int extent_buffer_uptodate(struct extent_io_tree *tree, | 287 | int extent_buffer_uptodate(struct extent_io_tree *tree, |
280 | struct extent_buffer *eb, | 288 | struct extent_buffer *eb, |
281 | struct extent_state *cached_state); | 289 | struct extent_state *cached_state); |
282 | int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, | ||
283 | unsigned long min_len, char **token, char **map, | ||
284 | unsigned long *map_start, | ||
285 | unsigned long *map_len, int km); | ||
286 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, | 290 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, |
287 | unsigned long min_len, char **token, char **map, | 291 | unsigned long min_len, char **map, |
288 | unsigned long *map_start, | 292 | unsigned long *map_start, |
289 | unsigned long *map_len, int km); | 293 | unsigned long *map_len); |
290 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); | ||
291 | int extent_range_uptodate(struct extent_io_tree *tree, | 294 | int extent_range_uptodate(struct extent_io_tree *tree, |
292 | u64 start, u64 end); | 295 | u64 start, u64 end); |
293 | int extent_clear_unlock_delalloc(struct inode *inode, | 296 | int extent_clear_unlock_delalloc(struct inode *inode, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2d0410344ea..7c97b330145 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
183 | return 0; | 183 | return 0; |
184 | } | 184 | } |
185 | 185 | ||
186 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | 186 | static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) |
187 | { | 187 | { |
188 | int ret = 0; | ||
189 | struct extent_map *merge = NULL; | 188 | struct extent_map *merge = NULL; |
190 | struct rb_node *rb; | 189 | struct rb_node *rb; |
191 | struct extent_map *em; | ||
192 | |||
193 | write_lock(&tree->lock); | ||
194 | em = lookup_extent_mapping(tree, start, len); | ||
195 | |||
196 | WARN_ON(!em || em->start != start); | ||
197 | |||
198 | if (!em) | ||
199 | goto out; | ||
200 | |||
201 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
202 | 190 | ||
203 | if (em->start != 0) { | 191 | if (em->start != 0) { |
204 | rb = rb_prev(&em->rb_node); | 192 | rb = rb_prev(&em->rb_node); |
@@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | |||
225 | merge->in_tree = 0; | 213 | merge->in_tree = 0; |
226 | free_extent_map(merge); | 214 | free_extent_map(merge); |
227 | } | 215 | } |
216 | } | ||
217 | |||
218 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | ||
219 | { | ||
220 | int ret = 0; | ||
221 | struct extent_map *em; | ||
222 | |||
223 | write_lock(&tree->lock); | ||
224 | em = lookup_extent_mapping(tree, start, len); | ||
225 | |||
226 | WARN_ON(!em || em->start != start); | ||
227 | |||
228 | if (!em) | ||
229 | goto out; | ||
230 | |||
231 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
232 | |||
233 | try_merge_map(tree, em); | ||
228 | 234 | ||
229 | free_extent_map(em); | 235 | free_extent_map(em); |
230 | out: | 236 | out: |
@@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
247 | struct extent_map *em) | 253 | struct extent_map *em) |
248 | { | 254 | { |
249 | int ret = 0; | 255 | int ret = 0; |
250 | struct extent_map *merge = NULL; | ||
251 | struct rb_node *rb; | 256 | struct rb_node *rb; |
252 | struct extent_map *exist; | 257 | struct extent_map *exist; |
253 | 258 | ||
@@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
263 | goto out; | 268 | goto out; |
264 | } | 269 | } |
265 | atomic_inc(&em->refs); | 270 | atomic_inc(&em->refs); |
266 | if (em->start != 0) { | 271 | |
267 | rb = rb_prev(&em->rb_node); | 272 | try_merge_map(tree, em); |
268 | if (rb) | ||
269 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
270 | if (rb && mergable_maps(merge, em)) { | ||
271 | em->start = merge->start; | ||
272 | em->len += merge->len; | ||
273 | em->block_len += merge->block_len; | ||
274 | em->block_start = merge->block_start; | ||
275 | merge->in_tree = 0; | ||
276 | rb_erase(&merge->rb_node, &tree->map); | ||
277 | free_extent_map(merge); | ||
278 | } | ||
279 | } | ||
280 | rb = rb_next(&em->rb_node); | ||
281 | if (rb) | ||
282 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
283 | if (rb && mergable_maps(em, merge)) { | ||
284 | em->len += merge->len; | ||
285 | em->block_len += merge->len; | ||
286 | rb_erase(&merge->rb_node, &tree->map); | ||
287 | merge->in_tree = 0; | ||
288 | free_extent_map(merge); | ||
289 | } | ||
290 | out: | 273 | out: |
291 | return ret; | 274 | return ret; |
292 | } | 275 | } |
@@ -299,19 +282,8 @@ static u64 range_end(u64 start, u64 len) | |||
299 | return start + len; | 282 | return start + len; |
300 | } | 283 | } |
301 | 284 | ||
302 | /** | 285 | struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree, |
303 | * lookup_extent_mapping - lookup extent_map | 286 | u64 start, u64 len, int strict) |
304 | * @tree: tree to lookup in | ||
305 | * @start: byte offset to start the search | ||
306 | * @len: length of the lookup range | ||
307 | * | ||
308 | * Find and return the first extent_map struct in @tree that intersects the | ||
309 | * [start, len] range. There may be additional objects in the tree that | ||
310 | * intersect, so check the object returned carefully to make sure that no | ||
311 | * additional lookups are needed. | ||
312 | */ | ||
313 | struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | ||
314 | u64 start, u64 len) | ||
315 | { | 287 | { |
316 | struct extent_map *em; | 288 | struct extent_map *em; |
317 | struct rb_node *rb_node; | 289 | struct rb_node *rb_node; |
@@ -320,38 +292,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
320 | u64 end = range_end(start, len); | 292 | u64 end = range_end(start, len); |
321 | 293 | ||
322 | rb_node = __tree_search(&tree->map, start, &prev, &next); | 294 | rb_node = __tree_search(&tree->map, start, &prev, &next); |
323 | if (!rb_node && prev) { | ||
324 | em = rb_entry(prev, struct extent_map, rb_node); | ||
325 | if (end > em->start && start < extent_map_end(em)) | ||
326 | goto found; | ||
327 | } | ||
328 | if (!rb_node && next) { | ||
329 | em = rb_entry(next, struct extent_map, rb_node); | ||
330 | if (end > em->start && start < extent_map_end(em)) | ||
331 | goto found; | ||
332 | } | ||
333 | if (!rb_node) { | 295 | if (!rb_node) { |
334 | em = NULL; | 296 | if (prev) |
335 | goto out; | 297 | rb_node = prev; |
336 | } | 298 | else if (next) |
337 | if (IS_ERR(rb_node)) { | 299 | rb_node = next; |
338 | em = ERR_CAST(rb_node); | 300 | else |
339 | goto out; | 301 | return NULL; |
340 | } | 302 | } |
303 | |||
341 | em = rb_entry(rb_node, struct extent_map, rb_node); | 304 | em = rb_entry(rb_node, struct extent_map, rb_node); |
342 | if (end > em->start && start < extent_map_end(em)) | ||
343 | goto found; | ||
344 | 305 | ||
345 | em = NULL; | 306 | if (strict && !(end > em->start && start < extent_map_end(em))) |
346 | goto out; | 307 | return NULL; |
347 | 308 | ||
348 | found: | ||
349 | atomic_inc(&em->refs); | 309 | atomic_inc(&em->refs); |
350 | out: | ||
351 | return em; | 310 | return em; |
352 | } | 311 | } |
353 | 312 | ||
354 | /** | 313 | /** |
314 | * lookup_extent_mapping - lookup extent_map | ||
315 | * @tree: tree to lookup in | ||
316 | * @start: byte offset to start the search | ||
317 | * @len: length of the lookup range | ||
318 | * | ||
319 | * Find and return the first extent_map struct in @tree that intersects the | ||
320 | * [start, len] range. There may be additional objects in the tree that | ||
321 | * intersect, so check the object returned carefully to make sure that no | ||
322 | * additional lookups are needed. | ||
323 | */ | ||
324 | struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | ||
325 | u64 start, u64 len) | ||
326 | { | ||
327 | return __lookup_extent_mapping(tree, start, len, 1); | ||
328 | } | ||
329 | |||
330 | /** | ||
355 | * search_extent_mapping - find a nearby extent map | 331 | * search_extent_mapping - find a nearby extent map |
356 | * @tree: tree to lookup in | 332 | * @tree: tree to lookup in |
357 | * @start: byte offset to start the search | 333 | * @start: byte offset to start the search |
@@ -365,38 +341,7 @@ out: | |||
365 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | 341 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, |
366 | u64 start, u64 len) | 342 | u64 start, u64 len) |
367 | { | 343 | { |
368 | struct extent_map *em; | 344 | return __lookup_extent_mapping(tree, start, len, 0); |
369 | struct rb_node *rb_node; | ||
370 | struct rb_node *prev = NULL; | ||
371 | struct rb_node *next = NULL; | ||
372 | |||
373 | rb_node = __tree_search(&tree->map, start, &prev, &next); | ||
374 | if (!rb_node && prev) { | ||
375 | em = rb_entry(prev, struct extent_map, rb_node); | ||
376 | goto found; | ||
377 | } | ||
378 | if (!rb_node && next) { | ||
379 | em = rb_entry(next, struct extent_map, rb_node); | ||
380 | goto found; | ||
381 | } | ||
382 | if (!rb_node) { | ||
383 | em = NULL; | ||
384 | goto out; | ||
385 | } | ||
386 | if (IS_ERR(rb_node)) { | ||
387 | em = ERR_CAST(rb_node); | ||
388 | goto out; | ||
389 | } | ||
390 | em = rb_entry(rb_node, struct extent_map, rb_node); | ||
391 | goto found; | ||
392 | |||
393 | em = NULL; | ||
394 | goto out; | ||
395 | |||
396 | found: | ||
397 | atomic_inc(&em->refs); | ||
398 | out: | ||
399 | return em; | ||
400 | } | 345 | } |
401 | 346 | ||
402 | /** | 347 | /** |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 90d4ee52cd4..a1cb7821bec 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -177,6 +177,17 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
177 | 177 | ||
178 | WARN_ON(bio->bi_vcnt <= 0); | 178 | WARN_ON(bio->bi_vcnt <= 0); |
179 | 179 | ||
180 | /* | ||
181 | * the free space stuff is only read when it hasn't been | ||
182 | * updated in the current transaction. So, we can safely | ||
183 | * read from the commit root and sidestep a nasty deadlock | ||
184 | * between reading the free space cache and updating the csum tree. | ||
185 | */ | ||
186 | if (btrfs_is_free_space_inode(root, inode)) { | ||
187 | path->search_commit_root = 1; | ||
188 | path->skip_locking = 1; | ||
189 | } | ||
190 | |||
180 | disk_bytenr = (u64)bio->bi_sector << 9; | 191 | disk_bytenr = (u64)bio->bi_sector << 9; |
181 | if (dio) | 192 | if (dio) |
182 | offset = logical_offset; | 193 | offset = logical_offset; |
@@ -282,7 +293,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
282 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); | 293 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); |
283 | 294 | ||
284 | path = btrfs_alloc_path(); | 295 | path = btrfs_alloc_path(); |
285 | BUG_ON(!path); | 296 | if (!path) |
297 | return -ENOMEM; | ||
286 | 298 | ||
287 | if (search_commit) { | 299 | if (search_commit) { |
288 | path->skip_locking = 1; | 300 | path->skip_locking = 1; |
@@ -664,15 +676,13 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
664 | struct btrfs_sector_sum *sector_sum; | 676 | struct btrfs_sector_sum *sector_sum; |
665 | u32 nritems; | 677 | u32 nritems; |
666 | u32 ins_size; | 678 | u32 ins_size; |
667 | char *eb_map; | ||
668 | char *eb_token; | ||
669 | unsigned long map_len; | ||
670 | unsigned long map_start; | ||
671 | u16 csum_size = | 679 | u16 csum_size = |
672 | btrfs_super_csum_size(&root->fs_info->super_copy); | 680 | btrfs_super_csum_size(&root->fs_info->super_copy); |
673 | 681 | ||
674 | path = btrfs_alloc_path(); | 682 | path = btrfs_alloc_path(); |
675 | BUG_ON(!path); | 683 | if (!path) |
684 | return -ENOMEM; | ||
685 | |||
676 | sector_sum = sums->sums; | 686 | sector_sum = sums->sums; |
677 | again: | 687 | again: |
678 | next_offset = (u64)-1; | 688 | next_offset = (u64)-1; |
@@ -814,30 +824,9 @@ found: | |||
814 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | 824 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
815 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | 825 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + |
816 | btrfs_item_size_nr(leaf, path->slots[0])); | 826 | btrfs_item_size_nr(leaf, path->slots[0])); |
817 | eb_token = NULL; | ||
818 | next_sector: | 827 | next_sector: |
819 | 828 | ||
820 | if (!eb_token || | 829 | write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size); |
821 | (unsigned long)item + csum_size >= map_start + map_len) { | ||
822 | int err; | ||
823 | |||
824 | if (eb_token) | ||
825 | unmap_extent_buffer(leaf, eb_token, KM_USER1); | ||
826 | eb_token = NULL; | ||
827 | err = map_private_extent_buffer(leaf, (unsigned long)item, | ||
828 | csum_size, | ||
829 | &eb_token, &eb_map, | ||
830 | &map_start, &map_len, KM_USER1); | ||
831 | if (err) | ||
832 | eb_token = NULL; | ||
833 | } | ||
834 | if (eb_token) { | ||
835 | memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), | ||
836 | §or_sum->sum, csum_size); | ||
837 | } else { | ||
838 | write_extent_buffer(leaf, §or_sum->sum, | ||
839 | (unsigned long)item, csum_size); | ||
840 | } | ||
841 | 830 | ||
842 | total_bytes += root->sectorsize; | 831 | total_bytes += root->sectorsize; |
843 | sector_sum++; | 832 | sector_sum++; |
@@ -850,10 +839,7 @@ next_sector: | |||
850 | goto next_sector; | 839 | goto next_sector; |
851 | } | 840 | } |
852 | } | 841 | } |
853 | if (eb_token) { | 842 | |
854 | unmap_extent_buffer(leaf, eb_token, KM_USER1); | ||
855 | eb_token = NULL; | ||
856 | } | ||
857 | btrfs_mark_buffer_dirty(path->nodes[0]); | 843 | btrfs_mark_buffer_dirty(path->nodes[0]); |
858 | if (total_bytes < sums->len) { | 844 | if (total_bytes < sums->len) { |
859 | btrfs_release_path(path); | 845 | btrfs_release_path(path); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fa4ef18b66b..e4e57d59edb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -74,7 +74,7 @@ struct inode_defrag { | |||
74 | * If an existing record is found the defrag item you | 74 | * If an existing record is found the defrag item you |
75 | * pass in is freed | 75 | * pass in is freed |
76 | */ | 76 | */ |
77 | static int __btrfs_add_inode_defrag(struct inode *inode, | 77 | static void __btrfs_add_inode_defrag(struct inode *inode, |
78 | struct inode_defrag *defrag) | 78 | struct inode_defrag *defrag) |
79 | { | 79 | { |
80 | struct btrfs_root *root = BTRFS_I(inode)->root; | 80 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode, | |||
106 | BTRFS_I(inode)->in_defrag = 1; | 106 | BTRFS_I(inode)->in_defrag = 1; |
107 | rb_link_node(&defrag->rb_node, parent, p); | 107 | rb_link_node(&defrag->rb_node, parent, p); |
108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | 108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); |
109 | return 0; | 109 | return; |
110 | 110 | ||
111 | exists: | 111 | exists: |
112 | kfree(defrag); | 112 | kfree(defrag); |
113 | return 0; | 113 | return; |
114 | 114 | ||
115 | } | 115 | } |
116 | 116 | ||
@@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
123 | { | 123 | { |
124 | struct btrfs_root *root = BTRFS_I(inode)->root; | 124 | struct btrfs_root *root = BTRFS_I(inode)->root; |
125 | struct inode_defrag *defrag; | 125 | struct inode_defrag *defrag; |
126 | int ret = 0; | ||
127 | u64 transid; | 126 | u64 transid; |
128 | 127 | ||
129 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | 128 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) |
@@ -150,9 +149,11 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
150 | 149 | ||
151 | spin_lock(&root->fs_info->defrag_inodes_lock); | 150 | spin_lock(&root->fs_info->defrag_inodes_lock); |
152 | if (!BTRFS_I(inode)->in_defrag) | 151 | if (!BTRFS_I(inode)->in_defrag) |
153 | ret = __btrfs_add_inode_defrag(inode, defrag); | 152 | __btrfs_add_inode_defrag(inode, defrag); |
153 | else | ||
154 | kfree(defrag); | ||
154 | spin_unlock(&root->fs_info->defrag_inodes_lock); | 155 | spin_unlock(&root->fs_info->defrag_inodes_lock); |
155 | return ret; | 156 | return 0; |
156 | } | 157 | } |
157 | 158 | ||
158 | /* | 159 | /* |
@@ -855,7 +856,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | |||
855 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 856 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
856 | 857 | ||
857 | path = btrfs_alloc_path(); | 858 | path = btrfs_alloc_path(); |
858 | BUG_ON(!path); | 859 | if (!path) |
860 | return -ENOMEM; | ||
859 | again: | 861 | again: |
860 | recow = 0; | 862 | recow = 0; |
861 | split = start; | 863 | split = start; |
@@ -1034,11 +1036,13 @@ out: | |||
1034 | * on error we return an unlocked page and the error value | 1036 | * on error we return an unlocked page and the error value |
1035 | * on success we return a locked page and 0 | 1037 | * on success we return a locked page and 0 |
1036 | */ | 1038 | */ |
1037 | static int prepare_uptodate_page(struct page *page, u64 pos) | 1039 | static int prepare_uptodate_page(struct page *page, u64 pos, |
1040 | bool force_uptodate) | ||
1038 | { | 1041 | { |
1039 | int ret = 0; | 1042 | int ret = 0; |
1040 | 1043 | ||
1041 | if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { | 1044 | if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) && |
1045 | !PageUptodate(page)) { | ||
1042 | ret = btrfs_readpage(NULL, page); | 1046 | ret = btrfs_readpage(NULL, page); |
1043 | if (ret) | 1047 | if (ret) |
1044 | return ret; | 1048 | return ret; |
@@ -1059,7 +1063,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos) | |||
1059 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | 1063 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, |
1060 | struct page **pages, size_t num_pages, | 1064 | struct page **pages, size_t num_pages, |
1061 | loff_t pos, unsigned long first_index, | 1065 | loff_t pos, unsigned long first_index, |
1062 | unsigned long last_index, size_t write_bytes) | 1066 | size_t write_bytes, bool force_uptodate) |
1063 | { | 1067 | { |
1064 | struct extent_state *cached_state = NULL; | 1068 | struct extent_state *cached_state = NULL; |
1065 | int i; | 1069 | int i; |
@@ -1073,15 +1077,10 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
1073 | start_pos = pos & ~((u64)root->sectorsize - 1); | 1077 | start_pos = pos & ~((u64)root->sectorsize - 1); |
1074 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; | 1078 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; |
1075 | 1079 | ||
1076 | if (start_pos > inode->i_size) { | ||
1077 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); | ||
1078 | if (err) | ||
1079 | return err; | ||
1080 | } | ||
1081 | |||
1082 | again: | 1080 | again: |
1083 | for (i = 0; i < num_pages; i++) { | 1081 | for (i = 0; i < num_pages; i++) { |
1084 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 1082 | pages[i] = find_or_create_page(inode->i_mapping, index + i, |
1083 | GFP_NOFS); | ||
1085 | if (!pages[i]) { | 1084 | if (!pages[i]) { |
1086 | faili = i - 1; | 1085 | faili = i - 1; |
1087 | err = -ENOMEM; | 1086 | err = -ENOMEM; |
@@ -1089,10 +1088,11 @@ again: | |||
1089 | } | 1088 | } |
1090 | 1089 | ||
1091 | if (i == 0) | 1090 | if (i == 0) |
1092 | err = prepare_uptodate_page(pages[i], pos); | 1091 | err = prepare_uptodate_page(pages[i], pos, |
1092 | force_uptodate); | ||
1093 | if (i == num_pages - 1) | 1093 | if (i == num_pages - 1) |
1094 | err = prepare_uptodate_page(pages[i], | 1094 | err = prepare_uptodate_page(pages[i], |
1095 | pos + write_bytes); | 1095 | pos + write_bytes, false); |
1096 | if (err) { | 1096 | if (err) { |
1097 | page_cache_release(pages[i]); | 1097 | page_cache_release(pages[i]); |
1098 | faili = i - 1; | 1098 | faili = i - 1; |
@@ -1158,10 +1158,10 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1158 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1158 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1159 | struct page **pages = NULL; | 1159 | struct page **pages = NULL; |
1160 | unsigned long first_index; | 1160 | unsigned long first_index; |
1161 | unsigned long last_index; | ||
1162 | size_t num_written = 0; | 1161 | size_t num_written = 0; |
1163 | int nrptrs; | 1162 | int nrptrs; |
1164 | int ret = 0; | 1163 | int ret = 0; |
1164 | bool force_page_uptodate = false; | ||
1165 | 1165 | ||
1166 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / | 1166 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
1167 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 1167 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
@@ -1171,7 +1171,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1171 | return -ENOMEM; | 1171 | return -ENOMEM; |
1172 | 1172 | ||
1173 | first_index = pos >> PAGE_CACHE_SHIFT; | 1173 | first_index = pos >> PAGE_CACHE_SHIFT; |
1174 | last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; | ||
1175 | 1174 | ||
1176 | while (iov_iter_count(i) > 0) { | 1175 | while (iov_iter_count(i) > 0) { |
1177 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 1176 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
@@ -1205,8 +1204,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1205 | * contents of pages from loop to loop | 1204 | * contents of pages from loop to loop |
1206 | */ | 1205 | */ |
1207 | ret = prepare_pages(root, file, pages, num_pages, | 1206 | ret = prepare_pages(root, file, pages, num_pages, |
1208 | pos, first_index, last_index, | 1207 | pos, first_index, write_bytes, |
1209 | write_bytes); | 1208 | force_page_uptodate); |
1210 | if (ret) { | 1209 | if (ret) { |
1211 | btrfs_delalloc_release_space(inode, | 1210 | btrfs_delalloc_release_space(inode, |
1212 | num_pages << PAGE_CACHE_SHIFT); | 1211 | num_pages << PAGE_CACHE_SHIFT); |
@@ -1223,12 +1222,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1223 | if (copied < write_bytes) | 1222 | if (copied < write_bytes) |
1224 | nrptrs = 1; | 1223 | nrptrs = 1; |
1225 | 1224 | ||
1226 | if (copied == 0) | 1225 | if (copied == 0) { |
1226 | force_page_uptodate = true; | ||
1227 | dirty_pages = 0; | 1227 | dirty_pages = 0; |
1228 | else | 1228 | } else { |
1229 | force_page_uptodate = false; | ||
1229 | dirty_pages = (copied + offset + | 1230 | dirty_pages = (copied + offset + |
1230 | PAGE_CACHE_SIZE - 1) >> | 1231 | PAGE_CACHE_SIZE - 1) >> |
1231 | PAGE_CACHE_SHIFT; | 1232 | PAGE_CACHE_SHIFT; |
1233 | } | ||
1232 | 1234 | ||
1233 | /* | 1235 | /* |
1234 | * If we had a short copy we need to release the excess delaloc | 1236 | * If we had a short copy we need to release the excess delaloc |
@@ -1238,9 +1240,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1238 | * managed to copy. | 1240 | * managed to copy. |
1239 | */ | 1241 | */ |
1240 | if (num_pages > dirty_pages) { | 1242 | if (num_pages > dirty_pages) { |
1241 | if (copied > 0) | 1243 | if (copied > 0) { |
1242 | atomic_inc( | 1244 | spin_lock(&BTRFS_I(inode)->lock); |
1243 | &BTRFS_I(inode)->outstanding_extents); | 1245 | BTRFS_I(inode)->outstanding_extents++; |
1246 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1247 | } | ||
1244 | btrfs_delalloc_release_space(inode, | 1248 | btrfs_delalloc_release_space(inode, |
1245 | (num_pages - dirty_pages) << | 1249 | (num_pages - dirty_pages) << |
1246 | PAGE_CACHE_SHIFT); | 1250 | PAGE_CACHE_SHIFT); |
@@ -1336,6 +1340,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1336 | struct inode *inode = fdentry(file)->d_inode; | 1340 | struct inode *inode = fdentry(file)->d_inode; |
1337 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1341 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1338 | loff_t *ppos = &iocb->ki_pos; | 1342 | loff_t *ppos = &iocb->ki_pos; |
1343 | u64 start_pos; | ||
1339 | ssize_t num_written = 0; | 1344 | ssize_t num_written = 0; |
1340 | ssize_t err = 0; | 1345 | ssize_t err = 0; |
1341 | size_t count, ocount; | 1346 | size_t count, ocount; |
@@ -1384,6 +1389,15 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1384 | file_update_time(file); | 1389 | file_update_time(file); |
1385 | BTRFS_I(inode)->sequence++; | 1390 | BTRFS_I(inode)->sequence++; |
1386 | 1391 | ||
1392 | start_pos = round_down(pos, root->sectorsize); | ||
1393 | if (start_pos > i_size_read(inode)) { | ||
1394 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); | ||
1395 | if (err) { | ||
1396 | mutex_unlock(&inode->i_mutex); | ||
1397 | goto out; | ||
1398 | } | ||
1399 | } | ||
1400 | |||
1387 | if (unlikely(file->f_flags & O_DIRECT)) { | 1401 | if (unlikely(file->f_flags & O_DIRECT)) { |
1388 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | 1402 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, |
1389 | pos, ppos, count, ocount); | 1403 | pos, ppos, count, ocount); |
@@ -1452,7 +1466,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1452 | * important optimization for directories because holding the mutex prevents | 1466 | * important optimization for directories because holding the mutex prevents |
1453 | * new operations on the dir while we write to disk. | 1467 | * new operations on the dir while we write to disk. |
1454 | */ | 1468 | */ |
1455 | int btrfs_sync_file(struct file *file, int datasync) | 1469 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
1456 | { | 1470 | { |
1457 | struct dentry *dentry = file->f_path.dentry; | 1471 | struct dentry *dentry = file->f_path.dentry; |
1458 | struct inode *inode = dentry->d_inode; | 1472 | struct inode *inode = dentry->d_inode; |
@@ -1462,9 +1476,13 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1462 | 1476 | ||
1463 | trace_btrfs_sync_file(file, datasync); | 1477 | trace_btrfs_sync_file(file, datasync); |
1464 | 1478 | ||
1479 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
1480 | if (ret) | ||
1481 | return ret; | ||
1482 | mutex_lock(&inode->i_mutex); | ||
1483 | |||
1465 | /* we wait first, since the writeback may change the inode */ | 1484 | /* we wait first, since the writeback may change the inode */ |
1466 | root->log_batch++; | 1485 | root->log_batch++; |
1467 | /* the VFS called filemap_fdatawrite for us */ | ||
1468 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 1486 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
1469 | root->log_batch++; | 1487 | root->log_batch++; |
1470 | 1488 | ||
@@ -1472,8 +1490,10 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1472 | * check the transaction that last modified this inode | 1490 | * check the transaction that last modified this inode |
1473 | * and see if its already been committed | 1491 | * and see if its already been committed |
1474 | */ | 1492 | */ |
1475 | if (!BTRFS_I(inode)->last_trans) | 1493 | if (!BTRFS_I(inode)->last_trans) { |
1494 | mutex_unlock(&inode->i_mutex); | ||
1476 | goto out; | 1495 | goto out; |
1496 | } | ||
1477 | 1497 | ||
1478 | /* | 1498 | /* |
1479 | * if the last transaction that changed this file was before | 1499 | * if the last transaction that changed this file was before |
@@ -1484,6 +1504,7 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1484 | if (BTRFS_I(inode)->last_trans <= | 1504 | if (BTRFS_I(inode)->last_trans <= |
1485 | root->fs_info->last_trans_committed) { | 1505 | root->fs_info->last_trans_committed) { |
1486 | BTRFS_I(inode)->last_trans = 0; | 1506 | BTRFS_I(inode)->last_trans = 0; |
1507 | mutex_unlock(&inode->i_mutex); | ||
1487 | goto out; | 1508 | goto out; |
1488 | } | 1509 | } |
1489 | 1510 | ||
@@ -1496,12 +1517,15 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1496 | trans = btrfs_start_transaction(root, 0); | 1517 | trans = btrfs_start_transaction(root, 0); |
1497 | if (IS_ERR(trans)) { | 1518 | if (IS_ERR(trans)) { |
1498 | ret = PTR_ERR(trans); | 1519 | ret = PTR_ERR(trans); |
1520 | mutex_unlock(&inode->i_mutex); | ||
1499 | goto out; | 1521 | goto out; |
1500 | } | 1522 | } |
1501 | 1523 | ||
1502 | ret = btrfs_log_dentry_safe(trans, root, dentry); | 1524 | ret = btrfs_log_dentry_safe(trans, root, dentry); |
1503 | if (ret < 0) | 1525 | if (ret < 0) { |
1526 | mutex_unlock(&inode->i_mutex); | ||
1504 | goto out; | 1527 | goto out; |
1528 | } | ||
1505 | 1529 | ||
1506 | /* we've logged all the items and now have a consistent | 1530 | /* we've logged all the items and now have a consistent |
1507 | * version of the file in the log. It is possible that | 1531 | * version of the file in the log. It is possible that |
@@ -1513,7 +1537,7 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1513 | * file again, but that will end up using the synchronization | 1537 | * file again, but that will end up using the synchronization |
1514 | * inside btrfs_sync_log to keep things safe. | 1538 | * inside btrfs_sync_log to keep things safe. |
1515 | */ | 1539 | */ |
1516 | mutex_unlock(&dentry->d_inode->i_mutex); | 1540 | mutex_unlock(&inode->i_mutex); |
1517 | 1541 | ||
1518 | if (ret != BTRFS_NO_LOG_SYNC) { | 1542 | if (ret != BTRFS_NO_LOG_SYNC) { |
1519 | if (ret > 0) { | 1543 | if (ret > 0) { |
@@ -1528,7 +1552,6 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1528 | } else { | 1552 | } else { |
1529 | ret = btrfs_end_transaction(trans, root); | 1553 | ret = btrfs_end_transaction(trans, root); |
1530 | } | 1554 | } |
1531 | mutex_lock(&dentry->d_inode->i_mutex); | ||
1532 | out: | 1555 | out: |
1533 | return ret > 0 ? -EIO : ret; | 1556 | return ret > 0 ? -EIO : ret; |
1534 | } | 1557 | } |
@@ -1629,11 +1652,15 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1629 | 1652 | ||
1630 | cur_offset = alloc_start; | 1653 | cur_offset = alloc_start; |
1631 | while (1) { | 1654 | while (1) { |
1655 | u64 actual_end; | ||
1656 | |||
1632 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | 1657 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, |
1633 | alloc_end - cur_offset, 0); | 1658 | alloc_end - cur_offset, 0); |
1634 | BUG_ON(IS_ERR_OR_NULL(em)); | 1659 | BUG_ON(IS_ERR_OR_NULL(em)); |
1635 | last_byte = min(extent_map_end(em), alloc_end); | 1660 | last_byte = min(extent_map_end(em), alloc_end); |
1661 | actual_end = min_t(u64, extent_map_end(em), offset + len); | ||
1636 | last_byte = (last_byte + mask) & ~mask; | 1662 | last_byte = (last_byte + mask) & ~mask; |
1663 | |||
1637 | if (em->block_start == EXTENT_MAP_HOLE || | 1664 | if (em->block_start == EXTENT_MAP_HOLE || |
1638 | (cur_offset >= inode->i_size && | 1665 | (cur_offset >= inode->i_size && |
1639 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 1666 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
@@ -1646,6 +1673,16 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1646 | free_extent_map(em); | 1673 | free_extent_map(em); |
1647 | break; | 1674 | break; |
1648 | } | 1675 | } |
1676 | } else if (actual_end > inode->i_size && | ||
1677 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
1678 | /* | ||
1679 | * We didn't need to allocate any more space, but we | ||
1680 | * still extended the size of the file so we need to | ||
1681 | * update i_size. | ||
1682 | */ | ||
1683 | inode->i_ctime = CURRENT_TIME; | ||
1684 | i_size_write(inode, actual_end); | ||
1685 | btrfs_ordered_update_i_size(inode, actual_end, NULL); | ||
1649 | } | 1686 | } |
1650 | free_extent_map(em); | 1687 | free_extent_map(em); |
1651 | 1688 | ||
@@ -1664,8 +1701,163 @@ out: | |||
1664 | return ret; | 1701 | return ret; |
1665 | } | 1702 | } |
1666 | 1703 | ||
1704 | static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) | ||
1705 | { | ||
1706 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1707 | struct extent_map *em; | ||
1708 | struct extent_state *cached_state = NULL; | ||
1709 | u64 lockstart = *offset; | ||
1710 | u64 lockend = i_size_read(inode); | ||
1711 | u64 start = *offset; | ||
1712 | u64 orig_start = *offset; | ||
1713 | u64 len = i_size_read(inode); | ||
1714 | u64 last_end = 0; | ||
1715 | int ret = 0; | ||
1716 | |||
1717 | lockend = max_t(u64, root->sectorsize, lockend); | ||
1718 | if (lockend <= lockstart) | ||
1719 | lockend = lockstart + root->sectorsize; | ||
1720 | |||
1721 | len = lockend - lockstart + 1; | ||
1722 | |||
1723 | len = max_t(u64, len, root->sectorsize); | ||
1724 | if (inode->i_size == 0) | ||
1725 | return -ENXIO; | ||
1726 | |||
1727 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, | ||
1728 | &cached_state, GFP_NOFS); | ||
1729 | |||
1730 | /* | ||
1731 | * Delalloc is such a pain. If we have a hole and we have pending | ||
1732 | * delalloc for a portion of the hole we will get back a hole that | ||
1733 | * exists for the entire range since it hasn't been actually written | ||
1734 | * yet. So to take care of this case we need to look for an extent just | ||
1735 | * before the position we want in case there is outstanding delalloc | ||
1736 | * going on here. | ||
1737 | */ | ||
1738 | if (origin == SEEK_HOLE && start != 0) { | ||
1739 | if (start <= root->sectorsize) | ||
1740 | em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, | ||
1741 | root->sectorsize, 0); | ||
1742 | else | ||
1743 | em = btrfs_get_extent_fiemap(inode, NULL, 0, | ||
1744 | start - root->sectorsize, | ||
1745 | root->sectorsize, 0); | ||
1746 | if (IS_ERR(em)) { | ||
1747 | ret = -ENXIO; | ||
1748 | goto out; | ||
1749 | } | ||
1750 | last_end = em->start + em->len; | ||
1751 | if (em->block_start == EXTENT_MAP_DELALLOC) | ||
1752 | last_end = min_t(u64, last_end, inode->i_size); | ||
1753 | free_extent_map(em); | ||
1754 | } | ||
1755 | |||
1756 | while (1) { | ||
1757 | em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); | ||
1758 | if (IS_ERR(em)) { | ||
1759 | ret = -ENXIO; | ||
1760 | break; | ||
1761 | } | ||
1762 | |||
1763 | if (em->block_start == EXTENT_MAP_HOLE) { | ||
1764 | if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { | ||
1765 | if (last_end <= orig_start) { | ||
1766 | free_extent_map(em); | ||
1767 | ret = -ENXIO; | ||
1768 | break; | ||
1769 | } | ||
1770 | } | ||
1771 | |||
1772 | if (origin == SEEK_HOLE) { | ||
1773 | *offset = start; | ||
1774 | free_extent_map(em); | ||
1775 | break; | ||
1776 | } | ||
1777 | } else { | ||
1778 | if (origin == SEEK_DATA) { | ||
1779 | if (em->block_start == EXTENT_MAP_DELALLOC) { | ||
1780 | if (start >= inode->i_size) { | ||
1781 | free_extent_map(em); | ||
1782 | ret = -ENXIO; | ||
1783 | break; | ||
1784 | } | ||
1785 | } | ||
1786 | |||
1787 | *offset = start; | ||
1788 | free_extent_map(em); | ||
1789 | break; | ||
1790 | } | ||
1791 | } | ||
1792 | |||
1793 | start = em->start + em->len; | ||
1794 | last_end = em->start + em->len; | ||
1795 | |||
1796 | if (em->block_start == EXTENT_MAP_DELALLOC) | ||
1797 | last_end = min_t(u64, last_end, inode->i_size); | ||
1798 | |||
1799 | if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { | ||
1800 | free_extent_map(em); | ||
1801 | ret = -ENXIO; | ||
1802 | break; | ||
1803 | } | ||
1804 | free_extent_map(em); | ||
1805 | cond_resched(); | ||
1806 | } | ||
1807 | if (!ret) | ||
1808 | *offset = min(*offset, inode->i_size); | ||
1809 | out: | ||
1810 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
1811 | &cached_state, GFP_NOFS); | ||
1812 | return ret; | ||
1813 | } | ||
1814 | |||
1815 | static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) | ||
1816 | { | ||
1817 | struct inode *inode = file->f_mapping->host; | ||
1818 | int ret; | ||
1819 | |||
1820 | mutex_lock(&inode->i_mutex); | ||
1821 | switch (origin) { | ||
1822 | case SEEK_END: | ||
1823 | case SEEK_CUR: | ||
1824 | offset = generic_file_llseek_unlocked(file, offset, origin); | ||
1825 | goto out; | ||
1826 | case SEEK_DATA: | ||
1827 | case SEEK_HOLE: | ||
1828 | if (offset >= i_size_read(inode)) { | ||
1829 | mutex_unlock(&inode->i_mutex); | ||
1830 | return -ENXIO; | ||
1831 | } | ||
1832 | |||
1833 | ret = find_desired_extent(inode, &offset, origin); | ||
1834 | if (ret) { | ||
1835 | mutex_unlock(&inode->i_mutex); | ||
1836 | return ret; | ||
1837 | } | ||
1838 | } | ||
1839 | |||
1840 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { | ||
1841 | offset = -EINVAL; | ||
1842 | goto out; | ||
1843 | } | ||
1844 | if (offset > inode->i_sb->s_maxbytes) { | ||
1845 | offset = -EINVAL; | ||
1846 | goto out; | ||
1847 | } | ||
1848 | |||
1849 | /* Special lock needed here? */ | ||
1850 | if (offset != file->f_pos) { | ||
1851 | file->f_pos = offset; | ||
1852 | file->f_version = 0; | ||
1853 | } | ||
1854 | out: | ||
1855 | mutex_unlock(&inode->i_mutex); | ||
1856 | return offset; | ||
1857 | } | ||
1858 | |||
1667 | const struct file_operations btrfs_file_operations = { | 1859 | const struct file_operations btrfs_file_operations = { |
1668 | .llseek = generic_file_llseek, | 1860 | .llseek = btrfs_file_llseek, |
1669 | .read = do_sync_read, | 1861 | .read = do_sync_read, |
1670 | .write = do_sync_write, | 1862 | .write = do_sync_write, |
1671 | .aio_read = generic_file_aio_read, | 1863 | .aio_read = generic_file_aio_read, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index bf0d61567f3..41ac927401d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
98 | return inode; | 98 | return inode; |
99 | 99 | ||
100 | spin_lock(&block_group->lock); | 100 | spin_lock(&block_group->lock); |
101 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { | ||
102 | printk(KERN_INFO "Old style space inode found, converting.\n"); | ||
103 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; | ||
104 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | ||
105 | } | ||
106 | |||
101 | if (!btrfs_fs_closing(root->fs_info)) { | 107 | if (!btrfs_fs_closing(root->fs_info)) { |
102 | block_group->inode = igrab(inode); | 108 | block_group->inode = igrab(inode); |
103 | block_group->iref = 1; | 109 | block_group->iref = 1; |
@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root, | |||
135 | btrfs_set_inode_gid(leaf, inode_item, 0); | 141 | btrfs_set_inode_gid(leaf, inode_item, 0); |
136 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); | 142 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); |
137 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | | 143 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | |
138 | BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); | 144 | BTRFS_INODE_PREALLOC); |
139 | btrfs_set_inode_nlink(leaf, inode_item, 1); | 145 | btrfs_set_inode_nlink(leaf, inode_item, 1); |
140 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); | 146 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); |
141 | btrfs_set_inode_block_group(leaf, inode_item, offset); | 147 | btrfs_set_inode_block_group(leaf, inode_item, offset); |
@@ -184,9 +190,11 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
184 | struct btrfs_path *path, | 190 | struct btrfs_path *path, |
185 | struct inode *inode) | 191 | struct inode *inode) |
186 | { | 192 | { |
193 | struct btrfs_block_rsv *rsv; | ||
187 | loff_t oldsize; | 194 | loff_t oldsize; |
188 | int ret = 0; | 195 | int ret = 0; |
189 | 196 | ||
197 | rsv = trans->block_rsv; | ||
190 | trans->block_rsv = root->orphan_block_rsv; | 198 | trans->block_rsv = root->orphan_block_rsv; |
191 | ret = btrfs_block_rsv_check(trans, root, | 199 | ret = btrfs_block_rsv_check(trans, root, |
192 | root->orphan_block_rsv, | 200 | root->orphan_block_rsv, |
@@ -204,6 +212,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
204 | */ | 212 | */ |
205 | ret = btrfs_truncate_inode_items(trans, root, inode, | 213 | ret = btrfs_truncate_inode_items(trans, root, inode, |
206 | 0, BTRFS_EXTENT_DATA_KEY); | 214 | 0, BTRFS_EXTENT_DATA_KEY); |
215 | |||
216 | trans->block_rsv = rsv; | ||
207 | if (ret) { | 217 | if (ret) { |
208 | WARN_ON(1); | 218 | WARN_ON(1); |
209 | return ret; | 219 | return ret; |
@@ -239,17 +249,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
239 | struct btrfs_free_space_header *header; | 249 | struct btrfs_free_space_header *header; |
240 | struct extent_buffer *leaf; | 250 | struct extent_buffer *leaf; |
241 | struct page *page; | 251 | struct page *page; |
242 | u32 *checksums = NULL, *crc; | ||
243 | char *disk_crcs = NULL; | ||
244 | struct btrfs_key key; | 252 | struct btrfs_key key; |
245 | struct list_head bitmaps; | 253 | struct list_head bitmaps; |
246 | u64 num_entries; | 254 | u64 num_entries; |
247 | u64 num_bitmaps; | 255 | u64 num_bitmaps; |
248 | u64 generation; | 256 | u64 generation; |
249 | u32 cur_crc = ~(u32)0; | ||
250 | pgoff_t index = 0; | 257 | pgoff_t index = 0; |
251 | unsigned long first_page_offset; | ||
252 | int num_checksums; | ||
253 | int ret = 0; | 258 | int ret = 0; |
254 | 259 | ||
255 | INIT_LIST_HEAD(&bitmaps); | 260 | INIT_LIST_HEAD(&bitmaps); |
@@ -292,16 +297,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
292 | if (!num_entries) | 297 | if (!num_entries) |
293 | goto out; | 298 | goto out; |
294 | 299 | ||
295 | /* Setup everything for doing checksumming */ | ||
296 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | ||
297 | checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
298 | if (!checksums) | ||
299 | goto out; | ||
300 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | ||
301 | disk_crcs = kzalloc(first_page_offset, GFP_NOFS); | ||
302 | if (!disk_crcs) | ||
303 | goto out; | ||
304 | |||
305 | ret = readahead_cache(inode); | 300 | ret = readahead_cache(inode); |
306 | if (ret) | 301 | if (ret) |
307 | goto out; | 302 | goto out; |
@@ -311,18 +306,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
311 | struct btrfs_free_space *e; | 306 | struct btrfs_free_space *e; |
312 | void *addr; | 307 | void *addr; |
313 | unsigned long offset = 0; | 308 | unsigned long offset = 0; |
314 | unsigned long start_offset = 0; | ||
315 | int need_loop = 0; | 309 | int need_loop = 0; |
316 | 310 | ||
317 | if (!num_entries && !num_bitmaps) | 311 | if (!num_entries && !num_bitmaps) |
318 | break; | 312 | break; |
319 | 313 | ||
320 | if (index == 0) { | 314 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); |
321 | start_offset = first_page_offset; | ||
322 | offset = start_offset; | ||
323 | } | ||
324 | |||
325 | page = grab_cache_page(inode->i_mapping, index); | ||
326 | if (!page) | 315 | if (!page) |
327 | goto free_cache; | 316 | goto free_cache; |
328 | 317 | ||
@@ -342,8 +331,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
342 | if (index == 0) { | 331 | if (index == 0) { |
343 | u64 *gen; | 332 | u64 *gen; |
344 | 333 | ||
345 | memcpy(disk_crcs, addr, first_page_offset); | 334 | /* |
346 | gen = addr + (sizeof(u32) * num_checksums); | 335 | * We put a bogus crc in the front of the first page in |
336 | * case old kernels try to mount a fs with the new | ||
337 | * format to make sure they discard the cache. | ||
338 | */ | ||
339 | addr += sizeof(u64); | ||
340 | offset += sizeof(u64); | ||
341 | |||
342 | gen = addr; | ||
347 | if (*gen != BTRFS_I(inode)->generation) { | 343 | if (*gen != BTRFS_I(inode)->generation) { |
348 | printk(KERN_ERR "btrfs: space cache generation" | 344 | printk(KERN_ERR "btrfs: space cache generation" |
349 | " (%llu) does not match inode (%llu)\n", | 345 | " (%llu) does not match inode (%llu)\n", |
@@ -355,24 +351,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
355 | page_cache_release(page); | 351 | page_cache_release(page); |
356 | goto free_cache; | 352 | goto free_cache; |
357 | } | 353 | } |
358 | crc = (u32 *)disk_crcs; | 354 | addr += sizeof(u64); |
355 | offset += sizeof(u64); | ||
359 | } | 356 | } |
360 | entry = addr + start_offset; | 357 | entry = addr; |
361 | |||
362 | /* First lets check our crc before we do anything fun */ | ||
363 | cur_crc = ~(u32)0; | ||
364 | cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, | ||
365 | PAGE_CACHE_SIZE - start_offset); | ||
366 | btrfs_csum_final(cur_crc, (char *)&cur_crc); | ||
367 | if (cur_crc != *crc) { | ||
368 | printk(KERN_ERR "btrfs: crc mismatch for page %lu\n", | ||
369 | index); | ||
370 | kunmap(page); | ||
371 | unlock_page(page); | ||
372 | page_cache_release(page); | ||
373 | goto free_cache; | ||
374 | } | ||
375 | crc++; | ||
376 | 358 | ||
377 | while (1) { | 359 | while (1) { |
378 | if (!num_entries) | 360 | if (!num_entries) |
@@ -470,8 +452,6 @@ next: | |||
470 | 452 | ||
471 | ret = 1; | 453 | ret = 1; |
472 | out: | 454 | out: |
473 | kfree(checksums); | ||
474 | kfree(disk_crcs); | ||
475 | return ret; | 455 | return ret; |
476 | free_cache: | 456 | free_cache: |
477 | __btrfs_remove_free_space_cache(ctl); | 457 | __btrfs_remove_free_space_cache(ctl); |
@@ -569,8 +549,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
569 | struct btrfs_key key; | 549 | struct btrfs_key key; |
570 | u64 start, end, len; | 550 | u64 start, end, len; |
571 | u64 bytes = 0; | 551 | u64 bytes = 0; |
572 | u32 *crc, *checksums; | 552 | u32 crc = ~(u32)0; |
573 | unsigned long first_page_offset; | ||
574 | int index = 0, num_pages = 0; | 553 | int index = 0, num_pages = 0; |
575 | int entries = 0; | 554 | int entries = 0; |
576 | int bitmaps = 0; | 555 | int bitmaps = 0; |
@@ -590,34 +569,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
590 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 569 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
591 | PAGE_CACHE_SHIFT; | 570 | PAGE_CACHE_SHIFT; |
592 | 571 | ||
593 | /* Since the first page has all of our checksums and our generation we | ||
594 | * need to calculate the offset into the page that we can start writing | ||
595 | * our entries. | ||
596 | */ | ||
597 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); | ||
598 | |||
599 | filemap_write_and_wait(inode->i_mapping); | 572 | filemap_write_and_wait(inode->i_mapping); |
600 | btrfs_wait_ordered_range(inode, inode->i_size & | 573 | btrfs_wait_ordered_range(inode, inode->i_size & |
601 | ~(root->sectorsize - 1), (u64)-1); | 574 | ~(root->sectorsize - 1), (u64)-1); |
602 | 575 | ||
603 | /* make sure we don't overflow that first page */ | ||
604 | if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { | ||
605 | /* this is really the same as running out of space, where we also return 0 */ | ||
606 | printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); | ||
607 | ret = 0; | ||
608 | goto out_update; | ||
609 | } | ||
610 | |||
611 | /* We need a checksum per page. */ | ||
612 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); | ||
613 | if (!crc) | ||
614 | return -1; | ||
615 | |||
616 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | 576 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); |
617 | if (!pages) { | 577 | if (!pages) |
618 | kfree(crc); | ||
619 | return -1; | 578 | return -1; |
620 | } | ||
621 | 579 | ||
622 | /* Get the cluster for this block_group if it exists */ | 580 | /* Get the cluster for this block_group if it exists */ |
623 | if (block_group && !list_empty(&block_group->cluster_list)) | 581 | if (block_group && !list_empty(&block_group->cluster_list)) |
@@ -640,7 +598,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
640 | * know and don't freak out. | 598 | * know and don't freak out. |
641 | */ | 599 | */ |
642 | while (index < num_pages) { | 600 | while (index < num_pages) { |
643 | page = grab_cache_page(inode->i_mapping, index); | 601 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); |
644 | if (!page) { | 602 | if (!page) { |
645 | int i; | 603 | int i; |
646 | 604 | ||
@@ -648,7 +606,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
648 | unlock_page(pages[i]); | 606 | unlock_page(pages[i]); |
649 | page_cache_release(pages[i]); | 607 | page_cache_release(pages[i]); |
650 | } | 608 | } |
651 | goto out_free; | 609 | goto out; |
652 | } | 610 | } |
653 | pages[index] = page; | 611 | pages[index] = page; |
654 | index++; | 612 | index++; |
@@ -668,17 +626,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
668 | /* Write out the extent entries */ | 626 | /* Write out the extent entries */ |
669 | do { | 627 | do { |
670 | struct btrfs_free_space_entry *entry; | 628 | struct btrfs_free_space_entry *entry; |
671 | void *addr; | 629 | void *addr, *orig; |
672 | unsigned long offset = 0; | 630 | unsigned long offset = 0; |
673 | unsigned long start_offset = 0; | ||
674 | 631 | ||
675 | next_page = false; | 632 | next_page = false; |
676 | 633 | ||
677 | if (index == 0) { | ||
678 | start_offset = first_page_offset; | ||
679 | offset = start_offset; | ||
680 | } | ||
681 | |||
682 | if (index >= num_pages) { | 634 | if (index >= num_pages) { |
683 | out_of_space = true; | 635 | out_of_space = true; |
684 | break; | 636 | break; |
@@ -686,10 +638,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
686 | 638 | ||
687 | page = pages[index]; | 639 | page = pages[index]; |
688 | 640 | ||
689 | addr = kmap(page); | 641 | orig = addr = kmap(page); |
690 | entry = addr + start_offset; | 642 | if (index == 0) { |
643 | u64 *gen; | ||
691 | 644 | ||
692 | memset(addr, 0, PAGE_CACHE_SIZE); | 645 | /* |
646 | * We're going to put in a bogus crc for this page to | ||
647 | * make sure that old kernels who aren't aware of this | ||
648 | * format will be sure to discard the cache. | ||
649 | */ | ||
650 | addr += sizeof(u64); | ||
651 | offset += sizeof(u64); | ||
652 | |||
653 | gen = addr; | ||
654 | *gen = trans->transid; | ||
655 | addr += sizeof(u64); | ||
656 | offset += sizeof(u64); | ||
657 | } | ||
658 | entry = addr; | ||
659 | |||
660 | memset(addr, 0, PAGE_CACHE_SIZE - offset); | ||
693 | while (node && !next_page) { | 661 | while (node && !next_page) { |
694 | struct btrfs_free_space *e; | 662 | struct btrfs_free_space *e; |
695 | 663 | ||
@@ -752,13 +720,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
752 | next_page = true; | 720 | next_page = true; |
753 | entry++; | 721 | entry++; |
754 | } | 722 | } |
755 | *crc = ~(u32)0; | ||
756 | *crc = btrfs_csum_data(root, addr + start_offset, *crc, | ||
757 | PAGE_CACHE_SIZE - start_offset); | ||
758 | kunmap(page); | ||
759 | 723 | ||
760 | btrfs_csum_final(*crc, (char *)crc); | 724 | /* Generate bogus crc value */ |
761 | crc++; | 725 | if (index == 0) { |
726 | u32 *tmp; | ||
727 | crc = btrfs_csum_data(root, orig + sizeof(u64), crc, | ||
728 | PAGE_CACHE_SIZE - sizeof(u64)); | ||
729 | btrfs_csum_final(crc, (char *)&crc); | ||
730 | crc++; | ||
731 | tmp = orig; | ||
732 | *tmp = crc; | ||
733 | } | ||
734 | |||
735 | kunmap(page); | ||
762 | 736 | ||
763 | bytes += PAGE_CACHE_SIZE; | 737 | bytes += PAGE_CACHE_SIZE; |
764 | 738 | ||
@@ -779,11 +753,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
779 | 753 | ||
780 | addr = kmap(page); | 754 | addr = kmap(page); |
781 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | 755 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); |
782 | *crc = ~(u32)0; | ||
783 | *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); | ||
784 | kunmap(page); | 756 | kunmap(page); |
785 | btrfs_csum_final(*crc, (char *)crc); | ||
786 | crc++; | ||
787 | bytes += PAGE_CACHE_SIZE; | 757 | bytes += PAGE_CACHE_SIZE; |
788 | 758 | ||
789 | list_del_init(&entry->list); | 759 | list_del_init(&entry->list); |
@@ -796,7 +766,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
796 | i_size_read(inode) - 1, &cached_state, | 766 | i_size_read(inode) - 1, &cached_state, |
797 | GFP_NOFS); | 767 | GFP_NOFS); |
798 | ret = 0; | 768 | ret = 0; |
799 | goto out_free; | 769 | goto out; |
800 | } | 770 | } |
801 | 771 | ||
802 | /* Zero out the rest of the pages just to make sure */ | 772 | /* Zero out the rest of the pages just to make sure */ |
@@ -811,20 +781,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
811 | index++; | 781 | index++; |
812 | } | 782 | } |
813 | 783 | ||
814 | /* Write the checksums and trans id to the first page */ | ||
815 | { | ||
816 | void *addr; | ||
817 | u64 *gen; | ||
818 | |||
819 | page = pages[0]; | ||
820 | |||
821 | addr = kmap(page); | ||
822 | memcpy(addr, checksums, sizeof(u32) * num_pages); | ||
823 | gen = addr + (sizeof(u32) * num_pages); | ||
824 | *gen = trans->transid; | ||
825 | kunmap(page); | ||
826 | } | ||
827 | |||
828 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | 784 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, |
829 | bytes, &cached_state); | 785 | bytes, &cached_state); |
830 | btrfs_drop_pages(pages, num_pages); | 786 | btrfs_drop_pages(pages, num_pages); |
@@ -833,7 +789,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
833 | 789 | ||
834 | if (ret) { | 790 | if (ret) { |
835 | ret = 0; | 791 | ret = 0; |
836 | goto out_free; | 792 | goto out; |
837 | } | 793 | } |
838 | 794 | ||
839 | BTRFS_I(inode)->generation = trans->transid; | 795 | BTRFS_I(inode)->generation = trans->transid; |
@@ -850,7 +806,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
850 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | 806 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, |
851 | EXTENT_DIRTY | EXTENT_DELALLOC | | 807 | EXTENT_DIRTY | EXTENT_DELALLOC | |
852 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); | 808 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); |
853 | goto out_free; | 809 | goto out; |
854 | } | 810 | } |
855 | leaf = path->nodes[0]; | 811 | leaf = path->nodes[0]; |
856 | if (ret > 0) { | 812 | if (ret > 0) { |
@@ -866,7 +822,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
866 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, | 822 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, |
867 | GFP_NOFS); | 823 | GFP_NOFS); |
868 | btrfs_release_path(path); | 824 | btrfs_release_path(path); |
869 | goto out_free; | 825 | goto out; |
870 | } | 826 | } |
871 | } | 827 | } |
872 | header = btrfs_item_ptr(leaf, path->slots[0], | 828 | header = btrfs_item_ptr(leaf, path->slots[0], |
@@ -879,11 +835,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
879 | 835 | ||
880 | ret = 1; | 836 | ret = 1; |
881 | 837 | ||
882 | out_free: | 838 | out: |
883 | kfree(checksums); | ||
884 | kfree(pages); | 839 | kfree(pages); |
885 | |||
886 | out_update: | ||
887 | if (ret != 1) { | 840 | if (ret != 1) { |
888 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | 841 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); |
889 | BTRFS_I(inode)->generation = 0; | 842 | BTRFS_I(inode)->generation = 0; |
@@ -1219,9 +1172,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | |||
1219 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); | 1172 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); |
1220 | } | 1173 | } |
1221 | 1174 | ||
1222 | static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | 1175 | static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, |
1223 | struct btrfs_free_space *info, u64 offset, | 1176 | struct btrfs_free_space *info, |
1224 | u64 bytes) | 1177 | u64 offset, u64 bytes) |
1225 | { | 1178 | { |
1226 | unsigned long start, count; | 1179 | unsigned long start, count; |
1227 | 1180 | ||
@@ -1232,6 +1185,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | |||
1232 | bitmap_clear(info->bitmap, start, count); | 1185 | bitmap_clear(info->bitmap, start, count); |
1233 | 1186 | ||
1234 | info->bytes -= bytes; | 1187 | info->bytes -= bytes; |
1188 | } | ||
1189 | |||
1190 | static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | ||
1191 | struct btrfs_free_space *info, u64 offset, | ||
1192 | u64 bytes) | ||
1193 | { | ||
1194 | __bitmap_clear_bits(ctl, info, offset, bytes); | ||
1235 | ctl->free_space -= bytes; | 1195 | ctl->free_space -= bytes; |
1236 | } | 1196 | } |
1237 | 1197 | ||
@@ -2035,7 +1995,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | |||
2035 | return 0; | 1995 | return 0; |
2036 | 1996 | ||
2037 | ret = search_start; | 1997 | ret = search_start; |
2038 | bitmap_clear_bits(ctl, entry, ret, bytes); | 1998 | __bitmap_clear_bits(ctl, entry, ret, bytes); |
2039 | 1999 | ||
2040 | return ret; | 2000 | return ret; |
2041 | } | 2001 | } |
@@ -2090,7 +2050,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
2090 | continue; | 2050 | continue; |
2091 | } | 2051 | } |
2092 | } else { | 2052 | } else { |
2093 | |||
2094 | ret = entry->offset; | 2053 | ret = entry->offset; |
2095 | 2054 | ||
2096 | entry->offset += bytes; | 2055 | entry->offset += bytes; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3601f0aebdd..b2d004ad66a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | |||
750 | return alloc_hint; | 750 | return alloc_hint; |
751 | } | 751 | } |
752 | 752 | ||
753 | static inline bool is_free_space_inode(struct btrfs_root *root, | ||
754 | struct inode *inode) | ||
755 | { | ||
756 | if (root == root->fs_info->tree_root || | ||
757 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | ||
758 | return true; | ||
759 | return false; | ||
760 | } | ||
761 | |||
762 | /* | 753 | /* |
763 | * when extent_io.c finds a delayed allocation range in the file, | 754 | * when extent_io.c finds a delayed allocation range in the file, |
764 | * the call backs end up in this code. The basic idea is to | 755 | * the call backs end up in this code. The basic idea is to |
@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
791 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 782 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
792 | int ret = 0; | 783 | int ret = 0; |
793 | 784 | ||
794 | BUG_ON(is_free_space_inode(root, inode)); | 785 | BUG_ON(btrfs_is_free_space_inode(root, inode)); |
795 | trans = btrfs_join_transaction(root); | 786 | trans = btrfs_join_transaction(root); |
796 | BUG_ON(IS_ERR(trans)); | 787 | BUG_ON(IS_ERR(trans)); |
797 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 788 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
@@ -1070,9 +1061,10 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1070 | u64 ino = btrfs_ino(inode); | 1061 | u64 ino = btrfs_ino(inode); |
1071 | 1062 | ||
1072 | path = btrfs_alloc_path(); | 1063 | path = btrfs_alloc_path(); |
1073 | BUG_ON(!path); | 1064 | if (!path) |
1065 | return -ENOMEM; | ||
1074 | 1066 | ||
1075 | nolock = is_free_space_inode(root, inode); | 1067 | nolock = btrfs_is_free_space_inode(root, inode); |
1076 | 1068 | ||
1077 | if (nolock) | 1069 | if (nolock) |
1078 | trans = btrfs_join_transaction_nolock(root); | 1070 | trans = btrfs_join_transaction_nolock(root); |
@@ -1291,15 +1283,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1291 | return ret; | 1283 | return ret; |
1292 | } | 1284 | } |
1293 | 1285 | ||
1294 | static int btrfs_split_extent_hook(struct inode *inode, | 1286 | static void btrfs_split_extent_hook(struct inode *inode, |
1295 | struct extent_state *orig, u64 split) | 1287 | struct extent_state *orig, u64 split) |
1296 | { | 1288 | { |
1297 | /* not delalloc, ignore it */ | 1289 | /* not delalloc, ignore it */ |
1298 | if (!(orig->state & EXTENT_DELALLOC)) | 1290 | if (!(orig->state & EXTENT_DELALLOC)) |
1299 | return 0; | 1291 | return; |
1300 | 1292 | ||
1301 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 1293 | spin_lock(&BTRFS_I(inode)->lock); |
1302 | return 0; | 1294 | BTRFS_I(inode)->outstanding_extents++; |
1295 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1303 | } | 1296 | } |
1304 | 1297 | ||
1305 | /* | 1298 | /* |
@@ -1308,16 +1301,17 @@ static int btrfs_split_extent_hook(struct inode *inode, | |||
1308 | * extents, such as when we are doing sequential writes, so we can properly | 1301 | * extents, such as when we are doing sequential writes, so we can properly |
1309 | * account for the metadata space we'll need. | 1302 | * account for the metadata space we'll need. |
1310 | */ | 1303 | */ |
1311 | static int btrfs_merge_extent_hook(struct inode *inode, | 1304 | static void btrfs_merge_extent_hook(struct inode *inode, |
1312 | struct extent_state *new, | 1305 | struct extent_state *new, |
1313 | struct extent_state *other) | 1306 | struct extent_state *other) |
1314 | { | 1307 | { |
1315 | /* not delalloc, ignore it */ | 1308 | /* not delalloc, ignore it */ |
1316 | if (!(other->state & EXTENT_DELALLOC)) | 1309 | if (!(other->state & EXTENT_DELALLOC)) |
1317 | return 0; | 1310 | return; |
1318 | 1311 | ||
1319 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 1312 | spin_lock(&BTRFS_I(inode)->lock); |
1320 | return 0; | 1313 | BTRFS_I(inode)->outstanding_extents--; |
1314 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1321 | } | 1315 | } |
1322 | 1316 | ||
1323 | /* | 1317 | /* |
@@ -1325,8 +1319,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1325 | * bytes in this file, and to maintain the list of inodes that | 1319 | * bytes in this file, and to maintain the list of inodes that |
1326 | * have pending delalloc work to be done. | 1320 | * have pending delalloc work to be done. |
1327 | */ | 1321 | */ |
1328 | static int btrfs_set_bit_hook(struct inode *inode, | 1322 | static void btrfs_set_bit_hook(struct inode *inode, |
1329 | struct extent_state *state, int *bits) | 1323 | struct extent_state *state, int *bits) |
1330 | { | 1324 | { |
1331 | 1325 | ||
1332 | /* | 1326 | /* |
@@ -1337,12 +1331,15 @@ static int btrfs_set_bit_hook(struct inode *inode, | |||
1337 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1331 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1338 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1332 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1339 | u64 len = state->end + 1 - state->start; | 1333 | u64 len = state->end + 1 - state->start; |
1340 | bool do_list = !is_free_space_inode(root, inode); | 1334 | bool do_list = !btrfs_is_free_space_inode(root, inode); |
1341 | 1335 | ||
1342 | if (*bits & EXTENT_FIRST_DELALLOC) | 1336 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1343 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1337 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1344 | else | 1338 | } else { |
1345 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 1339 | spin_lock(&BTRFS_I(inode)->lock); |
1340 | BTRFS_I(inode)->outstanding_extents++; | ||
1341 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1342 | } | ||
1346 | 1343 | ||
1347 | spin_lock(&root->fs_info->delalloc_lock); | 1344 | spin_lock(&root->fs_info->delalloc_lock); |
1348 | BTRFS_I(inode)->delalloc_bytes += len; | 1345 | BTRFS_I(inode)->delalloc_bytes += len; |
@@ -1353,14 +1350,13 @@ static int btrfs_set_bit_hook(struct inode *inode, | |||
1353 | } | 1350 | } |
1354 | spin_unlock(&root->fs_info->delalloc_lock); | 1351 | spin_unlock(&root->fs_info->delalloc_lock); |
1355 | } | 1352 | } |
1356 | return 0; | ||
1357 | } | 1353 | } |
1358 | 1354 | ||
1359 | /* | 1355 | /* |
1360 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1356 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
1361 | */ | 1357 | */ |
1362 | static int btrfs_clear_bit_hook(struct inode *inode, | 1358 | static void btrfs_clear_bit_hook(struct inode *inode, |
1363 | struct extent_state *state, int *bits) | 1359 | struct extent_state *state, int *bits) |
1364 | { | 1360 | { |
1365 | /* | 1361 | /* |
1366 | * set_bit and clear bit hooks normally require _irqsave/restore | 1362 | * set_bit and clear bit hooks normally require _irqsave/restore |
@@ -1370,12 +1366,15 @@ static int btrfs_clear_bit_hook(struct inode *inode, | |||
1370 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1366 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1371 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1367 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1372 | u64 len = state->end + 1 - state->start; | 1368 | u64 len = state->end + 1 - state->start; |
1373 | bool do_list = !is_free_space_inode(root, inode); | 1369 | bool do_list = !btrfs_is_free_space_inode(root, inode); |
1374 | 1370 | ||
1375 | if (*bits & EXTENT_FIRST_DELALLOC) | 1371 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1376 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1372 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1377 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) | 1373 | } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { |
1378 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 1374 | spin_lock(&BTRFS_I(inode)->lock); |
1375 | BTRFS_I(inode)->outstanding_extents--; | ||
1376 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1377 | } | ||
1379 | 1378 | ||
1380 | if (*bits & EXTENT_DO_ACCOUNTING) | 1379 | if (*bits & EXTENT_DO_ACCOUNTING) |
1381 | btrfs_delalloc_release_metadata(inode, len); | 1380 | btrfs_delalloc_release_metadata(inode, len); |
@@ -1394,7 +1393,6 @@ static int btrfs_clear_bit_hook(struct inode *inode, | |||
1394 | } | 1393 | } |
1395 | spin_unlock(&root->fs_info->delalloc_lock); | 1394 | spin_unlock(&root->fs_info->delalloc_lock); |
1396 | } | 1395 | } |
1397 | return 0; | ||
1398 | } | 1396 | } |
1399 | 1397 | ||
1400 | /* | 1398 | /* |
@@ -1477,7 +1475,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1477 | 1475 | ||
1478 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 1476 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
1479 | 1477 | ||
1480 | if (is_free_space_inode(root, inode)) | 1478 | if (btrfs_is_free_space_inode(root, inode)) |
1481 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); | 1479 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); |
1482 | else | 1480 | else |
1483 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 1481 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
@@ -1644,7 +1642,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1644 | int ret; | 1642 | int ret; |
1645 | 1643 | ||
1646 | path = btrfs_alloc_path(); | 1644 | path = btrfs_alloc_path(); |
1647 | BUG_ON(!path); | 1645 | if (!path) |
1646 | return -ENOMEM; | ||
1648 | 1647 | ||
1649 | path->leave_spinning = 1; | 1648 | path->leave_spinning = 1; |
1650 | 1649 | ||
@@ -1726,7 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1726 | return 0; | 1725 | return 0; |
1727 | BUG_ON(!ordered_extent); | 1726 | BUG_ON(!ordered_extent); |
1728 | 1727 | ||
1729 | nolock = is_free_space_inode(root, inode); | 1728 | nolock = btrfs_is_free_space_inode(root, inode); |
1730 | 1729 | ||
1731 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1730 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
1732 | BUG_ON(!list_empty(&ordered_extent->list)); | 1731 | BUG_ON(!list_empty(&ordered_extent->list)); |
@@ -1787,7 +1786,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1787 | &ordered_extent->list); | 1786 | &ordered_extent->list); |
1788 | 1787 | ||
1789 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1788 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1790 | if (!ret) { | 1789 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
1791 | ret = btrfs_update_inode(trans, root, inode); | 1790 | ret = btrfs_update_inode(trans, root, inode); |
1792 | BUG_ON(ret); | 1791 | BUG_ON(ret); |
1793 | } | 1792 | } |
@@ -2214,7 +2213,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2214 | 2213 | ||
2215 | if (!root->orphan_block_rsv) { | 2214 | if (!root->orphan_block_rsv) { |
2216 | block_rsv = btrfs_alloc_block_rsv(root); | 2215 | block_rsv = btrfs_alloc_block_rsv(root); |
2217 | BUG_ON(!block_rsv); | 2216 | if (!block_rsv) |
2217 | return -ENOMEM; | ||
2218 | } | 2218 | } |
2219 | 2219 | ||
2220 | spin_lock(&root->orphan_lock); | 2220 | spin_lock(&root->orphan_lock); |
@@ -2516,7 +2516,9 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2516 | filled = true; | 2516 | filled = true; |
2517 | 2517 | ||
2518 | path = btrfs_alloc_path(); | 2518 | path = btrfs_alloc_path(); |
2519 | BUG_ON(!path); | 2519 | if (!path) |
2520 | goto make_bad; | ||
2521 | |||
2520 | path->leave_spinning = 1; | 2522 | path->leave_spinning = 1; |
2521 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); | 2523 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); |
2522 | 2524 | ||
@@ -2531,13 +2533,6 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2531 | 2533 | ||
2532 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 2534 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
2533 | struct btrfs_inode_item); | 2535 | struct btrfs_inode_item); |
2534 | if (!leaf->map_token) | ||
2535 | map_private_extent_buffer(leaf, (unsigned long)inode_item, | ||
2536 | sizeof(struct btrfs_inode_item), | ||
2537 | &leaf->map_token, &leaf->kaddr, | ||
2538 | &leaf->map_start, &leaf->map_len, | ||
2539 | KM_USER1); | ||
2540 | |||
2541 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); | 2536 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); |
2542 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); | 2537 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); |
2543 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); | 2538 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); |
@@ -2575,11 +2570,6 @@ cache_acl: | |||
2575 | if (!maybe_acls) | 2570 | if (!maybe_acls) |
2576 | cache_no_acl(inode); | 2571 | cache_no_acl(inode); |
2577 | 2572 | ||
2578 | if (leaf->map_token) { | ||
2579 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2580 | leaf->map_token = NULL; | ||
2581 | } | ||
2582 | |||
2583 | btrfs_free_path(path); | 2573 | btrfs_free_path(path); |
2584 | 2574 | ||
2585 | switch (inode->i_mode & S_IFMT) { | 2575 | switch (inode->i_mode & S_IFMT) { |
@@ -2624,13 +2614,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2624 | struct btrfs_inode_item *item, | 2614 | struct btrfs_inode_item *item, |
2625 | struct inode *inode) | 2615 | struct inode *inode) |
2626 | { | 2616 | { |
2627 | if (!leaf->map_token) | ||
2628 | map_private_extent_buffer(leaf, (unsigned long)item, | ||
2629 | sizeof(struct btrfs_inode_item), | ||
2630 | &leaf->map_token, &leaf->kaddr, | ||
2631 | &leaf->map_start, &leaf->map_len, | ||
2632 | KM_USER1); | ||
2633 | |||
2634 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 2617 | btrfs_set_inode_uid(leaf, item, inode->i_uid); |
2635 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 2618 | btrfs_set_inode_gid(leaf, item, inode->i_gid); |
2636 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | 2619 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); |
@@ -2659,11 +2642,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2659 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2642 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2660 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2643 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
2661 | btrfs_set_inode_block_group(leaf, item, 0); | 2644 | btrfs_set_inode_block_group(leaf, item, 0); |
2662 | |||
2663 | if (leaf->map_token) { | ||
2664 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2665 | leaf->map_token = NULL; | ||
2666 | } | ||
2667 | } | 2645 | } |
2668 | 2646 | ||
2669 | /* | 2647 | /* |
@@ -2684,7 +2662,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2684 | * The data relocation inode should also be directly updated | 2662 | * The data relocation inode should also be directly updated |
2685 | * without delay | 2663 | * without delay |
2686 | */ | 2664 | */ |
2687 | if (!is_free_space_inode(root, inode) | 2665 | if (!btrfs_is_free_space_inode(root, inode) |
2688 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { | 2666 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { |
2689 | ret = btrfs_delayed_update_inode(trans, root, inode); | 2667 | ret = btrfs_delayed_update_inode(trans, root, inode); |
2690 | if (!ret) | 2668 | if (!ret) |
@@ -3021,13 +2999,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
3021 | 2999 | ||
3022 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 3000 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
3023 | dentry->d_name.name, dentry->d_name.len); | 3001 | dentry->d_name.name, dentry->d_name.len); |
3024 | BUG_ON(ret); | 3002 | if (ret) |
3003 | goto out; | ||
3025 | 3004 | ||
3026 | if (inode->i_nlink == 0) { | 3005 | if (inode->i_nlink == 0) { |
3027 | ret = btrfs_orphan_add(trans, inode); | 3006 | ret = btrfs_orphan_add(trans, inode); |
3028 | BUG_ON(ret); | 3007 | if (ret) |
3008 | goto out; | ||
3029 | } | 3009 | } |
3030 | 3010 | ||
3011 | out: | ||
3031 | nr = trans->blocks_used; | 3012 | nr = trans->blocks_used; |
3032 | __unlink_end_trans(trans, root); | 3013 | __unlink_end_trans(trans, root); |
3033 | btrfs_btree_balance_dirty(root, nr); | 3014 | btrfs_btree_balance_dirty(root, nr); |
@@ -3170,6 +3151,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3170 | 3151 | ||
3171 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); | 3152 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); |
3172 | 3153 | ||
3154 | path = btrfs_alloc_path(); | ||
3155 | if (!path) | ||
3156 | return -ENOMEM; | ||
3157 | path->reada = -1; | ||
3158 | |||
3173 | if (root->ref_cows || root == root->fs_info->tree_root) | 3159 | if (root->ref_cows || root == root->fs_info->tree_root) |
3174 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); | 3160 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); |
3175 | 3161 | ||
@@ -3182,10 +3168,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3182 | if (min_type == 0 && root == BTRFS_I(inode)->root) | 3168 | if (min_type == 0 && root == BTRFS_I(inode)->root) |
3183 | btrfs_kill_delayed_inode_items(inode); | 3169 | btrfs_kill_delayed_inode_items(inode); |
3184 | 3170 | ||
3185 | path = btrfs_alloc_path(); | ||
3186 | BUG_ON(!path); | ||
3187 | path->reada = -1; | ||
3188 | |||
3189 | key.objectid = ino; | 3171 | key.objectid = ino; |
3190 | key.offset = (u64)-1; | 3172 | key.offset = (u64)-1; |
3191 | key.type = (u8)-1; | 3173 | key.type = (u8)-1; |
@@ -3398,7 +3380,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3398 | 3380 | ||
3399 | ret = -ENOMEM; | 3381 | ret = -ENOMEM; |
3400 | again: | 3382 | again: |
3401 | page = grab_cache_page(mapping, index); | 3383 | page = find_or_create_page(mapping, index, GFP_NOFS); |
3402 | if (!page) { | 3384 | if (!page) { |
3403 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 3385 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3404 | goto out; | 3386 | goto out; |
@@ -3528,15 +3510,19 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3528 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3510 | err = btrfs_drop_extents(trans, inode, cur_offset, |
3529 | cur_offset + hole_size, | 3511 | cur_offset + hole_size, |
3530 | &hint_byte, 1); | 3512 | &hint_byte, 1); |
3531 | if (err) | 3513 | if (err) { |
3514 | btrfs_end_transaction(trans, root); | ||
3532 | break; | 3515 | break; |
3516 | } | ||
3533 | 3517 | ||
3534 | err = btrfs_insert_file_extent(trans, root, | 3518 | err = btrfs_insert_file_extent(trans, root, |
3535 | btrfs_ino(inode), cur_offset, 0, | 3519 | btrfs_ino(inode), cur_offset, 0, |
3536 | 0, hole_size, 0, hole_size, | 3520 | 0, hole_size, 0, hole_size, |
3537 | 0, 0, 0); | 3521 | 0, 0, 0); |
3538 | if (err) | 3522 | if (err) { |
3523 | btrfs_end_transaction(trans, root); | ||
3539 | break; | 3524 | break; |
3525 | } | ||
3540 | 3526 | ||
3541 | btrfs_drop_extent_cache(inode, hole_start, | 3527 | btrfs_drop_extent_cache(inode, hole_start, |
3542 | last_byte - 1, 0); | 3528 | last_byte - 1, 0); |
@@ -3634,7 +3620,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
3634 | 3620 | ||
3635 | truncate_inode_pages(&inode->i_data, 0); | 3621 | truncate_inode_pages(&inode->i_data, 0); |
3636 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || | 3622 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || |
3637 | is_free_space_inode(root, inode))) | 3623 | btrfs_is_free_space_inode(root, inode))) |
3638 | goto no_delete; | 3624 | goto no_delete; |
3639 | 3625 | ||
3640 | if (is_bad_inode(inode)) { | 3626 | if (is_bad_inode(inode)) { |
@@ -3713,7 +3699,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, | |||
3713 | int ret = 0; | 3699 | int ret = 0; |
3714 | 3700 | ||
3715 | path = btrfs_alloc_path(); | 3701 | path = btrfs_alloc_path(); |
3716 | BUG_ON(!path); | 3702 | if (!path) |
3703 | return -ENOMEM; | ||
3717 | 3704 | ||
3718 | di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, | 3705 | di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, |
3719 | namelen, 0); | 3706 | namelen, 0); |
@@ -3978,10 +3965,16 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
3978 | BTRFS_I(inode)->root = root; | 3965 | BTRFS_I(inode)->root = root; |
3979 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); | 3966 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); |
3980 | btrfs_read_locked_inode(inode); | 3967 | btrfs_read_locked_inode(inode); |
3981 | inode_tree_add(inode); | 3968 | if (!is_bad_inode(inode)) { |
3982 | unlock_new_inode(inode); | 3969 | inode_tree_add(inode); |
3983 | if (new) | 3970 | unlock_new_inode(inode); |
3984 | *new = 1; | 3971 | if (new) |
3972 | *new = 1; | ||
3973 | } else { | ||
3974 | unlock_new_inode(inode); | ||
3975 | iput(inode); | ||
3976 | inode = ERR_PTR(-ESTALE); | ||
3977 | } | ||
3985 | } | 3978 | } |
3986 | 3979 | ||
3987 | return inode; | 3980 | return inode; |
@@ -4016,12 +4009,20 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4016 | struct btrfs_root *sub_root = root; | 4009 | struct btrfs_root *sub_root = root; |
4017 | struct btrfs_key location; | 4010 | struct btrfs_key location; |
4018 | int index; | 4011 | int index; |
4019 | int ret; | 4012 | int ret = 0; |
4020 | 4013 | ||
4021 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 4014 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
4022 | return ERR_PTR(-ENAMETOOLONG); | 4015 | return ERR_PTR(-ENAMETOOLONG); |
4023 | 4016 | ||
4024 | ret = btrfs_inode_by_name(dir, dentry, &location); | 4017 | if (unlikely(d_need_lookup(dentry))) { |
4018 | memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); | ||
4019 | kfree(dentry->d_fsdata); | ||
4020 | dentry->d_fsdata = NULL; | ||
4021 | /* This thing is hashed, drop it for now */ | ||
4022 | d_drop(dentry); | ||
4023 | } else { | ||
4024 | ret = btrfs_inode_by_name(dir, dentry, &location); | ||
4025 | } | ||
4025 | 4026 | ||
4026 | if (ret < 0) | 4027 | if (ret < 0) |
4027 | return ERR_PTR(ret); | 4028 | return ERR_PTR(ret); |
@@ -4076,16 +4077,24 @@ static int btrfs_dentry_delete(const struct dentry *dentry) | |||
4076 | return 0; | 4077 | return 0; |
4077 | } | 4078 | } |
4078 | 4079 | ||
4080 | static void btrfs_dentry_release(struct dentry *dentry) | ||
4081 | { | ||
4082 | if (dentry->d_fsdata) | ||
4083 | kfree(dentry->d_fsdata); | ||
4084 | } | ||
4085 | |||
4079 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | 4086 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
4080 | struct nameidata *nd) | 4087 | struct nameidata *nd) |
4081 | { | 4088 | { |
4082 | struct inode *inode; | 4089 | struct dentry *ret; |
4083 | |||
4084 | inode = btrfs_lookup_dentry(dir, dentry); | ||
4085 | if (IS_ERR(inode)) | ||
4086 | return ERR_CAST(inode); | ||
4087 | 4090 | ||
4088 | return d_splice_alias(inode, dentry); | 4091 | ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); |
4092 | if (unlikely(d_need_lookup(dentry))) { | ||
4093 | spin_lock(&dentry->d_lock); | ||
4094 | dentry->d_flags &= ~DCACHE_NEED_LOOKUP; | ||
4095 | spin_unlock(&dentry->d_lock); | ||
4096 | } | ||
4097 | return ret; | ||
4089 | } | 4098 | } |
4090 | 4099 | ||
4091 | unsigned char btrfs_filetype_table[] = { | 4100 | unsigned char btrfs_filetype_table[] = { |
@@ -4104,6 +4113,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4104 | struct btrfs_path *path; | 4113 | struct btrfs_path *path; |
4105 | struct list_head ins_list; | 4114 | struct list_head ins_list; |
4106 | struct list_head del_list; | 4115 | struct list_head del_list; |
4116 | struct qstr q; | ||
4107 | int ret; | 4117 | int ret; |
4108 | struct extent_buffer *leaf; | 4118 | struct extent_buffer *leaf; |
4109 | int slot; | 4119 | int slot; |
@@ -4124,7 +4134,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4124 | 4134 | ||
4125 | /* special case for "." */ | 4135 | /* special case for "." */ |
4126 | if (filp->f_pos == 0) { | 4136 | if (filp->f_pos == 0) { |
4127 | over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR); | 4137 | over = filldir(dirent, ".", 1, |
4138 | filp->f_pos, btrfs_ino(inode), DT_DIR); | ||
4128 | if (over) | 4139 | if (over) |
4129 | return 0; | 4140 | return 0; |
4130 | filp->f_pos = 1; | 4141 | filp->f_pos = 1; |
@@ -4133,7 +4144,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4133 | if (filp->f_pos == 1) { | 4144 | if (filp->f_pos == 1) { |
4134 | u64 pino = parent_ino(filp->f_path.dentry); | 4145 | u64 pino = parent_ino(filp->f_path.dentry); |
4135 | over = filldir(dirent, "..", 2, | 4146 | over = filldir(dirent, "..", 2, |
4136 | 2, pino, DT_DIR); | 4147 | filp->f_pos, pino, DT_DIR); |
4137 | if (over) | 4148 | if (over) |
4138 | return 0; | 4149 | return 0; |
4139 | filp->f_pos = 2; | 4150 | filp->f_pos = 2; |
@@ -4193,6 +4204,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4193 | 4204 | ||
4194 | while (di_cur < di_total) { | 4205 | while (di_cur < di_total) { |
4195 | struct btrfs_key location; | 4206 | struct btrfs_key location; |
4207 | struct dentry *tmp; | ||
4196 | 4208 | ||
4197 | if (verify_dir_item(root, leaf, di)) | 4209 | if (verify_dir_item(root, leaf, di)) |
4198 | break; | 4210 | break; |
@@ -4213,6 +4225,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4213 | d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; | 4225 | d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; |
4214 | btrfs_dir_item_key_to_cpu(leaf, di, &location); | 4226 | btrfs_dir_item_key_to_cpu(leaf, di, &location); |
4215 | 4227 | ||
4228 | q.name = name_ptr; | ||
4229 | q.len = name_len; | ||
4230 | q.hash = full_name_hash(q.name, q.len); | ||
4231 | tmp = d_lookup(filp->f_dentry, &q); | ||
4232 | if (!tmp) { | ||
4233 | struct btrfs_key *newkey; | ||
4234 | |||
4235 | newkey = kzalloc(sizeof(struct btrfs_key), | ||
4236 | GFP_NOFS); | ||
4237 | if (!newkey) | ||
4238 | goto no_dentry; | ||
4239 | tmp = d_alloc(filp->f_dentry, &q); | ||
4240 | if (!tmp) { | ||
4241 | kfree(newkey); | ||
4242 | dput(tmp); | ||
4243 | goto no_dentry; | ||
4244 | } | ||
4245 | memcpy(newkey, &location, | ||
4246 | sizeof(struct btrfs_key)); | ||
4247 | tmp->d_fsdata = newkey; | ||
4248 | tmp->d_flags |= DCACHE_NEED_LOOKUP; | ||
4249 | d_rehash(tmp); | ||
4250 | dput(tmp); | ||
4251 | } else { | ||
4252 | dput(tmp); | ||
4253 | } | ||
4254 | no_dentry: | ||
4216 | /* is this a reference to our own snapshot? If so | 4255 | /* is this a reference to our own snapshot? If so |
4217 | * skip it | 4256 | * skip it |
4218 | */ | 4257 | */ |
@@ -4277,7 +4316,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4277 | if (BTRFS_I(inode)->dummy_inode) | 4316 | if (BTRFS_I(inode)->dummy_inode) |
4278 | return 0; | 4317 | return 0; |
4279 | 4318 | ||
4280 | if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) | 4319 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) |
4281 | nolock = true; | 4320 | nolock = true; |
4282 | 4321 | ||
4283 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4322 | if (wbc->sync_mode == WB_SYNC_ALL) { |
@@ -4438,7 +4477,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4438 | int owner; | 4477 | int owner; |
4439 | 4478 | ||
4440 | path = btrfs_alloc_path(); | 4479 | path = btrfs_alloc_path(); |
4441 | BUG_ON(!path); | 4480 | if (!path) |
4481 | return ERR_PTR(-ENOMEM); | ||
4442 | 4482 | ||
4443 | inode = new_inode(root->fs_info->sb); | 4483 | inode = new_inode(root->fs_info->sb); |
4444 | if (!inode) { | 4484 | if (!inode) { |
@@ -4473,7 +4513,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4473 | inode->i_generation = BTRFS_I(inode)->generation; | 4513 | inode->i_generation = BTRFS_I(inode)->generation; |
4474 | btrfs_set_inode_space_info(root, inode); | 4514 | btrfs_set_inode_space_info(root, inode); |
4475 | 4515 | ||
4476 | if (mode & S_IFDIR) | 4516 | if (S_ISDIR(mode)) |
4477 | owner = 0; | 4517 | owner = 0; |
4478 | else | 4518 | else |
4479 | owner = 1; | 4519 | owner = 1; |
@@ -4518,7 +4558,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4518 | 4558 | ||
4519 | btrfs_inherit_iflags(inode, dir); | 4559 | btrfs_inherit_iflags(inode, dir); |
4520 | 4560 | ||
4521 | if ((mode & S_IFREG)) { | 4561 | if (S_ISREG(mode)) { |
4522 | if (btrfs_test_opt(root, NODATASUM)) | 4562 | if (btrfs_test_opt(root, NODATASUM)) |
4523 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | 4563 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; |
4524 | if (btrfs_test_opt(root, NODATACOW) || | 4564 | if (btrfs_test_opt(root, NODATACOW) || |
@@ -4772,11 +4812,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4772 | if (err) { | 4812 | if (err) { |
4773 | drop_inode = 1; | 4813 | drop_inode = 1; |
4774 | } else { | 4814 | } else { |
4775 | struct dentry *parent = dget_parent(dentry); | 4815 | struct dentry *parent = dentry->d_parent; |
4776 | err = btrfs_update_inode(trans, root, inode); | 4816 | err = btrfs_update_inode(trans, root, inode); |
4777 | BUG_ON(err); | 4817 | BUG_ON(err); |
4778 | btrfs_log_new_name(trans, inode, NULL, parent); | 4818 | btrfs_log_new_name(trans, inode, NULL, parent); |
4779 | dput(parent); | ||
4780 | } | 4819 | } |
4781 | 4820 | ||
4782 | nr = trans->blocks_used; | 4821 | nr = trans->blocks_used; |
@@ -5794,7 +5833,7 @@ again: | |||
5794 | 5833 | ||
5795 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | 5834 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); |
5796 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | 5835 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
5797 | if (!ret) | 5836 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) |
5798 | btrfs_update_inode(trans, root, inode); | 5837 | btrfs_update_inode(trans, root, inode); |
5799 | ret = 0; | 5838 | ret = 0; |
5800 | out_unlock: | 5839 | out_unlock: |
@@ -6699,19 +6738,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
6699 | return 0; | 6738 | return 0; |
6700 | } | 6739 | } |
6701 | 6740 | ||
6702 | /* helper function for file defrag and space balancing. This | ||
6703 | * forces readahead on a given range of bytes in an inode | ||
6704 | */ | ||
6705 | unsigned long btrfs_force_ra(struct address_space *mapping, | ||
6706 | struct file_ra_state *ra, struct file *file, | ||
6707 | pgoff_t offset, pgoff_t last_index) | ||
6708 | { | ||
6709 | pgoff_t req_size = last_index - offset + 1; | ||
6710 | |||
6711 | page_cache_sync_readahead(mapping, ra, file, offset, req_size); | ||
6712 | return offset + req_size; | ||
6713 | } | ||
6714 | |||
6715 | struct inode *btrfs_alloc_inode(struct super_block *sb) | 6741 | struct inode *btrfs_alloc_inode(struct super_block *sb) |
6716 | { | 6742 | { |
6717 | struct btrfs_inode *ei; | 6743 | struct btrfs_inode *ei; |
@@ -6735,8 +6761,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6735 | ei->index_cnt = (u64)-1; | 6761 | ei->index_cnt = (u64)-1; |
6736 | ei->last_unlink_trans = 0; | 6762 | ei->last_unlink_trans = 0; |
6737 | 6763 | ||
6738 | atomic_set(&ei->outstanding_extents, 0); | 6764 | spin_lock_init(&ei->lock); |
6739 | atomic_set(&ei->reserved_extents, 0); | 6765 | ei->outstanding_extents = 0; |
6766 | ei->reserved_extents = 0; | ||
6740 | 6767 | ||
6741 | ei->ordered_data_close = 0; | 6768 | ei->ordered_data_close = 0; |
6742 | ei->orphan_meta_reserved = 0; | 6769 | ei->orphan_meta_reserved = 0; |
@@ -6774,8 +6801,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6774 | 6801 | ||
6775 | WARN_ON(!list_empty(&inode->i_dentry)); | 6802 | WARN_ON(!list_empty(&inode->i_dentry)); |
6776 | WARN_ON(inode->i_data.nrpages); | 6803 | WARN_ON(inode->i_data.nrpages); |
6777 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | 6804 | WARN_ON(BTRFS_I(inode)->outstanding_extents); |
6778 | WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); | 6805 | WARN_ON(BTRFS_I(inode)->reserved_extents); |
6779 | 6806 | ||
6780 | /* | 6807 | /* |
6781 | * This can happen where we create an inode, but somebody else also | 6808 | * This can happen where we create an inode, but somebody else also |
@@ -6830,7 +6857,7 @@ int btrfs_drop_inode(struct inode *inode) | |||
6830 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6857 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6831 | 6858 | ||
6832 | if (btrfs_root_refs(&root->root_item) == 0 && | 6859 | if (btrfs_root_refs(&root->root_item) == 0 && |
6833 | !is_free_space_inode(root, inode)) | 6860 | !btrfs_is_free_space_inode(root, inode)) |
6834 | return 1; | 6861 | return 1; |
6835 | else | 6862 | else |
6836 | return generic_drop_inode(inode); | 6863 | return generic_drop_inode(inode); |
@@ -6900,7 +6927,7 @@ static int btrfs_getattr(struct vfsmount *mnt, | |||
6900 | { | 6927 | { |
6901 | struct inode *inode = dentry->d_inode; | 6928 | struct inode *inode = dentry->d_inode; |
6902 | generic_fillattr(inode, stat); | 6929 | generic_fillattr(inode, stat); |
6903 | stat->dev = BTRFS_I(inode)->root->anon_super.s_dev; | 6930 | stat->dev = BTRFS_I(inode)->root->anon_dev; |
6904 | stat->blksize = PAGE_CACHE_SIZE; | 6931 | stat->blksize = PAGE_CACHE_SIZE; |
6905 | stat->blocks = (inode_get_bytes(inode) + | 6932 | stat->blocks = (inode_get_bytes(inode) + |
6906 | BTRFS_I(inode)->delalloc_bytes) >> 9; | 6933 | BTRFS_I(inode)->delalloc_bytes) >> 9; |
@@ -7068,9 +7095,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
7068 | BUG_ON(ret); | 7095 | BUG_ON(ret); |
7069 | 7096 | ||
7070 | if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { | 7097 | if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { |
7071 | struct dentry *parent = dget_parent(new_dentry); | 7098 | struct dentry *parent = new_dentry->d_parent; |
7072 | btrfs_log_new_name(trans, old_inode, old_dir, parent); | 7099 | btrfs_log_new_name(trans, old_inode, old_dir, parent); |
7073 | dput(parent); | ||
7074 | btrfs_end_log_trans(root); | 7100 | btrfs_end_log_trans(root); |
7075 | } | 7101 | } |
7076 | out_fail: | 7102 | out_fail: |
@@ -7194,7 +7220,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
7194 | goto out_unlock; | 7220 | goto out_unlock; |
7195 | 7221 | ||
7196 | path = btrfs_alloc_path(); | 7222 | path = btrfs_alloc_path(); |
7197 | BUG_ON(!path); | 7223 | if (!path) { |
7224 | err = -ENOMEM; | ||
7225 | drop_inode = 1; | ||
7226 | goto out_unlock; | ||
7227 | } | ||
7198 | key.objectid = btrfs_ino(inode); | 7228 | key.objectid = btrfs_ino(inode); |
7199 | key.offset = 0; | 7229 | key.offset = 0; |
7200 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | 7230 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); |
@@ -7331,15 +7361,19 @@ static int btrfs_set_page_dirty(struct page *page) | |||
7331 | return __set_page_dirty_nobuffers(page); | 7361 | return __set_page_dirty_nobuffers(page); |
7332 | } | 7362 | } |
7333 | 7363 | ||
7334 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) | 7364 | static int btrfs_permission(struct inode *inode, int mask) |
7335 | { | 7365 | { |
7336 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7366 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7367 | umode_t mode = inode->i_mode; | ||
7337 | 7368 | ||
7338 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | 7369 | if (mask & MAY_WRITE && |
7339 | return -EROFS; | 7370 | (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { |
7340 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7371 | if (btrfs_root_readonly(root)) |
7341 | return -EACCES; | 7372 | return -EROFS; |
7342 | return generic_permission(inode, mask, flags, btrfs_check_acl); | 7373 | if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) |
7374 | return -EACCES; | ||
7375 | } | ||
7376 | return generic_permission(inode, mask); | ||
7343 | } | 7377 | } |
7344 | 7378 | ||
7345 | static const struct inode_operations btrfs_dir_inode_operations = { | 7379 | static const struct inode_operations btrfs_dir_inode_operations = { |
@@ -7359,10 +7393,12 @@ static const struct inode_operations btrfs_dir_inode_operations = { | |||
7359 | .listxattr = btrfs_listxattr, | 7393 | .listxattr = btrfs_listxattr, |
7360 | .removexattr = btrfs_removexattr, | 7394 | .removexattr = btrfs_removexattr, |
7361 | .permission = btrfs_permission, | 7395 | .permission = btrfs_permission, |
7396 | .get_acl = btrfs_get_acl, | ||
7362 | }; | 7397 | }; |
7363 | static const struct inode_operations btrfs_dir_ro_inode_operations = { | 7398 | static const struct inode_operations btrfs_dir_ro_inode_operations = { |
7364 | .lookup = btrfs_lookup, | 7399 | .lookup = btrfs_lookup, |
7365 | .permission = btrfs_permission, | 7400 | .permission = btrfs_permission, |
7401 | .get_acl = btrfs_get_acl, | ||
7366 | }; | 7402 | }; |
7367 | 7403 | ||
7368 | static const struct file_operations btrfs_dir_file_operations = { | 7404 | static const struct file_operations btrfs_dir_file_operations = { |
@@ -7431,6 +7467,7 @@ static const struct inode_operations btrfs_file_inode_operations = { | |||
7431 | .removexattr = btrfs_removexattr, | 7467 | .removexattr = btrfs_removexattr, |
7432 | .permission = btrfs_permission, | 7468 | .permission = btrfs_permission, |
7433 | .fiemap = btrfs_fiemap, | 7469 | .fiemap = btrfs_fiemap, |
7470 | .get_acl = btrfs_get_acl, | ||
7434 | }; | 7471 | }; |
7435 | static const struct inode_operations btrfs_special_inode_operations = { | 7472 | static const struct inode_operations btrfs_special_inode_operations = { |
7436 | .getattr = btrfs_getattr, | 7473 | .getattr = btrfs_getattr, |
@@ -7440,6 +7477,7 @@ static const struct inode_operations btrfs_special_inode_operations = { | |||
7440 | .getxattr = btrfs_getxattr, | 7477 | .getxattr = btrfs_getxattr, |
7441 | .listxattr = btrfs_listxattr, | 7478 | .listxattr = btrfs_listxattr, |
7442 | .removexattr = btrfs_removexattr, | 7479 | .removexattr = btrfs_removexattr, |
7480 | .get_acl = btrfs_get_acl, | ||
7443 | }; | 7481 | }; |
7444 | static const struct inode_operations btrfs_symlink_inode_operations = { | 7482 | static const struct inode_operations btrfs_symlink_inode_operations = { |
7445 | .readlink = generic_readlink, | 7483 | .readlink = generic_readlink, |
@@ -7451,8 +7489,10 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
7451 | .getxattr = btrfs_getxattr, | 7489 | .getxattr = btrfs_getxattr, |
7452 | .listxattr = btrfs_listxattr, | 7490 | .listxattr = btrfs_listxattr, |
7453 | .removexattr = btrfs_removexattr, | 7491 | .removexattr = btrfs_removexattr, |
7492 | .get_acl = btrfs_get_acl, | ||
7454 | }; | 7493 | }; |
7455 | 7494 | ||
7456 | const struct dentry_operations btrfs_dentry_operations = { | 7495 | const struct dentry_operations btrfs_dentry_operations = { |
7457 | .d_delete = btrfs_dentry_delete, | 7496 | .d_delete = btrfs_dentry_delete, |
7497 | .d_release = btrfs_dentry_release, | ||
7458 | }; | 7498 | }; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a3c4751e07d..dae5dfe41ba 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -323,7 +323,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
323 | struct btrfs_inode_item *inode_item; | 323 | struct btrfs_inode_item *inode_item; |
324 | struct extent_buffer *leaf; | 324 | struct extent_buffer *leaf; |
325 | struct btrfs_root *new_root; | 325 | struct btrfs_root *new_root; |
326 | struct dentry *parent = dget_parent(dentry); | 326 | struct dentry *parent = dentry->d_parent; |
327 | struct inode *dir; | 327 | struct inode *dir; |
328 | int ret; | 328 | int ret; |
329 | int err; | 329 | int err; |
@@ -332,10 +332,8 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
332 | u64 index = 0; | 332 | u64 index = 0; |
333 | 333 | ||
334 | ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); | 334 | ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); |
335 | if (ret) { | 335 | if (ret) |
336 | dput(parent); | ||
337 | return ret; | 336 | return ret; |
338 | } | ||
339 | 337 | ||
340 | dir = parent->d_inode; | 338 | dir = parent->d_inode; |
341 | 339 | ||
@@ -346,10 +344,8 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
346 | * 2 - dir items | 344 | * 2 - dir items |
347 | */ | 345 | */ |
348 | trans = btrfs_start_transaction(root, 6); | 346 | trans = btrfs_start_transaction(root, 6); |
349 | if (IS_ERR(trans)) { | 347 | if (IS_ERR(trans)) |
350 | dput(parent); | ||
351 | return PTR_ERR(trans); | 348 | return PTR_ERR(trans); |
352 | } | ||
353 | 349 | ||
354 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 350 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
355 | 0, objectid, NULL, 0, 0, 0); | 351 | 0, objectid, NULL, 0, 0, 0); |
@@ -439,7 +435,6 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
439 | 435 | ||
440 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); | 436 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); |
441 | fail: | 437 | fail: |
442 | dput(parent); | ||
443 | if (async_transid) { | 438 | if (async_transid) { |
444 | *async_transid = trans->transid; | 439 | *async_transid = trans->transid; |
445 | err = btrfs_commit_transaction_async(trans, root, 1); | 440 | err = btrfs_commit_transaction_async(trans, root, 1); |
@@ -456,7 +451,6 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
456 | bool readonly) | 451 | bool readonly) |
457 | { | 452 | { |
458 | struct inode *inode; | 453 | struct inode *inode; |
459 | struct dentry *parent; | ||
460 | struct btrfs_pending_snapshot *pending_snapshot; | 454 | struct btrfs_pending_snapshot *pending_snapshot; |
461 | struct btrfs_trans_handle *trans; | 455 | struct btrfs_trans_handle *trans; |
462 | int ret; | 456 | int ret; |
@@ -504,9 +498,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
504 | if (ret) | 498 | if (ret) |
505 | goto fail; | 499 | goto fail; |
506 | 500 | ||
507 | parent = dget_parent(dentry); | 501 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
508 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); | ||
509 | dput(parent); | ||
510 | if (IS_ERR(inode)) { | 502 | if (IS_ERR(inode)) { |
511 | ret = PTR_ERR(inode); | 503 | ret = PTR_ERR(inode); |
512 | goto fail; | 504 | goto fail; |
@@ -867,8 +859,8 @@ again: | |||
867 | /* step one, lock all the pages */ | 859 | /* step one, lock all the pages */ |
868 | for (i = 0; i < num_pages; i++) { | 860 | for (i = 0; i < num_pages; i++) { |
869 | struct page *page; | 861 | struct page *page; |
870 | page = grab_cache_page(inode->i_mapping, | 862 | page = find_or_create_page(inode->i_mapping, |
871 | start_index + i); | 863 | start_index + i, GFP_NOFS); |
872 | if (!page) | 864 | if (!page) |
873 | break; | 865 | break; |
874 | 866 | ||
@@ -938,7 +930,9 @@ again: | |||
938 | GFP_NOFS); | 930 | GFP_NOFS); |
939 | 931 | ||
940 | if (i_done != num_pages) { | 932 | if (i_done != num_pages) { |
941 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 933 | spin_lock(&BTRFS_I(inode)->lock); |
934 | BTRFS_I(inode)->outstanding_extents++; | ||
935 | spin_unlock(&BTRFS_I(inode)->lock); | ||
942 | btrfs_delalloc_release_space(inode, | 936 | btrfs_delalloc_release_space(inode, |
943 | (num_pages - i_done) << PAGE_CACHE_SHIFT); | 937 | (num_pages - i_done) << PAGE_CACHE_SHIFT); |
944 | } | 938 | } |
@@ -1053,7 +1047,16 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1053 | if (!max_to_defrag) | 1047 | if (!max_to_defrag) |
1054 | max_to_defrag = last_index - 1; | 1048 | max_to_defrag = last_index - 1; |
1055 | 1049 | ||
1056 | while (i <= last_index && defrag_count < max_to_defrag) { | 1050 | /* |
1051 | * make writeback starts from i, so the defrag range can be | ||
1052 | * written sequentially. | ||
1053 | */ | ||
1054 | if (i < inode->i_mapping->writeback_index) | ||
1055 | inode->i_mapping->writeback_index = i; | ||
1056 | |||
1057 | while (i <= last_index && defrag_count < max_to_defrag && | ||
1058 | (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
1059 | PAGE_CACHE_SHIFT)) { | ||
1057 | /* | 1060 | /* |
1058 | * make sure we stop running if someone unmounts | 1061 | * make sure we stop running if someone unmounts |
1059 | * the FS | 1062 | * the FS |
@@ -1755,11 +1758,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, | |||
1755 | key.objectid = key.offset; | 1758 | key.objectid = key.offset; |
1756 | key.offset = (u64)-1; | 1759 | key.offset = (u64)-1; |
1757 | dirid = key.objectid; | 1760 | dirid = key.objectid; |
1758 | |||
1759 | } | 1761 | } |
1760 | if (ptr < name) | 1762 | if (ptr < name) |
1761 | goto out; | 1763 | goto out; |
1762 | memcpy(name, ptr, total_len); | 1764 | memmove(name, ptr, total_len); |
1763 | name[total_len]='\0'; | 1765 | name[total_len]='\0'; |
1764 | ret = 0; | 1766 | ret = 0; |
1765 | out: | 1767 | out: |
@@ -2184,6 +2186,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2184 | if (!(src_file->f_mode & FMODE_READ)) | 2186 | if (!(src_file->f_mode & FMODE_READ)) |
2185 | goto out_fput; | 2187 | goto out_fput; |
2186 | 2188 | ||
2189 | /* don't make the dst file partly checksummed */ | ||
2190 | if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != | ||
2191 | (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) | ||
2192 | goto out_fput; | ||
2193 | |||
2187 | ret = -EISDIR; | 2194 | ret = -EISDIR; |
2188 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) | 2195 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) |
2189 | goto out_fput; | 2196 | goto out_fput; |
@@ -2227,6 +2234,16 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2227 | !IS_ALIGNED(destoff, bs)) | 2234 | !IS_ALIGNED(destoff, bs)) |
2228 | goto out_unlock; | 2235 | goto out_unlock; |
2229 | 2236 | ||
2237 | if (destoff > inode->i_size) { | ||
2238 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); | ||
2239 | if (ret) | ||
2240 | goto out_unlock; | ||
2241 | } | ||
2242 | |||
2243 | /* truncate page cache pages from target inode range */ | ||
2244 | truncate_inode_pages_range(&inode->i_data, destoff, | ||
2245 | PAGE_CACHE_ALIGN(destoff + len) - 1); | ||
2246 | |||
2230 | /* do any pending delalloc/csum calc on src, one way or | 2247 | /* do any pending delalloc/csum calc on src, one way or |
2231 | another, and lock file content */ | 2248 | another, and lock file content */ |
2232 | while (1) { | 2249 | while (1) { |
@@ -2320,7 +2337,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2320 | else | 2337 | else |
2321 | new_key.offset = destoff; | 2338 | new_key.offset = destoff; |
2322 | 2339 | ||
2323 | trans = btrfs_start_transaction(root, 1); | 2340 | /* |
2341 | * 1 - adjusting old extent (we may have to split it) | ||
2342 | * 1 - add new extent | ||
2343 | * 1 - inode update | ||
2344 | */ | ||
2345 | trans = btrfs_start_transaction(root, 3); | ||
2324 | if (IS_ERR(trans)) { | 2346 | if (IS_ERR(trans)) { |
2325 | ret = PTR_ERR(trans); | 2347 | ret = PTR_ERR(trans); |
2326 | goto out; | 2348 | goto out; |
@@ -2328,14 +2350,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2328 | 2350 | ||
2329 | if (type == BTRFS_FILE_EXTENT_REG || | 2351 | if (type == BTRFS_FILE_EXTENT_REG || |
2330 | type == BTRFS_FILE_EXTENT_PREALLOC) { | 2352 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
2353 | /* | ||
2354 | * a | --- range to clone ---| b | ||
2355 | * | ------------- extent ------------- | | ||
2356 | */ | ||
2357 | |||
2358 | /* substract range b */ | ||
2359 | if (key.offset + datal > off + len) | ||
2360 | datal = off + len - key.offset; | ||
2361 | |||
2362 | /* substract range a */ | ||
2331 | if (off > key.offset) { | 2363 | if (off > key.offset) { |
2332 | datao += off - key.offset; | 2364 | datao += off - key.offset; |
2333 | datal -= off - key.offset; | 2365 | datal -= off - key.offset; |
2334 | } | 2366 | } |
2335 | 2367 | ||
2336 | if (key.offset + datal > off + len) | ||
2337 | datal = off + len - key.offset; | ||
2338 | |||
2339 | ret = btrfs_drop_extents(trans, inode, | 2368 | ret = btrfs_drop_extents(trans, inode, |
2340 | new_key.offset, | 2369 | new_key.offset, |
2341 | new_key.offset + datal, | 2370 | new_key.offset + datal, |
@@ -2432,7 +2461,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2432 | if (endoff > inode->i_size) | 2461 | if (endoff > inode->i_size) |
2433 | btrfs_i_size_write(inode, endoff); | 2462 | btrfs_i_size_write(inode, endoff); |
2434 | 2463 | ||
2435 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
2436 | ret = btrfs_update_inode(trans, root, inode); | 2464 | ret = btrfs_update_inode(trans, root, inode); |
2437 | BUG_ON(ret); | 2465 | BUG_ON(ret); |
2438 | btrfs_end_transaction(trans, root); | 2466 | btrfs_end_transaction(trans, root); |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 66fa43dc3f0..d77b67c4b27 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -24,185 +24,197 @@ | |||
24 | #include "extent_io.h" | 24 | #include "extent_io.h" |
25 | #include "locking.h" | 25 | #include "locking.h" |
26 | 26 | ||
27 | static inline void spin_nested(struct extent_buffer *eb) | 27 | void btrfs_assert_tree_read_locked(struct extent_buffer *eb); |
28 | { | ||
29 | spin_lock(&eb->lock); | ||
30 | } | ||
31 | 28 | ||
32 | /* | 29 | /* |
33 | * Setting a lock to blocking will drop the spinlock and set the | 30 | * if we currently have a spinning reader or writer lock |
34 | * flag that forces other procs who want the lock to wait. After | 31 | * (indicated by the rw flag) this will bump the count |
35 | * this you can safely schedule with the lock held. | 32 | * of blocking holders and drop the spinlock. |
36 | */ | 33 | */ |
37 | void btrfs_set_lock_blocking(struct extent_buffer *eb) | 34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) |
38 | { | 35 | { |
39 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 36 | if (rw == BTRFS_WRITE_LOCK) { |
40 | set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | 37 | if (atomic_read(&eb->blocking_writers) == 0) { |
41 | spin_unlock(&eb->lock); | 38 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); |
39 | atomic_dec(&eb->spinning_writers); | ||
40 | btrfs_assert_tree_locked(eb); | ||
41 | atomic_inc(&eb->blocking_writers); | ||
42 | write_unlock(&eb->lock); | ||
43 | } | ||
44 | } else if (rw == BTRFS_READ_LOCK) { | ||
45 | btrfs_assert_tree_read_locked(eb); | ||
46 | atomic_inc(&eb->blocking_readers); | ||
47 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); | ||
48 | atomic_dec(&eb->spinning_readers); | ||
49 | read_unlock(&eb->lock); | ||
42 | } | 50 | } |
43 | /* exit with the spin lock released and the bit set */ | 51 | return; |
44 | } | 52 | } |
45 | 53 | ||
46 | /* | 54 | /* |
47 | * clearing the blocking flag will take the spinlock again. | 55 | * if we currently have a blocking lock, take the spinlock |
48 | * After this you can't safely schedule | 56 | * and drop our blocking count |
49 | */ | 57 | */ |
50 | void btrfs_clear_lock_blocking(struct extent_buffer *eb) | 58 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) |
51 | { | 59 | { |
52 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 60 | if (rw == BTRFS_WRITE_LOCK_BLOCKING) { |
53 | spin_nested(eb); | 61 | BUG_ON(atomic_read(&eb->blocking_writers) != 1); |
54 | clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | 62 | write_lock(&eb->lock); |
55 | smp_mb__after_clear_bit(); | 63 | WARN_ON(atomic_read(&eb->spinning_writers)); |
64 | atomic_inc(&eb->spinning_writers); | ||
65 | if (atomic_dec_and_test(&eb->blocking_writers)) | ||
66 | wake_up(&eb->write_lock_wq); | ||
67 | } else if (rw == BTRFS_READ_LOCK_BLOCKING) { | ||
68 | BUG_ON(atomic_read(&eb->blocking_readers) == 0); | ||
69 | read_lock(&eb->lock); | ||
70 | atomic_inc(&eb->spinning_readers); | ||
71 | if (atomic_dec_and_test(&eb->blocking_readers)) | ||
72 | wake_up(&eb->read_lock_wq); | ||
56 | } | 73 | } |
57 | /* exit with the spin lock held */ | 74 | return; |
58 | } | 75 | } |
59 | 76 | ||
60 | /* | 77 | /* |
61 | * unfortunately, many of the places that currently set a lock to blocking | 78 | * take a spinning read lock. This will wait for any blocking |
62 | * don't end up blocking for very long, and often they don't block | 79 | * writers |
63 | * at all. For a dbench 50 run, if we don't spin on the blocking bit | ||
64 | * at all, the context switch rate can jump up to 400,000/sec or more. | ||
65 | * | ||
66 | * So, we're still stuck with this crummy spin on the blocking bit, | ||
67 | * at least until the most common causes of the short blocks | ||
68 | * can be dealt with. | ||
69 | */ | 80 | */ |
70 | static int btrfs_spin_on_block(struct extent_buffer *eb) | 81 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
71 | { | 82 | { |
72 | int i; | 83 | again: |
73 | 84 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); | |
74 | for (i = 0; i < 512; i++) { | 85 | read_lock(&eb->lock); |
75 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 86 | if (atomic_read(&eb->blocking_writers)) { |
76 | return 1; | 87 | read_unlock(&eb->lock); |
77 | if (need_resched()) | 88 | wait_event(eb->write_lock_wq, |
78 | break; | 89 | atomic_read(&eb->blocking_writers) == 0); |
79 | cpu_relax(); | 90 | goto again; |
80 | } | 91 | } |
81 | return 0; | 92 | atomic_inc(&eb->read_locks); |
93 | atomic_inc(&eb->spinning_readers); | ||
82 | } | 94 | } |
83 | 95 | ||
84 | /* | 96 | /* |
85 | * This is somewhat different from trylock. It will take the | 97 | * returns 1 if we get the read lock and 0 if we don't |
86 | * spinlock but if it finds the lock is set to blocking, it will | 98 | * this won't wait for blocking writers |
87 | * return without the lock held. | ||
88 | * | ||
89 | * returns 1 if it was able to take the lock and zero otherwise | ||
90 | * | ||
91 | * After this call, scheduling is not safe without first calling | ||
92 | * btrfs_set_lock_blocking() | ||
93 | */ | 99 | */ |
94 | int btrfs_try_spin_lock(struct extent_buffer *eb) | 100 | int btrfs_try_tree_read_lock(struct extent_buffer *eb) |
95 | { | 101 | { |
96 | int i; | 102 | if (atomic_read(&eb->blocking_writers)) |
103 | return 0; | ||
97 | 104 | ||
98 | if (btrfs_spin_on_block(eb)) { | 105 | read_lock(&eb->lock); |
99 | spin_nested(eb); | 106 | if (atomic_read(&eb->blocking_writers)) { |
100 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 107 | read_unlock(&eb->lock); |
101 | return 1; | 108 | return 0; |
102 | spin_unlock(&eb->lock); | ||
103 | } | 109 | } |
104 | /* spin for a bit on the BLOCKING flag */ | 110 | atomic_inc(&eb->read_locks); |
105 | for (i = 0; i < 2; i++) { | 111 | atomic_inc(&eb->spinning_readers); |
106 | cpu_relax(); | 112 | return 1; |
107 | if (!btrfs_spin_on_block(eb)) | ||
108 | break; | ||
109 | |||
110 | spin_nested(eb); | ||
111 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
112 | return 1; | ||
113 | spin_unlock(&eb->lock); | ||
114 | } | ||
115 | return 0; | ||
116 | } | 113 | } |
117 | 114 | ||
118 | /* | 115 | /* |
119 | * the autoremove wake function will return 0 if it tried to wake up | 116 | * returns 1 if we get the read lock and 0 if we don't |
120 | * a process that was already awake, which means that process won't | 117 | * this won't wait for blocking writers or readers |
121 | * count as an exclusive wakeup. The waitq code will continue waking | ||
122 | * procs until it finds one that was actually sleeping. | ||
123 | * | ||
124 | * For btrfs, this isn't quite what we want. We want a single proc | ||
125 | * to be notified that the lock is ready for taking. If that proc | ||
126 | * already happen to be awake, great, it will loop around and try for | ||
127 | * the lock. | ||
128 | * | ||
129 | * So, btrfs_wake_function always returns 1, even when the proc that we | ||
130 | * tried to wake up was already awake. | ||
131 | */ | 118 | */ |
132 | static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, | 119 | int btrfs_try_tree_write_lock(struct extent_buffer *eb) |
133 | int sync, void *key) | ||
134 | { | 120 | { |
135 | autoremove_wake_function(wait, mode, sync, key); | 121 | if (atomic_read(&eb->blocking_writers) || |
122 | atomic_read(&eb->blocking_readers)) | ||
123 | return 0; | ||
124 | write_lock(&eb->lock); | ||
125 | if (atomic_read(&eb->blocking_writers) || | ||
126 | atomic_read(&eb->blocking_readers)) { | ||
127 | write_unlock(&eb->lock); | ||
128 | return 0; | ||
129 | } | ||
130 | atomic_inc(&eb->write_locks); | ||
131 | atomic_inc(&eb->spinning_writers); | ||
136 | return 1; | 132 | return 1; |
137 | } | 133 | } |
138 | 134 | ||
139 | /* | 135 | /* |
140 | * returns with the extent buffer spinlocked. | 136 | * drop a spinning read lock |
141 | * | 137 | */ |
142 | * This will spin and/or wait as required to take the lock, and then | 138 | void btrfs_tree_read_unlock(struct extent_buffer *eb) |
143 | * return with the spinlock held. | 139 | { |
144 | * | 140 | btrfs_assert_tree_read_locked(eb); |
145 | * After this call, scheduling is not safe without first calling | 141 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); |
146 | * btrfs_set_lock_blocking() | 142 | atomic_dec(&eb->spinning_readers); |
143 | atomic_dec(&eb->read_locks); | ||
144 | read_unlock(&eb->lock); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * drop a blocking read lock | ||
149 | */ | ||
150 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) | ||
151 | { | ||
152 | btrfs_assert_tree_read_locked(eb); | ||
153 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); | ||
154 | if (atomic_dec_and_test(&eb->blocking_readers)) | ||
155 | wake_up(&eb->read_lock_wq); | ||
156 | atomic_dec(&eb->read_locks); | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * take a spinning write lock. This will wait for both | ||
161 | * blocking readers or writers | ||
147 | */ | 162 | */ |
148 | int btrfs_tree_lock(struct extent_buffer *eb) | 163 | int btrfs_tree_lock(struct extent_buffer *eb) |
149 | { | 164 | { |
150 | DEFINE_WAIT(wait); | 165 | again: |
151 | wait.func = btrfs_wake_function; | 166 | wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); |
152 | 167 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); | |
153 | if (!btrfs_spin_on_block(eb)) | 168 | write_lock(&eb->lock); |
154 | goto sleep; | 169 | if (atomic_read(&eb->blocking_readers)) { |
155 | 170 | write_unlock(&eb->lock); | |
156 | while(1) { | 171 | wait_event(eb->read_lock_wq, |
157 | spin_nested(eb); | 172 | atomic_read(&eb->blocking_readers) == 0); |
158 | 173 | goto again; | |
159 | /* nobody is blocking, exit with the spinlock held */ | ||
160 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
161 | return 0; | ||
162 | |||
163 | /* | ||
164 | * we have the spinlock, but the real owner is blocking. | ||
165 | * wait for them | ||
166 | */ | ||
167 | spin_unlock(&eb->lock); | ||
168 | |||
169 | /* | ||
170 | * spin for a bit, and if the blocking flag goes away, | ||
171 | * loop around | ||
172 | */ | ||
173 | cpu_relax(); | ||
174 | if (btrfs_spin_on_block(eb)) | ||
175 | continue; | ||
176 | sleep: | ||
177 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, | ||
178 | TASK_UNINTERRUPTIBLE); | ||
179 | |||
180 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
181 | schedule(); | ||
182 | |||
183 | finish_wait(&eb->lock_wq, &wait); | ||
184 | } | 174 | } |
175 | if (atomic_read(&eb->blocking_writers)) { | ||
176 | write_unlock(&eb->lock); | ||
177 | wait_event(eb->write_lock_wq, | ||
178 | atomic_read(&eb->blocking_writers) == 0); | ||
179 | goto again; | ||
180 | } | ||
181 | WARN_ON(atomic_read(&eb->spinning_writers)); | ||
182 | atomic_inc(&eb->spinning_writers); | ||
183 | atomic_inc(&eb->write_locks); | ||
185 | return 0; | 184 | return 0; |
186 | } | 185 | } |
187 | 186 | ||
187 | /* | ||
188 | * drop a spinning or a blocking write lock. | ||
189 | */ | ||
188 | int btrfs_tree_unlock(struct extent_buffer *eb) | 190 | int btrfs_tree_unlock(struct extent_buffer *eb) |
189 | { | 191 | { |
190 | /* | 192 | int blockers = atomic_read(&eb->blocking_writers); |
191 | * if we were a blocking owner, we don't have the spinlock held | 193 | |
192 | * just clear the bit and look for waiters | 194 | BUG_ON(blockers > 1); |
193 | */ | 195 | |
194 | if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 196 | btrfs_assert_tree_locked(eb); |
195 | smp_mb__after_clear_bit(); | 197 | atomic_dec(&eb->write_locks); |
196 | else | 198 | |
197 | spin_unlock(&eb->lock); | 199 | if (blockers) { |
198 | 200 | WARN_ON(atomic_read(&eb->spinning_writers)); | |
199 | if (waitqueue_active(&eb->lock_wq)) | 201 | atomic_dec(&eb->blocking_writers); |
200 | wake_up(&eb->lock_wq); | 202 | smp_wmb(); |
203 | wake_up(&eb->write_lock_wq); | ||
204 | } else { | ||
205 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); | ||
206 | atomic_dec(&eb->spinning_writers); | ||
207 | write_unlock(&eb->lock); | ||
208 | } | ||
201 | return 0; | 209 | return 0; |
202 | } | 210 | } |
203 | 211 | ||
204 | void btrfs_assert_tree_locked(struct extent_buffer *eb) | 212 | void btrfs_assert_tree_locked(struct extent_buffer *eb) |
205 | { | 213 | { |
206 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 214 | BUG_ON(!atomic_read(&eb->write_locks)); |
207 | assert_spin_locked(&eb->lock); | 215 | } |
216 | |||
217 | void btrfs_assert_tree_read_locked(struct extent_buffer *eb) | ||
218 | { | ||
219 | BUG_ON(!atomic_read(&eb->read_locks)); | ||
208 | } | 220 | } |
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 5c33a560a2f..17247ddb81a 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h | |||
@@ -19,11 +19,43 @@ | |||
19 | #ifndef __BTRFS_LOCKING_ | 19 | #ifndef __BTRFS_LOCKING_ |
20 | #define __BTRFS_LOCKING_ | 20 | #define __BTRFS_LOCKING_ |
21 | 21 | ||
22 | #define BTRFS_WRITE_LOCK 1 | ||
23 | #define BTRFS_READ_LOCK 2 | ||
24 | #define BTRFS_WRITE_LOCK_BLOCKING 3 | ||
25 | #define BTRFS_READ_LOCK_BLOCKING 4 | ||
26 | |||
22 | int btrfs_tree_lock(struct extent_buffer *eb); | 27 | int btrfs_tree_lock(struct extent_buffer *eb); |
23 | int btrfs_tree_unlock(struct extent_buffer *eb); | 28 | int btrfs_tree_unlock(struct extent_buffer *eb); |
24 | int btrfs_try_spin_lock(struct extent_buffer *eb); | 29 | int btrfs_try_spin_lock(struct extent_buffer *eb); |
25 | 30 | ||
26 | void btrfs_set_lock_blocking(struct extent_buffer *eb); | 31 | void btrfs_tree_read_lock(struct extent_buffer *eb); |
27 | void btrfs_clear_lock_blocking(struct extent_buffer *eb); | 32 | void btrfs_tree_read_unlock(struct extent_buffer *eb); |
33 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb); | ||
34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw); | ||
35 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); | ||
28 | void btrfs_assert_tree_locked(struct extent_buffer *eb); | 36 | void btrfs_assert_tree_locked(struct extent_buffer *eb); |
37 | int btrfs_try_tree_read_lock(struct extent_buffer *eb); | ||
38 | int btrfs_try_tree_write_lock(struct extent_buffer *eb); | ||
39 | |||
40 | static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) | ||
41 | { | ||
42 | if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING) | ||
43 | btrfs_tree_unlock(eb); | ||
44 | else if (rw == BTRFS_READ_LOCK_BLOCKING) | ||
45 | btrfs_tree_read_unlock_blocking(eb); | ||
46 | else if (rw == BTRFS_READ_LOCK) | ||
47 | btrfs_tree_read_unlock(eb); | ||
48 | else | ||
49 | BUG(); | ||
50 | } | ||
51 | |||
52 | static inline void btrfs_set_lock_blocking(struct extent_buffer *eb) | ||
53 | { | ||
54 | btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK); | ||
55 | } | ||
56 | |||
57 | static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb) | ||
58 | { | ||
59 | btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING); | ||
60 | } | ||
29 | #endif | 61 | #endif |
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c deleted file mode 100644 index 82d569cb626..00000000000 --- a/fs/btrfs/ref-cache.c +++ /dev/null | |||
@@ -1,68 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/sort.h> | ||
22 | #include "ctree.h" | ||
23 | #include "ref-cache.h" | ||
24 | #include "transaction.h" | ||
25 | |||
26 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | ||
27 | struct rb_node *node) | ||
28 | { | ||
29 | struct rb_node **p = &root->rb_node; | ||
30 | struct rb_node *parent = NULL; | ||
31 | struct btrfs_leaf_ref *entry; | ||
32 | |||
33 | while (*p) { | ||
34 | parent = *p; | ||
35 | entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); | ||
36 | |||
37 | if (bytenr < entry->bytenr) | ||
38 | p = &(*p)->rb_left; | ||
39 | else if (bytenr > entry->bytenr) | ||
40 | p = &(*p)->rb_right; | ||
41 | else | ||
42 | return parent; | ||
43 | } | ||
44 | |||
45 | entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); | ||
46 | rb_link_node(node, parent, p); | ||
47 | rb_insert_color(node, root); | ||
48 | return NULL; | ||
49 | } | ||
50 | |||
51 | static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) | ||
52 | { | ||
53 | struct rb_node *n = root->rb_node; | ||
54 | struct btrfs_leaf_ref *entry; | ||
55 | |||
56 | while (n) { | ||
57 | entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); | ||
58 | WARN_ON(!entry->in_tree); | ||
59 | |||
60 | if (bytenr < entry->bytenr) | ||
61 | n = n->rb_left; | ||
62 | else if (bytenr > entry->bytenr) | ||
63 | n = n->rb_right; | ||
64 | else | ||
65 | return n; | ||
66 | } | ||
67 | return NULL; | ||
68 | } | ||
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h deleted file mode 100644 index 24f7001f638..00000000000 --- a/fs/btrfs/ref-cache.h +++ /dev/null | |||
@@ -1,52 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | #ifndef __REFCACHE__ | ||
19 | #define __REFCACHE__ | ||
20 | |||
21 | struct btrfs_extent_info { | ||
22 | /* bytenr and num_bytes find the extent in the extent allocation tree */ | ||
23 | u64 bytenr; | ||
24 | u64 num_bytes; | ||
25 | |||
26 | /* objectid and offset find the back reference for the file */ | ||
27 | u64 objectid; | ||
28 | u64 offset; | ||
29 | }; | ||
30 | |||
31 | struct btrfs_leaf_ref { | ||
32 | struct rb_node rb_node; | ||
33 | struct btrfs_leaf_ref_tree *tree; | ||
34 | int in_tree; | ||
35 | atomic_t usage; | ||
36 | |||
37 | u64 root_gen; | ||
38 | u64 bytenr; | ||
39 | u64 owner; | ||
40 | u64 generation; | ||
41 | int nritems; | ||
42 | |||
43 | struct list_head list; | ||
44 | struct btrfs_extent_info extents[]; | ||
45 | }; | ||
46 | |||
47 | static inline size_t btrfs_leaf_ref_size(int nr_extents) | ||
48 | { | ||
49 | return sizeof(struct btrfs_leaf_ref) + | ||
50 | sizeof(struct btrfs_extent_info) * nr_extents; | ||
51 | } | ||
52 | #endif | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 5e0a3dc79a4..59bb1764273 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2955 | page_cache_sync_readahead(inode->i_mapping, | 2955 | page_cache_sync_readahead(inode->i_mapping, |
2956 | ra, NULL, index, | 2956 | ra, NULL, index, |
2957 | last_index + 1 - index); | 2957 | last_index + 1 - index); |
2958 | page = grab_cache_page(inode->i_mapping, index); | 2958 | page = find_or_create_page(inode->i_mapping, index, |
2959 | GFP_NOFS); | ||
2959 | if (!page) { | 2960 | if (!page) { |
2960 | btrfs_delalloc_release_metadata(inode, | 2961 | btrfs_delalloc_release_metadata(inode, |
2961 | PAGE_CACHE_SIZE); | 2962 | PAGE_CACHE_SIZE); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ebe45443de0..f4099904565 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -71,13 +71,12 @@ out: | |||
71 | return ret; | 71 | return ret; |
72 | } | 72 | } |
73 | 73 | ||
74 | int btrfs_set_root_node(struct btrfs_root_item *item, | 74 | void btrfs_set_root_node(struct btrfs_root_item *item, |
75 | struct extent_buffer *node) | 75 | struct extent_buffer *node) |
76 | { | 76 | { |
77 | btrfs_set_root_bytenr(item, node->start); | 77 | btrfs_set_root_bytenr(item, node->start); |
78 | btrfs_set_root_level(item, btrfs_header_level(node)); | 78 | btrfs_set_root_level(item, btrfs_header_level(node)); |
79 | btrfs_set_root_generation(item, btrfs_header_generation(node)); | 79 | btrfs_set_root_generation(item, btrfs_header_generation(node)); |
80 | return 0; | ||
81 | } | 80 | } |
82 | 81 | ||
83 | /* | 82 | /* |
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index c0f7ecaf1e7..bc1f6ad1844 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c | |||
@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ | |||
50 | unsigned long part_offset = (unsigned long)s; \ | 50 | unsigned long part_offset = (unsigned long)s; \ |
51 | unsigned long offset = part_offset + offsetof(type, member); \ | 51 | unsigned long offset = part_offset + offsetof(type, member); \ |
52 | type *p; \ | 52 | type *p; \ |
53 | /* ugly, but we want the fast path here */ \ | 53 | int err; \ |
54 | if (eb->map_token && offset >= eb->map_start && \ | 54 | char *kaddr; \ |
55 | offset + sizeof(((type *)0)->member) <= eb->map_start + \ | 55 | unsigned long map_start; \ |
56 | eb->map_len) { \ | 56 | unsigned long map_len; \ |
57 | p = (type *)(eb->kaddr + part_offset - eb->map_start); \ | 57 | u##bits res; \ |
58 | return le##bits##_to_cpu(p->member); \ | 58 | err = map_private_extent_buffer(eb, offset, \ |
59 | } \ | 59 | sizeof(((type *)0)->member), \ |
60 | { \ | 60 | &kaddr, &map_start, &map_len); \ |
61 | int err; \ | 61 | if (err) { \ |
62 | char *map_token; \ | 62 | __le##bits leres; \ |
63 | char *kaddr; \ | 63 | read_eb_member(eb, s, type, member, &leres); \ |
64 | int unmap_on_exit = (eb->map_token == NULL); \ | 64 | return le##bits##_to_cpu(leres); \ |
65 | unsigned long map_start; \ | 65 | } \ |
66 | unsigned long map_len; \ | 66 | p = (type *)(kaddr + part_offset - map_start); \ |
67 | u##bits res; \ | 67 | res = le##bits##_to_cpu(p->member); \ |
68 | err = map_extent_buffer(eb, offset, \ | 68 | return res; \ |
69 | sizeof(((type *)0)->member), \ | ||
70 | &map_token, &kaddr, \ | ||
71 | &map_start, &map_len, KM_USER1); \ | ||
72 | if (err) { \ | ||
73 | __le##bits leres; \ | ||
74 | read_eb_member(eb, s, type, member, &leres); \ | ||
75 | return le##bits##_to_cpu(leres); \ | ||
76 | } \ | ||
77 | p = (type *)(kaddr + part_offset - map_start); \ | ||
78 | res = le##bits##_to_cpu(p->member); \ | ||
79 | if (unmap_on_exit) \ | ||
80 | unmap_extent_buffer(eb, map_token, KM_USER1); \ | ||
81 | return res; \ | ||
82 | } \ | ||
83 | } \ | 69 | } \ |
84 | void btrfs_set_##name(struct extent_buffer *eb, \ | 70 | void btrfs_set_##name(struct extent_buffer *eb, \ |
85 | type *s, u##bits val) \ | 71 | type *s, u##bits val) \ |
@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \ | |||
87 | unsigned long part_offset = (unsigned long)s; \ | 73 | unsigned long part_offset = (unsigned long)s; \ |
88 | unsigned long offset = part_offset + offsetof(type, member); \ | 74 | unsigned long offset = part_offset + offsetof(type, member); \ |
89 | type *p; \ | 75 | type *p; \ |
90 | /* ugly, but we want the fast path here */ \ | 76 | int err; \ |
91 | if (eb->map_token && offset >= eb->map_start && \ | 77 | char *kaddr; \ |
92 | offset + sizeof(((type *)0)->member) <= eb->map_start + \ | 78 | unsigned long map_start; \ |
93 | eb->map_len) { \ | 79 | unsigned long map_len; \ |
94 | p = (type *)(eb->kaddr + part_offset - eb->map_start); \ | 80 | err = map_private_extent_buffer(eb, offset, \ |
95 | p->member = cpu_to_le##bits(val); \ | 81 | sizeof(((type *)0)->member), \ |
96 | return; \ | 82 | &kaddr, &map_start, &map_len); \ |
97 | } \ | 83 | if (err) { \ |
98 | { \ | 84 | __le##bits val2; \ |
99 | int err; \ | 85 | val2 = cpu_to_le##bits(val); \ |
100 | char *map_token; \ | 86 | write_eb_member(eb, s, type, member, &val2); \ |
101 | char *kaddr; \ | 87 | return; \ |
102 | int unmap_on_exit = (eb->map_token == NULL); \ | 88 | } \ |
103 | unsigned long map_start; \ | 89 | p = (type *)(kaddr + part_offset - map_start); \ |
104 | unsigned long map_len; \ | 90 | p->member = cpu_to_le##bits(val); \ |
105 | err = map_extent_buffer(eb, offset, \ | ||
106 | sizeof(((type *)0)->member), \ | ||
107 | &map_token, &kaddr, \ | ||
108 | &map_start, &map_len, KM_USER1); \ | ||
109 | if (err) { \ | ||
110 | __le##bits val2; \ | ||
111 | val2 = cpu_to_le##bits(val); \ | ||
112 | write_eb_member(eb, s, type, member, &val2); \ | ||
113 | return; \ | ||
114 | } \ | ||
115 | p = (type *)(kaddr + part_offset - map_start); \ | ||
116 | p->member = cpu_to_le##bits(val); \ | ||
117 | if (unmap_on_exit) \ | ||
118 | unmap_extent_buffer(eb, map_token, KM_USER1); \ | ||
119 | } \ | ||
120 | } | 91 | } |
121 | 92 | ||
122 | #include "ctree.h" | 93 | #include "ctree.h" |
@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb, | |||
125 | struct btrfs_disk_key *disk_key, int nr) | 96 | struct btrfs_disk_key *disk_key, int nr) |
126 | { | 97 | { |
127 | unsigned long ptr = btrfs_node_key_ptr_offset(nr); | 98 | unsigned long ptr = btrfs_node_key_ptr_offset(nr); |
128 | if (eb->map_token && ptr >= eb->map_start && | ||
129 | ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) { | ||
130 | memcpy(disk_key, eb->kaddr + ptr - eb->map_start, | ||
131 | sizeof(*disk_key)); | ||
132 | return; | ||
133 | } else if (eb->map_token) { | ||
134 | unmap_extent_buffer(eb, eb->map_token, KM_USER1); | ||
135 | eb->map_token = NULL; | ||
136 | } | ||
137 | read_eb_member(eb, (struct btrfs_key_ptr *)ptr, | 99 | read_eb_member(eb, (struct btrfs_key_ptr *)ptr, |
138 | struct btrfs_key_ptr, key, disk_key); | 100 | struct btrfs_key_ptr, key, disk_key); |
139 | } | 101 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 51dcec86757..e24b7964a15 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -216,17 +216,11 @@ static void wait_current_trans(struct btrfs_root *root) | |||
216 | spin_lock(&root->fs_info->trans_lock); | 216 | spin_lock(&root->fs_info->trans_lock); |
217 | cur_trans = root->fs_info->running_transaction; | 217 | cur_trans = root->fs_info->running_transaction; |
218 | if (cur_trans && cur_trans->blocked) { | 218 | if (cur_trans && cur_trans->blocked) { |
219 | DEFINE_WAIT(wait); | ||
220 | atomic_inc(&cur_trans->use_count); | 219 | atomic_inc(&cur_trans->use_count); |
221 | spin_unlock(&root->fs_info->trans_lock); | 220 | spin_unlock(&root->fs_info->trans_lock); |
222 | while (1) { | 221 | |
223 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 222 | wait_event(root->fs_info->transaction_wait, |
224 | TASK_UNINTERRUPTIBLE); | 223 | !cur_trans->blocked); |
225 | if (!cur_trans->blocked) | ||
226 | break; | ||
227 | schedule(); | ||
228 | } | ||
229 | finish_wait(&root->fs_info->transaction_wait, &wait); | ||
230 | put_transaction(cur_trans); | 224 | put_transaction(cur_trans); |
231 | } else { | 225 | } else { |
232 | spin_unlock(&root->fs_info->trans_lock); | 226 | spin_unlock(&root->fs_info->trans_lock); |
@@ -260,7 +254,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
260 | { | 254 | { |
261 | struct btrfs_trans_handle *h; | 255 | struct btrfs_trans_handle *h; |
262 | struct btrfs_transaction *cur_trans; | 256 | struct btrfs_transaction *cur_trans; |
263 | int retries = 0; | 257 | u64 num_bytes = 0; |
264 | int ret; | 258 | int ret; |
265 | 259 | ||
266 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 260 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
@@ -274,6 +268,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
274 | h->block_rsv = NULL; | 268 | h->block_rsv = NULL; |
275 | goto got_it; | 269 | goto got_it; |
276 | } | 270 | } |
271 | |||
272 | /* | ||
273 | * Do the reservation before we join the transaction so we can do all | ||
274 | * the appropriate flushing if need be. | ||
275 | */ | ||
276 | if (num_items > 0 && root != root->fs_info->chunk_root) { | ||
277 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | ||
278 | ret = btrfs_block_rsv_add(NULL, root, | ||
279 | &root->fs_info->trans_block_rsv, | ||
280 | num_bytes); | ||
281 | if (ret) | ||
282 | return ERR_PTR(ret); | ||
283 | } | ||
277 | again: | 284 | again: |
278 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 285 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
279 | if (!h) | 286 | if (!h) |
@@ -310,24 +317,9 @@ again: | |||
310 | goto again; | 317 | goto again; |
311 | } | 318 | } |
312 | 319 | ||
313 | if (num_items > 0) { | 320 | if (num_bytes) { |
314 | ret = btrfs_trans_reserve_metadata(h, root, num_items); | 321 | h->block_rsv = &root->fs_info->trans_block_rsv; |
315 | if (ret == -EAGAIN && !retries) { | 322 | h->bytes_reserved = num_bytes; |
316 | retries++; | ||
317 | btrfs_commit_transaction(h, root); | ||
318 | goto again; | ||
319 | } else if (ret == -EAGAIN) { | ||
320 | /* | ||
321 | * We have already retried and got EAGAIN, so really we | ||
322 | * don't have space, so set ret to -ENOSPC. | ||
323 | */ | ||
324 | ret = -ENOSPC; | ||
325 | } | ||
326 | |||
327 | if (ret < 0) { | ||
328 | btrfs_end_transaction(h, root); | ||
329 | return ERR_PTR(ret); | ||
330 | } | ||
331 | } | 323 | } |
332 | 324 | ||
333 | got_it: | 325 | got_it: |
@@ -359,19 +351,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root | |||
359 | } | 351 | } |
360 | 352 | ||
361 | /* wait for a transaction commit to be fully complete */ | 353 | /* wait for a transaction commit to be fully complete */ |
362 | static noinline int wait_for_commit(struct btrfs_root *root, | 354 | static noinline void wait_for_commit(struct btrfs_root *root, |
363 | struct btrfs_transaction *commit) | 355 | struct btrfs_transaction *commit) |
364 | { | 356 | { |
365 | DEFINE_WAIT(wait); | 357 | wait_event(commit->commit_wait, commit->commit_done); |
366 | while (!commit->commit_done) { | ||
367 | prepare_to_wait(&commit->commit_wait, &wait, | ||
368 | TASK_UNINTERRUPTIBLE); | ||
369 | if (commit->commit_done) | ||
370 | break; | ||
371 | schedule(); | ||
372 | } | ||
373 | finish_wait(&commit->commit_wait, &wait); | ||
374 | return 0; | ||
375 | } | 358 | } |
376 | 359 | ||
377 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | 360 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) |
@@ -499,10 +482,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
499 | } | 482 | } |
500 | 483 | ||
501 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { | 484 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { |
502 | if (throttle) | 485 | if (throttle) { |
486 | /* | ||
487 | * We may race with somebody else here so end up having | ||
488 | * to call end_transaction on ourselves again, so inc | ||
489 | * our use_count. | ||
490 | */ | ||
491 | trans->use_count++; | ||
503 | return btrfs_commit_transaction(trans, root); | 492 | return btrfs_commit_transaction(trans, root); |
504 | else | 493 | } else { |
505 | wake_up_process(info->transaction_kthread); | 494 | wake_up_process(info->transaction_kthread); |
495 | } | ||
506 | } | 496 | } |
507 | 497 | ||
508 | WARN_ON(cur_trans != info->running_transaction); | 498 | WARN_ON(cur_trans != info->running_transaction); |
@@ -894,6 +884,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
894 | struct btrfs_root *tree_root = fs_info->tree_root; | 884 | struct btrfs_root *tree_root = fs_info->tree_root; |
895 | struct btrfs_root *root = pending->root; | 885 | struct btrfs_root *root = pending->root; |
896 | struct btrfs_root *parent_root; | 886 | struct btrfs_root *parent_root; |
887 | struct btrfs_block_rsv *rsv; | ||
897 | struct inode *parent_inode; | 888 | struct inode *parent_inode; |
898 | struct dentry *parent; | 889 | struct dentry *parent; |
899 | struct dentry *dentry; | 890 | struct dentry *dentry; |
@@ -905,6 +896,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
905 | u64 objectid; | 896 | u64 objectid; |
906 | u64 root_flags; | 897 | u64 root_flags; |
907 | 898 | ||
899 | rsv = trans->block_rsv; | ||
900 | |||
908 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 901 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
909 | if (!new_root_item) { | 902 | if (!new_root_item) { |
910 | pending->error = -ENOMEM; | 903 | pending->error = -ENOMEM; |
@@ -1012,6 +1005,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1012 | btrfs_orphan_post_snapshot(trans, pending); | 1005 | btrfs_orphan_post_snapshot(trans, pending); |
1013 | fail: | 1006 | fail: |
1014 | kfree(new_root_item); | 1007 | kfree(new_root_item); |
1008 | trans->block_rsv = rsv; | ||
1015 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); | 1009 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
1016 | return 0; | 1010 | return 0; |
1017 | } | 1011 | } |
@@ -1080,22 +1074,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info) | |||
1080 | static void wait_current_trans_commit_start(struct btrfs_root *root, | 1074 | static void wait_current_trans_commit_start(struct btrfs_root *root, |
1081 | struct btrfs_transaction *trans) | 1075 | struct btrfs_transaction *trans) |
1082 | { | 1076 | { |
1083 | DEFINE_WAIT(wait); | 1077 | wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit); |
1084 | |||
1085 | if (trans->in_commit) | ||
1086 | return; | ||
1087 | |||
1088 | while (1) { | ||
1089 | prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait, | ||
1090 | TASK_UNINTERRUPTIBLE); | ||
1091 | if (trans->in_commit) { | ||
1092 | finish_wait(&root->fs_info->transaction_blocked_wait, | ||
1093 | &wait); | ||
1094 | break; | ||
1095 | } | ||
1096 | schedule(); | ||
1097 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); | ||
1098 | } | ||
1099 | } | 1078 | } |
1100 | 1079 | ||
1101 | /* | 1080 | /* |
@@ -1105,24 +1084,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, | |||
1105 | static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | 1084 | static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, |
1106 | struct btrfs_transaction *trans) | 1085 | struct btrfs_transaction *trans) |
1107 | { | 1086 | { |
1108 | DEFINE_WAIT(wait); | 1087 | wait_event(root->fs_info->transaction_wait, |
1109 | 1088 | trans->commit_done || (trans->in_commit && !trans->blocked)); | |
1110 | if (trans->commit_done || (trans->in_commit && !trans->blocked)) | ||
1111 | return; | ||
1112 | |||
1113 | while (1) { | ||
1114 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | ||
1115 | TASK_UNINTERRUPTIBLE); | ||
1116 | if (trans->commit_done || | ||
1117 | (trans->in_commit && !trans->blocked)) { | ||
1118 | finish_wait(&root->fs_info->transaction_wait, | ||
1119 | &wait); | ||
1120 | break; | ||
1121 | } | ||
1122 | schedule(); | ||
1123 | finish_wait(&root->fs_info->transaction_wait, | ||
1124 | &wait); | ||
1125 | } | ||
1126 | } | 1089 | } |
1127 | 1090 | ||
1128 | /* | 1091 | /* |
@@ -1229,8 +1192,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1229 | atomic_inc(&cur_trans->use_count); | 1192 | atomic_inc(&cur_trans->use_count); |
1230 | btrfs_end_transaction(trans, root); | 1193 | btrfs_end_transaction(trans, root); |
1231 | 1194 | ||
1232 | ret = wait_for_commit(root, cur_trans); | 1195 | wait_for_commit(root, cur_trans); |
1233 | BUG_ON(ret); | ||
1234 | 1196 | ||
1235 | put_transaction(cur_trans); | 1197 | put_transaction(cur_trans); |
1236 | 1198 | ||
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4ce8a9f41d1..786639fca06 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
799 | struct extent_buffer *eb, int slot, | 799 | struct extent_buffer *eb, int slot, |
800 | struct btrfs_key *key) | 800 | struct btrfs_key *key) |
801 | { | 801 | { |
802 | struct inode *dir; | ||
803 | int ret; | ||
804 | struct btrfs_inode_ref *ref; | 802 | struct btrfs_inode_ref *ref; |
803 | struct btrfs_dir_item *di; | ||
804 | struct inode *dir; | ||
805 | struct inode *inode; | 805 | struct inode *inode; |
806 | char *name; | ||
807 | int namelen; | ||
808 | unsigned long ref_ptr; | 806 | unsigned long ref_ptr; |
809 | unsigned long ref_end; | 807 | unsigned long ref_end; |
808 | char *name; | ||
809 | int namelen; | ||
810 | int ret; | ||
810 | int search_done = 0; | 811 | int search_done = 0; |
811 | 812 | ||
812 | /* | 813 | /* |
@@ -909,6 +910,25 @@ again: | |||
909 | } | 910 | } |
910 | btrfs_release_path(path); | 911 | btrfs_release_path(path); |
911 | 912 | ||
913 | /* look for a conflicting sequence number */ | ||
914 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), | ||
915 | btrfs_inode_ref_index(eb, ref), | ||
916 | name, namelen, 0); | ||
917 | if (di && !IS_ERR(di)) { | ||
918 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
919 | BUG_ON(ret); | ||
920 | } | ||
921 | btrfs_release_path(path); | ||
922 | |||
923 | /* look for a conflicing name */ | ||
924 | di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), | ||
925 | name, namelen, 0); | ||
926 | if (di && !IS_ERR(di)) { | ||
927 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
928 | BUG_ON(ret); | ||
929 | } | ||
930 | btrfs_release_path(path); | ||
931 | |||
912 | insert: | 932 | insert: |
913 | /* insert our name */ | 933 | /* insert our name */ |
914 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | 934 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, |
@@ -1617,7 +1637,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1617 | return 0; | 1637 | return 0; |
1618 | 1638 | ||
1619 | path = btrfs_alloc_path(); | 1639 | path = btrfs_alloc_path(); |
1620 | BUG_ON(!path); | 1640 | if (!path) |
1641 | return -ENOMEM; | ||
1621 | 1642 | ||
1622 | nritems = btrfs_header_nritems(eb); | 1643 | nritems = btrfs_header_nritems(eb); |
1623 | for (i = 0; i < nritems; i++) { | 1644 | for (i = 0; i < nritems; i++) { |
@@ -1723,15 +1744,17 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1723 | return -ENOMEM; | 1744 | return -ENOMEM; |
1724 | 1745 | ||
1725 | if (*level == 1) { | 1746 | if (*level == 1) { |
1726 | wc->process_func(root, next, wc, ptr_gen); | 1747 | ret = wc->process_func(root, next, wc, ptr_gen); |
1748 | if (ret) | ||
1749 | return ret; | ||
1727 | 1750 | ||
1728 | path->slots[*level]++; | 1751 | path->slots[*level]++; |
1729 | if (wc->free) { | 1752 | if (wc->free) { |
1730 | btrfs_read_buffer(next, ptr_gen); | 1753 | btrfs_read_buffer(next, ptr_gen); |
1731 | 1754 | ||
1732 | btrfs_tree_lock(next); | 1755 | btrfs_tree_lock(next); |
1733 | clean_tree_block(trans, root, next); | ||
1734 | btrfs_set_lock_blocking(next); | 1756 | btrfs_set_lock_blocking(next); |
1757 | clean_tree_block(trans, root, next); | ||
1735 | btrfs_wait_tree_block_writeback(next); | 1758 | btrfs_wait_tree_block_writeback(next); |
1736 | btrfs_tree_unlock(next); | 1759 | btrfs_tree_unlock(next); |
1737 | 1760 | ||
@@ -1788,16 +1811,19 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1788 | parent = path->nodes[*level + 1]; | 1811 | parent = path->nodes[*level + 1]; |
1789 | 1812 | ||
1790 | root_owner = btrfs_header_owner(parent); | 1813 | root_owner = btrfs_header_owner(parent); |
1791 | wc->process_func(root, path->nodes[*level], wc, | 1814 | ret = wc->process_func(root, path->nodes[*level], wc, |
1792 | btrfs_header_generation(path->nodes[*level])); | 1815 | btrfs_header_generation(path->nodes[*level])); |
1816 | if (ret) | ||
1817 | return ret; | ||
1818 | |||
1793 | if (wc->free) { | 1819 | if (wc->free) { |
1794 | struct extent_buffer *next; | 1820 | struct extent_buffer *next; |
1795 | 1821 | ||
1796 | next = path->nodes[*level]; | 1822 | next = path->nodes[*level]; |
1797 | 1823 | ||
1798 | btrfs_tree_lock(next); | 1824 | btrfs_tree_lock(next); |
1799 | clean_tree_block(trans, root, next); | ||
1800 | btrfs_set_lock_blocking(next); | 1825 | btrfs_set_lock_blocking(next); |
1826 | clean_tree_block(trans, root, next); | ||
1801 | btrfs_wait_tree_block_writeback(next); | 1827 | btrfs_wait_tree_block_writeback(next); |
1802 | btrfs_tree_unlock(next); | 1828 | btrfs_tree_unlock(next); |
1803 | 1829 | ||
@@ -1864,8 +1890,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1864 | next = path->nodes[orig_level]; | 1890 | next = path->nodes[orig_level]; |
1865 | 1891 | ||
1866 | btrfs_tree_lock(next); | 1892 | btrfs_tree_lock(next); |
1867 | clean_tree_block(trans, log, next); | ||
1868 | btrfs_set_lock_blocking(next); | 1893 | btrfs_set_lock_blocking(next); |
1894 | clean_tree_block(trans, log, next); | ||
1869 | btrfs_wait_tree_block_writeback(next); | 1895 | btrfs_wait_tree_block_writeback(next); |
1870 | btrfs_tree_unlock(next); | 1896 | btrfs_tree_unlock(next); |
1871 | 1897 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 19450bc5363..f2a4cc79da6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
142 | unsigned long limit; | 142 | unsigned long limit; |
143 | unsigned long last_waited = 0; | 143 | unsigned long last_waited = 0; |
144 | int force_reg = 0; | 144 | int force_reg = 0; |
145 | int sync_pending = 0; | ||
145 | struct blk_plug plug; | 146 | struct blk_plug plug; |
146 | 147 | ||
147 | /* | 148 | /* |
@@ -229,6 +230,22 @@ loop_lock: | |||
229 | 230 | ||
230 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 231 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
231 | 232 | ||
233 | /* | ||
234 | * if we're doing the sync list, record that our | ||
235 | * plug has some sync requests on it | ||
236 | * | ||
237 | * If we're doing the regular list and there are | ||
238 | * sync requests sitting around, unplug before | ||
239 | * we add more | ||
240 | */ | ||
241 | if (pending_bios == &device->pending_sync_bios) { | ||
242 | sync_pending = 1; | ||
243 | } else if (sync_pending) { | ||
244 | blk_finish_plug(&plug); | ||
245 | blk_start_plug(&plug); | ||
246 | sync_pending = 0; | ||
247 | } | ||
248 | |||
232 | submit_bio(cur->bi_rw, cur); | 249 | submit_bio(cur->bi_rw, cur); |
233 | num_run++; | 250 | num_run++; |
234 | batch_run++; | 251 | batch_run++; |
@@ -500,6 +517,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
500 | fs_devices->rw_devices--; | 517 | fs_devices->rw_devices--; |
501 | } | 518 | } |
502 | 519 | ||
520 | if (device->can_discard) | ||
521 | fs_devices->num_can_discard--; | ||
522 | |||
503 | new_device = kmalloc(sizeof(*new_device), GFP_NOFS); | 523 | new_device = kmalloc(sizeof(*new_device), GFP_NOFS); |
504 | BUG_ON(!new_device); | 524 | BUG_ON(!new_device); |
505 | memcpy(new_device, device, sizeof(*new_device)); | 525 | memcpy(new_device, device, sizeof(*new_device)); |
@@ -508,6 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
508 | new_device->bdev = NULL; | 528 | new_device->bdev = NULL; |
509 | new_device->writeable = 0; | 529 | new_device->writeable = 0; |
510 | new_device->in_fs_metadata = 0; | 530 | new_device->in_fs_metadata = 0; |
531 | new_device->can_discard = 0; | ||
511 | list_replace_rcu(&device->dev_list, &new_device->dev_list); | 532 | list_replace_rcu(&device->dev_list, &new_device->dev_list); |
512 | 533 | ||
513 | call_rcu(&device->rcu, free_device); | 534 | call_rcu(&device->rcu, free_device); |
@@ -547,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
547 | static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | 568 | static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, |
548 | fmode_t flags, void *holder) | 569 | fmode_t flags, void *holder) |
549 | { | 570 | { |
571 | struct request_queue *q; | ||
550 | struct block_device *bdev; | 572 | struct block_device *bdev; |
551 | struct list_head *head = &fs_devices->devices; | 573 | struct list_head *head = &fs_devices->devices; |
552 | struct btrfs_device *device; | 574 | struct btrfs_device *device; |
@@ -603,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
603 | seeding = 0; | 625 | seeding = 0; |
604 | } | 626 | } |
605 | 627 | ||
628 | q = bdev_get_queue(bdev); | ||
629 | if (blk_queue_discard(q)) { | ||
630 | device->can_discard = 1; | ||
631 | fs_devices->num_can_discard++; | ||
632 | } | ||
633 | |||
606 | device->bdev = bdev; | 634 | device->bdev = bdev; |
607 | device->in_fs_metadata = 0; | 635 | device->in_fs_metadata = 0; |
608 | device->mode = flags; | 636 | device->mode = flags; |
@@ -835,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
835 | 863 | ||
836 | max_hole_start = search_start; | 864 | max_hole_start = search_start; |
837 | max_hole_size = 0; | 865 | max_hole_size = 0; |
866 | hole_size = 0; | ||
838 | 867 | ||
839 | if (search_start >= search_end) { | 868 | if (search_start >= search_end) { |
840 | ret = -ENOSPC; | 869 | ret = -ENOSPC; |
@@ -917,7 +946,14 @@ next: | |||
917 | cond_resched(); | 946 | cond_resched(); |
918 | } | 947 | } |
919 | 948 | ||
920 | hole_size = search_end- search_start; | 949 | /* |
950 | * At this point, search_start should be the end of | ||
951 | * allocated dev extents, and when shrinking the device, | ||
952 | * search_end may be smaller than search_start. | ||
953 | */ | ||
954 | if (search_end > search_start) | ||
955 | hole_size = search_end - search_start; | ||
956 | |||
921 | if (hole_size > max_hole_size) { | 957 | if (hole_size > max_hole_size) { |
922 | max_hole_start = search_start; | 958 | max_hole_start = search_start; |
923 | max_hole_size = hole_size; | 959 | max_hole_size = hole_size; |
@@ -1037,7 +1073,8 @@ static noinline int find_next_chunk(struct btrfs_root *root, | |||
1037 | struct btrfs_key found_key; | 1073 | struct btrfs_key found_key; |
1038 | 1074 | ||
1039 | path = btrfs_alloc_path(); | 1075 | path = btrfs_alloc_path(); |
1040 | BUG_ON(!path); | 1076 | if (!path) |
1077 | return -ENOMEM; | ||
1041 | 1078 | ||
1042 | key.objectid = objectid; | 1079 | key.objectid = objectid; |
1043 | key.offset = (u64)-1; | 1080 | key.offset = (u64)-1; |
@@ -1542,6 +1579,7 @@ error: | |||
1542 | 1579 | ||
1543 | int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | 1580 | int btrfs_init_new_device(struct btrfs_root *root, char *device_path) |
1544 | { | 1581 | { |
1582 | struct request_queue *q; | ||
1545 | struct btrfs_trans_handle *trans; | 1583 | struct btrfs_trans_handle *trans; |
1546 | struct btrfs_device *device; | 1584 | struct btrfs_device *device; |
1547 | struct block_device *bdev; | 1585 | struct block_device *bdev; |
@@ -1611,6 +1649,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1611 | 1649 | ||
1612 | lock_chunks(root); | 1650 | lock_chunks(root); |
1613 | 1651 | ||
1652 | q = bdev_get_queue(bdev); | ||
1653 | if (blk_queue_discard(q)) | ||
1654 | device->can_discard = 1; | ||
1614 | device->writeable = 1; | 1655 | device->writeable = 1; |
1615 | device->work.func = pending_bios_fn; | 1656 | device->work.func = pending_bios_fn; |
1616 | generate_random_uuid(device->uuid); | 1657 | generate_random_uuid(device->uuid); |
@@ -1646,6 +1687,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1646 | root->fs_info->fs_devices->num_devices++; | 1687 | root->fs_info->fs_devices->num_devices++; |
1647 | root->fs_info->fs_devices->open_devices++; | 1688 | root->fs_info->fs_devices->open_devices++; |
1648 | root->fs_info->fs_devices->rw_devices++; | 1689 | root->fs_info->fs_devices->rw_devices++; |
1690 | if (device->can_discard) | ||
1691 | root->fs_info->fs_devices->num_can_discard++; | ||
1649 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 1692 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
1650 | 1693 | ||
1651 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) | 1694 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) |
@@ -2061,8 +2104,10 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
2061 | 2104 | ||
2062 | /* step two, relocate all the chunks */ | 2105 | /* step two, relocate all the chunks */ |
2063 | path = btrfs_alloc_path(); | 2106 | path = btrfs_alloc_path(); |
2064 | BUG_ON(!path); | 2107 | if (!path) { |
2065 | 2108 | ret = -ENOMEM; | |
2109 | goto error; | ||
2110 | } | ||
2066 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 2111 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
2067 | key.offset = (u64)-1; | 2112 | key.offset = (u64)-1; |
2068 | key.type = BTRFS_CHUNK_ITEM_KEY; | 2113 | key.type = BTRFS_CHUNK_ITEM_KEY; |
@@ -2410,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2410 | total_avail = device->total_bytes - device->bytes_used; | 2455 | total_avail = device->total_bytes - device->bytes_used; |
2411 | else | 2456 | else |
2412 | total_avail = 0; | 2457 | total_avail = 0; |
2413 | /* avail is off by max(alloc_start, 1MB), but that is the same | 2458 | |
2414 | * for all devices, so it doesn't hurt the sorting later on | 2459 | /* If there is no space on this device, skip it. */ |
2415 | */ | 2460 | if (total_avail == 0) |
2461 | continue; | ||
2416 | 2462 | ||
2417 | ret = find_free_dev_extent(trans, device, | 2463 | ret = find_free_dev_extent(trans, device, |
2418 | max_stripe_size * dev_stripes, | 2464 | max_stripe_size * dev_stripes, |
@@ -2661,7 +2707,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
2661 | 2707 | ||
2662 | ret = find_next_chunk(fs_info->chunk_root, | 2708 | ret = find_next_chunk(fs_info->chunk_root, |
2663 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); | 2709 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); |
2664 | BUG_ON(ret); | 2710 | if (ret) |
2711 | return ret; | ||
2665 | 2712 | ||
2666 | alloc_profile = BTRFS_BLOCK_GROUP_METADATA | | 2713 | alloc_profile = BTRFS_BLOCK_GROUP_METADATA | |
2667 | (fs_info->metadata_alloc_profile & | 2714 | (fs_info->metadata_alloc_profile & |
@@ -3595,7 +3642,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
3595 | if (!sb) | 3642 | if (!sb) |
3596 | return -ENOMEM; | 3643 | return -ENOMEM; |
3597 | btrfs_set_buffer_uptodate(sb); | 3644 | btrfs_set_buffer_uptodate(sb); |
3598 | btrfs_set_buffer_lockdep_class(sb, 0); | 3645 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); |
3599 | 3646 | ||
3600 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); | 3647 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); |
3601 | array_size = btrfs_super_sys_array_size(super_copy); | 3648 | array_size = btrfs_super_sys_array_size(super_copy); |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7c12d61ae7a..6d866db4e17 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -48,6 +48,7 @@ struct btrfs_device { | |||
48 | int writeable; | 48 | int writeable; |
49 | int in_fs_metadata; | 49 | int in_fs_metadata; |
50 | int missing; | 50 | int missing; |
51 | int can_discard; | ||
51 | 52 | ||
52 | spinlock_t io_lock; | 53 | spinlock_t io_lock; |
53 | 54 | ||
@@ -104,6 +105,7 @@ struct btrfs_fs_devices { | |||
104 | u64 rw_devices; | 105 | u64 rw_devices; |
105 | u64 missing_devices; | 106 | u64 missing_devices; |
106 | u64 total_rw_bytes; | 107 | u64 total_rw_bytes; |
108 | u64 num_can_discard; | ||
107 | struct block_device *latest_bdev; | 109 | struct block_device *latest_bdev; |
108 | 110 | ||
109 | /* all of the devices in the FS, protected by a mutex | 111 | /* all of the devices in the FS, protected by a mutex |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 5366fe452ab..69565e5fc6a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -102,48 +102,71 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
102 | if (!path) | 102 | if (!path) |
103 | return -ENOMEM; | 103 | return -ENOMEM; |
104 | 104 | ||
105 | /* first lets see if we already have this xattr */ | 105 | if (flags & XATTR_REPLACE) { |
106 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, | 106 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, |
107 | strlen(name), -1); | 107 | name_len, -1); |
108 | if (IS_ERR(di)) { | 108 | if (IS_ERR(di)) { |
109 | ret = PTR_ERR(di); | 109 | ret = PTR_ERR(di); |
110 | goto out; | 110 | goto out; |
111 | } | 111 | } else if (!di) { |
112 | 112 | ret = -ENODATA; | |
113 | /* ok we already have this xattr, lets remove it */ | ||
114 | if (di) { | ||
115 | /* if we want create only exit */ | ||
116 | if (flags & XATTR_CREATE) { | ||
117 | ret = -EEXIST; | ||
118 | goto out; | 113 | goto out; |
119 | } | 114 | } |
120 | |||
121 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | 115 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
122 | BUG_ON(ret); | 116 | if (ret) |
117 | goto out; | ||
123 | btrfs_release_path(path); | 118 | btrfs_release_path(path); |
124 | 119 | ||
125 | /* if we don't have a value then we are removing the xattr */ | 120 | /* |
121 | * remove the attribute | ||
122 | */ | ||
126 | if (!value) | 123 | if (!value) |
127 | goto out; | 124 | goto out; |
128 | } else { | 125 | } |
126 | |||
127 | again: | ||
128 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), | ||
129 | name, name_len, value, size); | ||
130 | if (ret == -EEXIST) { | ||
131 | if (flags & XATTR_CREATE) | ||
132 | goto out; | ||
133 | /* | ||
134 | * We can't use the path we already have since we won't have the | ||
135 | * proper locking for a delete, so release the path and | ||
136 | * re-lookup to delete the thing. | ||
137 | */ | ||
129 | btrfs_release_path(path); | 138 | btrfs_release_path(path); |
139 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), | ||
140 | name, name_len, -1); | ||
141 | if (IS_ERR(di)) { | ||
142 | ret = PTR_ERR(di); | ||
143 | goto out; | ||
144 | } else if (!di) { | ||
145 | /* Shouldn't happen but just in case... */ | ||
146 | btrfs_release_path(path); | ||
147 | goto again; | ||
148 | } | ||
130 | 149 | ||
131 | if (flags & XATTR_REPLACE) { | 150 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
132 | /* we couldn't find the attr to replace */ | 151 | if (ret) |
133 | ret = -ENODATA; | ||
134 | goto out; | 152 | goto out; |
153 | |||
154 | /* | ||
155 | * We have a value to set, so go back and try to insert it now. | ||
156 | */ | ||
157 | if (value) { | ||
158 | btrfs_release_path(path); | ||
159 | goto again; | ||
135 | } | 160 | } |
136 | } | 161 | } |
137 | |||
138 | /* ok we have to create a completely new xattr */ | ||
139 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), | ||
140 | name, name_len, value, size); | ||
141 | BUG_ON(ret); | ||
142 | out: | 162 | out: |
143 | btrfs_free_path(path); | 163 | btrfs_free_path(path); |
144 | return ret; | 164 | return ret; |
145 | } | 165 | } |
146 | 166 | ||
167 | /* | ||
168 | * @value: "" makes the attribute to empty, NULL removes it | ||
169 | */ | ||
147 | int __btrfs_setxattr(struct btrfs_trans_handle *trans, | 170 | int __btrfs_setxattr(struct btrfs_trans_handle *trans, |
148 | struct inode *inode, const char *name, | 171 | struct inode *inode, const char *name, |
149 | const void *value, size_t size, int flags) | 172 | const void *value, size_t size, int flags) |