diff options
Diffstat (limited to 'fs/btrfs')
35 files changed, 4248 insertions, 2303 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d2cf5a54a4b8..9adf5e4f7e96 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -8,7 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ |
11 | compression.o | 11 | compression.o delayed-ref.o |
12 | else | 12 | else |
13 | 13 | ||
14 | # Normal Makefile | 14 | # Normal Makefile |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 1d53b62dbba5..7fdd184a528d 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -256,7 +256,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) | |||
256 | } | 256 | } |
257 | 257 | ||
258 | if (!acl) | 258 | if (!acl) |
259 | inode->i_mode &= ~current->fs->umask; | 259 | inode->i_mode &= ~current_umask(); |
260 | } | 260 | } |
261 | 261 | ||
262 | if (IS_POSIXACL(dir) && acl) { | 262 | if (IS_POSIXACL(dir) && acl) { |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c84ca1f5259a..51bfdfc8fcda 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/spinlock.h> | 21 | #include <linux/spinlock.h> |
22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
23 | #include <linux/ftrace.h> | ||
24 | #include "async-thread.h" | 23 | #include "async-thread.h" |
25 | 24 | ||
26 | #define WORK_QUEUED_BIT 0 | 25 | #define WORK_QUEUED_BIT 0 |
@@ -195,6 +194,9 @@ again_locked: | |||
195 | if (!list_empty(&worker->pending)) | 194 | if (!list_empty(&worker->pending)) |
196 | continue; | 195 | continue; |
197 | 196 | ||
197 | if (kthread_should_stop()) | ||
198 | break; | ||
199 | |||
198 | /* still no more work?, sleep for real */ | 200 | /* still no more work?, sleep for real */ |
199 | spin_lock_irq(&worker->lock); | 201 | spin_lock_irq(&worker->lock); |
200 | set_current_state(TASK_INTERRUPTIBLE); | 202 | set_current_state(TASK_INTERRUPTIBLE); |
@@ -208,7 +210,8 @@ again_locked: | |||
208 | worker->working = 0; | 210 | worker->working = 0; |
209 | spin_unlock_irq(&worker->lock); | 211 | spin_unlock_irq(&worker->lock); |
210 | 212 | ||
211 | schedule(); | 213 | if (!kthread_should_stop()) |
214 | schedule(); | ||
212 | } | 215 | } |
213 | __set_current_state(TASK_RUNNING); | 216 | __set_current_state(TASK_RUNNING); |
214 | } | 217 | } |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index a8c9693b75ac..b30986f00b9d 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -66,6 +66,15 @@ struct btrfs_inode { | |||
66 | */ | 66 | */ |
67 | struct list_head delalloc_inodes; | 67 | struct list_head delalloc_inodes; |
68 | 68 | ||
69 | /* | ||
70 | * list for tracking inodes that must be sent to disk before a | ||
71 | * rename or truncate commit | ||
72 | */ | ||
73 | struct list_head ordered_operations; | ||
74 | |||
75 | /* the space_info for where this inode's data allocations are done */ | ||
76 | struct btrfs_space_info *space_info; | ||
77 | |||
69 | /* full 64 bit generation number, struct vfs_inode doesn't have a big | 78 | /* full 64 bit generation number, struct vfs_inode doesn't have a big |
70 | * enough field for this. | 79 | * enough field for this. |
71 | */ | 80 | */ |
@@ -83,17 +92,16 @@ struct btrfs_inode { | |||
83 | */ | 92 | */ |
84 | u64 logged_trans; | 93 | u64 logged_trans; |
85 | 94 | ||
86 | /* | ||
87 | * trans that last made a change that should be fully fsync'd. This | ||
88 | * gets reset to zero each time the inode is logged | ||
89 | */ | ||
90 | u64 log_dirty_trans; | ||
91 | |||
92 | /* total number of bytes pending delalloc, used by stat to calc the | 95 | /* total number of bytes pending delalloc, used by stat to calc the |
93 | * real block usage of the file | 96 | * real block usage of the file |
94 | */ | 97 | */ |
95 | u64 delalloc_bytes; | 98 | u64 delalloc_bytes; |
96 | 99 | ||
100 | /* total number of bytes that may be used for this inode for | ||
101 | * delalloc | ||
102 | */ | ||
103 | u64 reserved_bytes; | ||
104 | |||
97 | /* | 105 | /* |
98 | * the size of the file stored in the metadata on disk. data=ordered | 106 | * the size of the file stored in the metadata on disk. data=ordered |
99 | * means the in-memory i_size might be larger than the size on disk | 107 | * means the in-memory i_size might be larger than the size on disk |
@@ -113,6 +121,25 @@ struct btrfs_inode { | |||
113 | /* the start of block group preferred for allocations. */ | 121 | /* the start of block group preferred for allocations. */ |
114 | u64 block_group; | 122 | u64 block_group; |
115 | 123 | ||
124 | /* the fsync log has some corner cases that mean we have to check | ||
125 | * directories to see if any unlinks have been done before | ||
126 | * the directory was logged. See tree-log.c for all the | ||
127 | * details | ||
128 | */ | ||
129 | u64 last_unlink_trans; | ||
130 | |||
131 | /* | ||
132 | * ordered_data_close is set by truncate when a file that used | ||
133 | * to have good data has been truncated to zero. When it is set | ||
134 | * the btrfs file release call will add this inode to the | ||
135 | * ordered operations list so that we make sure to flush out any | ||
136 | * new data the application may have written before commit. | ||
137 | * | ||
138 | * yes, its silly to have a single bitflag, but we might grow more | ||
139 | * of these. | ||
140 | */ | ||
141 | unsigned ordered_data_close:1; | ||
142 | |||
116 | struct inode vfs_inode; | 143 | struct inode vfs_inode; |
117 | }; | 144 | }; |
118 | 145 | ||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 35443cc4b9a9..e5b2533b691a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -38,19 +38,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
38 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 38 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
39 | struct btrfs_path *path, int level, int slot); | 39 | struct btrfs_path *path, int level, int slot); |
40 | 40 | ||
41 | inline void btrfs_init_path(struct btrfs_path *p) | ||
42 | { | ||
43 | memset(p, 0, sizeof(*p)); | ||
44 | } | ||
45 | |||
46 | struct btrfs_path *btrfs_alloc_path(void) | 41 | struct btrfs_path *btrfs_alloc_path(void) |
47 | { | 42 | { |
48 | struct btrfs_path *path; | 43 | struct btrfs_path *path; |
49 | path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); | 44 | path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); |
50 | if (path) { | 45 | if (path) |
51 | btrfs_init_path(path); | ||
52 | path->reada = 1; | 46 | path->reada = 1; |
53 | } | ||
54 | return path; | 47 | return path; |
55 | } | 48 | } |
56 | 49 | ||
@@ -69,14 +62,38 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) | |||
69 | 62 | ||
70 | /* | 63 | /* |
71 | * reset all the locked nodes in the patch to spinning locks. | 64 | * reset all the locked nodes in the patch to spinning locks. |
65 | * | ||
66 | * held is used to keep lockdep happy, when lockdep is enabled | ||
67 | * we set held to a blocking lock before we go around and | ||
68 | * retake all the spinlocks in the path. You can safely use NULL | ||
69 | * for held | ||
72 | */ | 70 | */ |
73 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p) | 71 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p, |
72 | struct extent_buffer *held) | ||
74 | { | 73 | { |
75 | int i; | 74 | int i; |
76 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | 75 | |
76 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
77 | /* lockdep really cares that we take all of these spinlocks | ||
78 | * in the right order. If any of the locks in the path are not | ||
79 | * currently blocking, it is going to complain. So, make really | ||
80 | * really sure by forcing the path to blocking before we clear | ||
81 | * the path blocking. | ||
82 | */ | ||
83 | if (held) | ||
84 | btrfs_set_lock_blocking(held); | ||
85 | btrfs_set_path_blocking(p); | ||
86 | #endif | ||
87 | |||
88 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { | ||
77 | if (p->nodes[i] && p->locks[i]) | 89 | if (p->nodes[i] && p->locks[i]) |
78 | btrfs_clear_lock_blocking(p->nodes[i]); | 90 | btrfs_clear_lock_blocking(p->nodes[i]); |
79 | } | 91 | } |
92 | |||
93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
94 | if (held) | ||
95 | btrfs_clear_lock_blocking(held); | ||
96 | #endif | ||
80 | } | 97 | } |
81 | 98 | ||
82 | /* this also releases the path */ | 99 | /* this also releases the path */ |
@@ -237,18 +254,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
237 | * empty_size -- a hint that you plan on doing more cow. This is the size in | 254 | * empty_size -- a hint that you plan on doing more cow. This is the size in |
238 | * bytes the allocator should try to find free next to the block it returns. | 255 | * bytes the allocator should try to find free next to the block it returns. |
239 | * This is just a hint and may be ignored by the allocator. | 256 | * This is just a hint and may be ignored by the allocator. |
240 | * | ||
241 | * prealloc_dest -- if you have already reserved a destination for the cow, | ||
242 | * this uses that block instead of allocating a new one. | ||
243 | * btrfs_alloc_reserved_extent is used to finish the allocation. | ||
244 | */ | 257 | */ |
245 | static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | 258 | static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, |
246 | struct btrfs_root *root, | 259 | struct btrfs_root *root, |
247 | struct extent_buffer *buf, | 260 | struct extent_buffer *buf, |
248 | struct extent_buffer *parent, int parent_slot, | 261 | struct extent_buffer *parent, int parent_slot, |
249 | struct extent_buffer **cow_ret, | 262 | struct extent_buffer **cow_ret, |
250 | u64 search_start, u64 empty_size, | 263 | u64 search_start, u64 empty_size) |
251 | u64 prealloc_dest) | ||
252 | { | 264 | { |
253 | u64 parent_start; | 265 | u64 parent_start; |
254 | struct extent_buffer *cow; | 266 | struct extent_buffer *cow; |
@@ -260,7 +272,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
260 | if (*cow_ret == buf) | 272 | if (*cow_ret == buf) |
261 | unlock_orig = 1; | 273 | unlock_orig = 1; |
262 | 274 | ||
263 | WARN_ON(!btrfs_tree_locked(buf)); | 275 | btrfs_assert_tree_locked(buf); |
264 | 276 | ||
265 | if (parent) | 277 | if (parent) |
266 | parent_start = parent->start; | 278 | parent_start = parent->start; |
@@ -274,26 +286,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
274 | level = btrfs_header_level(buf); | 286 | level = btrfs_header_level(buf); |
275 | nritems = btrfs_header_nritems(buf); | 287 | nritems = btrfs_header_nritems(buf); |
276 | 288 | ||
277 | if (prealloc_dest) { | 289 | cow = btrfs_alloc_free_block(trans, root, buf->len, |
278 | struct btrfs_key ins; | 290 | parent_start, root->root_key.objectid, |
279 | 291 | trans->transid, level, | |
280 | ins.objectid = prealloc_dest; | 292 | search_start, empty_size); |
281 | ins.offset = buf->len; | ||
282 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
283 | |||
284 | ret = btrfs_alloc_reserved_extent(trans, root, parent_start, | ||
285 | root->root_key.objectid, | ||
286 | trans->transid, level, &ins); | ||
287 | BUG_ON(ret); | ||
288 | cow = btrfs_init_new_buffer(trans, root, prealloc_dest, | ||
289 | buf->len); | ||
290 | } else { | ||
291 | cow = btrfs_alloc_free_block(trans, root, buf->len, | ||
292 | parent_start, | ||
293 | root->root_key.objectid, | ||
294 | trans->transid, level, | ||
295 | search_start, empty_size); | ||
296 | } | ||
297 | if (IS_ERR(cow)) | 293 | if (IS_ERR(cow)) |
298 | return PTR_ERR(cow); | 294 | return PTR_ERR(cow); |
299 | 295 | ||
@@ -396,7 +392,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
396 | noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | 392 | noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, |
397 | struct btrfs_root *root, struct extent_buffer *buf, | 393 | struct btrfs_root *root, struct extent_buffer *buf, |
398 | struct extent_buffer *parent, int parent_slot, | 394 | struct extent_buffer *parent, int parent_slot, |
399 | struct extent_buffer **cow_ret, u64 prealloc_dest) | 395 | struct extent_buffer **cow_ret) |
400 | { | 396 | { |
401 | u64 search_start; | 397 | u64 search_start; |
402 | int ret; | 398 | int ret; |
@@ -419,7 +415,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
419 | btrfs_header_owner(buf) == root->root_key.objectid && | 415 | btrfs_header_owner(buf) == root->root_key.objectid && |
420 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 416 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
421 | *cow_ret = buf; | 417 | *cow_ret = buf; |
422 | WARN_ON(prealloc_dest); | ||
423 | return 0; | 418 | return 0; |
424 | } | 419 | } |
425 | 420 | ||
@@ -430,8 +425,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
430 | btrfs_set_lock_blocking(buf); | 425 | btrfs_set_lock_blocking(buf); |
431 | 426 | ||
432 | ret = __btrfs_cow_block(trans, root, buf, parent, | 427 | ret = __btrfs_cow_block(trans, root, buf, parent, |
433 | parent_slot, cow_ret, search_start, 0, | 428 | parent_slot, cow_ret, search_start, 0); |
434 | prealloc_dest); | ||
435 | return ret; | 429 | return ret; |
436 | } | 430 | } |
437 | 431 | ||
@@ -600,7 +594,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
600 | err = __btrfs_cow_block(trans, root, cur, parent, i, | 594 | err = __btrfs_cow_block(trans, root, cur, parent, i, |
601 | &cur, search_start, | 595 | &cur, search_start, |
602 | min(16 * blocksize, | 596 | min(16 * blocksize, |
603 | (end_slot - i) * blocksize), 0); | 597 | (end_slot - i) * blocksize)); |
604 | if (err) { | 598 | if (err) { |
605 | btrfs_tree_unlock(cur); | 599 | btrfs_tree_unlock(cur); |
606 | free_extent_buffer(cur); | 600 | free_extent_buffer(cur); |
@@ -917,10 +911,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
917 | 911 | ||
918 | /* promote the child to a root */ | 912 | /* promote the child to a root */ |
919 | child = read_node_slot(root, mid, 0); | 913 | child = read_node_slot(root, mid, 0); |
914 | BUG_ON(!child); | ||
920 | btrfs_tree_lock(child); | 915 | btrfs_tree_lock(child); |
921 | btrfs_set_lock_blocking(child); | 916 | btrfs_set_lock_blocking(child); |
922 | BUG_ON(!child); | 917 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); |
923 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); | ||
924 | BUG_ON(ret); | 918 | BUG_ON(ret); |
925 | 919 | ||
926 | spin_lock(&root->node_lock); | 920 | spin_lock(&root->node_lock); |
@@ -928,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
928 | spin_unlock(&root->node_lock); | 922 | spin_unlock(&root->node_lock); |
929 | 923 | ||
930 | ret = btrfs_update_extent_ref(trans, root, child->start, | 924 | ret = btrfs_update_extent_ref(trans, root, child->start, |
925 | child->len, | ||
931 | mid->start, child->start, | 926 | mid->start, child->start, |
932 | root->root_key.objectid, | 927 | root->root_key.objectid, |
933 | trans->transid, level - 1); | 928 | trans->transid, level - 1); |
@@ -954,6 +949,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
954 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 949 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
955 | return 0; | 950 | return 0; |
956 | 951 | ||
952 | if (trans->transaction->delayed_refs.flushing && | ||
953 | btrfs_header_nritems(mid) > 2) | ||
954 | return 0; | ||
955 | |||
957 | if (btrfs_header_nritems(mid) < 2) | 956 | if (btrfs_header_nritems(mid) < 2) |
958 | err_on_enospc = 1; | 957 | err_on_enospc = 1; |
959 | 958 | ||
@@ -962,7 +961,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
962 | btrfs_tree_lock(left); | 961 | btrfs_tree_lock(left); |
963 | btrfs_set_lock_blocking(left); | 962 | btrfs_set_lock_blocking(left); |
964 | wret = btrfs_cow_block(trans, root, left, | 963 | wret = btrfs_cow_block(trans, root, left, |
965 | parent, pslot - 1, &left, 0); | 964 | parent, pslot - 1, &left); |
966 | if (wret) { | 965 | if (wret) { |
967 | ret = wret; | 966 | ret = wret; |
968 | goto enospc; | 967 | goto enospc; |
@@ -973,7 +972,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
973 | btrfs_tree_lock(right); | 972 | btrfs_tree_lock(right); |
974 | btrfs_set_lock_blocking(right); | 973 | btrfs_set_lock_blocking(right); |
975 | wret = btrfs_cow_block(trans, root, right, | 974 | wret = btrfs_cow_block(trans, root, right, |
976 | parent, pslot + 1, &right, 0); | 975 | parent, pslot + 1, &right); |
977 | if (wret) { | 976 | if (wret) { |
978 | ret = wret; | 977 | ret = wret; |
979 | goto enospc; | 978 | goto enospc; |
@@ -1154,7 +1153,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1154 | wret = 1; | 1153 | wret = 1; |
1155 | } else { | 1154 | } else { |
1156 | ret = btrfs_cow_block(trans, root, left, parent, | 1155 | ret = btrfs_cow_block(trans, root, left, parent, |
1157 | pslot - 1, &left, 0); | 1156 | pslot - 1, &left); |
1158 | if (ret) | 1157 | if (ret) |
1159 | wret = 1; | 1158 | wret = 1; |
1160 | else { | 1159 | else { |
@@ -1205,7 +1204,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1205 | } else { | 1204 | } else { |
1206 | ret = btrfs_cow_block(trans, root, right, | 1205 | ret = btrfs_cow_block(trans, root, right, |
1207 | parent, pslot + 1, | 1206 | parent, pslot + 1, |
1208 | &right, 0); | 1207 | &right); |
1209 | if (ret) | 1208 | if (ret) |
1210 | wret = 1; | 1209 | wret = 1; |
1211 | else { | 1210 | else { |
@@ -1245,9 +1244,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1245 | * readahead one full node of leaves, finding things that are close | 1244 | * readahead one full node of leaves, finding things that are close |
1246 | * to the block in 'slot', and triggering ra on them. | 1245 | * to the block in 'slot', and triggering ra on them. |
1247 | */ | 1246 | */ |
1248 | static noinline void reada_for_search(struct btrfs_root *root, | 1247 | static void reada_for_search(struct btrfs_root *root, |
1249 | struct btrfs_path *path, | 1248 | struct btrfs_path *path, |
1250 | int level, int slot, u64 objectid) | 1249 | int level, int slot, u64 objectid) |
1251 | { | 1250 | { |
1252 | struct extent_buffer *node; | 1251 | struct extent_buffer *node; |
1253 | struct btrfs_disk_key disk_key; | 1252 | struct btrfs_disk_key disk_key; |
@@ -1448,6 +1447,117 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
1448 | } | 1447 | } |
1449 | 1448 | ||
1450 | /* | 1449 | /* |
1450 | * helper function for btrfs_search_slot. The goal is to find a block | ||
1451 | * in cache without setting the path to blocking. If we find the block | ||
1452 | * we return zero and the path is unchanged. | ||
1453 | * | ||
1454 | * If we can't find the block, we set the path blocking and do some | ||
1455 | * reada. -EAGAIN is returned and the search must be repeated. | ||
1456 | */ | ||
1457 | static int | ||
1458 | read_block_for_search(struct btrfs_trans_handle *trans, | ||
1459 | struct btrfs_root *root, struct btrfs_path *p, | ||
1460 | struct extent_buffer **eb_ret, int level, int slot, | ||
1461 | struct btrfs_key *key) | ||
1462 | { | ||
1463 | u64 blocknr; | ||
1464 | u64 gen; | ||
1465 | u32 blocksize; | ||
1466 | struct extent_buffer *b = *eb_ret; | ||
1467 | struct extent_buffer *tmp; | ||
1468 | |||
1469 | blocknr = btrfs_node_blockptr(b, slot); | ||
1470 | gen = btrfs_node_ptr_generation(b, slot); | ||
1471 | blocksize = btrfs_level_size(root, level - 1); | ||
1472 | |||
1473 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | ||
1474 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
1475 | *eb_ret = tmp; | ||
1476 | return 0; | ||
1477 | } | ||
1478 | |||
1479 | /* | ||
1480 | * reduce lock contention at high levels | ||
1481 | * of the btree by dropping locks before | ||
1482 | * we read. | ||
1483 | */ | ||
1484 | btrfs_release_path(NULL, p); | ||
1485 | if (tmp) | ||
1486 | free_extent_buffer(tmp); | ||
1487 | if (p->reada) | ||
1488 | reada_for_search(root, p, level, slot, key->objectid); | ||
1489 | |||
1490 | tmp = read_tree_block(root, blocknr, blocksize, gen); | ||
1491 | if (tmp) | ||
1492 | free_extent_buffer(tmp); | ||
1493 | return -EAGAIN; | ||
1494 | } | ||
1495 | |||
1496 | /* | ||
1497 | * helper function for btrfs_search_slot. This does all of the checks | ||
1498 | * for node-level blocks and does any balancing required based on | ||
1499 | * the ins_len. | ||
1500 | * | ||
1501 | * If no extra work was required, zero is returned. If we had to | ||
1502 | * drop the path, -EAGAIN is returned and btrfs_search_slot must | ||
1503 | * start over | ||
1504 | */ | ||
1505 | static int | ||
1506 | setup_nodes_for_search(struct btrfs_trans_handle *trans, | ||
1507 | struct btrfs_root *root, struct btrfs_path *p, | ||
1508 | struct extent_buffer *b, int level, int ins_len) | ||
1509 | { | ||
1510 | int ret; | ||
1511 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= | ||
1512 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | ||
1513 | int sret; | ||
1514 | |||
1515 | sret = reada_for_balance(root, p, level); | ||
1516 | if (sret) | ||
1517 | goto again; | ||
1518 | |||
1519 | btrfs_set_path_blocking(p); | ||
1520 | sret = split_node(trans, root, p, level); | ||
1521 | btrfs_clear_path_blocking(p, NULL); | ||
1522 | |||
1523 | BUG_ON(sret > 0); | ||
1524 | if (sret) { | ||
1525 | ret = sret; | ||
1526 | goto done; | ||
1527 | } | ||
1528 | b = p->nodes[level]; | ||
1529 | } else if (ins_len < 0 && btrfs_header_nritems(b) < | ||
1530 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { | ||
1531 | int sret; | ||
1532 | |||
1533 | sret = reada_for_balance(root, p, level); | ||
1534 | if (sret) | ||
1535 | goto again; | ||
1536 | |||
1537 | btrfs_set_path_blocking(p); | ||
1538 | sret = balance_level(trans, root, p, level); | ||
1539 | btrfs_clear_path_blocking(p, NULL); | ||
1540 | |||
1541 | if (sret) { | ||
1542 | ret = sret; | ||
1543 | goto done; | ||
1544 | } | ||
1545 | b = p->nodes[level]; | ||
1546 | if (!b) { | ||
1547 | btrfs_release_path(NULL, p); | ||
1548 | goto again; | ||
1549 | } | ||
1550 | BUG_ON(btrfs_header_nritems(b) == 1); | ||
1551 | } | ||
1552 | return 0; | ||
1553 | |||
1554 | again: | ||
1555 | ret = -EAGAIN; | ||
1556 | done: | ||
1557 | return ret; | ||
1558 | } | ||
1559 | |||
1560 | /* | ||
1451 | * look for key in the tree. path is filled in with nodes along the way | 1561 | * look for key in the tree. path is filled in with nodes along the way |
1452 | * if key is found, we return zero and you can find the item in the leaf | 1562 | * if key is found, we return zero and you can find the item in the leaf |
1453 | * level of the path (level 0) | 1563 | * level of the path (level 0) |
@@ -1465,17 +1575,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1465 | ins_len, int cow) | 1575 | ins_len, int cow) |
1466 | { | 1576 | { |
1467 | struct extent_buffer *b; | 1577 | struct extent_buffer *b; |
1468 | struct extent_buffer *tmp; | ||
1469 | int slot; | 1578 | int slot; |
1470 | int ret; | 1579 | int ret; |
1471 | int level; | 1580 | int level; |
1472 | int should_reada = p->reada; | ||
1473 | int lowest_unlock = 1; | 1581 | int lowest_unlock = 1; |
1474 | int blocksize; | ||
1475 | u8 lowest_level = 0; | 1582 | u8 lowest_level = 0; |
1476 | u64 blocknr; | ||
1477 | u64 gen; | ||
1478 | struct btrfs_key prealloc_block; | ||
1479 | 1583 | ||
1480 | lowest_level = p->lowest_level; | 1584 | lowest_level = p->lowest_level; |
1481 | WARN_ON(lowest_level && ins_len > 0); | 1585 | WARN_ON(lowest_level && ins_len > 0); |
@@ -1484,8 +1588,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1484 | if (ins_len < 0) | 1588 | if (ins_len < 0) |
1485 | lowest_unlock = 2; | 1589 | lowest_unlock = 2; |
1486 | 1590 | ||
1487 | prealloc_block.objectid = 0; | ||
1488 | |||
1489 | again: | 1591 | again: |
1490 | if (p->skip_locking) | 1592 | if (p->skip_locking) |
1491 | b = btrfs_root_node(root); | 1593 | b = btrfs_root_node(root); |
@@ -1506,50 +1608,21 @@ again: | |||
1506 | if (cow) { | 1608 | if (cow) { |
1507 | int wret; | 1609 | int wret; |
1508 | 1610 | ||
1509 | /* is a cow on this block not required */ | 1611 | /* |
1612 | * if we don't really need to cow this block | ||
1613 | * then we don't want to set the path blocking, | ||
1614 | * so we test it here | ||
1615 | */ | ||
1510 | if (btrfs_header_generation(b) == trans->transid && | 1616 | if (btrfs_header_generation(b) == trans->transid && |
1511 | btrfs_header_owner(b) == root->root_key.objectid && | 1617 | btrfs_header_owner(b) == root->root_key.objectid && |
1512 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { | 1618 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { |
1513 | goto cow_done; | 1619 | goto cow_done; |
1514 | } | 1620 | } |
1515 | |||
1516 | /* ok, we have to cow, is our old prealloc the right | ||
1517 | * size? | ||
1518 | */ | ||
1519 | if (prealloc_block.objectid && | ||
1520 | prealloc_block.offset != b->len) { | ||
1521 | btrfs_release_path(root, p); | ||
1522 | btrfs_free_reserved_extent(root, | ||
1523 | prealloc_block.objectid, | ||
1524 | prealloc_block.offset); | ||
1525 | prealloc_block.objectid = 0; | ||
1526 | goto again; | ||
1527 | } | ||
1528 | |||
1529 | /* | ||
1530 | * for higher level blocks, try not to allocate blocks | ||
1531 | * with the block and the parent locks held. | ||
1532 | */ | ||
1533 | if (level > 0 && !prealloc_block.objectid) { | ||
1534 | u32 size = b->len; | ||
1535 | u64 hint = b->start; | ||
1536 | |||
1537 | btrfs_release_path(root, p); | ||
1538 | ret = btrfs_reserve_extent(trans, root, | ||
1539 | size, size, 0, | ||
1540 | hint, (u64)-1, | ||
1541 | &prealloc_block, 0); | ||
1542 | BUG_ON(ret); | ||
1543 | goto again; | ||
1544 | } | ||
1545 | |||
1546 | btrfs_set_path_blocking(p); | 1621 | btrfs_set_path_blocking(p); |
1547 | 1622 | ||
1548 | wret = btrfs_cow_block(trans, root, b, | 1623 | wret = btrfs_cow_block(trans, root, b, |
1549 | p->nodes[level + 1], | 1624 | p->nodes[level + 1], |
1550 | p->slots[level + 1], | 1625 | p->slots[level + 1], &b); |
1551 | &b, prealloc_block.objectid); | ||
1552 | prealloc_block.objectid = 0; | ||
1553 | if (wret) { | 1626 | if (wret) { |
1554 | free_extent_buffer(b); | 1627 | free_extent_buffer(b); |
1555 | ret = wret; | 1628 | ret = wret; |
@@ -1566,7 +1639,7 @@ cow_done: | |||
1566 | if (!p->skip_locking) | 1639 | if (!p->skip_locking) |
1567 | p->locks[level] = 1; | 1640 | p->locks[level] = 1; |
1568 | 1641 | ||
1569 | btrfs_clear_path_blocking(p); | 1642 | btrfs_clear_path_blocking(p, NULL); |
1570 | 1643 | ||
1571 | /* | 1644 | /* |
1572 | * we have a lock on b and as long as we aren't changing | 1645 | * we have a lock on b and as long as we aren't changing |
@@ -1594,51 +1667,15 @@ cow_done: | |||
1594 | if (ret && slot > 0) | 1667 | if (ret && slot > 0) |
1595 | slot -= 1; | 1668 | slot -= 1; |
1596 | p->slots[level] = slot; | 1669 | p->slots[level] = slot; |
1597 | if ((p->search_for_split || ins_len > 0) && | 1670 | ret = setup_nodes_for_search(trans, root, p, b, level, |
1598 | btrfs_header_nritems(b) >= | 1671 | ins_len); |
1599 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1672 | if (ret == -EAGAIN) |
1600 | int sret; | 1673 | goto again; |
1601 | 1674 | else if (ret) | |
1602 | sret = reada_for_balance(root, p, level); | 1675 | goto done; |
1603 | if (sret) | 1676 | b = p->nodes[level]; |
1604 | goto again; | 1677 | slot = p->slots[level]; |
1605 | |||
1606 | btrfs_set_path_blocking(p); | ||
1607 | sret = split_node(trans, root, p, level); | ||
1608 | btrfs_clear_path_blocking(p); | ||
1609 | |||
1610 | BUG_ON(sret > 0); | ||
1611 | if (sret) { | ||
1612 | ret = sret; | ||
1613 | goto done; | ||
1614 | } | ||
1615 | b = p->nodes[level]; | ||
1616 | slot = p->slots[level]; | ||
1617 | } else if (ins_len < 0 && | ||
1618 | btrfs_header_nritems(b) < | ||
1619 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { | ||
1620 | int sret; | ||
1621 | |||
1622 | sret = reada_for_balance(root, p, level); | ||
1623 | if (sret) | ||
1624 | goto again; | ||
1625 | |||
1626 | btrfs_set_path_blocking(p); | ||
1627 | sret = balance_level(trans, root, p, level); | ||
1628 | btrfs_clear_path_blocking(p); | ||
1629 | 1678 | ||
1630 | if (sret) { | ||
1631 | ret = sret; | ||
1632 | goto done; | ||
1633 | } | ||
1634 | b = p->nodes[level]; | ||
1635 | if (!b) { | ||
1636 | btrfs_release_path(NULL, p); | ||
1637 | goto again; | ||
1638 | } | ||
1639 | slot = p->slots[level]; | ||
1640 | BUG_ON(btrfs_header_nritems(b) == 1); | ||
1641 | } | ||
1642 | unlock_up(p, level, lowest_unlock); | 1679 | unlock_up(p, level, lowest_unlock); |
1643 | 1680 | ||
1644 | /* this is only true while dropping a snapshot */ | 1681 | /* this is only true while dropping a snapshot */ |
@@ -1647,54 +1684,21 @@ cow_done: | |||
1647 | goto done; | 1684 | goto done; |
1648 | } | 1685 | } |
1649 | 1686 | ||
1650 | blocknr = btrfs_node_blockptr(b, slot); | 1687 | ret = read_block_for_search(trans, root, p, |
1651 | gen = btrfs_node_ptr_generation(b, slot); | 1688 | &b, level, slot, key); |
1652 | blocksize = btrfs_level_size(root, level - 1); | 1689 | if (ret == -EAGAIN) |
1690 | goto again; | ||
1653 | 1691 | ||
1654 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | ||
1655 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
1656 | b = tmp; | ||
1657 | } else { | ||
1658 | /* | ||
1659 | * reduce lock contention at high levels | ||
1660 | * of the btree by dropping locks before | ||
1661 | * we read. | ||
1662 | */ | ||
1663 | if (level > 0) { | ||
1664 | btrfs_release_path(NULL, p); | ||
1665 | if (tmp) | ||
1666 | free_extent_buffer(tmp); | ||
1667 | if (should_reada) | ||
1668 | reada_for_search(root, p, | ||
1669 | level, slot, | ||
1670 | key->objectid); | ||
1671 | |||
1672 | tmp = read_tree_block(root, blocknr, | ||
1673 | blocksize, gen); | ||
1674 | if (tmp) | ||
1675 | free_extent_buffer(tmp); | ||
1676 | goto again; | ||
1677 | } else { | ||
1678 | btrfs_set_path_blocking(p); | ||
1679 | if (tmp) | ||
1680 | free_extent_buffer(tmp); | ||
1681 | if (should_reada) | ||
1682 | reada_for_search(root, p, | ||
1683 | level, slot, | ||
1684 | key->objectid); | ||
1685 | b = read_node_slot(root, b, slot); | ||
1686 | } | ||
1687 | } | ||
1688 | if (!p->skip_locking) { | 1692 | if (!p->skip_locking) { |
1689 | int lret; | 1693 | int lret; |
1690 | 1694 | ||
1691 | btrfs_clear_path_blocking(p); | 1695 | btrfs_clear_path_blocking(p, NULL); |
1692 | lret = btrfs_try_spin_lock(b); | 1696 | lret = btrfs_try_spin_lock(b); |
1693 | 1697 | ||
1694 | if (!lret) { | 1698 | if (!lret) { |
1695 | btrfs_set_path_blocking(p); | 1699 | btrfs_set_path_blocking(p); |
1696 | btrfs_tree_lock(b); | 1700 | btrfs_tree_lock(b); |
1697 | btrfs_clear_path_blocking(p); | 1701 | btrfs_clear_path_blocking(p, b); |
1698 | } | 1702 | } |
1699 | } | 1703 | } |
1700 | } else { | 1704 | } else { |
@@ -1706,7 +1710,7 @@ cow_done: | |||
1706 | btrfs_set_path_blocking(p); | 1710 | btrfs_set_path_blocking(p); |
1707 | sret = split_leaf(trans, root, key, | 1711 | sret = split_leaf(trans, root, key, |
1708 | p, ins_len, ret == 0); | 1712 | p, ins_len, ret == 0); |
1709 | btrfs_clear_path_blocking(p); | 1713 | btrfs_clear_path_blocking(p, NULL); |
1710 | 1714 | ||
1711 | BUG_ON(sret > 0); | 1715 | BUG_ON(sret > 0); |
1712 | if (sret) { | 1716 | if (sret) { |
@@ -1725,12 +1729,8 @@ done: | |||
1725 | * we don't really know what they plan on doing with the path | 1729 | * we don't really know what they plan on doing with the path |
1726 | * from here on, so for now just mark it as blocking | 1730 | * from here on, so for now just mark it as blocking |
1727 | */ | 1731 | */ |
1728 | btrfs_set_path_blocking(p); | 1732 | if (!p->leave_spinning) |
1729 | if (prealloc_block.objectid) { | 1733 | btrfs_set_path_blocking(p); |
1730 | btrfs_free_reserved_extent(root, | ||
1731 | prealloc_block.objectid, | ||
1732 | prealloc_block.offset); | ||
1733 | } | ||
1734 | return ret; | 1734 | return ret; |
1735 | } | 1735 | } |
1736 | 1736 | ||
@@ -1751,7 +1751,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1751 | int ret; | 1751 | int ret; |
1752 | 1752 | ||
1753 | eb = btrfs_lock_root_node(root); | 1753 | eb = btrfs_lock_root_node(root); |
1754 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); | 1754 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb); |
1755 | BUG_ON(ret); | 1755 | BUG_ON(ret); |
1756 | 1756 | ||
1757 | btrfs_set_lock_blocking(eb); | 1757 | btrfs_set_lock_blocking(eb); |
@@ -1809,7 +1809,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1809 | } | 1809 | } |
1810 | 1810 | ||
1811 | ret = btrfs_cow_block(trans, root, eb, parent, slot, | 1811 | ret = btrfs_cow_block(trans, root, eb, parent, slot, |
1812 | &eb, 0); | 1812 | &eb); |
1813 | BUG_ON(ret); | 1813 | BUG_ON(ret); |
1814 | 1814 | ||
1815 | if (root->root_key.objectid == | 1815 | if (root->root_key.objectid == |
@@ -2122,7 +2122,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2122 | spin_unlock(&root->node_lock); | 2122 | spin_unlock(&root->node_lock); |
2123 | 2123 | ||
2124 | ret = btrfs_update_extent_ref(trans, root, lower->start, | 2124 | ret = btrfs_update_extent_ref(trans, root, lower->start, |
2125 | lower->start, c->start, | 2125 | lower->len, lower->start, c->start, |
2126 | root->root_key.objectid, | 2126 | root->root_key.objectid, |
2127 | trans->transid, level - 1); | 2127 | trans->transid, level - 1); |
2128 | BUG_ON(ret); | 2128 | BUG_ON(ret); |
@@ -2157,8 +2157,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2157 | BUG_ON(!path->nodes[level]); | 2157 | BUG_ON(!path->nodes[level]); |
2158 | lower = path->nodes[level]; | 2158 | lower = path->nodes[level]; |
2159 | nritems = btrfs_header_nritems(lower); | 2159 | nritems = btrfs_header_nritems(lower); |
2160 | if (slot > nritems) | 2160 | BUG_ON(slot > nritems); |
2161 | BUG(); | ||
2162 | if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) | 2161 | if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) |
2163 | BUG(); | 2162 | BUG(); |
2164 | if (slot != nritems) { | 2163 | if (slot != nritems) { |
@@ -2204,7 +2203,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2204 | ret = insert_new_root(trans, root, path, level + 1); | 2203 | ret = insert_new_root(trans, root, path, level + 1); |
2205 | if (ret) | 2204 | if (ret) |
2206 | return ret; | 2205 | return ret; |
2207 | } else { | 2206 | } else if (!trans->transaction->delayed_refs.flushing) { |
2208 | ret = push_nodes_for_insert(trans, root, path, level); | 2207 | ret = push_nodes_for_insert(trans, root, path, level); |
2209 | c = path->nodes[level]; | 2208 | c = path->nodes[level]; |
2210 | if (!ret && btrfs_header_nritems(c) < | 2209 | if (!ret && btrfs_header_nritems(c) < |
@@ -2312,66 +2311,27 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root, | |||
2312 | return ret; | 2311 | return ret; |
2313 | } | 2312 | } |
2314 | 2313 | ||
2315 | /* | 2314 | static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, |
2316 | * push some data in the path leaf to the right, trying to free up at | 2315 | struct btrfs_root *root, |
2317 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2316 | struct btrfs_path *path, |
2318 | * | 2317 | int data_size, int empty, |
2319 | * returns 1 if the push failed because the other node didn't have enough | 2318 | struct extent_buffer *right, |
2320 | * room, 0 if everything worked out and < 0 if there were major errors. | 2319 | int free_space, u32 left_nritems) |
2321 | */ | ||
2322 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | ||
2323 | *root, struct btrfs_path *path, int data_size, | ||
2324 | int empty) | ||
2325 | { | 2320 | { |
2326 | struct extent_buffer *left = path->nodes[0]; | 2321 | struct extent_buffer *left = path->nodes[0]; |
2327 | struct extent_buffer *right; | 2322 | struct extent_buffer *upper = path->nodes[1]; |
2328 | struct extent_buffer *upper; | ||
2329 | struct btrfs_disk_key disk_key; | 2323 | struct btrfs_disk_key disk_key; |
2330 | int slot; | 2324 | int slot; |
2331 | u32 i; | 2325 | u32 i; |
2332 | int free_space; | ||
2333 | int push_space = 0; | 2326 | int push_space = 0; |
2334 | int push_items = 0; | 2327 | int push_items = 0; |
2335 | struct btrfs_item *item; | 2328 | struct btrfs_item *item; |
2336 | u32 left_nritems; | ||
2337 | u32 nr; | 2329 | u32 nr; |
2338 | u32 right_nritems; | 2330 | u32 right_nritems; |
2339 | u32 data_end; | 2331 | u32 data_end; |
2340 | u32 this_item_size; | 2332 | u32 this_item_size; |
2341 | int ret; | 2333 | int ret; |
2342 | 2334 | ||
2343 | slot = path->slots[1]; | ||
2344 | if (!path->nodes[1]) | ||
2345 | return 1; | ||
2346 | |||
2347 | upper = path->nodes[1]; | ||
2348 | if (slot >= btrfs_header_nritems(upper) - 1) | ||
2349 | return 1; | ||
2350 | |||
2351 | WARN_ON(!btrfs_tree_locked(path->nodes[1])); | ||
2352 | |||
2353 | right = read_node_slot(root, upper, slot + 1); | ||
2354 | btrfs_tree_lock(right); | ||
2355 | btrfs_set_lock_blocking(right); | ||
2356 | |||
2357 | free_space = btrfs_leaf_free_space(root, right); | ||
2358 | if (free_space < data_size) | ||
2359 | goto out_unlock; | ||
2360 | |||
2361 | /* cow and double check */ | ||
2362 | ret = btrfs_cow_block(trans, root, right, upper, | ||
2363 | slot + 1, &right, 0); | ||
2364 | if (ret) | ||
2365 | goto out_unlock; | ||
2366 | |||
2367 | free_space = btrfs_leaf_free_space(root, right); | ||
2368 | if (free_space < data_size) | ||
2369 | goto out_unlock; | ||
2370 | |||
2371 | left_nritems = btrfs_header_nritems(left); | ||
2372 | if (left_nritems == 0) | ||
2373 | goto out_unlock; | ||
2374 | |||
2375 | if (empty) | 2335 | if (empty) |
2376 | nr = 0; | 2336 | nr = 0; |
2377 | else | 2337 | else |
@@ -2380,6 +2340,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2380 | if (path->slots[0] >= left_nritems) | 2340 | if (path->slots[0] >= left_nritems) |
2381 | push_space += data_size; | 2341 | push_space += data_size; |
2382 | 2342 | ||
2343 | slot = path->slots[1]; | ||
2383 | i = left_nritems - 1; | 2344 | i = left_nritems - 1; |
2384 | while (i >= nr) { | 2345 | while (i >= nr) { |
2385 | item = btrfs_item_nr(left, i); | 2346 | item = btrfs_item_nr(left, i); |
@@ -2511,24 +2472,82 @@ out_unlock: | |||
2511 | } | 2472 | } |
2512 | 2473 | ||
2513 | /* | 2474 | /* |
2475 | * push some data in the path leaf to the right, trying to free up at | ||
2476 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | ||
2477 | * | ||
2478 | * returns 1 if the push failed because the other node didn't have enough | ||
2479 | * room, 0 if everything worked out and < 0 if there were major errors. | ||
2480 | */ | ||
2481 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | ||
2482 | *root, struct btrfs_path *path, int data_size, | ||
2483 | int empty) | ||
2484 | { | ||
2485 | struct extent_buffer *left = path->nodes[0]; | ||
2486 | struct extent_buffer *right; | ||
2487 | struct extent_buffer *upper; | ||
2488 | int slot; | ||
2489 | int free_space; | ||
2490 | u32 left_nritems; | ||
2491 | int ret; | ||
2492 | |||
2493 | if (!path->nodes[1]) | ||
2494 | return 1; | ||
2495 | |||
2496 | slot = path->slots[1]; | ||
2497 | upper = path->nodes[1]; | ||
2498 | if (slot >= btrfs_header_nritems(upper) - 1) | ||
2499 | return 1; | ||
2500 | |||
2501 | btrfs_assert_tree_locked(path->nodes[1]); | ||
2502 | |||
2503 | right = read_node_slot(root, upper, slot + 1); | ||
2504 | btrfs_tree_lock(right); | ||
2505 | btrfs_set_lock_blocking(right); | ||
2506 | |||
2507 | free_space = btrfs_leaf_free_space(root, right); | ||
2508 | if (free_space < data_size) | ||
2509 | goto out_unlock; | ||
2510 | |||
2511 | /* cow and double check */ | ||
2512 | ret = btrfs_cow_block(trans, root, right, upper, | ||
2513 | slot + 1, &right); | ||
2514 | if (ret) | ||
2515 | goto out_unlock; | ||
2516 | |||
2517 | free_space = btrfs_leaf_free_space(root, right); | ||
2518 | if (free_space < data_size) | ||
2519 | goto out_unlock; | ||
2520 | |||
2521 | left_nritems = btrfs_header_nritems(left); | ||
2522 | if (left_nritems == 0) | ||
2523 | goto out_unlock; | ||
2524 | |||
2525 | return __push_leaf_right(trans, root, path, data_size, empty, | ||
2526 | right, free_space, left_nritems); | ||
2527 | out_unlock: | ||
2528 | btrfs_tree_unlock(right); | ||
2529 | free_extent_buffer(right); | ||
2530 | return 1; | ||
2531 | } | ||
2532 | |||
2533 | /* | ||
2514 | * push some data in the path leaf to the left, trying to free up at | 2534 | * push some data in the path leaf to the left, trying to free up at |
2515 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2535 | * least data_size bytes. returns zero if the push worked, nonzero otherwise |
2516 | */ | 2536 | */ |
2517 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | 2537 | static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, |
2518 | *root, struct btrfs_path *path, int data_size, | 2538 | struct btrfs_root *root, |
2519 | int empty) | 2539 | struct btrfs_path *path, int data_size, |
2540 | int empty, struct extent_buffer *left, | ||
2541 | int free_space, int right_nritems) | ||
2520 | { | 2542 | { |
2521 | struct btrfs_disk_key disk_key; | 2543 | struct btrfs_disk_key disk_key; |
2522 | struct extent_buffer *right = path->nodes[0]; | 2544 | struct extent_buffer *right = path->nodes[0]; |
2523 | struct extent_buffer *left; | ||
2524 | int slot; | 2545 | int slot; |
2525 | int i; | 2546 | int i; |
2526 | int free_space; | ||
2527 | int push_space = 0; | 2547 | int push_space = 0; |
2528 | int push_items = 0; | 2548 | int push_items = 0; |
2529 | struct btrfs_item *item; | 2549 | struct btrfs_item *item; |
2530 | u32 old_left_nritems; | 2550 | u32 old_left_nritems; |
2531 | u32 right_nritems; | ||
2532 | u32 nr; | 2551 | u32 nr; |
2533 | int ret = 0; | 2552 | int ret = 0; |
2534 | int wret; | 2553 | int wret; |
@@ -2536,41 +2555,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2536 | u32 old_left_item_size; | 2555 | u32 old_left_item_size; |
2537 | 2556 | ||
2538 | slot = path->slots[1]; | 2557 | slot = path->slots[1]; |
2539 | if (slot == 0) | ||
2540 | return 1; | ||
2541 | if (!path->nodes[1]) | ||
2542 | return 1; | ||
2543 | |||
2544 | right_nritems = btrfs_header_nritems(right); | ||
2545 | if (right_nritems == 0) | ||
2546 | return 1; | ||
2547 | |||
2548 | WARN_ON(!btrfs_tree_locked(path->nodes[1])); | ||
2549 | |||
2550 | left = read_node_slot(root, path->nodes[1], slot - 1); | ||
2551 | btrfs_tree_lock(left); | ||
2552 | btrfs_set_lock_blocking(left); | ||
2553 | |||
2554 | free_space = btrfs_leaf_free_space(root, left); | ||
2555 | if (free_space < data_size) { | ||
2556 | ret = 1; | ||
2557 | goto out; | ||
2558 | } | ||
2559 | |||
2560 | /* cow and double check */ | ||
2561 | ret = btrfs_cow_block(trans, root, left, | ||
2562 | path->nodes[1], slot - 1, &left, 0); | ||
2563 | if (ret) { | ||
2564 | /* we hit -ENOSPC, but it isn't fatal here */ | ||
2565 | ret = 1; | ||
2566 | goto out; | ||
2567 | } | ||
2568 | |||
2569 | free_space = btrfs_leaf_free_space(root, left); | ||
2570 | if (free_space < data_size) { | ||
2571 | ret = 1; | ||
2572 | goto out; | ||
2573 | } | ||
2574 | 2558 | ||
2575 | if (empty) | 2559 | if (empty) |
2576 | nr = right_nritems; | 2560 | nr = right_nritems; |
@@ -2738,6 +2722,154 @@ out: | |||
2738 | } | 2722 | } |
2739 | 2723 | ||
2740 | /* | 2724 | /* |
2725 | * push some data in the path leaf to the left, trying to free up at | ||
2726 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | ||
2727 | */ | ||
2728 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | ||
2729 | *root, struct btrfs_path *path, int data_size, | ||
2730 | int empty) | ||
2731 | { | ||
2732 | struct extent_buffer *right = path->nodes[0]; | ||
2733 | struct extent_buffer *left; | ||
2734 | int slot; | ||
2735 | int free_space; | ||
2736 | u32 right_nritems; | ||
2737 | int ret = 0; | ||
2738 | |||
2739 | slot = path->slots[1]; | ||
2740 | if (slot == 0) | ||
2741 | return 1; | ||
2742 | if (!path->nodes[1]) | ||
2743 | return 1; | ||
2744 | |||
2745 | right_nritems = btrfs_header_nritems(right); | ||
2746 | if (right_nritems == 0) | ||
2747 | return 1; | ||
2748 | |||
2749 | btrfs_assert_tree_locked(path->nodes[1]); | ||
2750 | |||
2751 | left = read_node_slot(root, path->nodes[1], slot - 1); | ||
2752 | btrfs_tree_lock(left); | ||
2753 | btrfs_set_lock_blocking(left); | ||
2754 | |||
2755 | free_space = btrfs_leaf_free_space(root, left); | ||
2756 | if (free_space < data_size) { | ||
2757 | ret = 1; | ||
2758 | goto out; | ||
2759 | } | ||
2760 | |||
2761 | /* cow and double check */ | ||
2762 | ret = btrfs_cow_block(trans, root, left, | ||
2763 | path->nodes[1], slot - 1, &left); | ||
2764 | if (ret) { | ||
2765 | /* we hit -ENOSPC, but it isn't fatal here */ | ||
2766 | ret = 1; | ||
2767 | goto out; | ||
2768 | } | ||
2769 | |||
2770 | free_space = btrfs_leaf_free_space(root, left); | ||
2771 | if (free_space < data_size) { | ||
2772 | ret = 1; | ||
2773 | goto out; | ||
2774 | } | ||
2775 | |||
2776 | return __push_leaf_left(trans, root, path, data_size, | ||
2777 | empty, left, free_space, right_nritems); | ||
2778 | out: | ||
2779 | btrfs_tree_unlock(left); | ||
2780 | free_extent_buffer(left); | ||
2781 | return ret; | ||
2782 | } | ||
2783 | |||
2784 | /* | ||
2785 | * split the path's leaf in two, making sure there is at least data_size | ||
2786 | * available for the resulting leaf level of the path. | ||
2787 | * | ||
2788 | * returns 0 if all went well and < 0 on failure. | ||
2789 | */ | ||
2790 | static noinline int copy_for_split(struct btrfs_trans_handle *trans, | ||
2791 | struct btrfs_root *root, | ||
2792 | struct btrfs_path *path, | ||
2793 | struct extent_buffer *l, | ||
2794 | struct extent_buffer *right, | ||
2795 | int slot, int mid, int nritems) | ||
2796 | { | ||
2797 | int data_copy_size; | ||
2798 | int rt_data_off; | ||
2799 | int i; | ||
2800 | int ret = 0; | ||
2801 | int wret; | ||
2802 | struct btrfs_disk_key disk_key; | ||
2803 | |||
2804 | nritems = nritems - mid; | ||
2805 | btrfs_set_header_nritems(right, nritems); | ||
2806 | data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); | ||
2807 | |||
2808 | copy_extent_buffer(right, l, btrfs_item_nr_offset(0), | ||
2809 | btrfs_item_nr_offset(mid), | ||
2810 | nritems * sizeof(struct btrfs_item)); | ||
2811 | |||
2812 | copy_extent_buffer(right, l, | ||
2813 | btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - | ||
2814 | data_copy_size, btrfs_leaf_data(l) + | ||
2815 | leaf_data_end(root, l), data_copy_size); | ||
2816 | |||
2817 | rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - | ||
2818 | btrfs_item_end_nr(l, mid); | ||
2819 | |||
2820 | for (i = 0; i < nritems; i++) { | ||
2821 | struct btrfs_item *item = btrfs_item_nr(right, i); | ||
2822 | u32 ioff; | ||
2823 | |||
2824 | if (!right->map_token) { | ||
2825 | map_extent_buffer(right, (unsigned long)item, | ||
2826 | sizeof(struct btrfs_item), | ||
2827 | &right->map_token, &right->kaddr, | ||
2828 | &right->map_start, &right->map_len, | ||
2829 | KM_USER1); | ||
2830 | } | ||
2831 | |||
2832 | ioff = btrfs_item_offset(right, item); | ||
2833 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | ||
2834 | } | ||
2835 | |||
2836 | if (right->map_token) { | ||
2837 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2838 | right->map_token = NULL; | ||
2839 | } | ||
2840 | |||
2841 | btrfs_set_header_nritems(l, mid); | ||
2842 | ret = 0; | ||
2843 | btrfs_item_key(right, &disk_key, 0); | ||
2844 | wret = insert_ptr(trans, root, path, &disk_key, right->start, | ||
2845 | path->slots[1] + 1, 1); | ||
2846 | if (wret) | ||
2847 | ret = wret; | ||
2848 | |||
2849 | btrfs_mark_buffer_dirty(right); | ||
2850 | btrfs_mark_buffer_dirty(l); | ||
2851 | BUG_ON(path->slots[0] != slot); | ||
2852 | |||
2853 | ret = btrfs_update_ref(trans, root, l, right, 0, nritems); | ||
2854 | BUG_ON(ret); | ||
2855 | |||
2856 | if (mid <= slot) { | ||
2857 | btrfs_tree_unlock(path->nodes[0]); | ||
2858 | free_extent_buffer(path->nodes[0]); | ||
2859 | path->nodes[0] = right; | ||
2860 | path->slots[0] -= mid; | ||
2861 | path->slots[1] += 1; | ||
2862 | } else { | ||
2863 | btrfs_tree_unlock(right); | ||
2864 | free_extent_buffer(right); | ||
2865 | } | ||
2866 | |||
2867 | BUG_ON(path->slots[0] < 0); | ||
2868 | |||
2869 | return ret; | ||
2870 | } | ||
2871 | |||
2872 | /* | ||
2741 | * split the path's leaf in two, making sure there is at least data_size | 2873 | * split the path's leaf in two, making sure there is at least data_size |
2742 | * available for the resulting leaf level of the path. | 2874 | * available for the resulting leaf level of the path. |
2743 | * | 2875 | * |
@@ -2754,17 +2886,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2754 | int mid; | 2886 | int mid; |
2755 | int slot; | 2887 | int slot; |
2756 | struct extent_buffer *right; | 2888 | struct extent_buffer *right; |
2757 | int data_copy_size; | ||
2758 | int rt_data_off; | ||
2759 | int i; | ||
2760 | int ret = 0; | 2889 | int ret = 0; |
2761 | int wret; | 2890 | int wret; |
2762 | int double_split; | 2891 | int double_split; |
2763 | int num_doubles = 0; | 2892 | int num_doubles = 0; |
2764 | struct btrfs_disk_key disk_key; | ||
2765 | 2893 | ||
2766 | /* first try to make some room by pushing left and right */ | 2894 | /* first try to make some room by pushing left and right */ |
2767 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { | 2895 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY && |
2896 | !trans->transaction->delayed_refs.flushing) { | ||
2768 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2897 | wret = push_leaf_right(trans, root, path, data_size, 0); |
2769 | if (wret < 0) | 2898 | if (wret < 0) |
2770 | return wret; | 2899 | return wret; |
@@ -2813,11 +2942,14 @@ again: | |||
2813 | write_extent_buffer(right, root->fs_info->chunk_tree_uuid, | 2942 | write_extent_buffer(right, root->fs_info->chunk_tree_uuid, |
2814 | (unsigned long)btrfs_header_chunk_tree_uuid(right), | 2943 | (unsigned long)btrfs_header_chunk_tree_uuid(right), |
2815 | BTRFS_UUID_SIZE); | 2944 | BTRFS_UUID_SIZE); |
2945 | |||
2816 | if (mid <= slot) { | 2946 | if (mid <= slot) { |
2817 | if (nritems == 1 || | 2947 | if (nritems == 1 || |
2818 | leaf_space_used(l, mid, nritems - mid) + data_size > | 2948 | leaf_space_used(l, mid, nritems - mid) + data_size > |
2819 | BTRFS_LEAF_DATA_SIZE(root)) { | 2949 | BTRFS_LEAF_DATA_SIZE(root)) { |
2820 | if (slot >= nritems) { | 2950 | if (slot >= nritems) { |
2951 | struct btrfs_disk_key disk_key; | ||
2952 | |||
2821 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | 2953 | btrfs_cpu_key_to_disk(&disk_key, ins_key); |
2822 | btrfs_set_header_nritems(right, 0); | 2954 | btrfs_set_header_nritems(right, 0); |
2823 | wret = insert_ptr(trans, root, path, | 2955 | wret = insert_ptr(trans, root, path, |
@@ -2845,6 +2977,8 @@ again: | |||
2845 | if (leaf_space_used(l, 0, mid) + data_size > | 2977 | if (leaf_space_used(l, 0, mid) + data_size > |
2846 | BTRFS_LEAF_DATA_SIZE(root)) { | 2978 | BTRFS_LEAF_DATA_SIZE(root)) { |
2847 | if (!extend && data_size && slot == 0) { | 2979 | if (!extend && data_size && slot == 0) { |
2980 | struct btrfs_disk_key disk_key; | ||
2981 | |||
2848 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | 2982 | btrfs_cpu_key_to_disk(&disk_key, ins_key); |
2849 | btrfs_set_header_nritems(right, 0); | 2983 | btrfs_set_header_nritems(right, 0); |
2850 | wret = insert_ptr(trans, root, path, | 2984 | wret = insert_ptr(trans, root, path, |
@@ -2877,76 +3011,16 @@ again: | |||
2877 | } | 3011 | } |
2878 | } | 3012 | } |
2879 | } | 3013 | } |
2880 | nritems = nritems - mid; | ||
2881 | btrfs_set_header_nritems(right, nritems); | ||
2882 | data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); | ||
2883 | |||
2884 | copy_extent_buffer(right, l, btrfs_item_nr_offset(0), | ||
2885 | btrfs_item_nr_offset(mid), | ||
2886 | nritems * sizeof(struct btrfs_item)); | ||
2887 | |||
2888 | copy_extent_buffer(right, l, | ||
2889 | btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - | ||
2890 | data_copy_size, btrfs_leaf_data(l) + | ||
2891 | leaf_data_end(root, l), data_copy_size); | ||
2892 | |||
2893 | rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - | ||
2894 | btrfs_item_end_nr(l, mid); | ||
2895 | |||
2896 | for (i = 0; i < nritems; i++) { | ||
2897 | struct btrfs_item *item = btrfs_item_nr(right, i); | ||
2898 | u32 ioff; | ||
2899 | |||
2900 | if (!right->map_token) { | ||
2901 | map_extent_buffer(right, (unsigned long)item, | ||
2902 | sizeof(struct btrfs_item), | ||
2903 | &right->map_token, &right->kaddr, | ||
2904 | &right->map_start, &right->map_len, | ||
2905 | KM_USER1); | ||
2906 | } | ||
2907 | |||
2908 | ioff = btrfs_item_offset(right, item); | ||
2909 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | ||
2910 | } | ||
2911 | |||
2912 | if (right->map_token) { | ||
2913 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2914 | right->map_token = NULL; | ||
2915 | } | ||
2916 | 3014 | ||
2917 | btrfs_set_header_nritems(l, mid); | 3015 | ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); |
2918 | ret = 0; | ||
2919 | btrfs_item_key(right, &disk_key, 0); | ||
2920 | wret = insert_ptr(trans, root, path, &disk_key, right->start, | ||
2921 | path->slots[1] + 1, 1); | ||
2922 | if (wret) | ||
2923 | ret = wret; | ||
2924 | |||
2925 | btrfs_mark_buffer_dirty(right); | ||
2926 | btrfs_mark_buffer_dirty(l); | ||
2927 | BUG_ON(path->slots[0] != slot); | ||
2928 | |||
2929 | ret = btrfs_update_ref(trans, root, l, right, 0, nritems); | ||
2930 | BUG_ON(ret); | 3016 | BUG_ON(ret); |
2931 | 3017 | ||
2932 | if (mid <= slot) { | ||
2933 | btrfs_tree_unlock(path->nodes[0]); | ||
2934 | free_extent_buffer(path->nodes[0]); | ||
2935 | path->nodes[0] = right; | ||
2936 | path->slots[0] -= mid; | ||
2937 | path->slots[1] += 1; | ||
2938 | } else { | ||
2939 | btrfs_tree_unlock(right); | ||
2940 | free_extent_buffer(right); | ||
2941 | } | ||
2942 | |||
2943 | BUG_ON(path->slots[0] < 0); | ||
2944 | |||
2945 | if (double_split) { | 3018 | if (double_split) { |
2946 | BUG_ON(num_doubles != 0); | 3019 | BUG_ON(num_doubles != 0); |
2947 | num_doubles++; | 3020 | num_doubles++; |
2948 | goto again; | 3021 | goto again; |
2949 | } | 3022 | } |
3023 | |||
2950 | return ret; | 3024 | return ret; |
2951 | } | 3025 | } |
2952 | 3026 | ||
@@ -3004,26 +3078,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, | |||
3004 | return -EAGAIN; | 3078 | return -EAGAIN; |
3005 | } | 3079 | } |
3006 | 3080 | ||
3081 | btrfs_set_path_blocking(path); | ||
3007 | ret = split_leaf(trans, root, &orig_key, path, | 3082 | ret = split_leaf(trans, root, &orig_key, path, |
3008 | sizeof(struct btrfs_item), 1); | 3083 | sizeof(struct btrfs_item), 1); |
3009 | path->keep_locks = 0; | 3084 | path->keep_locks = 0; |
3010 | BUG_ON(ret); | 3085 | BUG_ON(ret); |
3011 | 3086 | ||
3087 | btrfs_unlock_up_safe(path, 1); | ||
3088 | leaf = path->nodes[0]; | ||
3089 | BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); | ||
3090 | |||
3091 | split: | ||
3012 | /* | 3092 | /* |
3013 | * make sure any changes to the path from split_leaf leave it | 3093 | * make sure any changes to the path from split_leaf leave it |
3014 | * in a blocking state | 3094 | * in a blocking state |
3015 | */ | 3095 | */ |
3016 | btrfs_set_path_blocking(path); | 3096 | btrfs_set_path_blocking(path); |
3017 | 3097 | ||
3018 | leaf = path->nodes[0]; | ||
3019 | BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); | ||
3020 | |||
3021 | split: | ||
3022 | item = btrfs_item_nr(leaf, path->slots[0]); | 3098 | item = btrfs_item_nr(leaf, path->slots[0]); |
3023 | orig_offset = btrfs_item_offset(leaf, item); | 3099 | orig_offset = btrfs_item_offset(leaf, item); |
3024 | item_size = btrfs_item_size(leaf, item); | 3100 | item_size = btrfs_item_size(leaf, item); |
3025 | 3101 | ||
3026 | |||
3027 | buf = kmalloc(item_size, GFP_NOFS); | 3102 | buf = kmalloc(item_size, GFP_NOFS); |
3028 | read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, | 3103 | read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, |
3029 | path->slots[0]), item_size); | 3104 | path->slots[0]), item_size); |
@@ -3428,39 +3503,27 @@ out: | |||
3428 | } | 3503 | } |
3429 | 3504 | ||
3430 | /* | 3505 | /* |
3431 | * Given a key and some data, insert items into the tree. | 3506 | * this is a helper for btrfs_insert_empty_items, the main goal here is |
3432 | * This does all the path init required, making room in the tree if needed. | 3507 | * to save stack depth by doing the bulk of the work in a function |
3508 | * that doesn't call btrfs_search_slot | ||
3433 | */ | 3509 | */ |
3434 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | 3510 | static noinline_for_stack int |
3435 | struct btrfs_root *root, | 3511 | setup_items_for_insert(struct btrfs_trans_handle *trans, |
3436 | struct btrfs_path *path, | 3512 | struct btrfs_root *root, struct btrfs_path *path, |
3437 | struct btrfs_key *cpu_key, u32 *data_size, | 3513 | struct btrfs_key *cpu_key, u32 *data_size, |
3438 | int nr) | 3514 | u32 total_data, u32 total_size, int nr) |
3439 | { | 3515 | { |
3440 | struct extent_buffer *leaf; | ||
3441 | struct btrfs_item *item; | 3516 | struct btrfs_item *item; |
3442 | int ret = 0; | ||
3443 | int slot; | ||
3444 | int slot_orig; | ||
3445 | int i; | 3517 | int i; |
3446 | u32 nritems; | 3518 | u32 nritems; |
3447 | u32 total_size = 0; | ||
3448 | u32 total_data = 0; | ||
3449 | unsigned int data_end; | 3519 | unsigned int data_end; |
3450 | struct btrfs_disk_key disk_key; | 3520 | struct btrfs_disk_key disk_key; |
3521 | int ret; | ||
3522 | struct extent_buffer *leaf; | ||
3523 | int slot; | ||
3451 | 3524 | ||
3452 | for (i = 0; i < nr; i++) | ||
3453 | total_data += data_size[i]; | ||
3454 | |||
3455 | total_size = total_data + (nr * sizeof(struct btrfs_item)); | ||
3456 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
3457 | if (ret == 0) | ||
3458 | return -EEXIST; | ||
3459 | if (ret < 0) | ||
3460 | goto out; | ||
3461 | |||
3462 | slot_orig = path->slots[0]; | ||
3463 | leaf = path->nodes[0]; | 3525 | leaf = path->nodes[0]; |
3526 | slot = path->slots[0]; | ||
3464 | 3527 | ||
3465 | nritems = btrfs_header_nritems(leaf); | 3528 | nritems = btrfs_header_nritems(leaf); |
3466 | data_end = leaf_data_end(root, leaf); | 3529 | data_end = leaf_data_end(root, leaf); |
@@ -3472,9 +3535,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3472 | BUG(); | 3535 | BUG(); |
3473 | } | 3536 | } |
3474 | 3537 | ||
3475 | slot = path->slots[0]; | ||
3476 | BUG_ON(slot < 0); | ||
3477 | |||
3478 | if (slot != nritems) { | 3538 | if (slot != nritems) { |
3479 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); | 3539 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); |
3480 | 3540 | ||
@@ -3530,21 +3590,60 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3530 | data_end -= data_size[i]; | 3590 | data_end -= data_size[i]; |
3531 | btrfs_set_item_size(leaf, item, data_size[i]); | 3591 | btrfs_set_item_size(leaf, item, data_size[i]); |
3532 | } | 3592 | } |
3593 | |||
3533 | btrfs_set_header_nritems(leaf, nritems + nr); | 3594 | btrfs_set_header_nritems(leaf, nritems + nr); |
3534 | btrfs_mark_buffer_dirty(leaf); | ||
3535 | 3595 | ||
3536 | ret = 0; | 3596 | ret = 0; |
3537 | if (slot == 0) { | 3597 | if (slot == 0) { |
3598 | struct btrfs_disk_key disk_key; | ||
3538 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | 3599 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); |
3539 | ret = fixup_low_keys(trans, root, path, &disk_key, 1); | 3600 | ret = fixup_low_keys(trans, root, path, &disk_key, 1); |
3540 | } | 3601 | } |
3602 | btrfs_unlock_up_safe(path, 1); | ||
3603 | btrfs_mark_buffer_dirty(leaf); | ||
3541 | 3604 | ||
3542 | if (btrfs_leaf_free_space(root, leaf) < 0) { | 3605 | if (btrfs_leaf_free_space(root, leaf) < 0) { |
3543 | btrfs_print_leaf(root, leaf); | 3606 | btrfs_print_leaf(root, leaf); |
3544 | BUG(); | 3607 | BUG(); |
3545 | } | 3608 | } |
3609 | return ret; | ||
3610 | } | ||
3611 | |||
3612 | /* | ||
3613 | * Given a key and some data, insert items into the tree. | ||
3614 | * This does all the path init required, making room in the tree if needed. | ||
3615 | */ | ||
3616 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | ||
3617 | struct btrfs_root *root, | ||
3618 | struct btrfs_path *path, | ||
3619 | struct btrfs_key *cpu_key, u32 *data_size, | ||
3620 | int nr) | ||
3621 | { | ||
3622 | struct extent_buffer *leaf; | ||
3623 | int ret = 0; | ||
3624 | int slot; | ||
3625 | int i; | ||
3626 | u32 total_size = 0; | ||
3627 | u32 total_data = 0; | ||
3628 | |||
3629 | for (i = 0; i < nr; i++) | ||
3630 | total_data += data_size[i]; | ||
3631 | |||
3632 | total_size = total_data + (nr * sizeof(struct btrfs_item)); | ||
3633 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
3634 | if (ret == 0) | ||
3635 | return -EEXIST; | ||
3636 | if (ret < 0) | ||
3637 | goto out; | ||
3638 | |||
3639 | leaf = path->nodes[0]; | ||
3640 | slot = path->slots[0]; | ||
3641 | BUG_ON(slot < 0); | ||
3642 | |||
3643 | ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, | ||
3644 | total_data, total_size, nr); | ||
3645 | |||
3546 | out: | 3646 | out: |
3547 | btrfs_unlock_up_safe(path, 1); | ||
3548 | return ret; | 3647 | return ret; |
3549 | } | 3648 | } |
3550 | 3649 | ||
@@ -3732,7 +3831,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3732 | } | 3831 | } |
3733 | 3832 | ||
3734 | /* delete the leaf if it is mostly empty */ | 3833 | /* delete the leaf if it is mostly empty */ |
3735 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { | 3834 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 && |
3835 | !trans->transaction->delayed_refs.flushing) { | ||
3736 | /* push_leaf_left fixes the path. | 3836 | /* push_leaf_left fixes the path. |
3737 | * make sure the path still points to our leaf | 3837 | * make sure the path still points to our leaf |
3738 | * for possible call to del_ptr below | 3838 | * for possible call to del_ptr below |
@@ -3740,6 +3840,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3740 | slot = path->slots[1]; | 3840 | slot = path->slots[1]; |
3741 | extent_buffer_get(leaf); | 3841 | extent_buffer_get(leaf); |
3742 | 3842 | ||
3843 | btrfs_set_path_blocking(path); | ||
3743 | wret = push_leaf_left(trans, root, path, 1, 1); | 3844 | wret = push_leaf_left(trans, root, path, 1, 1); |
3744 | if (wret < 0 && wret != -ENOSPC) | 3845 | if (wret < 0 && wret != -ENOSPC) |
3745 | ret = wret; | 3846 | ret = wret; |
@@ -3926,7 +4027,6 @@ find_next_key: | |||
3926 | btrfs_release_path(root, path); | 4027 | btrfs_release_path(root, path); |
3927 | goto again; | 4028 | goto again; |
3928 | } else { | 4029 | } else { |
3929 | btrfs_clear_path_blocking(path); | ||
3930 | goto out; | 4030 | goto out; |
3931 | } | 4031 | } |
3932 | } | 4032 | } |
@@ -3946,7 +4046,7 @@ find_next_key: | |||
3946 | path->locks[level - 1] = 1; | 4046 | path->locks[level - 1] = 1; |
3947 | path->nodes[level - 1] = cur; | 4047 | path->nodes[level - 1] = cur; |
3948 | unlock_up(path, level, 1); | 4048 | unlock_up(path, level, 1); |
3949 | btrfs_clear_path_blocking(path); | 4049 | btrfs_clear_path_blocking(path, NULL); |
3950 | } | 4050 | } |
3951 | out: | 4051 | out: |
3952 | if (ret == 0) | 4052 | if (ret == 0) |
@@ -4026,28 +4126,44 @@ next: | |||
4026 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | 4126 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) |
4027 | { | 4127 | { |
4028 | int slot; | 4128 | int slot; |
4029 | int level = 1; | 4129 | int level; |
4030 | struct extent_buffer *c; | 4130 | struct extent_buffer *c; |
4031 | struct extent_buffer *next = NULL; | 4131 | struct extent_buffer *next; |
4032 | struct btrfs_key key; | 4132 | struct btrfs_key key; |
4033 | u32 nritems; | 4133 | u32 nritems; |
4034 | int ret; | 4134 | int ret; |
4135 | int old_spinning = path->leave_spinning; | ||
4136 | int force_blocking = 0; | ||
4035 | 4137 | ||
4036 | nritems = btrfs_header_nritems(path->nodes[0]); | 4138 | nritems = btrfs_header_nritems(path->nodes[0]); |
4037 | if (nritems == 0) | 4139 | if (nritems == 0) |
4038 | return 1; | 4140 | return 1; |
4039 | 4141 | ||
4040 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | 4142 | /* |
4143 | * we take the blocks in an order that upsets lockdep. Using | ||
4144 | * blocking mode is the only way around it. | ||
4145 | */ | ||
4146 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
4147 | force_blocking = 1; | ||
4148 | #endif | ||
4041 | 4149 | ||
4150 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | ||
4151 | again: | ||
4152 | level = 1; | ||
4153 | next = NULL; | ||
4042 | btrfs_release_path(root, path); | 4154 | btrfs_release_path(root, path); |
4155 | |||
4043 | path->keep_locks = 1; | 4156 | path->keep_locks = 1; |
4157 | |||
4158 | if (!force_blocking) | ||
4159 | path->leave_spinning = 1; | ||
4160 | |||
4044 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4161 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4045 | path->keep_locks = 0; | 4162 | path->keep_locks = 0; |
4046 | 4163 | ||
4047 | if (ret < 0) | 4164 | if (ret < 0) |
4048 | return ret; | 4165 | return ret; |
4049 | 4166 | ||
4050 | btrfs_set_path_blocking(path); | ||
4051 | nritems = btrfs_header_nritems(path->nodes[0]); | 4167 | nritems = btrfs_header_nritems(path->nodes[0]); |
4052 | /* | 4168 | /* |
4053 | * by releasing the path above we dropped all our locks. A balance | 4169 | * by releasing the path above we dropped all our locks. A balance |
@@ -4057,19 +4173,24 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4057 | */ | 4173 | */ |
4058 | if (nritems > 0 && path->slots[0] < nritems - 1) { | 4174 | if (nritems > 0 && path->slots[0] < nritems - 1) { |
4059 | path->slots[0]++; | 4175 | path->slots[0]++; |
4176 | ret = 0; | ||
4060 | goto done; | 4177 | goto done; |
4061 | } | 4178 | } |
4062 | 4179 | ||
4063 | while (level < BTRFS_MAX_LEVEL) { | 4180 | while (level < BTRFS_MAX_LEVEL) { |
4064 | if (!path->nodes[level]) | 4181 | if (!path->nodes[level]) { |
4065 | return 1; | 4182 | ret = 1; |
4183 | goto done; | ||
4184 | } | ||
4066 | 4185 | ||
4067 | slot = path->slots[level] + 1; | 4186 | slot = path->slots[level] + 1; |
4068 | c = path->nodes[level]; | 4187 | c = path->nodes[level]; |
4069 | if (slot >= btrfs_header_nritems(c)) { | 4188 | if (slot >= btrfs_header_nritems(c)) { |
4070 | level++; | 4189 | level++; |
4071 | if (level == BTRFS_MAX_LEVEL) | 4190 | if (level == BTRFS_MAX_LEVEL) { |
4072 | return 1; | 4191 | ret = 1; |
4192 | goto done; | ||
4193 | } | ||
4073 | continue; | 4194 | continue; |
4074 | } | 4195 | } |
4075 | 4196 | ||
@@ -4078,16 +4199,22 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4078 | free_extent_buffer(next); | 4199 | free_extent_buffer(next); |
4079 | } | 4200 | } |
4080 | 4201 | ||
4081 | /* the path was set to blocking above */ | 4202 | next = c; |
4082 | if (level == 1 && (path->locks[1] || path->skip_locking) && | 4203 | ret = read_block_for_search(NULL, root, path, &next, level, |
4083 | path->reada) | 4204 | slot, &key); |
4084 | reada_for_search(root, path, level, slot, 0); | 4205 | if (ret == -EAGAIN) |
4206 | goto again; | ||
4085 | 4207 | ||
4086 | next = read_node_slot(root, c, slot); | ||
4087 | if (!path->skip_locking) { | 4208 | if (!path->skip_locking) { |
4088 | WARN_ON(!btrfs_tree_locked(c)); | 4209 | ret = btrfs_try_spin_lock(next); |
4089 | btrfs_tree_lock(next); | 4210 | if (!ret) { |
4090 | btrfs_set_lock_blocking(next); | 4211 | btrfs_set_path_blocking(path); |
4212 | btrfs_tree_lock(next); | ||
4213 | if (!force_blocking) | ||
4214 | btrfs_clear_path_blocking(path, next); | ||
4215 | } | ||
4216 | if (force_blocking) | ||
4217 | btrfs_set_lock_blocking(next); | ||
4091 | } | 4218 | } |
4092 | break; | 4219 | break; |
4093 | } | 4220 | } |
@@ -4097,27 +4224,42 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4097 | c = path->nodes[level]; | 4224 | c = path->nodes[level]; |
4098 | if (path->locks[level]) | 4225 | if (path->locks[level]) |
4099 | btrfs_tree_unlock(c); | 4226 | btrfs_tree_unlock(c); |
4227 | |||
4100 | free_extent_buffer(c); | 4228 | free_extent_buffer(c); |
4101 | path->nodes[level] = next; | 4229 | path->nodes[level] = next; |
4102 | path->slots[level] = 0; | 4230 | path->slots[level] = 0; |
4103 | if (!path->skip_locking) | 4231 | if (!path->skip_locking) |
4104 | path->locks[level] = 1; | 4232 | path->locks[level] = 1; |
4233 | |||
4105 | if (!level) | 4234 | if (!level) |
4106 | break; | 4235 | break; |
4107 | 4236 | ||
4108 | btrfs_set_path_blocking(path); | 4237 | ret = read_block_for_search(NULL, root, path, &next, level, |
4109 | if (level == 1 && path->locks[1] && path->reada) | 4238 | 0, &key); |
4110 | reada_for_search(root, path, level, slot, 0); | 4239 | if (ret == -EAGAIN) |
4111 | next = read_node_slot(root, next, 0); | 4240 | goto again; |
4241 | |||
4112 | if (!path->skip_locking) { | 4242 | if (!path->skip_locking) { |
4113 | WARN_ON(!btrfs_tree_locked(path->nodes[level])); | 4243 | btrfs_assert_tree_locked(path->nodes[level]); |
4114 | btrfs_tree_lock(next); | 4244 | ret = btrfs_try_spin_lock(next); |
4115 | btrfs_set_lock_blocking(next); | 4245 | if (!ret) { |
4246 | btrfs_set_path_blocking(path); | ||
4247 | btrfs_tree_lock(next); | ||
4248 | if (!force_blocking) | ||
4249 | btrfs_clear_path_blocking(path, next); | ||
4250 | } | ||
4251 | if (force_blocking) | ||
4252 | btrfs_set_lock_blocking(next); | ||
4116 | } | 4253 | } |
4117 | } | 4254 | } |
4255 | ret = 0; | ||
4118 | done: | 4256 | done: |
4119 | unlock_up(path, 0, 1); | 4257 | unlock_up(path, 0, 1); |
4120 | return 0; | 4258 | path->leave_spinning = old_spinning; |
4259 | if (!old_spinning) | ||
4260 | btrfs_set_path_blocking(path); | ||
4261 | |||
4262 | return ret; | ||
4121 | } | 4263 | } |
4122 | 4264 | ||
4123 | /* | 4265 | /* |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 531db112c8bd..ad96495dedc5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -43,11 +43,14 @@ struct btrfs_ordered_sum; | |||
43 | 43 | ||
44 | #define BTRFS_ACL_NOT_CACHED ((void *)-1) | 44 | #define BTRFS_ACL_NOT_CACHED ((void *)-1) |
45 | 45 | ||
46 | #ifdef CONFIG_LOCKDEP | 46 | #define BTRFS_MAX_LEVEL 8 |
47 | # define BTRFS_MAX_LEVEL 7 | 47 | |
48 | #else | 48 | /* |
49 | # define BTRFS_MAX_LEVEL 8 | 49 | * files bigger than this get some pre-flushing when they are added |
50 | #endif | 50 | * to the ordered operations list. That way we limit the total |
51 | * work done by the commit | ||
52 | */ | ||
53 | #define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024) | ||
51 | 54 | ||
52 | /* holds pointers to all of the tree roots */ | 55 | /* holds pointers to all of the tree roots */ |
53 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL | 56 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL |
@@ -140,12 +143,15 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
140 | #define BTRFS_FT_MAX 9 | 143 | #define BTRFS_FT_MAX 9 |
141 | 144 | ||
142 | /* | 145 | /* |
143 | * the key defines the order in the tree, and so it also defines (optimal) | 146 | * The key defines the order in the tree, and so it also defines (optimal) |
144 | * block layout. objectid corresonds to the inode number. The flags | 147 | * block layout. |
145 | * tells us things about the object, and is a kind of stream selector. | 148 | * |
146 | * so for a given inode, keys with flags of 1 might refer to the inode | 149 | * objectid corresponds to the inode number. |
147 | * data, flags of 2 may point to file data in the btree and flags == 3 | 150 | * |
148 | * may point to extents. | 151 | * type tells us things about the object, and is a kind of stream selector. |
152 | * so for a given inode, keys with type of 1 might refer to the inode data, | ||
153 | * type of 2 may point to file data in the btree and type == 3 may point to | ||
154 | * extents. | ||
149 | * | 155 | * |
150 | * offset is the starting byte offset for this key in the stream. | 156 | * offset is the starting byte offset for this key in the stream. |
151 | * | 157 | * |
@@ -197,7 +203,7 @@ struct btrfs_dev_item { | |||
197 | 203 | ||
198 | /* | 204 | /* |
199 | * starting byte of this partition on the device, | 205 | * starting byte of this partition on the device, |
200 | * to allowr for stripe alignment in the future | 206 | * to allow for stripe alignment in the future |
201 | */ | 207 | */ |
202 | __le64 start_offset; | 208 | __le64 start_offset; |
203 | 209 | ||
@@ -405,15 +411,16 @@ struct btrfs_path { | |||
405 | int locks[BTRFS_MAX_LEVEL]; | 411 | int locks[BTRFS_MAX_LEVEL]; |
406 | int reada; | 412 | int reada; |
407 | /* keep some upper locks as we walk down */ | 413 | /* keep some upper locks as we walk down */ |
408 | int keep_locks; | ||
409 | int skip_locking; | ||
410 | int lowest_level; | 414 | int lowest_level; |
411 | 415 | ||
412 | /* | 416 | /* |
413 | * set by btrfs_split_item, tells search_slot to keep all locks | 417 | * set by btrfs_split_item, tells search_slot to keep all locks |
414 | * and to force calls to keep space in the nodes | 418 | * and to force calls to keep space in the nodes |
415 | */ | 419 | */ |
416 | int search_for_split; | 420 | unsigned int search_for_split:1; |
421 | unsigned int keep_locks:1; | ||
422 | unsigned int skip_locking:1; | ||
423 | unsigned int leave_spinning:1; | ||
417 | }; | 424 | }; |
418 | 425 | ||
419 | /* | 426 | /* |
@@ -600,13 +607,27 @@ struct btrfs_block_group_item { | |||
600 | 607 | ||
601 | struct btrfs_space_info { | 608 | struct btrfs_space_info { |
602 | u64 flags; | 609 | u64 flags; |
603 | u64 total_bytes; | 610 | |
604 | u64 bytes_used; | 611 | u64 total_bytes; /* total bytes in the space */ |
605 | u64 bytes_pinned; | 612 | u64 bytes_used; /* total bytes used on disk */ |
606 | u64 bytes_reserved; | 613 | u64 bytes_pinned; /* total bytes pinned, will be freed when the |
607 | u64 bytes_readonly; | 614 | transaction finishes */ |
608 | int full; | 615 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
609 | int force_alloc; | 616 | current allocations */ |
617 | u64 bytes_readonly; /* total bytes that are read only */ | ||
618 | |||
619 | /* delalloc accounting */ | ||
620 | u64 bytes_delalloc; /* number of bytes reserved for allocation, | ||
621 | this space is not necessarily reserved yet | ||
622 | by the allocator */ | ||
623 | u64 bytes_may_use; /* number of bytes that may be used for | ||
624 | delalloc */ | ||
625 | |||
626 | int full; /* indicates that we cannot allocate any more | ||
627 | chunks for this space */ | ||
628 | int force_alloc; /* set if we need to force a chunk alloc for | ||
629 | this space */ | ||
630 | |||
610 | struct list_head list; | 631 | struct list_head list; |
611 | 632 | ||
612 | /* for block groups in our same type */ | 633 | /* for block groups in our same type */ |
@@ -615,18 +636,35 @@ struct btrfs_space_info { | |||
615 | struct rw_semaphore groups_sem; | 636 | struct rw_semaphore groups_sem; |
616 | }; | 637 | }; |
617 | 638 | ||
618 | struct btrfs_free_space { | 639 | /* |
619 | struct rb_node bytes_index; | 640 | * free clusters are used to claim free space in relatively large chunks, |
620 | struct rb_node offset_index; | 641 | * allowing us to do less seeky writes. They are used for all metadata |
621 | u64 offset; | 642 | * allocations and data allocations in ssd mode. |
622 | u64 bytes; | 643 | */ |
644 | struct btrfs_free_cluster { | ||
645 | spinlock_t lock; | ||
646 | spinlock_t refill_lock; | ||
647 | struct rb_root root; | ||
648 | |||
649 | /* largest extent in this cluster */ | ||
650 | u64 max_size; | ||
651 | |||
652 | /* first extent starting offset */ | ||
653 | u64 window_start; | ||
654 | |||
655 | struct btrfs_block_group_cache *block_group; | ||
656 | /* | ||
657 | * when a cluster is allocated from a block group, we put the | ||
658 | * cluster onto a list in the block group so that it can | ||
659 | * be freed before the block group is freed. | ||
660 | */ | ||
661 | struct list_head block_group_list; | ||
623 | }; | 662 | }; |
624 | 663 | ||
625 | struct btrfs_block_group_cache { | 664 | struct btrfs_block_group_cache { |
626 | struct btrfs_key key; | 665 | struct btrfs_key key; |
627 | struct btrfs_block_group_item item; | 666 | struct btrfs_block_group_item item; |
628 | spinlock_t lock; | 667 | spinlock_t lock; |
629 | struct mutex alloc_mutex; | ||
630 | struct mutex cache_mutex; | 668 | struct mutex cache_mutex; |
631 | u64 pinned; | 669 | u64 pinned; |
632 | u64 reserved; | 670 | u64 reserved; |
@@ -638,6 +676,7 @@ struct btrfs_block_group_cache { | |||
638 | struct btrfs_space_info *space_info; | 676 | struct btrfs_space_info *space_info; |
639 | 677 | ||
640 | /* free space cache stuff */ | 678 | /* free space cache stuff */ |
679 | spinlock_t tree_lock; | ||
641 | struct rb_root free_space_bytes; | 680 | struct rb_root free_space_bytes; |
642 | struct rb_root free_space_offset; | 681 | struct rb_root free_space_offset; |
643 | 682 | ||
@@ -649,6 +688,11 @@ struct btrfs_block_group_cache { | |||
649 | 688 | ||
650 | /* usage count */ | 689 | /* usage count */ |
651 | atomic_t count; | 690 | atomic_t count; |
691 | |||
692 | /* List of struct btrfs_free_clusters for this block group. | ||
693 | * Today it will only have one thing on it, but that may change | ||
694 | */ | ||
695 | struct list_head cluster_list; | ||
652 | }; | 696 | }; |
653 | 697 | ||
654 | struct btrfs_leaf_ref_tree { | 698 | struct btrfs_leaf_ref_tree { |
@@ -678,15 +722,18 @@ struct btrfs_fs_info { | |||
678 | struct rb_root block_group_cache_tree; | 722 | struct rb_root block_group_cache_tree; |
679 | 723 | ||
680 | struct extent_io_tree pinned_extents; | 724 | struct extent_io_tree pinned_extents; |
681 | struct extent_io_tree pending_del; | ||
682 | struct extent_io_tree extent_ins; | ||
683 | 725 | ||
684 | /* logical->physical extent mapping */ | 726 | /* logical->physical extent mapping */ |
685 | struct btrfs_mapping_tree mapping_tree; | 727 | struct btrfs_mapping_tree mapping_tree; |
686 | 728 | ||
687 | u64 generation; | 729 | u64 generation; |
688 | u64 last_trans_committed; | 730 | u64 last_trans_committed; |
689 | u64 last_trans_new_blockgroup; | 731 | |
732 | /* | ||
733 | * this is updated to the current trans every time a full commit | ||
734 | * is required instead of the faster short fsync log commits | ||
735 | */ | ||
736 | u64 last_trans_log_full_commit; | ||
690 | u64 open_ioctl_trans; | 737 | u64 open_ioctl_trans; |
691 | unsigned long mount_opt; | 738 | unsigned long mount_opt; |
692 | u64 max_extent; | 739 | u64 max_extent; |
@@ -707,12 +754,20 @@ struct btrfs_fs_info { | |||
707 | struct mutex tree_log_mutex; | 754 | struct mutex tree_log_mutex; |
708 | struct mutex transaction_kthread_mutex; | 755 | struct mutex transaction_kthread_mutex; |
709 | struct mutex cleaner_mutex; | 756 | struct mutex cleaner_mutex; |
710 | struct mutex extent_ins_mutex; | ||
711 | struct mutex pinned_mutex; | ||
712 | struct mutex chunk_mutex; | 757 | struct mutex chunk_mutex; |
713 | struct mutex drop_mutex; | 758 | struct mutex drop_mutex; |
714 | struct mutex volume_mutex; | 759 | struct mutex volume_mutex; |
715 | struct mutex tree_reloc_mutex; | 760 | struct mutex tree_reloc_mutex; |
761 | |||
762 | /* | ||
763 | * this protects the ordered operations list only while we are | ||
764 | * processing all of the entries on it. This way we make | ||
765 | * sure the commit code doesn't find the list temporarily empty | ||
766 | * because another function happens to be doing non-waiting preflush | ||
767 | * before jumping into the main commit. | ||
768 | */ | ||
769 | struct mutex ordered_operations_mutex; | ||
770 | |||
716 | struct list_head trans_list; | 771 | struct list_head trans_list; |
717 | struct list_head hashers; | 772 | struct list_head hashers; |
718 | struct list_head dead_roots; | 773 | struct list_head dead_roots; |
@@ -727,10 +782,29 @@ struct btrfs_fs_info { | |||
727 | * ordered extents | 782 | * ordered extents |
728 | */ | 783 | */ |
729 | spinlock_t ordered_extent_lock; | 784 | spinlock_t ordered_extent_lock; |
785 | |||
786 | /* | ||
787 | * all of the data=ordered extents pending writeback | ||
788 | * these can span multiple transactions and basically include | ||
789 | * every dirty data page that isn't from nodatacow | ||
790 | */ | ||
730 | struct list_head ordered_extents; | 791 | struct list_head ordered_extents; |
792 | |||
793 | /* | ||
794 | * all of the inodes that have delalloc bytes. It is possible for | ||
795 | * this list to be empty even when there is still dirty data=ordered | ||
796 | * extents waiting to finish IO. | ||
797 | */ | ||
731 | struct list_head delalloc_inodes; | 798 | struct list_head delalloc_inodes; |
732 | 799 | ||
733 | /* | 800 | /* |
801 | * special rename and truncate targets that must be on disk before | ||
802 | * we're allowed to commit. This is basically the ext3 style | ||
803 | * data=ordered list. | ||
804 | */ | ||
805 | struct list_head ordered_operations; | ||
806 | |||
807 | /* | ||
734 | * there is a pool of worker threads for checksumming during writes | 808 | * there is a pool of worker threads for checksumming during writes |
735 | * and a pool for checksumming after reads. This is because readers | 809 | * and a pool for checksumming after reads. This is because readers |
736 | * can run with FS locks held, and the writers may be waiting for | 810 | * can run with FS locks held, and the writers may be waiting for |
@@ -771,15 +845,31 @@ struct btrfs_fs_info { | |||
771 | atomic_t throttle_gen; | 845 | atomic_t throttle_gen; |
772 | 846 | ||
773 | u64 total_pinned; | 847 | u64 total_pinned; |
848 | |||
849 | /* protected by the delalloc lock, used to keep from writing | ||
850 | * metadata until there is a nice batch | ||
851 | */ | ||
852 | u64 dirty_metadata_bytes; | ||
774 | struct list_head dirty_cowonly_roots; | 853 | struct list_head dirty_cowonly_roots; |
775 | 854 | ||
776 | struct btrfs_fs_devices *fs_devices; | 855 | struct btrfs_fs_devices *fs_devices; |
856 | |||
857 | /* | ||
858 | * the space_info list is almost entirely read only. It only changes | ||
859 | * when we add a new raid type to the FS, and that happens | ||
860 | * very rarely. RCU is used to protect it. | ||
861 | */ | ||
777 | struct list_head space_info; | 862 | struct list_head space_info; |
863 | |||
778 | spinlock_t delalloc_lock; | 864 | spinlock_t delalloc_lock; |
779 | spinlock_t new_trans_lock; | 865 | spinlock_t new_trans_lock; |
780 | u64 delalloc_bytes; | 866 | u64 delalloc_bytes; |
781 | u64 last_alloc; | 867 | |
782 | u64 last_data_alloc; | 868 | /* data_alloc_cluster is only used in ssd mode */ |
869 | struct btrfs_free_cluster data_alloc_cluster; | ||
870 | |||
871 | /* all metadata allocations go through this cluster */ | ||
872 | struct btrfs_free_cluster meta_alloc_cluster; | ||
783 | 873 | ||
784 | spinlock_t ref_cache_lock; | 874 | spinlock_t ref_cache_lock; |
785 | u64 total_ref_cache_size; | 875 | u64 total_ref_cache_size; |
@@ -871,7 +961,6 @@ struct btrfs_root { | |||
871 | }; | 961 | }; |
872 | 962 | ||
873 | /* | 963 | /* |
874 | |||
875 | * inode items have the data typically returned from stat and store other | 964 | * inode items have the data typically returned from stat and store other |
876 | * info about object characteristics. There is one for every file and dir in | 965 | * info about object characteristics. There is one for every file and dir in |
877 | * the FS | 966 | * the FS |
@@ -902,7 +991,7 @@ struct btrfs_root { | |||
902 | #define BTRFS_EXTENT_CSUM_KEY 128 | 991 | #define BTRFS_EXTENT_CSUM_KEY 128 |
903 | 992 | ||
904 | /* | 993 | /* |
905 | * root items point to tree roots. There are typically in the root | 994 | * root items point to tree roots. They are typically in the root |
906 | * tree used by the super block to find all the other trees | 995 | * tree used by the super block to find all the other trees |
907 | */ | 996 | */ |
908 | #define BTRFS_ROOT_ITEM_KEY 132 | 997 | #define BTRFS_ROOT_ITEM_KEY 132 |
@@ -949,6 +1038,8 @@ struct btrfs_root { | |||
949 | #define BTRFS_MOUNT_SSD (1 << 3) | 1038 | #define BTRFS_MOUNT_SSD (1 << 3) |
950 | #define BTRFS_MOUNT_DEGRADED (1 << 4) | 1039 | #define BTRFS_MOUNT_DEGRADED (1 << 4) |
951 | #define BTRFS_MOUNT_COMPRESS (1 << 5) | 1040 | #define BTRFS_MOUNT_COMPRESS (1 << 5) |
1041 | #define BTRFS_MOUNT_NOTREELOG (1 << 6) | ||
1042 | #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) | ||
952 | 1043 | ||
953 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1044 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
954 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1045 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -1687,18 +1778,16 @@ static inline struct dentry *fdentry(struct file *file) | |||
1687 | } | 1778 | } |
1688 | 1779 | ||
1689 | /* extent-tree.c */ | 1780 | /* extent-tree.c */ |
1781 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | ||
1782 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | ||
1783 | struct btrfs_root *root, unsigned long count); | ||
1690 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1784 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1691 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
1692 | struct btrfs_root *root, u64 bytenr, | ||
1693 | u64 num_bytes, u32 *refs); | ||
1694 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 1785 | int btrfs_update_pinned_extents(struct btrfs_root *root, |
1695 | u64 bytenr, u64 num, int pin); | 1786 | u64 bytenr, u64 num, int pin); |
1696 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1787 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
1697 | struct btrfs_root *root, struct extent_buffer *leaf); | 1788 | struct btrfs_root *root, struct extent_buffer *leaf); |
1698 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1789 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
1699 | struct btrfs_root *root, u64 objectid, u64 bytenr); | 1790 | struct btrfs_root *root, u64 objectid, u64 bytenr); |
1700 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | ||
1701 | struct btrfs_root *root); | ||
1702 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); | 1791 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); |
1703 | struct btrfs_block_group_cache *btrfs_lookup_block_group( | 1792 | struct btrfs_block_group_cache *btrfs_lookup_block_group( |
1704 | struct btrfs_fs_info *info, | 1793 | struct btrfs_fs_info *info, |
@@ -1715,7 +1804,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
1715 | u64 empty_size); | 1804 | u64 empty_size); |
1716 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 1805 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
1717 | struct btrfs_root *root, | 1806 | struct btrfs_root *root, |
1718 | u64 bytenr, u32 blocksize); | 1807 | u64 bytenr, u32 blocksize, |
1808 | int level); | ||
1719 | int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | 1809 | int btrfs_alloc_extent(struct btrfs_trans_handle *trans, |
1720 | struct btrfs_root *root, | 1810 | struct btrfs_root *root, |
1721 | u64 num_bytes, u64 parent, u64 min_bytes, | 1811 | u64 num_bytes, u64 parent, u64 min_bytes, |
@@ -1759,7 +1849,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1759 | u64 root_objectid, u64 ref_generation, | 1849 | u64 root_objectid, u64 ref_generation, |
1760 | u64 owner_objectid); | 1850 | u64 owner_objectid); |
1761 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 1851 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1762 | struct btrfs_root *root, u64 bytenr, | 1852 | struct btrfs_root *root, u64 bytenr, u64 num_bytes, |
1763 | u64 orig_parent, u64 parent, | 1853 | u64 orig_parent, u64 parent, |
1764 | u64 root_objectid, u64 ref_generation, | 1854 | u64 root_objectid, u64 ref_generation, |
1765 | u64 owner_objectid); | 1855 | u64 owner_objectid); |
@@ -1785,6 +1875,18 @@ int btrfs_add_dead_reloc_root(struct btrfs_root *root); | |||
1785 | int btrfs_cleanup_reloc_trees(struct btrfs_root *root); | 1875 | int btrfs_cleanup_reloc_trees(struct btrfs_root *root); |
1786 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); | 1876 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); |
1787 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 1877 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
1878 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | ||
1879 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | ||
1880 | |||
1881 | int btrfs_check_metadata_free_space(struct btrfs_root *root); | ||
1882 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | ||
1883 | u64 bytes); | ||
1884 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | ||
1885 | struct inode *inode, u64 bytes); | ||
1886 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
1887 | u64 bytes); | ||
1888 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | ||
1889 | u64 bytes); | ||
1788 | /* ctree.c */ | 1890 | /* ctree.c */ |
1789 | int btrfs_previous_item(struct btrfs_root *root, | 1891 | int btrfs_previous_item(struct btrfs_root *root, |
1790 | struct btrfs_path *path, u64 min_objectid, | 1892 | struct btrfs_path *path, u64 min_objectid, |
@@ -1808,7 +1910,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
1808 | int btrfs_cow_block(struct btrfs_trans_handle *trans, | 1910 | int btrfs_cow_block(struct btrfs_trans_handle *trans, |
1809 | struct btrfs_root *root, struct extent_buffer *buf, | 1911 | struct btrfs_root *root, struct extent_buffer *buf, |
1810 | struct extent_buffer *parent, int parent_slot, | 1912 | struct extent_buffer *parent, int parent_slot, |
1811 | struct extent_buffer **cow_ret, u64 prealloc_dest); | 1913 | struct extent_buffer **cow_ret); |
1812 | int btrfs_copy_root(struct btrfs_trans_handle *trans, | 1914 | int btrfs_copy_root(struct btrfs_trans_handle *trans, |
1813 | struct btrfs_root *root, | 1915 | struct btrfs_root *root, |
1814 | struct extent_buffer *buf, | 1916 | struct extent_buffer *buf, |
@@ -1834,9 +1936,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
1834 | void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); | 1936 | void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); |
1835 | struct btrfs_path *btrfs_alloc_path(void); | 1937 | struct btrfs_path *btrfs_alloc_path(void); |
1836 | void btrfs_free_path(struct btrfs_path *p); | 1938 | void btrfs_free_path(struct btrfs_path *p); |
1837 | void btrfs_init_path(struct btrfs_path *p); | ||
1838 | void btrfs_set_path_blocking(struct btrfs_path *p); | 1939 | void btrfs_set_path_blocking(struct btrfs_path *p); |
1839 | void btrfs_clear_path_blocking(struct btrfs_path *p); | ||
1840 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); | 1940 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); |
1841 | 1941 | ||
1842 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1942 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
@@ -2032,9 +2132,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
2032 | unsigned long btrfs_force_ra(struct address_space *mapping, | 2132 | unsigned long btrfs_force_ra(struct address_space *mapping, |
2033 | struct file_ra_state *ra, struct file *file, | 2133 | struct file_ra_state *ra, struct file *file, |
2034 | pgoff_t offset, pgoff_t last_index); | 2134 | pgoff_t offset, pgoff_t last_index); |
2035 | int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, | 2135 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2036 | int for_del); | ||
2037 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); | ||
2038 | int btrfs_readpage(struct file *file, struct page *page); | 2136 | int btrfs_readpage(struct file *file, struct page *page); |
2039 | void btrfs_delete_inode(struct inode *inode); | 2137 | void btrfs_delete_inode(struct inode *inode); |
2040 | void btrfs_put_inode(struct inode *inode); | 2138 | void btrfs_put_inode(struct inode *inode); |
@@ -2107,21 +2205,4 @@ int btrfs_check_acl(struct inode *inode, int mask); | |||
2107 | int btrfs_init_acl(struct inode *inode, struct inode *dir); | 2205 | int btrfs_init_acl(struct inode *inode, struct inode *dir); |
2108 | int btrfs_acl_chmod(struct inode *inode); | 2206 | int btrfs_acl_chmod(struct inode *inode); |
2109 | 2207 | ||
2110 | /* free-space-cache.c */ | ||
2111 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
2112 | u64 bytenr, u64 size); | ||
2113 | int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
2114 | u64 offset, u64 bytes); | ||
2115 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
2116 | u64 bytenr, u64 size); | ||
2117 | int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
2118 | u64 offset, u64 bytes); | ||
2119 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
2120 | *block_group); | ||
2121 | struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache | ||
2122 | *block_group, u64 offset, | ||
2123 | u64 bytes); | ||
2124 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
2125 | u64 bytes); | ||
2126 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
2127 | #endif | 2208 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c new file mode 100644 index 000000000000..d6c01c096a40 --- /dev/null +++ b/fs/btrfs/delayed-ref.c | |||
@@ -0,0 +1,668 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/sort.h> | ||
21 | #include "ctree.h" | ||
22 | #include "delayed-ref.h" | ||
23 | #include "transaction.h" | ||
24 | |||
25 | /* | ||
26 | * delayed back reference update tracking. For subvolume trees | ||
27 | * we queue up extent allocations and backref maintenance for | ||
28 | * delayed processing. This avoids deep call chains where we | ||
29 | * add extents in the middle of btrfs_search_slot, and it allows | ||
30 | * us to buffer up frequently modified backrefs in an rb tree instead | ||
31 | * of hammering updates on the extent allocation tree. | ||
32 | * | ||
33 | * Right now this code is only used for reference counted trees, but | ||
34 | * the long term goal is to get rid of the similar code for delayed | ||
35 | * extent tree modifications. | ||
36 | */ | ||
37 | |||
38 | /* | ||
39 | * entries in the rb tree are ordered by the byte number of the extent | ||
40 | * and by the byte number of the parent block. | ||
41 | */ | ||
42 | static int comp_entry(struct btrfs_delayed_ref_node *ref, | ||
43 | u64 bytenr, u64 parent) | ||
44 | { | ||
45 | if (bytenr < ref->bytenr) | ||
46 | return -1; | ||
47 | if (bytenr > ref->bytenr) | ||
48 | return 1; | ||
49 | if (parent < ref->parent) | ||
50 | return -1; | ||
51 | if (parent > ref->parent) | ||
52 | return 1; | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * insert a new ref into the rbtree. This returns any existing refs | ||
58 | * for the same (bytenr,parent) tuple, or NULL if the new node was properly | ||
59 | * inserted. | ||
60 | */ | ||
61 | static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, | ||
62 | u64 bytenr, u64 parent, | ||
63 | struct rb_node *node) | ||
64 | { | ||
65 | struct rb_node **p = &root->rb_node; | ||
66 | struct rb_node *parent_node = NULL; | ||
67 | struct btrfs_delayed_ref_node *entry; | ||
68 | int cmp; | ||
69 | |||
70 | while (*p) { | ||
71 | parent_node = *p; | ||
72 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, | ||
73 | rb_node); | ||
74 | |||
75 | cmp = comp_entry(entry, bytenr, parent); | ||
76 | if (cmp < 0) | ||
77 | p = &(*p)->rb_left; | ||
78 | else if (cmp > 0) | ||
79 | p = &(*p)->rb_right; | ||
80 | else | ||
81 | return entry; | ||
82 | } | ||
83 | |||
84 | entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
85 | rb_link_node(node, parent_node, p); | ||
86 | rb_insert_color(node, root); | ||
87 | return NULL; | ||
88 | } | ||
89 | |||
90 | /* | ||
91 | * find an entry based on (bytenr,parent). This returns the delayed | ||
92 | * ref if it was able to find one, or NULL if nothing was in that spot | ||
93 | */ | ||
94 | static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, | ||
95 | u64 bytenr, u64 parent, | ||
96 | struct btrfs_delayed_ref_node **last) | ||
97 | { | ||
98 | struct rb_node *n = root->rb_node; | ||
99 | struct btrfs_delayed_ref_node *entry; | ||
100 | int cmp; | ||
101 | |||
102 | while (n) { | ||
103 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
104 | WARN_ON(!entry->in_tree); | ||
105 | if (last) | ||
106 | *last = entry; | ||
107 | |||
108 | cmp = comp_entry(entry, bytenr, parent); | ||
109 | if (cmp < 0) | ||
110 | n = n->rb_left; | ||
111 | else if (cmp > 0) | ||
112 | n = n->rb_right; | ||
113 | else | ||
114 | return entry; | ||
115 | } | ||
116 | return NULL; | ||
117 | } | ||
118 | |||
119 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | ||
120 | struct btrfs_delayed_ref_head *head) | ||
121 | { | ||
122 | struct btrfs_delayed_ref_root *delayed_refs; | ||
123 | |||
124 | delayed_refs = &trans->transaction->delayed_refs; | ||
125 | assert_spin_locked(&delayed_refs->lock); | ||
126 | if (mutex_trylock(&head->mutex)) | ||
127 | return 0; | ||
128 | |||
129 | atomic_inc(&head->node.refs); | ||
130 | spin_unlock(&delayed_refs->lock); | ||
131 | |||
132 | mutex_lock(&head->mutex); | ||
133 | spin_lock(&delayed_refs->lock); | ||
134 | if (!head->node.in_tree) { | ||
135 | mutex_unlock(&head->mutex); | ||
136 | btrfs_put_delayed_ref(&head->node); | ||
137 | return -EAGAIN; | ||
138 | } | ||
139 | btrfs_put_delayed_ref(&head->node); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | ||
144 | struct list_head *cluster, u64 start) | ||
145 | { | ||
146 | int count = 0; | ||
147 | struct btrfs_delayed_ref_root *delayed_refs; | ||
148 | struct rb_node *node; | ||
149 | struct btrfs_delayed_ref_node *ref; | ||
150 | struct btrfs_delayed_ref_head *head; | ||
151 | |||
152 | delayed_refs = &trans->transaction->delayed_refs; | ||
153 | if (start == 0) { | ||
154 | node = rb_first(&delayed_refs->root); | ||
155 | } else { | ||
156 | ref = NULL; | ||
157 | tree_search(&delayed_refs->root, start, (u64)-1, &ref); | ||
158 | if (ref) { | ||
159 | struct btrfs_delayed_ref_node *tmp; | ||
160 | |||
161 | node = rb_prev(&ref->rb_node); | ||
162 | while (node) { | ||
163 | tmp = rb_entry(node, | ||
164 | struct btrfs_delayed_ref_node, | ||
165 | rb_node); | ||
166 | if (tmp->bytenr < start) | ||
167 | break; | ||
168 | ref = tmp; | ||
169 | node = rb_prev(&ref->rb_node); | ||
170 | } | ||
171 | node = &ref->rb_node; | ||
172 | } else | ||
173 | node = rb_first(&delayed_refs->root); | ||
174 | } | ||
175 | again: | ||
176 | while (node && count < 32) { | ||
177 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
178 | if (btrfs_delayed_ref_is_head(ref)) { | ||
179 | head = btrfs_delayed_node_to_head(ref); | ||
180 | if (list_empty(&head->cluster)) { | ||
181 | list_add_tail(&head->cluster, cluster); | ||
182 | delayed_refs->run_delayed_start = | ||
183 | head->node.bytenr; | ||
184 | count++; | ||
185 | |||
186 | WARN_ON(delayed_refs->num_heads_ready == 0); | ||
187 | delayed_refs->num_heads_ready--; | ||
188 | } else if (count) { | ||
189 | /* the goal of the clustering is to find extents | ||
190 | * that are likely to end up in the same extent | ||
191 | * leaf on disk. So, we don't want them spread | ||
192 | * all over the tree. Stop now if we've hit | ||
193 | * a head that was already in use | ||
194 | */ | ||
195 | break; | ||
196 | } | ||
197 | } | ||
198 | node = rb_next(node); | ||
199 | } | ||
200 | if (count) { | ||
201 | return 0; | ||
202 | } else if (start) { | ||
203 | /* | ||
204 | * we've gone to the end of the rbtree without finding any | ||
205 | * clusters. start from the beginning and try again | ||
206 | */ | ||
207 | start = 0; | ||
208 | node = rb_first(&delayed_refs->root); | ||
209 | goto again; | ||
210 | } | ||
211 | return 1; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * This checks to see if there are any delayed refs in the | ||
216 | * btree for a given bytenr. It returns one if it finds any | ||
217 | * and zero otherwise. | ||
218 | * | ||
219 | * If it only finds a head node, it returns 0. | ||
220 | * | ||
221 | * The idea is to use this when deciding if you can safely delete an | ||
222 | * extent from the extent allocation tree. There may be a pending | ||
223 | * ref in the rbtree that adds or removes references, so as long as this | ||
224 | * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent | ||
225 | * allocation tree. | ||
226 | */ | ||
227 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr) | ||
228 | { | ||
229 | struct btrfs_delayed_ref_node *ref; | ||
230 | struct btrfs_delayed_ref_root *delayed_refs; | ||
231 | struct rb_node *prev_node; | ||
232 | int ret = 0; | ||
233 | |||
234 | delayed_refs = &trans->transaction->delayed_refs; | ||
235 | spin_lock(&delayed_refs->lock); | ||
236 | |||
237 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | ||
238 | if (ref) { | ||
239 | prev_node = rb_prev(&ref->rb_node); | ||
240 | if (!prev_node) | ||
241 | goto out; | ||
242 | ref = rb_entry(prev_node, struct btrfs_delayed_ref_node, | ||
243 | rb_node); | ||
244 | if (ref->bytenr == bytenr) | ||
245 | ret = 1; | ||
246 | } | ||
247 | out: | ||
248 | spin_unlock(&delayed_refs->lock); | ||
249 | return ret; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * helper function to lookup reference count | ||
254 | * | ||
255 | * the head node for delayed ref is used to store the sum of all the | ||
256 | * reference count modifications queued up in the rbtree. This way you | ||
257 | * can check to see what the reference count would be if all of the | ||
258 | * delayed refs are processed. | ||
259 | */ | ||
260 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
261 | struct btrfs_root *root, u64 bytenr, | ||
262 | u64 num_bytes, u32 *refs) | ||
263 | { | ||
264 | struct btrfs_delayed_ref_node *ref; | ||
265 | struct btrfs_delayed_ref_head *head; | ||
266 | struct btrfs_delayed_ref_root *delayed_refs; | ||
267 | struct btrfs_path *path; | ||
268 | struct extent_buffer *leaf; | ||
269 | struct btrfs_extent_item *ei; | ||
270 | struct btrfs_key key; | ||
271 | u32 num_refs; | ||
272 | int ret; | ||
273 | |||
274 | path = btrfs_alloc_path(); | ||
275 | if (!path) | ||
276 | return -ENOMEM; | ||
277 | |||
278 | key.objectid = bytenr; | ||
279 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
280 | key.offset = num_bytes; | ||
281 | delayed_refs = &trans->transaction->delayed_refs; | ||
282 | again: | ||
283 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
284 | &key, path, 0, 0); | ||
285 | if (ret < 0) | ||
286 | goto out; | ||
287 | |||
288 | if (ret == 0) { | ||
289 | leaf = path->nodes[0]; | ||
290 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
291 | struct btrfs_extent_item); | ||
292 | num_refs = btrfs_extent_refs(leaf, ei); | ||
293 | } else { | ||
294 | num_refs = 0; | ||
295 | ret = 0; | ||
296 | } | ||
297 | |||
298 | spin_lock(&delayed_refs->lock); | ||
299 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | ||
300 | if (ref) { | ||
301 | head = btrfs_delayed_node_to_head(ref); | ||
302 | if (mutex_trylock(&head->mutex)) { | ||
303 | num_refs += ref->ref_mod; | ||
304 | mutex_unlock(&head->mutex); | ||
305 | *refs = num_refs; | ||
306 | goto out; | ||
307 | } | ||
308 | |||
309 | atomic_inc(&ref->refs); | ||
310 | spin_unlock(&delayed_refs->lock); | ||
311 | |||
312 | btrfs_release_path(root->fs_info->extent_root, path); | ||
313 | |||
314 | mutex_lock(&head->mutex); | ||
315 | mutex_unlock(&head->mutex); | ||
316 | btrfs_put_delayed_ref(ref); | ||
317 | goto again; | ||
318 | } else { | ||
319 | *refs = num_refs; | ||
320 | } | ||
321 | out: | ||
322 | spin_unlock(&delayed_refs->lock); | ||
323 | btrfs_free_path(path); | ||
324 | return ret; | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * helper function to update an extent delayed ref in the | ||
329 | * rbtree. existing and update must both have the same | ||
330 | * bytenr and parent | ||
331 | * | ||
332 | * This may free existing if the update cancels out whatever | ||
333 | * operation it was doing. | ||
334 | */ | ||
335 | static noinline void | ||
336 | update_existing_ref(struct btrfs_trans_handle *trans, | ||
337 | struct btrfs_delayed_ref_root *delayed_refs, | ||
338 | struct btrfs_delayed_ref_node *existing, | ||
339 | struct btrfs_delayed_ref_node *update) | ||
340 | { | ||
341 | struct btrfs_delayed_ref *existing_ref; | ||
342 | struct btrfs_delayed_ref *ref; | ||
343 | |||
344 | existing_ref = btrfs_delayed_node_to_ref(existing); | ||
345 | ref = btrfs_delayed_node_to_ref(update); | ||
346 | |||
347 | if (ref->pin) | ||
348 | existing_ref->pin = 1; | ||
349 | |||
350 | if (ref->action != existing_ref->action) { | ||
351 | /* | ||
352 | * this is effectively undoing either an add or a | ||
353 | * drop. We decrement the ref_mod, and if it goes | ||
354 | * down to zero we just delete the entry without | ||
355 | * every changing the extent allocation tree. | ||
356 | */ | ||
357 | existing->ref_mod--; | ||
358 | if (existing->ref_mod == 0) { | ||
359 | rb_erase(&existing->rb_node, | ||
360 | &delayed_refs->root); | ||
361 | existing->in_tree = 0; | ||
362 | btrfs_put_delayed_ref(existing); | ||
363 | delayed_refs->num_entries--; | ||
364 | if (trans->delayed_ref_updates) | ||
365 | trans->delayed_ref_updates--; | ||
366 | } | ||
367 | } else { | ||
368 | if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { | ||
369 | /* if we're adding refs, make sure all the | ||
370 | * details match up. The extent could | ||
371 | * have been totally freed and reallocated | ||
372 | * by a different owner before the delayed | ||
373 | * ref entries were removed. | ||
374 | */ | ||
375 | existing_ref->owner_objectid = ref->owner_objectid; | ||
376 | existing_ref->generation = ref->generation; | ||
377 | existing_ref->root = ref->root; | ||
378 | existing->num_bytes = update->num_bytes; | ||
379 | } | ||
380 | /* | ||
381 | * the action on the existing ref matches | ||
382 | * the action on the ref we're trying to add. | ||
383 | * Bump the ref_mod by one so the backref that | ||
384 | * is eventually added/removed has the correct | ||
385 | * reference count | ||
386 | */ | ||
387 | existing->ref_mod += update->ref_mod; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * helper function to update the accounting in the head ref | ||
393 | * existing and update must have the same bytenr | ||
394 | */ | ||
395 | static noinline void | ||
396 | update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | ||
397 | struct btrfs_delayed_ref_node *update) | ||
398 | { | ||
399 | struct btrfs_delayed_ref_head *existing_ref; | ||
400 | struct btrfs_delayed_ref_head *ref; | ||
401 | |||
402 | existing_ref = btrfs_delayed_node_to_head(existing); | ||
403 | ref = btrfs_delayed_node_to_head(update); | ||
404 | |||
405 | if (ref->must_insert_reserved) { | ||
406 | /* if the extent was freed and then | ||
407 | * reallocated before the delayed ref | ||
408 | * entries were processed, we can end up | ||
409 | * with an existing head ref without | ||
410 | * the must_insert_reserved flag set. | ||
411 | * Set it again here | ||
412 | */ | ||
413 | existing_ref->must_insert_reserved = ref->must_insert_reserved; | ||
414 | |||
415 | /* | ||
416 | * update the num_bytes so we make sure the accounting | ||
417 | * is done correctly | ||
418 | */ | ||
419 | existing->num_bytes = update->num_bytes; | ||
420 | |||
421 | } | ||
422 | |||
423 | /* | ||
424 | * update the reference mod on the head to reflect this new operation | ||
425 | */ | ||
426 | existing->ref_mod += update->ref_mod; | ||
427 | } | ||
428 | |||
429 | /* | ||
430 | * helper function to actually insert a delayed ref into the rbtree. | ||
431 | * this does all the dirty work in terms of maintaining the correct | ||
432 | * overall modification count in the head node and properly dealing | ||
433 | * with updating existing nodes as new modifications are queued. | ||
434 | */ | ||
435 | static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | ||
436 | struct btrfs_delayed_ref_node *ref, | ||
437 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | ||
438 | u64 ref_generation, u64 owner_objectid, int action, | ||
439 | int pin) | ||
440 | { | ||
441 | struct btrfs_delayed_ref_node *existing; | ||
442 | struct btrfs_delayed_ref *full_ref; | ||
443 | struct btrfs_delayed_ref_head *head_ref = NULL; | ||
444 | struct btrfs_delayed_ref_root *delayed_refs; | ||
445 | int count_mod = 1; | ||
446 | int must_insert_reserved = 0; | ||
447 | |||
448 | /* | ||
449 | * the head node stores the sum of all the mods, so dropping a ref | ||
450 | * should drop the sum in the head node by one. | ||
451 | */ | ||
452 | if (parent == (u64)-1) { | ||
453 | if (action == BTRFS_DROP_DELAYED_REF) | ||
454 | count_mod = -1; | ||
455 | else if (action == BTRFS_UPDATE_DELAYED_HEAD) | ||
456 | count_mod = 0; | ||
457 | } | ||
458 | |||
459 | /* | ||
460 | * BTRFS_ADD_DELAYED_EXTENT means that we need to update | ||
461 | * the reserved accounting when the extent is finally added, or | ||
462 | * if a later modification deletes the delayed ref without ever | ||
463 | * inserting the extent into the extent allocation tree. | ||
464 | * ref->must_insert_reserved is the flag used to record | ||
465 | * that accounting mods are required. | ||
466 | * | ||
467 | * Once we record must_insert_reserved, switch the action to | ||
468 | * BTRFS_ADD_DELAYED_REF because other special casing is not required. | ||
469 | */ | ||
470 | if (action == BTRFS_ADD_DELAYED_EXTENT) { | ||
471 | must_insert_reserved = 1; | ||
472 | action = BTRFS_ADD_DELAYED_REF; | ||
473 | } else { | ||
474 | must_insert_reserved = 0; | ||
475 | } | ||
476 | |||
477 | |||
478 | delayed_refs = &trans->transaction->delayed_refs; | ||
479 | |||
480 | /* first set the basic ref node struct up */ | ||
481 | atomic_set(&ref->refs, 1); | ||
482 | ref->bytenr = bytenr; | ||
483 | ref->parent = parent; | ||
484 | ref->ref_mod = count_mod; | ||
485 | ref->in_tree = 1; | ||
486 | ref->num_bytes = num_bytes; | ||
487 | |||
488 | if (btrfs_delayed_ref_is_head(ref)) { | ||
489 | head_ref = btrfs_delayed_node_to_head(ref); | ||
490 | head_ref->must_insert_reserved = must_insert_reserved; | ||
491 | INIT_LIST_HEAD(&head_ref->cluster); | ||
492 | mutex_init(&head_ref->mutex); | ||
493 | } else { | ||
494 | full_ref = btrfs_delayed_node_to_ref(ref); | ||
495 | full_ref->root = ref_root; | ||
496 | full_ref->generation = ref_generation; | ||
497 | full_ref->owner_objectid = owner_objectid; | ||
498 | full_ref->pin = pin; | ||
499 | full_ref->action = action; | ||
500 | } | ||
501 | |||
502 | existing = tree_insert(&delayed_refs->root, bytenr, | ||
503 | parent, &ref->rb_node); | ||
504 | |||
505 | if (existing) { | ||
506 | if (btrfs_delayed_ref_is_head(ref)) | ||
507 | update_existing_head_ref(existing, ref); | ||
508 | else | ||
509 | update_existing_ref(trans, delayed_refs, existing, ref); | ||
510 | |||
511 | /* | ||
512 | * we've updated the existing ref, free the newly | ||
513 | * allocated ref | ||
514 | */ | ||
515 | kfree(ref); | ||
516 | } else { | ||
517 | if (btrfs_delayed_ref_is_head(ref)) { | ||
518 | delayed_refs->num_heads++; | ||
519 | delayed_refs->num_heads_ready++; | ||
520 | } | ||
521 | delayed_refs->num_entries++; | ||
522 | trans->delayed_ref_updates++; | ||
523 | } | ||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * add a delayed ref to the tree. This does all of the accounting required | ||
529 | * to make sure the delayed ref is eventually processed before this | ||
530 | * transaction commits. | ||
531 | */ | ||
532 | int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | ||
533 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | ||
534 | u64 ref_generation, u64 owner_objectid, int action, | ||
535 | int pin) | ||
536 | { | ||
537 | struct btrfs_delayed_ref *ref; | ||
538 | struct btrfs_delayed_ref_head *head_ref; | ||
539 | struct btrfs_delayed_ref_root *delayed_refs; | ||
540 | int ret; | ||
541 | |||
542 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
543 | if (!ref) | ||
544 | return -ENOMEM; | ||
545 | |||
546 | /* | ||
547 | * the parent = 0 case comes from cases where we don't actually | ||
548 | * know the parent yet. It will get updated later via a add/drop | ||
549 | * pair. | ||
550 | */ | ||
551 | if (parent == 0) | ||
552 | parent = bytenr; | ||
553 | |||
554 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | ||
555 | if (!head_ref) { | ||
556 | kfree(ref); | ||
557 | return -ENOMEM; | ||
558 | } | ||
559 | delayed_refs = &trans->transaction->delayed_refs; | ||
560 | spin_lock(&delayed_refs->lock); | ||
561 | |||
562 | /* | ||
563 | * insert both the head node and the new ref without dropping | ||
564 | * the spin lock | ||
565 | */ | ||
566 | ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, | ||
567 | (u64)-1, 0, 0, 0, action, pin); | ||
568 | BUG_ON(ret); | ||
569 | |||
570 | ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, | ||
571 | parent, ref_root, ref_generation, | ||
572 | owner_objectid, action, pin); | ||
573 | BUG_ON(ret); | ||
574 | spin_unlock(&delayed_refs->lock); | ||
575 | return 0; | ||
576 | } | ||
577 | |||
578 | /* | ||
579 | * this does a simple search for the head node for a given extent. | ||
580 | * It must be called with the delayed ref spinlock held, and it returns | ||
581 | * the head node if any where found, or NULL if not. | ||
582 | */ | ||
583 | struct btrfs_delayed_ref_head * | ||
584 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | ||
585 | { | ||
586 | struct btrfs_delayed_ref_node *ref; | ||
587 | struct btrfs_delayed_ref_root *delayed_refs; | ||
588 | |||
589 | delayed_refs = &trans->transaction->delayed_refs; | ||
590 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | ||
591 | if (ref) | ||
592 | return btrfs_delayed_node_to_head(ref); | ||
593 | return NULL; | ||
594 | } | ||
595 | |||
596 | /* | ||
597 | * add a delayed ref to the tree. This does all of the accounting required | ||
598 | * to make sure the delayed ref is eventually processed before this | ||
599 | * transaction commits. | ||
600 | * | ||
601 | * The main point of this call is to add and remove a backreference in a single | ||
602 | * shot, taking the lock only once, and only searching for the head node once. | ||
603 | * | ||
604 | * It is the same as doing a ref add and delete in two separate calls. | ||
605 | */ | ||
606 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | ||
607 | u64 bytenr, u64 num_bytes, u64 orig_parent, | ||
608 | u64 parent, u64 orig_ref_root, u64 ref_root, | ||
609 | u64 orig_ref_generation, u64 ref_generation, | ||
610 | u64 owner_objectid, int pin) | ||
611 | { | ||
612 | struct btrfs_delayed_ref *ref; | ||
613 | struct btrfs_delayed_ref *old_ref; | ||
614 | struct btrfs_delayed_ref_head *head_ref; | ||
615 | struct btrfs_delayed_ref_root *delayed_refs; | ||
616 | int ret; | ||
617 | |||
618 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
619 | if (!ref) | ||
620 | return -ENOMEM; | ||
621 | |||
622 | old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS); | ||
623 | if (!old_ref) { | ||
624 | kfree(ref); | ||
625 | return -ENOMEM; | ||
626 | } | ||
627 | |||
628 | /* | ||
629 | * the parent = 0 case comes from cases where we don't actually | ||
630 | * know the parent yet. It will get updated later via a add/drop | ||
631 | * pair. | ||
632 | */ | ||
633 | if (parent == 0) | ||
634 | parent = bytenr; | ||
635 | if (orig_parent == 0) | ||
636 | orig_parent = bytenr; | ||
637 | |||
638 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | ||
639 | if (!head_ref) { | ||
640 | kfree(ref); | ||
641 | kfree(old_ref); | ||
642 | return -ENOMEM; | ||
643 | } | ||
644 | delayed_refs = &trans->transaction->delayed_refs; | ||
645 | spin_lock(&delayed_refs->lock); | ||
646 | |||
647 | /* | ||
648 | * insert both the head node and the new ref without dropping | ||
649 | * the spin lock | ||
650 | */ | ||
651 | ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, | ||
652 | (u64)-1, 0, 0, 0, | ||
653 | BTRFS_UPDATE_DELAYED_HEAD, 0); | ||
654 | BUG_ON(ret); | ||
655 | |||
656 | ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, | ||
657 | parent, ref_root, ref_generation, | ||
658 | owner_objectid, BTRFS_ADD_DELAYED_REF, 0); | ||
659 | BUG_ON(ret); | ||
660 | |||
661 | ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes, | ||
662 | orig_parent, orig_ref_root, | ||
663 | orig_ref_generation, owner_objectid, | ||
664 | BTRFS_DROP_DELAYED_REF, pin); | ||
665 | BUG_ON(ret); | ||
666 | spin_unlock(&delayed_refs->lock); | ||
667 | return 0; | ||
668 | } | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h new file mode 100644 index 000000000000..3bec2ff0b15c --- /dev/null +++ b/fs/btrfs/delayed-ref.h | |||
@@ -0,0 +1,193 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | #ifndef __DELAYED_REF__ | ||
19 | #define __DELAYED_REF__ | ||
20 | |||
21 | /* these are the possible values of struct btrfs_delayed_ref->action */ | ||
22 | #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ | ||
23 | #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ | ||
24 | #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ | ||
25 | #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ | ||
26 | |||
27 | struct btrfs_delayed_ref_node { | ||
28 | struct rb_node rb_node; | ||
29 | |||
30 | /* the starting bytenr of the extent */ | ||
31 | u64 bytenr; | ||
32 | |||
33 | /* the parent our backref will point to */ | ||
34 | u64 parent; | ||
35 | |||
36 | /* the size of the extent */ | ||
37 | u64 num_bytes; | ||
38 | |||
39 | /* ref count on this data structure */ | ||
40 | atomic_t refs; | ||
41 | |||
42 | /* | ||
43 | * how many refs is this entry adding or deleting. For | ||
44 | * head refs, this may be a negative number because it is keeping | ||
45 | * track of the total mods done to the reference count. | ||
46 | * For individual refs, this will always be a positive number | ||
47 | * | ||
48 | * It may be more than one, since it is possible for a single | ||
49 | * parent to have more than one ref on an extent | ||
50 | */ | ||
51 | int ref_mod; | ||
52 | |||
53 | /* is this node still in the rbtree? */ | ||
54 | unsigned int in_tree:1; | ||
55 | }; | ||
56 | |||
57 | /* | ||
58 | * the head refs are used to hold a lock on a given extent, which allows us | ||
59 | * to make sure that only one process is running the delayed refs | ||
60 | * at a time for a single extent. They also store the sum of all the | ||
61 | * reference count modifications we've queued up. | ||
62 | */ | ||
63 | struct btrfs_delayed_ref_head { | ||
64 | struct btrfs_delayed_ref_node node; | ||
65 | |||
66 | /* | ||
67 | * the mutex is held while running the refs, and it is also | ||
68 | * held when checking the sum of reference modifications. | ||
69 | */ | ||
70 | struct mutex mutex; | ||
71 | |||
72 | struct list_head cluster; | ||
73 | |||
74 | /* | ||
75 | * when a new extent is allocated, it is just reserved in memory | ||
76 | * The actual extent isn't inserted into the extent allocation tree | ||
77 | * until the delayed ref is processed. must_insert_reserved is | ||
78 | * used to flag a delayed ref so the accounting can be updated | ||
79 | * when a full insert is done. | ||
80 | * | ||
81 | * It is possible the extent will be freed before it is ever | ||
82 | * inserted into the extent allocation tree. In this case | ||
83 | * we need to update the in ram accounting to properly reflect | ||
84 | * the free has happened. | ||
85 | */ | ||
86 | unsigned int must_insert_reserved:1; | ||
87 | }; | ||
88 | |||
89 | struct btrfs_delayed_ref { | ||
90 | struct btrfs_delayed_ref_node node; | ||
91 | |||
92 | /* the root objectid our ref will point to */ | ||
93 | u64 root; | ||
94 | |||
95 | /* the generation for the backref */ | ||
96 | u64 generation; | ||
97 | |||
98 | /* owner_objectid of the backref */ | ||
99 | u64 owner_objectid; | ||
100 | |||
101 | /* operation done by this entry in the rbtree */ | ||
102 | u8 action; | ||
103 | |||
104 | /* if pin == 1, when the extent is freed it will be pinned until | ||
105 | * transaction commit | ||
106 | */ | ||
107 | unsigned int pin:1; | ||
108 | }; | ||
109 | |||
110 | struct btrfs_delayed_ref_root { | ||
111 | struct rb_root root; | ||
112 | |||
113 | /* this spin lock protects the rbtree and the entries inside */ | ||
114 | spinlock_t lock; | ||
115 | |||
116 | /* how many delayed ref updates we've queued, used by the | ||
117 | * throttling code | ||
118 | */ | ||
119 | unsigned long num_entries; | ||
120 | |||
121 | /* total number of head nodes in tree */ | ||
122 | unsigned long num_heads; | ||
123 | |||
124 | /* total number of head nodes ready for processing */ | ||
125 | unsigned long num_heads_ready; | ||
126 | |||
127 | /* | ||
128 | * set when the tree is flushing before a transaction commit, | ||
129 | * used by the throttling code to decide if new updates need | ||
130 | * to be run right away | ||
131 | */ | ||
132 | int flushing; | ||
133 | |||
134 | u64 run_delayed_start; | ||
135 | }; | ||
136 | |||
137 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) | ||
138 | { | ||
139 | WARN_ON(atomic_read(&ref->refs) == 0); | ||
140 | if (atomic_dec_and_test(&ref->refs)) { | ||
141 | WARN_ON(ref->in_tree); | ||
142 | kfree(ref); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | ||
147 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | ||
148 | u64 ref_generation, u64 owner_objectid, int action, | ||
149 | int pin); | ||
150 | |||
151 | struct btrfs_delayed_ref_head * | ||
152 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | ||
153 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | ||
154 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
155 | struct btrfs_root *root, u64 bytenr, | ||
156 | u64 num_bytes, u32 *refs); | ||
157 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | ||
158 | u64 bytenr, u64 num_bytes, u64 orig_parent, | ||
159 | u64 parent, u64 orig_ref_root, u64 ref_root, | ||
160 | u64 orig_ref_generation, u64 ref_generation, | ||
161 | u64 owner_objectid, int pin); | ||
162 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | ||
163 | struct btrfs_delayed_ref_head *head); | ||
164 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | ||
165 | struct list_head *cluster, u64 search_start); | ||
166 | /* | ||
167 | * a node might live in a head or a regular ref, this lets you | ||
168 | * test for the proper type to use. | ||
169 | */ | ||
170 | static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) | ||
171 | { | ||
172 | return node->parent == (u64)-1; | ||
173 | } | ||
174 | |||
175 | /* | ||
176 | * helper functions to cast a node into its container | ||
177 | */ | ||
178 | static inline struct btrfs_delayed_ref * | ||
179 | btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node) | ||
180 | { | ||
181 | WARN_ON(btrfs_delayed_ref_is_head(node)); | ||
182 | return container_of(node, struct btrfs_delayed_ref, node); | ||
183 | |||
184 | } | ||
185 | |||
186 | static inline struct btrfs_delayed_ref_head * | ||
187 | btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node) | ||
188 | { | ||
189 | WARN_ON(!btrfs_delayed_ref_is_head(node)); | ||
190 | return container_of(node, struct btrfs_delayed_ref_head, node); | ||
191 | |||
192 | } | ||
193 | #endif | ||
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 926a0b287a7d..1d70236ba00c 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
145 | key.objectid = dir; | 145 | key.objectid = dir; |
146 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 146 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); |
147 | key.offset = btrfs_name_hash(name, name_len); | 147 | key.offset = btrfs_name_hash(name, name_len); |
148 | |||
148 | path = btrfs_alloc_path(); | 149 | path = btrfs_alloc_path(); |
150 | path->leave_spinning = 1; | ||
151 | |||
149 | data_size = sizeof(*dir_item) + name_len; | 152 | data_size = sizeof(*dir_item) + name_len; |
150 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | 153 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, |
151 | name, name_len); | 154 | name, name_len); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5aebddd71193..92caa8035f36 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "locking.h" | 38 | #include "locking.h" |
39 | #include "ref-cache.h" | 39 | #include "ref-cache.h" |
40 | #include "tree-log.h" | 40 | #include "tree-log.h" |
41 | #include "free-space-cache.h" | ||
41 | 42 | ||
42 | static struct extent_io_ops btree_extent_io_ops; | 43 | static struct extent_io_ops btree_extent_io_ops; |
43 | static void end_workqueue_fn(struct btrfs_work *work); | 44 | static void end_workqueue_fn(struct btrfs_work *work); |
@@ -75,6 +76,40 @@ struct async_submit_bio { | |||
75 | struct btrfs_work work; | 76 | struct btrfs_work work; |
76 | }; | 77 | }; |
77 | 78 | ||
79 | /* These are used to set the lockdep class on the extent buffer locks. | ||
80 | * The class is set by the readpage_end_io_hook after the buffer has | ||
81 | * passed csum validation but before the pages are unlocked. | ||
82 | * | ||
83 | * The lockdep class is also set by btrfs_init_new_buffer on freshly | ||
84 | * allocated blocks. | ||
85 | * | ||
86 | * The class is based on the level in the tree block, which allows lockdep | ||
87 | * to know that lower nodes nest inside the locks of higher nodes. | ||
88 | * | ||
89 | * We also add a check to make sure the highest level of the tree is | ||
90 | * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this | ||
91 | * code needs update as well. | ||
92 | */ | ||
93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
94 | # if BTRFS_MAX_LEVEL != 8 | ||
95 | # error | ||
96 | # endif | ||
97 | static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; | ||
98 | static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { | ||
99 | /* leaf */ | ||
100 | "btrfs-extent-00", | ||
101 | "btrfs-extent-01", | ||
102 | "btrfs-extent-02", | ||
103 | "btrfs-extent-03", | ||
104 | "btrfs-extent-04", | ||
105 | "btrfs-extent-05", | ||
106 | "btrfs-extent-06", | ||
107 | "btrfs-extent-07", | ||
108 | /* highest possible level */ | ||
109 | "btrfs-extent-08", | ||
110 | }; | ||
111 | #endif | ||
112 | |||
78 | /* | 113 | /* |
79 | * extents on the btree inode are pretty simple, there's one extent | 114 | * extents on the btree inode are pretty simple, there's one extent |
80 | * that covers the entire device | 115 | * that covers the entire device |
@@ -347,6 +382,15 @@ static int check_tree_block_fsid(struct btrfs_root *root, | |||
347 | return ret; | 382 | return ret; |
348 | } | 383 | } |
349 | 384 | ||
385 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
386 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | ||
387 | { | ||
388 | lockdep_set_class_and_name(&eb->lock, | ||
389 | &btrfs_eb_class[level], | ||
390 | btrfs_eb_name[level]); | ||
391 | } | ||
392 | #endif | ||
393 | |||
350 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 394 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
351 | struct extent_state *state) | 395 | struct extent_state *state) |
352 | { | 396 | { |
@@ -392,6 +436,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
392 | } | 436 | } |
393 | found_level = btrfs_header_level(eb); | 437 | found_level = btrfs_header_level(eb); |
394 | 438 | ||
439 | btrfs_set_buffer_lockdep_class(eb, found_level); | ||
440 | |||
395 | ret = csum_tree_block(root, eb, 1); | 441 | ret = csum_tree_block(root, eb, 1); |
396 | if (ret) | 442 | if (ret) |
397 | ret = -EIO; | 443 | ret = -EIO; |
@@ -623,14 +669,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
623 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | 669 | static int btree_writepage(struct page *page, struct writeback_control *wbc) |
624 | { | 670 | { |
625 | struct extent_io_tree *tree; | 671 | struct extent_io_tree *tree; |
672 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
673 | struct extent_buffer *eb; | ||
674 | int was_dirty; | ||
675 | |||
626 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 676 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
677 | if (!(current->flags & PF_MEMALLOC)) { | ||
678 | return extent_write_full_page(tree, page, | ||
679 | btree_get_extent, wbc); | ||
680 | } | ||
627 | 681 | ||
628 | if (current->flags & PF_MEMALLOC) { | 682 | redirty_page_for_writepage(wbc, page); |
629 | redirty_page_for_writepage(wbc, page); | 683 | eb = btrfs_find_tree_block(root, page_offset(page), |
630 | unlock_page(page); | 684 | PAGE_CACHE_SIZE); |
631 | return 0; | 685 | WARN_ON(!eb); |
686 | |||
687 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | ||
688 | if (!was_dirty) { | ||
689 | spin_lock(&root->fs_info->delalloc_lock); | ||
690 | root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; | ||
691 | spin_unlock(&root->fs_info->delalloc_lock); | ||
632 | } | 692 | } |
633 | return extent_write_full_page(tree, page, btree_get_extent, wbc); | 693 | free_extent_buffer(eb); |
694 | |||
695 | unlock_page(page); | ||
696 | return 0; | ||
634 | } | 697 | } |
635 | 698 | ||
636 | static int btree_writepages(struct address_space *mapping, | 699 | static int btree_writepages(struct address_space *mapping, |
@@ -639,15 +702,15 @@ static int btree_writepages(struct address_space *mapping, | |||
639 | struct extent_io_tree *tree; | 702 | struct extent_io_tree *tree; |
640 | tree = &BTRFS_I(mapping->host)->io_tree; | 703 | tree = &BTRFS_I(mapping->host)->io_tree; |
641 | if (wbc->sync_mode == WB_SYNC_NONE) { | 704 | if (wbc->sync_mode == WB_SYNC_NONE) { |
705 | struct btrfs_root *root = BTRFS_I(mapping->host)->root; | ||
642 | u64 num_dirty; | 706 | u64 num_dirty; |
643 | u64 start = 0; | ||
644 | unsigned long thresh = 32 * 1024 * 1024; | 707 | unsigned long thresh = 32 * 1024 * 1024; |
645 | 708 | ||
646 | if (wbc->for_kupdate) | 709 | if (wbc->for_kupdate) |
647 | return 0; | 710 | return 0; |
648 | 711 | ||
649 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 712 | /* this is a bit racy, but that's ok */ |
650 | thresh, EXTENT_DIRTY); | 713 | num_dirty = root->fs_info->dirty_metadata_bytes; |
651 | if (num_dirty < thresh) | 714 | if (num_dirty < thresh) |
652 | return 0; | 715 | return 0; |
653 | } | 716 | } |
@@ -812,11 +875,19 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
812 | struct inode *btree_inode = root->fs_info->btree_inode; | 875 | struct inode *btree_inode = root->fs_info->btree_inode; |
813 | if (btrfs_header_generation(buf) == | 876 | if (btrfs_header_generation(buf) == |
814 | root->fs_info->running_transaction->transid) { | 877 | root->fs_info->running_transaction->transid) { |
815 | WARN_ON(!btrfs_tree_locked(buf)); | 878 | btrfs_assert_tree_locked(buf); |
879 | |||
880 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { | ||
881 | spin_lock(&root->fs_info->delalloc_lock); | ||
882 | if (root->fs_info->dirty_metadata_bytes >= buf->len) | ||
883 | root->fs_info->dirty_metadata_bytes -= buf->len; | ||
884 | else | ||
885 | WARN_ON(1); | ||
886 | spin_unlock(&root->fs_info->delalloc_lock); | ||
887 | } | ||
816 | 888 | ||
817 | /* ugh, clear_extent_buffer_dirty can be expensive */ | 889 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
818 | btrfs_set_lock_blocking(buf); | 890 | btrfs_set_lock_blocking(buf); |
819 | |||
820 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | 891 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
821 | buf); | 892 | buf); |
822 | } | 893 | } |
@@ -1342,8 +1413,6 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1342 | 1413 | ||
1343 | ret = extent_range_uptodate(io_tree, start + length, | 1414 | ret = extent_range_uptodate(io_tree, start + length, |
1344 | start + buf_len - 1); | 1415 | start + buf_len - 1); |
1345 | if (ret == 1) | ||
1346 | return ret; | ||
1347 | return ret; | 1416 | return ret; |
1348 | } | 1417 | } |
1349 | 1418 | ||
@@ -1426,12 +1495,6 @@ static int transaction_kthread(void *arg) | |||
1426 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1495 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1427 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1496 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1428 | 1497 | ||
1429 | if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { | ||
1430 | printk(KERN_INFO "btrfs: total reference cache " | ||
1431 | "size %llu\n", | ||
1432 | root->fs_info->total_ref_cache_size); | ||
1433 | } | ||
1434 | |||
1435 | mutex_lock(&root->fs_info->trans_mutex); | 1498 | mutex_lock(&root->fs_info->trans_mutex); |
1436 | cur = root->fs_info->running_transaction; | 1499 | cur = root->fs_info->running_transaction; |
1437 | if (!cur) { | 1500 | if (!cur) { |
@@ -1448,6 +1511,7 @@ static int transaction_kthread(void *arg) | |||
1448 | mutex_unlock(&root->fs_info->trans_mutex); | 1511 | mutex_unlock(&root->fs_info->trans_mutex); |
1449 | trans = btrfs_start_transaction(root, 1); | 1512 | trans = btrfs_start_transaction(root, 1); |
1450 | ret = btrfs_commit_transaction(trans, root); | 1513 | ret = btrfs_commit_transaction(trans, root); |
1514 | |||
1451 | sleep: | 1515 | sleep: |
1452 | wake_up_process(root->fs_info->cleaner_kthread); | 1516 | wake_up_process(root->fs_info->cleaner_kthread); |
1453 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1517 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
@@ -1507,6 +1571,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1507 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1571 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1508 | INIT_LIST_HEAD(&fs_info->hashers); | 1572 | INIT_LIST_HEAD(&fs_info->hashers); |
1509 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1573 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1574 | INIT_LIST_HEAD(&fs_info->ordered_operations); | ||
1510 | spin_lock_init(&fs_info->delalloc_lock); | 1575 | spin_lock_init(&fs_info->delalloc_lock); |
1511 | spin_lock_init(&fs_info->new_trans_lock); | 1576 | spin_lock_init(&fs_info->new_trans_lock); |
1512 | spin_lock_init(&fs_info->ref_cache_lock); | 1577 | spin_lock_init(&fs_info->ref_cache_lock); |
@@ -1566,10 +1631,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1566 | 1631 | ||
1567 | extent_io_tree_init(&fs_info->pinned_extents, | 1632 | extent_io_tree_init(&fs_info->pinned_extents, |
1568 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1633 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1569 | extent_io_tree_init(&fs_info->pending_del, | ||
1570 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1571 | extent_io_tree_init(&fs_info->extent_ins, | ||
1572 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1573 | fs_info->do_barriers = 1; | 1634 | fs_info->do_barriers = 1; |
1574 | 1635 | ||
1575 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | 1636 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); |
@@ -1582,15 +1643,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1582 | insert_inode_hash(fs_info->btree_inode); | 1643 | insert_inode_hash(fs_info->btree_inode); |
1583 | 1644 | ||
1584 | mutex_init(&fs_info->trans_mutex); | 1645 | mutex_init(&fs_info->trans_mutex); |
1646 | mutex_init(&fs_info->ordered_operations_mutex); | ||
1585 | mutex_init(&fs_info->tree_log_mutex); | 1647 | mutex_init(&fs_info->tree_log_mutex); |
1586 | mutex_init(&fs_info->drop_mutex); | 1648 | mutex_init(&fs_info->drop_mutex); |
1587 | mutex_init(&fs_info->extent_ins_mutex); | ||
1588 | mutex_init(&fs_info->pinned_mutex); | ||
1589 | mutex_init(&fs_info->chunk_mutex); | 1649 | mutex_init(&fs_info->chunk_mutex); |
1590 | mutex_init(&fs_info->transaction_kthread_mutex); | 1650 | mutex_init(&fs_info->transaction_kthread_mutex); |
1591 | mutex_init(&fs_info->cleaner_mutex); | 1651 | mutex_init(&fs_info->cleaner_mutex); |
1592 | mutex_init(&fs_info->volume_mutex); | 1652 | mutex_init(&fs_info->volume_mutex); |
1593 | mutex_init(&fs_info->tree_reloc_mutex); | 1653 | mutex_init(&fs_info->tree_reloc_mutex); |
1654 | |||
1655 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | ||
1656 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | ||
1657 | |||
1594 | init_waitqueue_head(&fs_info->transaction_throttle); | 1658 | init_waitqueue_head(&fs_info->transaction_throttle); |
1595 | init_waitqueue_head(&fs_info->transaction_wait); | 1659 | init_waitqueue_head(&fs_info->transaction_wait); |
1596 | init_waitqueue_head(&fs_info->async_submit_wait); | 1660 | init_waitqueue_head(&fs_info->async_submit_wait); |
@@ -1777,7 +1841,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1777 | ret = find_and_setup_root(tree_root, fs_info, | 1841 | ret = find_and_setup_root(tree_root, fs_info, |
1778 | BTRFS_DEV_TREE_OBJECTID, dev_root); | 1842 | BTRFS_DEV_TREE_OBJECTID, dev_root); |
1779 | dev_root->track_dirty = 1; | 1843 | dev_root->track_dirty = 1; |
1780 | |||
1781 | if (ret) | 1844 | if (ret) |
1782 | goto fail_extent_root; | 1845 | goto fail_extent_root; |
1783 | 1846 | ||
@@ -2314,10 +2377,9 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2314 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | 2377 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; |
2315 | u64 transid = btrfs_header_generation(buf); | 2378 | u64 transid = btrfs_header_generation(buf); |
2316 | struct inode *btree_inode = root->fs_info->btree_inode; | 2379 | struct inode *btree_inode = root->fs_info->btree_inode; |
2380 | int was_dirty; | ||
2317 | 2381 | ||
2318 | btrfs_set_lock_blocking(buf); | 2382 | btrfs_assert_tree_locked(buf); |
2319 | |||
2320 | WARN_ON(!btrfs_tree_locked(buf)); | ||
2321 | if (transid != root->fs_info->generation) { | 2383 | if (transid != root->fs_info->generation) { |
2322 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " | 2384 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " |
2323 | "found %llu running %llu\n", | 2385 | "found %llu running %llu\n", |
@@ -2326,7 +2388,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2326 | (unsigned long long)root->fs_info->generation); | 2388 | (unsigned long long)root->fs_info->generation); |
2327 | WARN_ON(1); | 2389 | WARN_ON(1); |
2328 | } | 2390 | } |
2329 | set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); | 2391 | was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
2392 | buf); | ||
2393 | if (!was_dirty) { | ||
2394 | spin_lock(&root->fs_info->delalloc_lock); | ||
2395 | root->fs_info->dirty_metadata_bytes += buf->len; | ||
2396 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2397 | } | ||
2330 | } | 2398 | } |
2331 | 2399 | ||
2332 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 2400 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) |
@@ -2341,7 +2409,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
2341 | unsigned long thresh = 32 * 1024 * 1024; | 2409 | unsigned long thresh = 32 * 1024 * 1024; |
2342 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 2410 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
2343 | 2411 | ||
2344 | if (current_is_pdflush() || current->flags & PF_MEMALLOC) | 2412 | if (current->flags & PF_MEMALLOC) |
2345 | return; | 2413 | return; |
2346 | 2414 | ||
2347 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 2415 | num_dirty = count_range_bits(tree, &start, (u64)-1, |
@@ -2366,6 +2434,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
2366 | int btree_lock_page_hook(struct page *page) | 2434 | int btree_lock_page_hook(struct page *page) |
2367 | { | 2435 | { |
2368 | struct inode *inode = page->mapping->host; | 2436 | struct inode *inode = page->mapping->host; |
2437 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2369 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 2438 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
2370 | struct extent_buffer *eb; | 2439 | struct extent_buffer *eb; |
2371 | unsigned long len; | 2440 | unsigned long len; |
@@ -2381,6 +2450,16 @@ int btree_lock_page_hook(struct page *page) | |||
2381 | 2450 | ||
2382 | btrfs_tree_lock(eb); | 2451 | btrfs_tree_lock(eb); |
2383 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 2452 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
2453 | |||
2454 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
2455 | spin_lock(&root->fs_info->delalloc_lock); | ||
2456 | if (root->fs_info->dirty_metadata_bytes >= eb->len) | ||
2457 | root->fs_info->dirty_metadata_bytes -= eb->len; | ||
2458 | else | ||
2459 | WARN_ON(1); | ||
2460 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2461 | } | ||
2462 | |||
2384 | btrfs_tree_unlock(eb); | 2463 | btrfs_tree_unlock(eb); |
2385 | free_extent_buffer(eb); | 2464 | free_extent_buffer(eb); |
2386 | out: | 2465 | out: |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 494a56eb2986..c958ecbc1916 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, | |||
72 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | 72 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); |
73 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); | 73 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); |
74 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | 74 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); |
75 | void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf); | ||
75 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); | 76 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); |
76 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); | 77 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); |
77 | int wait_on_tree_block_writeback(struct btrfs_root *root, | 78 | int wait_on_tree_block_writeback(struct btrfs_root *root, |
@@ -101,4 +102,14 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | |||
101 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | 102 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, |
102 | struct btrfs_root *root); | 103 | struct btrfs_root *root); |
103 | int btree_lock_page_hook(struct page *page); | 104 | int btree_lock_page_hook(struct page *page); |
105 | |||
106 | |||
107 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
108 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); | ||
109 | #else | ||
110 | static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, | ||
111 | int level) | ||
112 | { | ||
113 | } | ||
114 | #endif | ||
104 | #endif | 115 | #endif |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7527523c2d2d..178df4c67de4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/writeback.h> | 20 | #include <linux/writeback.h> |
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/sort.h> | 22 | #include <linux/sort.h> |
23 | #include <linux/rcupdate.h> | ||
23 | #include "compat.h" | 24 | #include "compat.h" |
24 | #include "hash.h" | 25 | #include "hash.h" |
25 | #include "crc32c.h" | 26 | #include "crc32c.h" |
@@ -30,6 +31,7 @@ | |||
30 | #include "volumes.h" | 31 | #include "volumes.h" |
31 | #include "locking.h" | 32 | #include "locking.h" |
32 | #include "ref-cache.h" | 33 | #include "ref-cache.h" |
34 | #include "free-space-cache.h" | ||
33 | 35 | ||
34 | #define PENDING_EXTENT_INSERT 0 | 36 | #define PENDING_EXTENT_INSERT 0 |
35 | #define PENDING_EXTENT_DELETE 1 | 37 | #define PENDING_EXTENT_DELETE 1 |
@@ -48,17 +50,27 @@ struct pending_extent_op { | |||
48 | int del; | 50 | int del; |
49 | }; | 51 | }; |
50 | 52 | ||
51 | static int finish_current_insert(struct btrfs_trans_handle *trans, | 53 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, |
52 | struct btrfs_root *extent_root, int all); | 54 | struct btrfs_root *root, u64 parent, |
53 | static int del_pending_extents(struct btrfs_trans_handle *trans, | 55 | u64 root_objectid, u64 ref_generation, |
54 | struct btrfs_root *extent_root, int all); | 56 | u64 owner, struct btrfs_key *ins, |
55 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 57 | int ref_mod); |
56 | struct btrfs_root *root, | 58 | static int update_reserved_extents(struct btrfs_root *root, |
57 | u64 bytenr, u64 num_bytes, int is_data); | 59 | u64 bytenr, u64 num, int reserve); |
58 | static int update_block_group(struct btrfs_trans_handle *trans, | 60 | static int update_block_group(struct btrfs_trans_handle *trans, |
59 | struct btrfs_root *root, | 61 | struct btrfs_root *root, |
60 | u64 bytenr, u64 num_bytes, int alloc, | 62 | u64 bytenr, u64 num_bytes, int alloc, |
61 | int mark_free); | 63 | int mark_free); |
64 | static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans, | ||
65 | struct btrfs_root *root, | ||
66 | u64 bytenr, u64 num_bytes, u64 parent, | ||
67 | u64 root_objectid, u64 ref_generation, | ||
68 | u64 owner_objectid, int pin, | ||
69 | int ref_to_drop); | ||
70 | |||
71 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | ||
72 | struct btrfs_root *extent_root, u64 alloc_bytes, | ||
73 | u64 flags, int force); | ||
62 | 74 | ||
63 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) | 75 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) |
64 | { | 76 | { |
@@ -155,7 +167,6 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
155 | u64 extent_start, extent_end, size; | 167 | u64 extent_start, extent_end, size; |
156 | int ret; | 168 | int ret; |
157 | 169 | ||
158 | mutex_lock(&info->pinned_mutex); | ||
159 | while (start < end) { | 170 | while (start < end) { |
160 | ret = find_first_extent_bit(&info->pinned_extents, start, | 171 | ret = find_first_extent_bit(&info->pinned_extents, start, |
161 | &extent_start, &extent_end, | 172 | &extent_start, &extent_end, |
@@ -181,7 +192,6 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
181 | ret = btrfs_add_free_space(block_group, start, size); | 192 | ret = btrfs_add_free_space(block_group, start, size); |
182 | BUG_ON(ret); | 193 | BUG_ON(ret); |
183 | } | 194 | } |
184 | mutex_unlock(&info->pinned_mutex); | ||
185 | 195 | ||
186 | return 0; | 196 | return 0; |
187 | } | 197 | } |
@@ -280,8 +290,8 @@ next: | |||
280 | block_group->key.objectid + | 290 | block_group->key.objectid + |
281 | block_group->key.offset); | 291 | block_group->key.offset); |
282 | 292 | ||
283 | remove_sb_from_cache(root, block_group); | ||
284 | block_group->cached = 1; | 293 | block_group->cached = 1; |
294 | remove_sb_from_cache(root, block_group); | ||
285 | ret = 0; | 295 | ret = 0; |
286 | err: | 296 | err: |
287 | btrfs_free_path(path); | 297 | btrfs_free_path(path); |
@@ -315,7 +325,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( | |||
315 | return cache; | 325 | return cache; |
316 | } | 326 | } |
317 | 327 | ||
318 | static inline void put_block_group(struct btrfs_block_group_cache *cache) | 328 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
319 | { | 329 | { |
320 | if (atomic_dec_and_test(&cache->count)) | 330 | if (atomic_dec_and_test(&cache->count)) |
321 | kfree(cache); | 331 | kfree(cache); |
@@ -326,13 +336,33 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
326 | { | 336 | { |
327 | struct list_head *head = &info->space_info; | 337 | struct list_head *head = &info->space_info; |
328 | struct btrfs_space_info *found; | 338 | struct btrfs_space_info *found; |
329 | list_for_each_entry(found, head, list) { | 339 | |
330 | if (found->flags == flags) | 340 | rcu_read_lock(); |
341 | list_for_each_entry_rcu(found, head, list) { | ||
342 | if (found->flags == flags) { | ||
343 | rcu_read_unlock(); | ||
331 | return found; | 344 | return found; |
345 | } | ||
332 | } | 346 | } |
347 | rcu_read_unlock(); | ||
333 | return NULL; | 348 | return NULL; |
334 | } | 349 | } |
335 | 350 | ||
351 | /* | ||
352 | * after adding space to the filesystem, we need to clear the full flags | ||
353 | * on all the space infos. | ||
354 | */ | ||
355 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info) | ||
356 | { | ||
357 | struct list_head *head = &info->space_info; | ||
358 | struct btrfs_space_info *found; | ||
359 | |||
360 | rcu_read_lock(); | ||
361 | list_for_each_entry_rcu(found, head, list) | ||
362 | found->full = 0; | ||
363 | rcu_read_unlock(); | ||
364 | } | ||
365 | |||
336 | static u64 div_factor(u64 num, int factor) | 366 | static u64 div_factor(u64 num, int factor) |
337 | { | 367 | { |
338 | if (factor == 10) | 368 | if (factor == 10) |
@@ -368,12 +398,12 @@ again: | |||
368 | div_factor(cache->key.offset, factor)) { | 398 | div_factor(cache->key.offset, factor)) { |
369 | group_start = cache->key.objectid; | 399 | group_start = cache->key.objectid; |
370 | spin_unlock(&cache->lock); | 400 | spin_unlock(&cache->lock); |
371 | put_block_group(cache); | 401 | btrfs_put_block_group(cache); |
372 | goto found; | 402 | goto found; |
373 | } | 403 | } |
374 | } | 404 | } |
375 | spin_unlock(&cache->lock); | 405 | spin_unlock(&cache->lock); |
376 | put_block_group(cache); | 406 | btrfs_put_block_group(cache); |
377 | cond_resched(); | 407 | cond_resched(); |
378 | } | 408 | } |
379 | if (!wrapped) { | 409 | if (!wrapped) { |
@@ -529,262 +559,13 @@ out: | |||
529 | return ret; | 559 | return ret; |
530 | } | 560 | } |
531 | 561 | ||
532 | /* | ||
533 | * updates all the backrefs that are pending on update_list for the | ||
534 | * extent_root | ||
535 | */ | ||
536 | static noinline int update_backrefs(struct btrfs_trans_handle *trans, | ||
537 | struct btrfs_root *extent_root, | ||
538 | struct btrfs_path *path, | ||
539 | struct list_head *update_list) | ||
540 | { | ||
541 | struct btrfs_key key; | ||
542 | struct btrfs_extent_ref *ref; | ||
543 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
544 | struct pending_extent_op *op; | ||
545 | struct extent_buffer *leaf; | ||
546 | int ret = 0; | ||
547 | struct list_head *cur = update_list->next; | ||
548 | u64 ref_objectid; | ||
549 | u64 ref_root = extent_root->root_key.objectid; | ||
550 | |||
551 | op = list_entry(cur, struct pending_extent_op, list); | ||
552 | |||
553 | search: | ||
554 | key.objectid = op->bytenr; | ||
555 | key.type = BTRFS_EXTENT_REF_KEY; | ||
556 | key.offset = op->orig_parent; | ||
557 | |||
558 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); | ||
559 | BUG_ON(ret); | ||
560 | |||
561 | leaf = path->nodes[0]; | ||
562 | |||
563 | loop: | ||
564 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | ||
565 | |||
566 | ref_objectid = btrfs_ref_objectid(leaf, ref); | ||
567 | |||
568 | if (btrfs_ref_root(leaf, ref) != ref_root || | ||
569 | btrfs_ref_generation(leaf, ref) != op->orig_generation || | ||
570 | (ref_objectid != op->level && | ||
571 | ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { | ||
572 | printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " | ||
573 | "root %llu, owner %u\n", | ||
574 | (unsigned long long)op->bytenr, | ||
575 | (unsigned long long)op->orig_parent, | ||
576 | (unsigned long long)ref_root, op->level); | ||
577 | btrfs_print_leaf(extent_root, leaf); | ||
578 | BUG(); | ||
579 | } | ||
580 | |||
581 | key.objectid = op->bytenr; | ||
582 | key.offset = op->parent; | ||
583 | key.type = BTRFS_EXTENT_REF_KEY; | ||
584 | ret = btrfs_set_item_key_safe(trans, extent_root, path, &key); | ||
585 | BUG_ON(ret); | ||
586 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | ||
587 | btrfs_set_ref_generation(leaf, ref, op->generation); | ||
588 | |||
589 | cur = cur->next; | ||
590 | |||
591 | list_del_init(&op->list); | ||
592 | unlock_extent(&info->extent_ins, op->bytenr, | ||
593 | op->bytenr + op->num_bytes - 1, GFP_NOFS); | ||
594 | kfree(op); | ||
595 | |||
596 | if (cur == update_list) { | ||
597 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
598 | btrfs_release_path(extent_root, path); | ||
599 | goto out; | ||
600 | } | ||
601 | |||
602 | op = list_entry(cur, struct pending_extent_op, list); | ||
603 | |||
604 | path->slots[0]++; | ||
605 | while (path->slots[0] < btrfs_header_nritems(leaf)) { | ||
606 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
607 | if (key.objectid == op->bytenr && | ||
608 | key.type == BTRFS_EXTENT_REF_KEY) | ||
609 | goto loop; | ||
610 | path->slots[0]++; | ||
611 | } | ||
612 | |||
613 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
614 | btrfs_release_path(extent_root, path); | ||
615 | goto search; | ||
616 | |||
617 | out: | ||
618 | return 0; | ||
619 | } | ||
620 | |||
621 | static noinline int insert_extents(struct btrfs_trans_handle *trans, | ||
622 | struct btrfs_root *extent_root, | ||
623 | struct btrfs_path *path, | ||
624 | struct list_head *insert_list, int nr) | ||
625 | { | ||
626 | struct btrfs_key *keys; | ||
627 | u32 *data_size; | ||
628 | struct pending_extent_op *op; | ||
629 | struct extent_buffer *leaf; | ||
630 | struct list_head *cur = insert_list->next; | ||
631 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
632 | u64 ref_root = extent_root->root_key.objectid; | ||
633 | int i = 0, last = 0, ret; | ||
634 | int total = nr * 2; | ||
635 | |||
636 | if (!nr) | ||
637 | return 0; | ||
638 | |||
639 | keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS); | ||
640 | if (!keys) | ||
641 | return -ENOMEM; | ||
642 | |||
643 | data_size = kzalloc(total * sizeof(u32), GFP_NOFS); | ||
644 | if (!data_size) { | ||
645 | kfree(keys); | ||
646 | return -ENOMEM; | ||
647 | } | ||
648 | |||
649 | list_for_each_entry(op, insert_list, list) { | ||
650 | keys[i].objectid = op->bytenr; | ||
651 | keys[i].offset = op->num_bytes; | ||
652 | keys[i].type = BTRFS_EXTENT_ITEM_KEY; | ||
653 | data_size[i] = sizeof(struct btrfs_extent_item); | ||
654 | i++; | ||
655 | |||
656 | keys[i].objectid = op->bytenr; | ||
657 | keys[i].offset = op->parent; | ||
658 | keys[i].type = BTRFS_EXTENT_REF_KEY; | ||
659 | data_size[i] = sizeof(struct btrfs_extent_ref); | ||
660 | i++; | ||
661 | } | ||
662 | |||
663 | op = list_entry(cur, struct pending_extent_op, list); | ||
664 | i = 0; | ||
665 | while (i < total) { | ||
666 | int c; | ||
667 | ret = btrfs_insert_some_items(trans, extent_root, path, | ||
668 | keys+i, data_size+i, total-i); | ||
669 | BUG_ON(ret < 0); | ||
670 | |||
671 | if (last && ret > 1) | ||
672 | BUG(); | ||
673 | |||
674 | leaf = path->nodes[0]; | ||
675 | for (c = 0; c < ret; c++) { | ||
676 | int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY; | ||
677 | |||
678 | /* | ||
679 | * if the first item we inserted was a backref, then | ||
680 | * the EXTENT_ITEM will be the odd c's, else it will | ||
681 | * be the even c's | ||
682 | */ | ||
683 | if ((ref_first && (c % 2)) || | ||
684 | (!ref_first && !(c % 2))) { | ||
685 | struct btrfs_extent_item *itm; | ||
686 | |||
687 | itm = btrfs_item_ptr(leaf, path->slots[0] + c, | ||
688 | struct btrfs_extent_item); | ||
689 | btrfs_set_extent_refs(path->nodes[0], itm, 1); | ||
690 | op->del++; | ||
691 | } else { | ||
692 | struct btrfs_extent_ref *ref; | ||
693 | |||
694 | ref = btrfs_item_ptr(leaf, path->slots[0] + c, | ||
695 | struct btrfs_extent_ref); | ||
696 | btrfs_set_ref_root(leaf, ref, ref_root); | ||
697 | btrfs_set_ref_generation(leaf, ref, | ||
698 | op->generation); | ||
699 | btrfs_set_ref_objectid(leaf, ref, op->level); | ||
700 | btrfs_set_ref_num_refs(leaf, ref, 1); | ||
701 | op->del++; | ||
702 | } | ||
703 | |||
704 | /* | ||
705 | * using del to see when its ok to free up the | ||
706 | * pending_extent_op. In the case where we insert the | ||
707 | * last item on the list in order to help do batching | ||
708 | * we need to not free the extent op until we actually | ||
709 | * insert the extent_item | ||
710 | */ | ||
711 | if (op->del == 2) { | ||
712 | unlock_extent(&info->extent_ins, op->bytenr, | ||
713 | op->bytenr + op->num_bytes - 1, | ||
714 | GFP_NOFS); | ||
715 | cur = cur->next; | ||
716 | list_del_init(&op->list); | ||
717 | kfree(op); | ||
718 | if (cur != insert_list) | ||
719 | op = list_entry(cur, | ||
720 | struct pending_extent_op, | ||
721 | list); | ||
722 | } | ||
723 | } | ||
724 | btrfs_mark_buffer_dirty(leaf); | ||
725 | btrfs_release_path(extent_root, path); | ||
726 | |||
727 | /* | ||
728 | * Ok backref's and items usually go right next to eachother, | ||
729 | * but if we could only insert 1 item that means that we | ||
730 | * inserted on the end of a leaf, and we have no idea what may | ||
731 | * be on the next leaf so we just play it safe. In order to | ||
732 | * try and help this case we insert the last thing on our | ||
733 | * insert list so hopefully it will end up being the last | ||
734 | * thing on the leaf and everything else will be before it, | ||
735 | * which will let us insert a whole bunch of items at the same | ||
736 | * time. | ||
737 | */ | ||
738 | if (ret == 1 && !last && (i + ret < total)) { | ||
739 | /* | ||
740 | * last: where we will pick up the next time around | ||
741 | * i: our current key to insert, will be total - 1 | ||
742 | * cur: the current op we are screwing with | ||
743 | * op: duh | ||
744 | */ | ||
745 | last = i + ret; | ||
746 | i = total - 1; | ||
747 | cur = insert_list->prev; | ||
748 | op = list_entry(cur, struct pending_extent_op, list); | ||
749 | } else if (last) { | ||
750 | /* | ||
751 | * ok we successfully inserted the last item on the | ||
752 | * list, lets reset everything | ||
753 | * | ||
754 | * i: our current key to insert, so where we left off | ||
755 | * last time | ||
756 | * last: done with this | ||
757 | * cur: the op we are messing with | ||
758 | * op: duh | ||
759 | * total: since we inserted the last key, we need to | ||
760 | * decrement total so we dont overflow | ||
761 | */ | ||
762 | i = last; | ||
763 | last = 0; | ||
764 | total--; | ||
765 | if (i < total) { | ||
766 | cur = insert_list->next; | ||
767 | op = list_entry(cur, struct pending_extent_op, | ||
768 | list); | ||
769 | } | ||
770 | } else { | ||
771 | i += ret; | ||
772 | } | ||
773 | |||
774 | cond_resched(); | ||
775 | } | ||
776 | ret = 0; | ||
777 | kfree(keys); | ||
778 | kfree(data_size); | ||
779 | return ret; | ||
780 | } | ||
781 | |||
782 | static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | 562 | static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, |
783 | struct btrfs_root *root, | 563 | struct btrfs_root *root, |
784 | struct btrfs_path *path, | 564 | struct btrfs_path *path, |
785 | u64 bytenr, u64 parent, | 565 | u64 bytenr, u64 parent, |
786 | u64 ref_root, u64 ref_generation, | 566 | u64 ref_root, u64 ref_generation, |
787 | u64 owner_objectid) | 567 | u64 owner_objectid, |
568 | int refs_to_add) | ||
788 | { | 569 | { |
789 | struct btrfs_key key; | 570 | struct btrfs_key key; |
790 | struct extent_buffer *leaf; | 571 | struct extent_buffer *leaf; |
@@ -804,9 +585,10 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | |||
804 | btrfs_set_ref_root(leaf, ref, ref_root); | 585 | btrfs_set_ref_root(leaf, ref, ref_root); |
805 | btrfs_set_ref_generation(leaf, ref, ref_generation); | 586 | btrfs_set_ref_generation(leaf, ref, ref_generation); |
806 | btrfs_set_ref_objectid(leaf, ref, owner_objectid); | 587 | btrfs_set_ref_objectid(leaf, ref, owner_objectid); |
807 | btrfs_set_ref_num_refs(leaf, ref, 1); | 588 | btrfs_set_ref_num_refs(leaf, ref, refs_to_add); |
808 | } else if (ret == -EEXIST) { | 589 | } else if (ret == -EEXIST) { |
809 | u64 existing_owner; | 590 | u64 existing_owner; |
591 | |||
810 | BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); | 592 | BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); |
811 | leaf = path->nodes[0]; | 593 | leaf = path->nodes[0]; |
812 | ref = btrfs_item_ptr(leaf, path->slots[0], | 594 | ref = btrfs_item_ptr(leaf, path->slots[0], |
@@ -820,7 +602,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | |||
820 | 602 | ||
821 | num_refs = btrfs_ref_num_refs(leaf, ref); | 603 | num_refs = btrfs_ref_num_refs(leaf, ref); |
822 | BUG_ON(num_refs == 0); | 604 | BUG_ON(num_refs == 0); |
823 | btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); | 605 | btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add); |
824 | 606 | ||
825 | existing_owner = btrfs_ref_objectid(leaf, ref); | 607 | existing_owner = btrfs_ref_objectid(leaf, ref); |
826 | if (existing_owner != owner_objectid && | 608 | if (existing_owner != owner_objectid && |
@@ -832,6 +614,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | |||
832 | } else { | 614 | } else { |
833 | goto out; | 615 | goto out; |
834 | } | 616 | } |
617 | btrfs_unlock_up_safe(path, 1); | ||
835 | btrfs_mark_buffer_dirty(path->nodes[0]); | 618 | btrfs_mark_buffer_dirty(path->nodes[0]); |
836 | out: | 619 | out: |
837 | btrfs_release_path(root, path); | 620 | btrfs_release_path(root, path); |
@@ -840,7 +623,8 @@ out: | |||
840 | 623 | ||
841 | static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, | 624 | static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, |
842 | struct btrfs_root *root, | 625 | struct btrfs_root *root, |
843 | struct btrfs_path *path) | 626 | struct btrfs_path *path, |
627 | int refs_to_drop) | ||
844 | { | 628 | { |
845 | struct extent_buffer *leaf; | 629 | struct extent_buffer *leaf; |
846 | struct btrfs_extent_ref *ref; | 630 | struct btrfs_extent_ref *ref; |
@@ -850,8 +634,8 @@ static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
850 | leaf = path->nodes[0]; | 634 | leaf = path->nodes[0]; |
851 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | 635 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); |
852 | num_refs = btrfs_ref_num_refs(leaf, ref); | 636 | num_refs = btrfs_ref_num_refs(leaf, ref); |
853 | BUG_ON(num_refs == 0); | 637 | BUG_ON(num_refs < refs_to_drop); |
854 | num_refs -= 1; | 638 | num_refs -= refs_to_drop; |
855 | if (num_refs == 0) { | 639 | if (num_refs == 0) { |
856 | ret = btrfs_del_item(trans, root, path); | 640 | ret = btrfs_del_item(trans, root, path); |
857 | } else { | 641 | } else { |
@@ -902,332 +686,28 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
902 | #endif | 686 | #endif |
903 | } | 687 | } |
904 | 688 | ||
905 | static noinline int free_extents(struct btrfs_trans_handle *trans, | ||
906 | struct btrfs_root *extent_root, | ||
907 | struct list_head *del_list) | ||
908 | { | ||
909 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
910 | struct btrfs_path *path; | ||
911 | struct btrfs_key key, found_key; | ||
912 | struct extent_buffer *leaf; | ||
913 | struct list_head *cur; | ||
914 | struct pending_extent_op *op; | ||
915 | struct btrfs_extent_item *ei; | ||
916 | int ret, num_to_del, extent_slot = 0, found_extent = 0; | ||
917 | u32 refs; | ||
918 | u64 bytes_freed = 0; | ||
919 | |||
920 | path = btrfs_alloc_path(); | ||
921 | if (!path) | ||
922 | return -ENOMEM; | ||
923 | path->reada = 1; | ||
924 | |||
925 | search: | ||
926 | /* search for the backref for the current ref we want to delete */ | ||
927 | cur = del_list->next; | ||
928 | op = list_entry(cur, struct pending_extent_op, list); | ||
929 | ret = lookup_extent_backref(trans, extent_root, path, op->bytenr, | ||
930 | op->orig_parent, | ||
931 | extent_root->root_key.objectid, | ||
932 | op->orig_generation, op->level, 1); | ||
933 | if (ret) { | ||
934 | printk(KERN_ERR "btrfs unable to find backref byte nr %llu " | ||
935 | "root %llu gen %llu owner %u\n", | ||
936 | (unsigned long long)op->bytenr, | ||
937 | (unsigned long long)extent_root->root_key.objectid, | ||
938 | (unsigned long long)op->orig_generation, op->level); | ||
939 | btrfs_print_leaf(extent_root, path->nodes[0]); | ||
940 | WARN_ON(1); | ||
941 | goto out; | ||
942 | } | ||
943 | |||
944 | extent_slot = path->slots[0]; | ||
945 | num_to_del = 1; | ||
946 | found_extent = 0; | ||
947 | |||
948 | /* | ||
949 | * if we aren't the first item on the leaf we can move back one and see | ||
950 | * if our ref is right next to our extent item | ||
951 | */ | ||
952 | if (likely(extent_slot)) { | ||
953 | extent_slot--; | ||
954 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
955 | extent_slot); | ||
956 | if (found_key.objectid == op->bytenr && | ||
957 | found_key.type == BTRFS_EXTENT_ITEM_KEY && | ||
958 | found_key.offset == op->num_bytes) { | ||
959 | num_to_del++; | ||
960 | found_extent = 1; | ||
961 | } | ||
962 | } | ||
963 | |||
964 | /* | ||
965 | * if we didn't find the extent we need to delete the backref and then | ||
966 | * search for the extent item key so we can update its ref count | ||
967 | */ | ||
968 | if (!found_extent) { | ||
969 | key.objectid = op->bytenr; | ||
970 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
971 | key.offset = op->num_bytes; | ||
972 | |||
973 | ret = remove_extent_backref(trans, extent_root, path); | ||
974 | BUG_ON(ret); | ||
975 | btrfs_release_path(extent_root, path); | ||
976 | ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); | ||
977 | BUG_ON(ret); | ||
978 | extent_slot = path->slots[0]; | ||
979 | } | ||
980 | |||
981 | /* this is where we update the ref count for the extent */ | ||
982 | leaf = path->nodes[0]; | ||
983 | ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); | ||
984 | refs = btrfs_extent_refs(leaf, ei); | ||
985 | BUG_ON(refs == 0); | ||
986 | refs--; | ||
987 | btrfs_set_extent_refs(leaf, ei, refs); | ||
988 | |||
989 | btrfs_mark_buffer_dirty(leaf); | ||
990 | |||
991 | /* | ||
992 | * This extent needs deleting. The reason cur_slot is extent_slot + | ||
993 | * num_to_del is because extent_slot points to the slot where the extent | ||
994 | * is, and if the backref was not right next to the extent we will be | ||
995 | * deleting at least 1 item, and will want to start searching at the | ||
996 | * slot directly next to extent_slot. However if we did find the | ||
997 | * backref next to the extent item them we will be deleting at least 2 | ||
998 | * items and will want to start searching directly after the ref slot | ||
999 | */ | ||
1000 | if (!refs) { | ||
1001 | struct list_head *pos, *n, *end; | ||
1002 | int cur_slot = extent_slot+num_to_del; | ||
1003 | u64 super_used; | ||
1004 | u64 root_used; | ||
1005 | |||
1006 | path->slots[0] = extent_slot; | ||
1007 | bytes_freed = op->num_bytes; | ||
1008 | |||
1009 | mutex_lock(&info->pinned_mutex); | ||
1010 | ret = pin_down_bytes(trans, extent_root, op->bytenr, | ||
1011 | op->num_bytes, op->level >= | ||
1012 | BTRFS_FIRST_FREE_OBJECTID); | ||
1013 | mutex_unlock(&info->pinned_mutex); | ||
1014 | BUG_ON(ret < 0); | ||
1015 | op->del = ret; | ||
1016 | |||
1017 | /* | ||
1018 | * we need to see if we can delete multiple things at once, so | ||
1019 | * start looping through the list of extents we are wanting to | ||
1020 | * delete and see if their extent/backref's are right next to | ||
1021 | * eachother and the extents only have 1 ref | ||
1022 | */ | ||
1023 | for (pos = cur->next; pos != del_list; pos = pos->next) { | ||
1024 | struct pending_extent_op *tmp; | ||
1025 | |||
1026 | tmp = list_entry(pos, struct pending_extent_op, list); | ||
1027 | |||
1028 | /* we only want to delete extent+ref at this stage */ | ||
1029 | if (cur_slot >= btrfs_header_nritems(leaf) - 1) | ||
1030 | break; | ||
1031 | |||
1032 | btrfs_item_key_to_cpu(leaf, &found_key, cur_slot); | ||
1033 | if (found_key.objectid != tmp->bytenr || | ||
1034 | found_key.type != BTRFS_EXTENT_ITEM_KEY || | ||
1035 | found_key.offset != tmp->num_bytes) | ||
1036 | break; | ||
1037 | |||
1038 | /* check to make sure this extent only has one ref */ | ||
1039 | ei = btrfs_item_ptr(leaf, cur_slot, | ||
1040 | struct btrfs_extent_item); | ||
1041 | if (btrfs_extent_refs(leaf, ei) != 1) | ||
1042 | break; | ||
1043 | |||
1044 | btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1); | ||
1045 | if (found_key.objectid != tmp->bytenr || | ||
1046 | found_key.type != BTRFS_EXTENT_REF_KEY || | ||
1047 | found_key.offset != tmp->orig_parent) | ||
1048 | break; | ||
1049 | |||
1050 | /* | ||
1051 | * the ref is right next to the extent, we can set the | ||
1052 | * ref count to 0 since we will delete them both now | ||
1053 | */ | ||
1054 | btrfs_set_extent_refs(leaf, ei, 0); | ||
1055 | |||
1056 | /* pin down the bytes for this extent */ | ||
1057 | mutex_lock(&info->pinned_mutex); | ||
1058 | ret = pin_down_bytes(trans, extent_root, tmp->bytenr, | ||
1059 | tmp->num_bytes, tmp->level >= | ||
1060 | BTRFS_FIRST_FREE_OBJECTID); | ||
1061 | mutex_unlock(&info->pinned_mutex); | ||
1062 | BUG_ON(ret < 0); | ||
1063 | |||
1064 | /* | ||
1065 | * use the del field to tell if we need to go ahead and | ||
1066 | * free up the extent when we delete the item or not. | ||
1067 | */ | ||
1068 | tmp->del = ret; | ||
1069 | bytes_freed += tmp->num_bytes; | ||
1070 | |||
1071 | num_to_del += 2; | ||
1072 | cur_slot += 2; | ||
1073 | } | ||
1074 | end = pos; | ||
1075 | |||
1076 | /* update the free space counters */ | ||
1077 | spin_lock(&info->delalloc_lock); | ||
1078 | super_used = btrfs_super_bytes_used(&info->super_copy); | ||
1079 | btrfs_set_super_bytes_used(&info->super_copy, | ||
1080 | super_used - bytes_freed); | ||
1081 | |||
1082 | root_used = btrfs_root_used(&extent_root->root_item); | ||
1083 | btrfs_set_root_used(&extent_root->root_item, | ||
1084 | root_used - bytes_freed); | ||
1085 | spin_unlock(&info->delalloc_lock); | ||
1086 | |||
1087 | /* delete the items */ | ||
1088 | ret = btrfs_del_items(trans, extent_root, path, | ||
1089 | path->slots[0], num_to_del); | ||
1090 | BUG_ON(ret); | ||
1091 | |||
1092 | /* | ||
1093 | * loop through the extents we deleted and do the cleanup work | ||
1094 | * on them | ||
1095 | */ | ||
1096 | for (pos = cur, n = pos->next; pos != end; | ||
1097 | pos = n, n = pos->next) { | ||
1098 | struct pending_extent_op *tmp; | ||
1099 | tmp = list_entry(pos, struct pending_extent_op, list); | ||
1100 | |||
1101 | /* | ||
1102 | * remember tmp->del tells us wether or not we pinned | ||
1103 | * down the extent | ||
1104 | */ | ||
1105 | ret = update_block_group(trans, extent_root, | ||
1106 | tmp->bytenr, tmp->num_bytes, 0, | ||
1107 | tmp->del); | ||
1108 | BUG_ON(ret); | ||
1109 | |||
1110 | list_del_init(&tmp->list); | ||
1111 | unlock_extent(&info->extent_ins, tmp->bytenr, | ||
1112 | tmp->bytenr + tmp->num_bytes - 1, | ||
1113 | GFP_NOFS); | ||
1114 | kfree(tmp); | ||
1115 | } | ||
1116 | } else if (refs && found_extent) { | ||
1117 | /* | ||
1118 | * the ref and extent were right next to eachother, but the | ||
1119 | * extent still has a ref, so just free the backref and keep | ||
1120 | * going | ||
1121 | */ | ||
1122 | ret = remove_extent_backref(trans, extent_root, path); | ||
1123 | BUG_ON(ret); | ||
1124 | |||
1125 | list_del_init(&op->list); | ||
1126 | unlock_extent(&info->extent_ins, op->bytenr, | ||
1127 | op->bytenr + op->num_bytes - 1, GFP_NOFS); | ||
1128 | kfree(op); | ||
1129 | } else { | ||
1130 | /* | ||
1131 | * the extent has multiple refs and the backref we were looking | ||
1132 | * for was not right next to it, so just unlock and go next, | ||
1133 | * we're good to go | ||
1134 | */ | ||
1135 | list_del_init(&op->list); | ||
1136 | unlock_extent(&info->extent_ins, op->bytenr, | ||
1137 | op->bytenr + op->num_bytes - 1, GFP_NOFS); | ||
1138 | kfree(op); | ||
1139 | } | ||
1140 | |||
1141 | btrfs_release_path(extent_root, path); | ||
1142 | if (!list_empty(del_list)) | ||
1143 | goto search; | ||
1144 | |||
1145 | out: | ||
1146 | btrfs_free_path(path); | ||
1147 | return ret; | ||
1148 | } | ||
1149 | |||
1150 | static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 689 | static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1151 | struct btrfs_root *root, u64 bytenr, | 690 | struct btrfs_root *root, u64 bytenr, |
691 | u64 num_bytes, | ||
1152 | u64 orig_parent, u64 parent, | 692 | u64 orig_parent, u64 parent, |
1153 | u64 orig_root, u64 ref_root, | 693 | u64 orig_root, u64 ref_root, |
1154 | u64 orig_generation, u64 ref_generation, | 694 | u64 orig_generation, u64 ref_generation, |
1155 | u64 owner_objectid) | 695 | u64 owner_objectid) |
1156 | { | 696 | { |
1157 | int ret; | 697 | int ret; |
1158 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 698 | int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID; |
1159 | struct btrfs_path *path; | ||
1160 | |||
1161 | if (root == root->fs_info->extent_root) { | ||
1162 | struct pending_extent_op *extent_op; | ||
1163 | u64 num_bytes; | ||
1164 | |||
1165 | BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); | ||
1166 | num_bytes = btrfs_level_size(root, (int)owner_objectid); | ||
1167 | mutex_lock(&root->fs_info->extent_ins_mutex); | ||
1168 | if (test_range_bit(&root->fs_info->extent_ins, bytenr, | ||
1169 | bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { | ||
1170 | u64 priv; | ||
1171 | ret = get_state_private(&root->fs_info->extent_ins, | ||
1172 | bytenr, &priv); | ||
1173 | BUG_ON(ret); | ||
1174 | extent_op = (struct pending_extent_op *) | ||
1175 | (unsigned long)priv; | ||
1176 | BUG_ON(extent_op->parent != orig_parent); | ||
1177 | BUG_ON(extent_op->generation != orig_generation); | ||
1178 | 699 | ||
1179 | extent_op->parent = parent; | 700 | ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes, |
1180 | extent_op->generation = ref_generation; | 701 | orig_parent, parent, orig_root, |
1181 | } else { | 702 | ref_root, orig_generation, |
1182 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 703 | ref_generation, owner_objectid, pin); |
1183 | BUG_ON(!extent_op); | ||
1184 | |||
1185 | extent_op->type = PENDING_BACKREF_UPDATE; | ||
1186 | extent_op->bytenr = bytenr; | ||
1187 | extent_op->num_bytes = num_bytes; | ||
1188 | extent_op->parent = parent; | ||
1189 | extent_op->orig_parent = orig_parent; | ||
1190 | extent_op->generation = ref_generation; | ||
1191 | extent_op->orig_generation = orig_generation; | ||
1192 | extent_op->level = (int)owner_objectid; | ||
1193 | INIT_LIST_HEAD(&extent_op->list); | ||
1194 | extent_op->del = 0; | ||
1195 | |||
1196 | set_extent_bits(&root->fs_info->extent_ins, | ||
1197 | bytenr, bytenr + num_bytes - 1, | ||
1198 | EXTENT_WRITEBACK, GFP_NOFS); | ||
1199 | set_state_private(&root->fs_info->extent_ins, | ||
1200 | bytenr, (unsigned long)extent_op); | ||
1201 | } | ||
1202 | mutex_unlock(&root->fs_info->extent_ins_mutex); | ||
1203 | return 0; | ||
1204 | } | ||
1205 | |||
1206 | path = btrfs_alloc_path(); | ||
1207 | if (!path) | ||
1208 | return -ENOMEM; | ||
1209 | ret = lookup_extent_backref(trans, extent_root, path, | ||
1210 | bytenr, orig_parent, orig_root, | ||
1211 | orig_generation, owner_objectid, 1); | ||
1212 | if (ret) | ||
1213 | goto out; | ||
1214 | ret = remove_extent_backref(trans, extent_root, path); | ||
1215 | if (ret) | ||
1216 | goto out; | ||
1217 | ret = insert_extent_backref(trans, extent_root, path, bytenr, | ||
1218 | parent, ref_root, ref_generation, | ||
1219 | owner_objectid); | ||
1220 | BUG_ON(ret); | 704 | BUG_ON(ret); |
1221 | finish_current_insert(trans, extent_root, 0); | ||
1222 | del_pending_extents(trans, extent_root, 0); | ||
1223 | out: | ||
1224 | btrfs_free_path(path); | ||
1225 | return ret; | 705 | return ret; |
1226 | } | 706 | } |
1227 | 707 | ||
1228 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 708 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1229 | struct btrfs_root *root, u64 bytenr, | 709 | struct btrfs_root *root, u64 bytenr, |
1230 | u64 orig_parent, u64 parent, | 710 | u64 num_bytes, u64 orig_parent, u64 parent, |
1231 | u64 ref_root, u64 ref_generation, | 711 | u64 ref_root, u64 ref_generation, |
1232 | u64 owner_objectid) | 712 | u64 owner_objectid) |
1233 | { | 713 | { |
@@ -1235,20 +715,36 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | |||
1235 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && | 715 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && |
1236 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | 716 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) |
1237 | return 0; | 717 | return 0; |
1238 | ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, | 718 | |
1239 | parent, ref_root, ref_root, | 719 | ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes, |
1240 | ref_generation, ref_generation, | 720 | orig_parent, parent, ref_root, |
1241 | owner_objectid); | 721 | ref_root, ref_generation, |
722 | ref_generation, owner_objectid); | ||
1242 | return ret; | 723 | return ret; |
1243 | } | 724 | } |
1244 | |||
1245 | static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 725 | static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
1246 | struct btrfs_root *root, u64 bytenr, | 726 | struct btrfs_root *root, u64 bytenr, |
727 | u64 num_bytes, | ||
1247 | u64 orig_parent, u64 parent, | 728 | u64 orig_parent, u64 parent, |
1248 | u64 orig_root, u64 ref_root, | 729 | u64 orig_root, u64 ref_root, |
1249 | u64 orig_generation, u64 ref_generation, | 730 | u64 orig_generation, u64 ref_generation, |
1250 | u64 owner_objectid) | 731 | u64 owner_objectid) |
1251 | { | 732 | { |
733 | int ret; | ||
734 | |||
735 | ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root, | ||
736 | ref_generation, owner_objectid, | ||
737 | BTRFS_ADD_DELAYED_REF, 0); | ||
738 | BUG_ON(ret); | ||
739 | return ret; | ||
740 | } | ||
741 | |||
742 | static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans, | ||
743 | struct btrfs_root *root, u64 bytenr, | ||
744 | u64 num_bytes, u64 parent, u64 ref_root, | ||
745 | u64 ref_generation, u64 owner_objectid, | ||
746 | int refs_to_add) | ||
747 | { | ||
1252 | struct btrfs_path *path; | 748 | struct btrfs_path *path; |
1253 | int ret; | 749 | int ret; |
1254 | struct btrfs_key key; | 750 | struct btrfs_key key; |
@@ -1261,17 +757,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1261 | return -ENOMEM; | 757 | return -ENOMEM; |
1262 | 758 | ||
1263 | path->reada = 1; | 759 | path->reada = 1; |
760 | path->leave_spinning = 1; | ||
1264 | key.objectid = bytenr; | 761 | key.objectid = bytenr; |
1265 | key.type = BTRFS_EXTENT_ITEM_KEY; | 762 | key.type = BTRFS_EXTENT_ITEM_KEY; |
1266 | key.offset = (u64)-1; | 763 | key.offset = num_bytes; |
1267 | 764 | ||
1268 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, | 765 | /* first find the extent item and update its reference count */ |
1269 | 0, 1); | 766 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, |
1270 | if (ret < 0) | 767 | path, 0, 1); |
768 | if (ret < 0) { | ||
769 | btrfs_set_path_blocking(path); | ||
1271 | return ret; | 770 | return ret; |
1272 | BUG_ON(ret == 0 || path->slots[0] == 0); | 771 | } |
1273 | 772 | ||
1274 | path->slots[0]--; | 773 | if (ret > 0) { |
774 | WARN_ON(1); | ||
775 | btrfs_free_path(path); | ||
776 | return -EIO; | ||
777 | } | ||
1275 | l = path->nodes[0]; | 778 | l = path->nodes[0]; |
1276 | 779 | ||
1277 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 780 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
@@ -1285,21 +788,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1285 | BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); | 788 | BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); |
1286 | 789 | ||
1287 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | 790 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); |
791 | |||
1288 | refs = btrfs_extent_refs(l, item); | 792 | refs = btrfs_extent_refs(l, item); |
1289 | btrfs_set_extent_refs(l, item, refs + 1); | 793 | btrfs_set_extent_refs(l, item, refs + refs_to_add); |
794 | btrfs_unlock_up_safe(path, 1); | ||
795 | |||
1290 | btrfs_mark_buffer_dirty(path->nodes[0]); | 796 | btrfs_mark_buffer_dirty(path->nodes[0]); |
1291 | 797 | ||
1292 | btrfs_release_path(root->fs_info->extent_root, path); | 798 | btrfs_release_path(root->fs_info->extent_root, path); |
1293 | 799 | ||
1294 | path->reada = 1; | 800 | path->reada = 1; |
801 | path->leave_spinning = 1; | ||
802 | |||
803 | /* now insert the actual backref */ | ||
1295 | ret = insert_extent_backref(trans, root->fs_info->extent_root, | 804 | ret = insert_extent_backref(trans, root->fs_info->extent_root, |
1296 | path, bytenr, parent, | 805 | path, bytenr, parent, |
1297 | ref_root, ref_generation, | 806 | ref_root, ref_generation, |
1298 | owner_objectid); | 807 | owner_objectid, refs_to_add); |
1299 | BUG_ON(ret); | 808 | BUG_ON(ret); |
1300 | finish_current_insert(trans, root->fs_info->extent_root, 0); | ||
1301 | del_pending_extents(trans, root->fs_info->extent_root, 0); | ||
1302 | |||
1303 | btrfs_free_path(path); | 809 | btrfs_free_path(path); |
1304 | return 0; | 810 | return 0; |
1305 | } | 811 | } |
@@ -1314,51 +820,278 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1314 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && | 820 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && |
1315 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | 821 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) |
1316 | return 0; | 822 | return 0; |
1317 | ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, | 823 | |
824 | ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent, | ||
1318 | 0, ref_root, 0, ref_generation, | 825 | 0, ref_root, 0, ref_generation, |
1319 | owner_objectid); | 826 | owner_objectid); |
1320 | return ret; | 827 | return ret; |
1321 | } | 828 | } |
1322 | 829 | ||
1323 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | 830 | static int drop_delayed_ref(struct btrfs_trans_handle *trans, |
1324 | struct btrfs_root *root) | 831 | struct btrfs_root *root, |
832 | struct btrfs_delayed_ref_node *node) | ||
833 | { | ||
834 | int ret = 0; | ||
835 | struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node); | ||
836 | |||
837 | BUG_ON(node->ref_mod == 0); | ||
838 | ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, | ||
839 | node->parent, ref->root, ref->generation, | ||
840 | ref->owner_objectid, ref->pin, node->ref_mod); | ||
841 | |||
842 | return ret; | ||
843 | } | ||
844 | |||
845 | /* helper function to actually process a single delayed ref entry */ | ||
846 | static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans, | ||
847 | struct btrfs_root *root, | ||
848 | struct btrfs_delayed_ref_node *node, | ||
849 | int insert_reserved) | ||
1325 | { | 850 | { |
1326 | finish_current_insert(trans, root->fs_info->extent_root, 1); | 851 | int ret; |
1327 | del_pending_extents(trans, root->fs_info->extent_root, 1); | 852 | struct btrfs_delayed_ref *ref; |
853 | |||
854 | if (node->parent == (u64)-1) { | ||
855 | struct btrfs_delayed_ref_head *head; | ||
856 | /* | ||
857 | * we've hit the end of the chain and we were supposed | ||
858 | * to insert this extent into the tree. But, it got | ||
859 | * deleted before we ever needed to insert it, so all | ||
860 | * we have to do is clean up the accounting | ||
861 | */ | ||
862 | if (insert_reserved) { | ||
863 | update_reserved_extents(root, node->bytenr, | ||
864 | node->num_bytes, 0); | ||
865 | } | ||
866 | head = btrfs_delayed_node_to_head(node); | ||
867 | mutex_unlock(&head->mutex); | ||
868 | return 0; | ||
869 | } | ||
870 | |||
871 | ref = btrfs_delayed_node_to_ref(node); | ||
872 | if (ref->action == BTRFS_ADD_DELAYED_REF) { | ||
873 | if (insert_reserved) { | ||
874 | struct btrfs_key ins; | ||
875 | |||
876 | ins.objectid = node->bytenr; | ||
877 | ins.offset = node->num_bytes; | ||
878 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
879 | |||
880 | /* record the full extent allocation */ | ||
881 | ret = __btrfs_alloc_reserved_extent(trans, root, | ||
882 | node->parent, ref->root, | ||
883 | ref->generation, ref->owner_objectid, | ||
884 | &ins, node->ref_mod); | ||
885 | update_reserved_extents(root, node->bytenr, | ||
886 | node->num_bytes, 0); | ||
887 | } else { | ||
888 | /* just add one backref */ | ||
889 | ret = add_extent_ref(trans, root, node->bytenr, | ||
890 | node->num_bytes, | ||
891 | node->parent, ref->root, ref->generation, | ||
892 | ref->owner_objectid, node->ref_mod); | ||
893 | } | ||
894 | BUG_ON(ret); | ||
895 | } else if (ref->action == BTRFS_DROP_DELAYED_REF) { | ||
896 | WARN_ON(insert_reserved); | ||
897 | ret = drop_delayed_ref(trans, root, node); | ||
898 | } | ||
1328 | return 0; | 899 | return 0; |
1329 | } | 900 | } |
1330 | 901 | ||
1331 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | 902 | static noinline struct btrfs_delayed_ref_node * |
1332 | struct btrfs_root *root, u64 bytenr, | 903 | select_delayed_ref(struct btrfs_delayed_ref_head *head) |
1333 | u64 num_bytes, u32 *refs) | ||
1334 | { | 904 | { |
1335 | struct btrfs_path *path; | 905 | struct rb_node *node; |
906 | struct btrfs_delayed_ref_node *ref; | ||
907 | int action = BTRFS_ADD_DELAYED_REF; | ||
908 | again: | ||
909 | /* | ||
910 | * select delayed ref of type BTRFS_ADD_DELAYED_REF first. | ||
911 | * this prevents ref count from going down to zero when | ||
912 | * there still are pending delayed ref. | ||
913 | */ | ||
914 | node = rb_prev(&head->node.rb_node); | ||
915 | while (1) { | ||
916 | if (!node) | ||
917 | break; | ||
918 | ref = rb_entry(node, struct btrfs_delayed_ref_node, | ||
919 | rb_node); | ||
920 | if (ref->bytenr != head->node.bytenr) | ||
921 | break; | ||
922 | if (btrfs_delayed_node_to_ref(ref)->action == action) | ||
923 | return ref; | ||
924 | node = rb_prev(node); | ||
925 | } | ||
926 | if (action == BTRFS_ADD_DELAYED_REF) { | ||
927 | action = BTRFS_DROP_DELAYED_REF; | ||
928 | goto again; | ||
929 | } | ||
930 | return NULL; | ||
931 | } | ||
932 | |||
933 | static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | ||
934 | struct btrfs_root *root, | ||
935 | struct list_head *cluster) | ||
936 | { | ||
937 | struct btrfs_delayed_ref_root *delayed_refs; | ||
938 | struct btrfs_delayed_ref_node *ref; | ||
939 | struct btrfs_delayed_ref_head *locked_ref = NULL; | ||
1336 | int ret; | 940 | int ret; |
1337 | struct btrfs_key key; | 941 | int count = 0; |
1338 | struct extent_buffer *l; | 942 | int must_insert_reserved = 0; |
1339 | struct btrfs_extent_item *item; | ||
1340 | 943 | ||
1341 | WARN_ON(num_bytes < root->sectorsize); | 944 | delayed_refs = &trans->transaction->delayed_refs; |
1342 | path = btrfs_alloc_path(); | 945 | while (1) { |
1343 | path->reada = 1; | 946 | if (!locked_ref) { |
1344 | key.objectid = bytenr; | 947 | /* pick a new head ref from the cluster list */ |
1345 | key.offset = num_bytes; | 948 | if (list_empty(cluster)) |
1346 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 949 | break; |
1347 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, | 950 | |
1348 | 0, 0); | 951 | locked_ref = list_entry(cluster->next, |
1349 | if (ret < 0) | 952 | struct btrfs_delayed_ref_head, cluster); |
1350 | goto out; | 953 | |
1351 | if (ret != 0) { | 954 | /* grab the lock that says we are going to process |
1352 | btrfs_print_leaf(root, path->nodes[0]); | 955 | * all the refs for this head */ |
1353 | printk(KERN_INFO "btrfs failed to find block number %llu\n", | 956 | ret = btrfs_delayed_ref_lock(trans, locked_ref); |
1354 | (unsigned long long)bytenr); | 957 | |
1355 | BUG(); | 958 | /* |
959 | * we may have dropped the spin lock to get the head | ||
960 | * mutex lock, and that might have given someone else | ||
961 | * time to free the head. If that's true, it has been | ||
962 | * removed from our list and we can move on. | ||
963 | */ | ||
964 | if (ret == -EAGAIN) { | ||
965 | locked_ref = NULL; | ||
966 | count++; | ||
967 | continue; | ||
968 | } | ||
969 | } | ||
970 | |||
971 | /* | ||
972 | * record the must insert reserved flag before we | ||
973 | * drop the spin lock. | ||
974 | */ | ||
975 | must_insert_reserved = locked_ref->must_insert_reserved; | ||
976 | locked_ref->must_insert_reserved = 0; | ||
977 | |||
978 | /* | ||
979 | * locked_ref is the head node, so we have to go one | ||
980 | * node back for any delayed ref updates | ||
981 | */ | ||
982 | ref = select_delayed_ref(locked_ref); | ||
983 | if (!ref) { | ||
984 | /* All delayed refs have been processed, Go ahead | ||
985 | * and send the head node to run_one_delayed_ref, | ||
986 | * so that any accounting fixes can happen | ||
987 | */ | ||
988 | ref = &locked_ref->node; | ||
989 | list_del_init(&locked_ref->cluster); | ||
990 | locked_ref = NULL; | ||
991 | } | ||
992 | |||
993 | ref->in_tree = 0; | ||
994 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
995 | delayed_refs->num_entries--; | ||
996 | spin_unlock(&delayed_refs->lock); | ||
997 | |||
998 | ret = run_one_delayed_ref(trans, root, ref, | ||
999 | must_insert_reserved); | ||
1000 | BUG_ON(ret); | ||
1001 | btrfs_put_delayed_ref(ref); | ||
1002 | |||
1003 | count++; | ||
1004 | cond_resched(); | ||
1005 | spin_lock(&delayed_refs->lock); | ||
1006 | } | ||
1007 | return count; | ||
1008 | } | ||
1009 | |||
1010 | /* | ||
1011 | * this starts processing the delayed reference count updates and | ||
1012 | * extent insertions we have queued up so far. count can be | ||
1013 | * 0, which means to process everything in the tree at the start | ||
1014 | * of the run (but not newly added entries), or it can be some target | ||
1015 | * number you'd like to process. | ||
1016 | */ | ||
1017 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | ||
1018 | struct btrfs_root *root, unsigned long count) | ||
1019 | { | ||
1020 | struct rb_node *node; | ||
1021 | struct btrfs_delayed_ref_root *delayed_refs; | ||
1022 | struct btrfs_delayed_ref_node *ref; | ||
1023 | struct list_head cluster; | ||
1024 | int ret; | ||
1025 | int run_all = count == (unsigned long)-1; | ||
1026 | int run_most = 0; | ||
1027 | |||
1028 | if (root == root->fs_info->extent_root) | ||
1029 | root = root->fs_info->tree_root; | ||
1030 | |||
1031 | delayed_refs = &trans->transaction->delayed_refs; | ||
1032 | INIT_LIST_HEAD(&cluster); | ||
1033 | again: | ||
1034 | spin_lock(&delayed_refs->lock); | ||
1035 | if (count == 0) { | ||
1036 | count = delayed_refs->num_entries * 2; | ||
1037 | run_most = 1; | ||
1038 | } | ||
1039 | while (1) { | ||
1040 | if (!(run_all || run_most) && | ||
1041 | delayed_refs->num_heads_ready < 64) | ||
1042 | break; | ||
1043 | |||
1044 | /* | ||
1045 | * go find something we can process in the rbtree. We start at | ||
1046 | * the beginning of the tree, and then build a cluster | ||
1047 | * of refs to process starting at the first one we are able to | ||
1048 | * lock | ||
1049 | */ | ||
1050 | ret = btrfs_find_ref_cluster(trans, &cluster, | ||
1051 | delayed_refs->run_delayed_start); | ||
1052 | if (ret) | ||
1053 | break; | ||
1054 | |||
1055 | ret = run_clustered_refs(trans, root, &cluster); | ||
1056 | BUG_ON(ret < 0); | ||
1057 | |||
1058 | count -= min_t(unsigned long, ret, count); | ||
1059 | |||
1060 | if (count == 0) | ||
1061 | break; | ||
1062 | } | ||
1063 | |||
1064 | if (run_all) { | ||
1065 | node = rb_first(&delayed_refs->root); | ||
1066 | if (!node) | ||
1067 | goto out; | ||
1068 | count = (unsigned long)-1; | ||
1069 | |||
1070 | while (node) { | ||
1071 | ref = rb_entry(node, struct btrfs_delayed_ref_node, | ||
1072 | rb_node); | ||
1073 | if (btrfs_delayed_ref_is_head(ref)) { | ||
1074 | struct btrfs_delayed_ref_head *head; | ||
1075 | |||
1076 | head = btrfs_delayed_node_to_head(ref); | ||
1077 | atomic_inc(&ref->refs); | ||
1078 | |||
1079 | spin_unlock(&delayed_refs->lock); | ||
1080 | mutex_lock(&head->mutex); | ||
1081 | mutex_unlock(&head->mutex); | ||
1082 | |||
1083 | btrfs_put_delayed_ref(ref); | ||
1084 | cond_resched(); | ||
1085 | goto again; | ||
1086 | } | ||
1087 | node = rb_next(node); | ||
1088 | } | ||
1089 | spin_unlock(&delayed_refs->lock); | ||
1090 | schedule_timeout(1); | ||
1091 | goto again; | ||
1356 | } | 1092 | } |
1357 | l = path->nodes[0]; | ||
1358 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | ||
1359 | *refs = btrfs_extent_refs(l, item); | ||
1360 | out: | 1093 | out: |
1361 | btrfs_free_path(path); | 1094 | spin_unlock(&delayed_refs->lock); |
1362 | return 0; | 1095 | return 0; |
1363 | } | 1096 | } |
1364 | 1097 | ||
@@ -1582,7 +1315,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | |||
1582 | int refi = 0; | 1315 | int refi = 0; |
1583 | int slot; | 1316 | int slot; |
1584 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 1317 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
1585 | u64, u64, u64, u64, u64, u64, u64, u64); | 1318 | u64, u64, u64, u64, u64, u64, u64, u64, u64); |
1586 | 1319 | ||
1587 | ref_root = btrfs_header_owner(buf); | 1320 | ref_root = btrfs_header_owner(buf); |
1588 | ref_generation = btrfs_header_generation(buf); | 1321 | ref_generation = btrfs_header_generation(buf); |
@@ -1654,12 +1387,19 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | |||
1654 | 1387 | ||
1655 | if (level == 0) { | 1388 | if (level == 0) { |
1656 | btrfs_item_key_to_cpu(buf, &key, slot); | 1389 | btrfs_item_key_to_cpu(buf, &key, slot); |
1390 | fi = btrfs_item_ptr(buf, slot, | ||
1391 | struct btrfs_file_extent_item); | ||
1392 | |||
1393 | bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | ||
1394 | if (bytenr == 0) | ||
1395 | continue; | ||
1657 | 1396 | ||
1658 | ret = process_func(trans, root, bytenr, | 1397 | ret = process_func(trans, root, bytenr, |
1659 | orig_buf->start, buf->start, | 1398 | btrfs_file_extent_disk_num_bytes(buf, fi), |
1660 | orig_root, ref_root, | 1399 | orig_buf->start, buf->start, |
1661 | orig_generation, ref_generation, | 1400 | orig_root, ref_root, |
1662 | key.objectid); | 1401 | orig_generation, ref_generation, |
1402 | key.objectid); | ||
1663 | 1403 | ||
1664 | if (ret) { | 1404 | if (ret) { |
1665 | faili = slot; | 1405 | faili = slot; |
@@ -1667,7 +1407,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | |||
1667 | goto fail; | 1407 | goto fail; |
1668 | } | 1408 | } |
1669 | } else { | 1409 | } else { |
1670 | ret = process_func(trans, root, bytenr, | 1410 | ret = process_func(trans, root, bytenr, buf->len, |
1671 | orig_buf->start, buf->start, | 1411 | orig_buf->start, buf->start, |
1672 | orig_root, ref_root, | 1412 | orig_root, ref_root, |
1673 | orig_generation, ref_generation, | 1413 | orig_generation, ref_generation, |
@@ -1744,17 +1484,17 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans, | |||
1744 | if (bytenr == 0) | 1484 | if (bytenr == 0) |
1745 | continue; | 1485 | continue; |
1746 | ret = __btrfs_update_extent_ref(trans, root, bytenr, | 1486 | ret = __btrfs_update_extent_ref(trans, root, bytenr, |
1747 | orig_buf->start, buf->start, | 1487 | btrfs_file_extent_disk_num_bytes(buf, fi), |
1748 | orig_root, ref_root, | 1488 | orig_buf->start, buf->start, |
1749 | orig_generation, ref_generation, | 1489 | orig_root, ref_root, orig_generation, |
1750 | key.objectid); | 1490 | ref_generation, key.objectid); |
1751 | if (ret) | 1491 | if (ret) |
1752 | goto fail; | 1492 | goto fail; |
1753 | } else { | 1493 | } else { |
1754 | bytenr = btrfs_node_blockptr(buf, slot); | 1494 | bytenr = btrfs_node_blockptr(buf, slot); |
1755 | ret = __btrfs_update_extent_ref(trans, root, bytenr, | 1495 | ret = __btrfs_update_extent_ref(trans, root, bytenr, |
1756 | orig_buf->start, buf->start, | 1496 | buf->len, orig_buf->start, |
1757 | orig_root, ref_root, | 1497 | buf->start, orig_root, ref_root, |
1758 | orig_generation, ref_generation, | 1498 | orig_generation, ref_generation, |
1759 | level - 1); | 1499 | level - 1); |
1760 | if (ret) | 1500 | if (ret) |
@@ -1773,7 +1513,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, | |||
1773 | struct btrfs_block_group_cache *cache) | 1513 | struct btrfs_block_group_cache *cache) |
1774 | { | 1514 | { |
1775 | int ret; | 1515 | int ret; |
1776 | int pending_ret; | ||
1777 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 1516 | struct btrfs_root *extent_root = root->fs_info->extent_root; |
1778 | unsigned long bi; | 1517 | unsigned long bi; |
1779 | struct extent_buffer *leaf; | 1518 | struct extent_buffer *leaf; |
@@ -1789,12 +1528,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, | |||
1789 | btrfs_mark_buffer_dirty(leaf); | 1528 | btrfs_mark_buffer_dirty(leaf); |
1790 | btrfs_release_path(extent_root, path); | 1529 | btrfs_release_path(extent_root, path); |
1791 | fail: | 1530 | fail: |
1792 | finish_current_insert(trans, extent_root, 0); | ||
1793 | pending_ret = del_pending_extents(trans, extent_root, 0); | ||
1794 | if (ret) | 1531 | if (ret) |
1795 | return ret; | 1532 | return ret; |
1796 | if (pending_ret) | ||
1797 | return pending_ret; | ||
1798 | return 0; | 1533 | return 0; |
1799 | 1534 | ||
1800 | } | 1535 | } |
@@ -1858,7 +1593,7 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) | |||
1858 | if (!block_group || block_group->ro) | 1593 | if (!block_group || block_group->ro) |
1859 | readonly = 1; | 1594 | readonly = 1; |
1860 | if (block_group) | 1595 | if (block_group) |
1861 | put_block_group(block_group); | 1596 | btrfs_put_block_group(block_group); |
1862 | return readonly; | 1597 | return readonly; |
1863 | } | 1598 | } |
1864 | 1599 | ||
@@ -1882,7 +1617,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
1882 | if (!found) | 1617 | if (!found) |
1883 | return -ENOMEM; | 1618 | return -ENOMEM; |
1884 | 1619 | ||
1885 | list_add(&found->list, &info->space_info); | ||
1886 | INIT_LIST_HEAD(&found->block_groups); | 1620 | INIT_LIST_HEAD(&found->block_groups); |
1887 | init_rwsem(&found->groups_sem); | 1621 | init_rwsem(&found->groups_sem); |
1888 | spin_lock_init(&found->lock); | 1622 | spin_lock_init(&found->lock); |
@@ -1892,9 +1626,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
1892 | found->bytes_pinned = 0; | 1626 | found->bytes_pinned = 0; |
1893 | found->bytes_reserved = 0; | 1627 | found->bytes_reserved = 0; |
1894 | found->bytes_readonly = 0; | 1628 | found->bytes_readonly = 0; |
1629 | found->bytes_delalloc = 0; | ||
1895 | found->full = 0; | 1630 | found->full = 0; |
1896 | found->force_alloc = 0; | 1631 | found->force_alloc = 0; |
1897 | *space_info = found; | 1632 | *space_info = found; |
1633 | list_add_rcu(&found->list, &info->space_info); | ||
1898 | return 0; | 1634 | return 0; |
1899 | } | 1635 | } |
1900 | 1636 | ||
@@ -1955,6 +1691,233 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
1955 | return flags; | 1691 | return flags; |
1956 | } | 1692 | } |
1957 | 1693 | ||
1694 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) | ||
1695 | { | ||
1696 | struct btrfs_fs_info *info = root->fs_info; | ||
1697 | u64 alloc_profile; | ||
1698 | |||
1699 | if (data) { | ||
1700 | alloc_profile = info->avail_data_alloc_bits & | ||
1701 | info->data_alloc_profile; | ||
1702 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
1703 | } else if (root == root->fs_info->chunk_root) { | ||
1704 | alloc_profile = info->avail_system_alloc_bits & | ||
1705 | info->system_alloc_profile; | ||
1706 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
1707 | } else { | ||
1708 | alloc_profile = info->avail_metadata_alloc_bits & | ||
1709 | info->metadata_alloc_profile; | ||
1710 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
1711 | } | ||
1712 | |||
1713 | return btrfs_reduce_alloc_profile(root, data); | ||
1714 | } | ||
1715 | |||
1716 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | ||
1717 | { | ||
1718 | u64 alloc_target; | ||
1719 | |||
1720 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
1721 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | ||
1722 | alloc_target); | ||
1723 | } | ||
1724 | |||
1725 | /* | ||
1726 | * for now this just makes sure we have at least 5% of our metadata space free | ||
1727 | * for use. | ||
1728 | */ | ||
1729 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | ||
1730 | { | ||
1731 | struct btrfs_fs_info *info = root->fs_info; | ||
1732 | struct btrfs_space_info *meta_sinfo; | ||
1733 | u64 alloc_target, thresh; | ||
1734 | int committed = 0, ret; | ||
1735 | |||
1736 | /* get the space info for where the metadata will live */ | ||
1737 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
1738 | meta_sinfo = __find_space_info(info, alloc_target); | ||
1739 | |||
1740 | again: | ||
1741 | spin_lock(&meta_sinfo->lock); | ||
1742 | if (!meta_sinfo->full) | ||
1743 | thresh = meta_sinfo->total_bytes * 80; | ||
1744 | else | ||
1745 | thresh = meta_sinfo->total_bytes * 95; | ||
1746 | |||
1747 | do_div(thresh, 100); | ||
1748 | |||
1749 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
1750 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { | ||
1751 | struct btrfs_trans_handle *trans; | ||
1752 | if (!meta_sinfo->full) { | ||
1753 | meta_sinfo->force_alloc = 1; | ||
1754 | spin_unlock(&meta_sinfo->lock); | ||
1755 | |||
1756 | trans = btrfs_start_transaction(root, 1); | ||
1757 | if (!trans) | ||
1758 | return -ENOMEM; | ||
1759 | |||
1760 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
1761 | 2 * 1024 * 1024, alloc_target, 0); | ||
1762 | btrfs_end_transaction(trans, root); | ||
1763 | goto again; | ||
1764 | } | ||
1765 | spin_unlock(&meta_sinfo->lock); | ||
1766 | |||
1767 | if (!committed) { | ||
1768 | committed = 1; | ||
1769 | trans = btrfs_join_transaction(root, 1); | ||
1770 | if (!trans) | ||
1771 | return -ENOMEM; | ||
1772 | ret = btrfs_commit_transaction(trans, root); | ||
1773 | if (ret) | ||
1774 | return ret; | ||
1775 | goto again; | ||
1776 | } | ||
1777 | return -ENOSPC; | ||
1778 | } | ||
1779 | spin_unlock(&meta_sinfo->lock); | ||
1780 | |||
1781 | return 0; | ||
1782 | } | ||
1783 | |||
1784 | /* | ||
1785 | * This will check the space that the inode allocates from to make sure we have | ||
1786 | * enough space for bytes. | ||
1787 | */ | ||
1788 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | ||
1789 | u64 bytes) | ||
1790 | { | ||
1791 | struct btrfs_space_info *data_sinfo; | ||
1792 | int ret = 0, committed = 0; | ||
1793 | |||
1794 | /* make sure bytes are sectorsize aligned */ | ||
1795 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | ||
1796 | |||
1797 | data_sinfo = BTRFS_I(inode)->space_info; | ||
1798 | again: | ||
1799 | /* make sure we have enough space to handle the data first */ | ||
1800 | spin_lock(&data_sinfo->lock); | ||
1801 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | ||
1802 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | ||
1803 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | ||
1804 | data_sinfo->bytes_may_use < bytes) { | ||
1805 | struct btrfs_trans_handle *trans; | ||
1806 | |||
1807 | /* | ||
1808 | * if we don't have enough free bytes in this space then we need | ||
1809 | * to alloc a new chunk. | ||
1810 | */ | ||
1811 | if (!data_sinfo->full) { | ||
1812 | u64 alloc_target; | ||
1813 | |||
1814 | data_sinfo->force_alloc = 1; | ||
1815 | spin_unlock(&data_sinfo->lock); | ||
1816 | |||
1817 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
1818 | trans = btrfs_start_transaction(root, 1); | ||
1819 | if (!trans) | ||
1820 | return -ENOMEM; | ||
1821 | |||
1822 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
1823 | bytes + 2 * 1024 * 1024, | ||
1824 | alloc_target, 0); | ||
1825 | btrfs_end_transaction(trans, root); | ||
1826 | if (ret) | ||
1827 | return ret; | ||
1828 | goto again; | ||
1829 | } | ||
1830 | spin_unlock(&data_sinfo->lock); | ||
1831 | |||
1832 | /* commit the current transaction and try again */ | ||
1833 | if (!committed) { | ||
1834 | committed = 1; | ||
1835 | trans = btrfs_join_transaction(root, 1); | ||
1836 | if (!trans) | ||
1837 | return -ENOMEM; | ||
1838 | ret = btrfs_commit_transaction(trans, root); | ||
1839 | if (ret) | ||
1840 | return ret; | ||
1841 | goto again; | ||
1842 | } | ||
1843 | |||
1844 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | ||
1845 | ", %llu bytes_used, %llu bytes_reserved, " | ||
1846 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use" | ||
1847 | "%llu total\n", bytes, data_sinfo->bytes_delalloc, | ||
1848 | data_sinfo->bytes_used, data_sinfo->bytes_reserved, | ||
1849 | data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, | ||
1850 | data_sinfo->bytes_may_use, data_sinfo->total_bytes); | ||
1851 | return -ENOSPC; | ||
1852 | } | ||
1853 | data_sinfo->bytes_may_use += bytes; | ||
1854 | BTRFS_I(inode)->reserved_bytes += bytes; | ||
1855 | spin_unlock(&data_sinfo->lock); | ||
1856 | |||
1857 | return btrfs_check_metadata_free_space(root); | ||
1858 | } | ||
1859 | |||
1860 | /* | ||
1861 | * if there was an error for whatever reason after calling | ||
1862 | * btrfs_check_data_free_space, call this so we can cleanup the counters. | ||
1863 | */ | ||
1864 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | ||
1865 | struct inode *inode, u64 bytes) | ||
1866 | { | ||
1867 | struct btrfs_space_info *data_sinfo; | ||
1868 | |||
1869 | /* make sure bytes are sectorsize aligned */ | ||
1870 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | ||
1871 | |||
1872 | data_sinfo = BTRFS_I(inode)->space_info; | ||
1873 | spin_lock(&data_sinfo->lock); | ||
1874 | data_sinfo->bytes_may_use -= bytes; | ||
1875 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
1876 | spin_unlock(&data_sinfo->lock); | ||
1877 | } | ||
1878 | |||
1879 | /* called when we are adding a delalloc extent to the inode's io_tree */ | ||
1880 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
1881 | u64 bytes) | ||
1882 | { | ||
1883 | struct btrfs_space_info *data_sinfo; | ||
1884 | |||
1885 | /* get the space info for where this inode will be storing its data */ | ||
1886 | data_sinfo = BTRFS_I(inode)->space_info; | ||
1887 | |||
1888 | /* make sure we have enough space to handle the data first */ | ||
1889 | spin_lock(&data_sinfo->lock); | ||
1890 | data_sinfo->bytes_delalloc += bytes; | ||
1891 | |||
1892 | /* | ||
1893 | * we are adding a delalloc extent without calling | ||
1894 | * btrfs_check_data_free_space first. This happens on a weird | ||
1895 | * writepage condition, but shouldn't hurt our accounting | ||
1896 | */ | ||
1897 | if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { | ||
1898 | data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; | ||
1899 | BTRFS_I(inode)->reserved_bytes = 0; | ||
1900 | } else { | ||
1901 | data_sinfo->bytes_may_use -= bytes; | ||
1902 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
1903 | } | ||
1904 | |||
1905 | spin_unlock(&data_sinfo->lock); | ||
1906 | } | ||
1907 | |||
1908 | /* called when we are clearing an delalloc extent from the inode's io_tree */ | ||
1909 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | ||
1910 | u64 bytes) | ||
1911 | { | ||
1912 | struct btrfs_space_info *info; | ||
1913 | |||
1914 | info = BTRFS_I(inode)->space_info; | ||
1915 | |||
1916 | spin_lock(&info->lock); | ||
1917 | info->bytes_delalloc -= bytes; | ||
1918 | spin_unlock(&info->lock); | ||
1919 | } | ||
1920 | |||
1958 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 1921 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
1959 | struct btrfs_root *extent_root, u64 alloc_bytes, | 1922 | struct btrfs_root *extent_root, u64 alloc_bytes, |
1960 | u64 flags, int force) | 1923 | u64 flags, int force) |
@@ -2054,7 +2017,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
2054 | WARN_ON(ret); | 2017 | WARN_ON(ret); |
2055 | } | 2018 | } |
2056 | } | 2019 | } |
2057 | put_block_group(cache); | 2020 | btrfs_put_block_group(cache); |
2058 | total -= num_bytes; | 2021 | total -= num_bytes; |
2059 | bytenr += num_bytes; | 2022 | bytenr += num_bytes; |
2060 | } | 2023 | } |
@@ -2071,7 +2034,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
2071 | return 0; | 2034 | return 0; |
2072 | 2035 | ||
2073 | bytenr = cache->key.objectid; | 2036 | bytenr = cache->key.objectid; |
2074 | put_block_group(cache); | 2037 | btrfs_put_block_group(cache); |
2075 | 2038 | ||
2076 | return bytenr; | 2039 | return bytenr; |
2077 | } | 2040 | } |
@@ -2083,7 +2046,6 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2083 | struct btrfs_block_group_cache *cache; | 2046 | struct btrfs_block_group_cache *cache; |
2084 | struct btrfs_fs_info *fs_info = root->fs_info; | 2047 | struct btrfs_fs_info *fs_info = root->fs_info; |
2085 | 2048 | ||
2086 | WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex)); | ||
2087 | if (pin) { | 2049 | if (pin) { |
2088 | set_extent_dirty(&fs_info->pinned_extents, | 2050 | set_extent_dirty(&fs_info->pinned_extents, |
2089 | bytenr, bytenr + num - 1, GFP_NOFS); | 2051 | bytenr, bytenr + num - 1, GFP_NOFS); |
@@ -2091,6 +2053,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2091 | clear_extent_dirty(&fs_info->pinned_extents, | 2053 | clear_extent_dirty(&fs_info->pinned_extents, |
2092 | bytenr, bytenr + num - 1, GFP_NOFS); | 2054 | bytenr, bytenr + num - 1, GFP_NOFS); |
2093 | } | 2055 | } |
2056 | |||
2094 | while (num > 0) { | 2057 | while (num > 0) { |
2095 | cache = btrfs_lookup_block_group(fs_info, bytenr); | 2058 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
2096 | BUG_ON(!cache); | 2059 | BUG_ON(!cache); |
@@ -2115,7 +2078,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2115 | if (cache->cached) | 2078 | if (cache->cached) |
2116 | btrfs_add_free_space(cache, bytenr, len); | 2079 | btrfs_add_free_space(cache, bytenr, len); |
2117 | } | 2080 | } |
2118 | put_block_group(cache); | 2081 | btrfs_put_block_group(cache); |
2119 | bytenr += len; | 2082 | bytenr += len; |
2120 | num -= len; | 2083 | num -= len; |
2121 | } | 2084 | } |
@@ -2146,7 +2109,7 @@ static int update_reserved_extents(struct btrfs_root *root, | |||
2146 | } | 2109 | } |
2147 | spin_unlock(&cache->lock); | 2110 | spin_unlock(&cache->lock); |
2148 | spin_unlock(&cache->space_info->lock); | 2111 | spin_unlock(&cache->space_info->lock); |
2149 | put_block_group(cache); | 2112 | btrfs_put_block_group(cache); |
2150 | bytenr += len; | 2113 | bytenr += len; |
2151 | num -= len; | 2114 | num -= len; |
2152 | } | 2115 | } |
@@ -2161,7 +2124,6 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | |||
2161 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | 2124 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; |
2162 | int ret; | 2125 | int ret; |
2163 | 2126 | ||
2164 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2165 | while (1) { | 2127 | while (1) { |
2166 | ret = find_first_extent_bit(pinned_extents, last, | 2128 | ret = find_first_extent_bit(pinned_extents, last, |
2167 | &start, &end, EXTENT_DIRTY); | 2129 | &start, &end, EXTENT_DIRTY); |
@@ -2170,7 +2132,6 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | |||
2170 | set_extent_dirty(copy, start, end, GFP_NOFS); | 2132 | set_extent_dirty(copy, start, end, GFP_NOFS); |
2171 | last = end + 1; | 2133 | last = end + 1; |
2172 | } | 2134 | } |
2173 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2174 | return 0; | 2135 | return 0; |
2175 | } | 2136 | } |
2176 | 2137 | ||
@@ -2182,7 +2143,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
2182 | u64 end; | 2143 | u64 end; |
2183 | int ret; | 2144 | int ret; |
2184 | 2145 | ||
2185 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2186 | while (1) { | 2146 | while (1) { |
2187 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 2147 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
2188 | EXTENT_DIRTY); | 2148 | EXTENT_DIRTY); |
@@ -2191,215 +2151,20 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
2191 | 2151 | ||
2192 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 2152 | ret = btrfs_discard_extent(root, start, end + 1 - start); |
2193 | 2153 | ||
2154 | /* unlocks the pinned mutex */ | ||
2194 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | 2155 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); |
2195 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 2156 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
2196 | 2157 | ||
2197 | if (need_resched()) { | 2158 | cond_resched(); |
2198 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2199 | cond_resched(); | ||
2200 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2201 | } | ||
2202 | } | 2159 | } |
2203 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2204 | return ret; | 2160 | return ret; |
2205 | } | 2161 | } |
2206 | 2162 | ||
2207 | static int finish_current_insert(struct btrfs_trans_handle *trans, | ||
2208 | struct btrfs_root *extent_root, int all) | ||
2209 | { | ||
2210 | u64 start; | ||
2211 | u64 end; | ||
2212 | u64 priv; | ||
2213 | u64 search = 0; | ||
2214 | u64 skipped = 0; | ||
2215 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2216 | struct btrfs_path *path; | ||
2217 | struct pending_extent_op *extent_op, *tmp; | ||
2218 | struct list_head insert_list, update_list; | ||
2219 | int ret; | ||
2220 | int num_inserts = 0, max_inserts; | ||
2221 | |||
2222 | path = btrfs_alloc_path(); | ||
2223 | INIT_LIST_HEAD(&insert_list); | ||
2224 | INIT_LIST_HEAD(&update_list); | ||
2225 | |||
2226 | max_inserts = extent_root->leafsize / | ||
2227 | (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) + | ||
2228 | sizeof(struct btrfs_extent_ref) + | ||
2229 | sizeof(struct btrfs_extent_item)); | ||
2230 | again: | ||
2231 | mutex_lock(&info->extent_ins_mutex); | ||
2232 | while (1) { | ||
2233 | ret = find_first_extent_bit(&info->extent_ins, search, &start, | ||
2234 | &end, EXTENT_WRITEBACK); | ||
2235 | if (ret) { | ||
2236 | if (skipped && all && !num_inserts && | ||
2237 | list_empty(&update_list)) { | ||
2238 | skipped = 0; | ||
2239 | search = 0; | ||
2240 | continue; | ||
2241 | } | ||
2242 | mutex_unlock(&info->extent_ins_mutex); | ||
2243 | break; | ||
2244 | } | ||
2245 | |||
2246 | ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); | ||
2247 | if (!ret) { | ||
2248 | skipped = 1; | ||
2249 | search = end + 1; | ||
2250 | if (need_resched()) { | ||
2251 | mutex_unlock(&info->extent_ins_mutex); | ||
2252 | cond_resched(); | ||
2253 | mutex_lock(&info->extent_ins_mutex); | ||
2254 | } | ||
2255 | continue; | ||
2256 | } | ||
2257 | |||
2258 | ret = get_state_private(&info->extent_ins, start, &priv); | ||
2259 | BUG_ON(ret); | ||
2260 | extent_op = (struct pending_extent_op *)(unsigned long) priv; | ||
2261 | |||
2262 | if (extent_op->type == PENDING_EXTENT_INSERT) { | ||
2263 | num_inserts++; | ||
2264 | list_add_tail(&extent_op->list, &insert_list); | ||
2265 | search = end + 1; | ||
2266 | if (num_inserts == max_inserts) { | ||
2267 | mutex_unlock(&info->extent_ins_mutex); | ||
2268 | break; | ||
2269 | } | ||
2270 | } else if (extent_op->type == PENDING_BACKREF_UPDATE) { | ||
2271 | list_add_tail(&extent_op->list, &update_list); | ||
2272 | search = end + 1; | ||
2273 | } else { | ||
2274 | BUG(); | ||
2275 | } | ||
2276 | } | ||
2277 | |||
2278 | /* | ||
2279 | * process the update list, clear the writeback bit for it, and if | ||
2280 | * somebody marked this thing for deletion then just unlock it and be | ||
2281 | * done, the free_extents will handle it | ||
2282 | */ | ||
2283 | mutex_lock(&info->extent_ins_mutex); | ||
2284 | list_for_each_entry_safe(extent_op, tmp, &update_list, list) { | ||
2285 | clear_extent_bits(&info->extent_ins, extent_op->bytenr, | ||
2286 | extent_op->bytenr + extent_op->num_bytes - 1, | ||
2287 | EXTENT_WRITEBACK, GFP_NOFS); | ||
2288 | if (extent_op->del) { | ||
2289 | list_del_init(&extent_op->list); | ||
2290 | unlock_extent(&info->extent_ins, extent_op->bytenr, | ||
2291 | extent_op->bytenr + extent_op->num_bytes | ||
2292 | - 1, GFP_NOFS); | ||
2293 | kfree(extent_op); | ||
2294 | } | ||
2295 | } | ||
2296 | mutex_unlock(&info->extent_ins_mutex); | ||
2297 | |||
2298 | /* | ||
2299 | * still have things left on the update list, go ahead an update | ||
2300 | * everything | ||
2301 | */ | ||
2302 | if (!list_empty(&update_list)) { | ||
2303 | ret = update_backrefs(trans, extent_root, path, &update_list); | ||
2304 | BUG_ON(ret); | ||
2305 | } | ||
2306 | |||
2307 | /* | ||
2308 | * if no inserts need to be done, but we skipped some extents and we | ||
2309 | * need to make sure everything is cleaned then reset everything and | ||
2310 | * go back to the beginning | ||
2311 | */ | ||
2312 | if (!num_inserts && all && skipped) { | ||
2313 | search = 0; | ||
2314 | skipped = 0; | ||
2315 | INIT_LIST_HEAD(&update_list); | ||
2316 | INIT_LIST_HEAD(&insert_list); | ||
2317 | goto again; | ||
2318 | } else if (!num_inserts) { | ||
2319 | goto out; | ||
2320 | } | ||
2321 | |||
2322 | /* | ||
2323 | * process the insert extents list. Again if we are deleting this | ||
2324 | * extent, then just unlock it, pin down the bytes if need be, and be | ||
2325 | * done with it. Saves us from having to actually insert the extent | ||
2326 | * into the tree and then subsequently come along and delete it | ||
2327 | */ | ||
2328 | mutex_lock(&info->extent_ins_mutex); | ||
2329 | list_for_each_entry_safe(extent_op, tmp, &insert_list, list) { | ||
2330 | clear_extent_bits(&info->extent_ins, extent_op->bytenr, | ||
2331 | extent_op->bytenr + extent_op->num_bytes - 1, | ||
2332 | EXTENT_WRITEBACK, GFP_NOFS); | ||
2333 | if (extent_op->del) { | ||
2334 | u64 used; | ||
2335 | list_del_init(&extent_op->list); | ||
2336 | unlock_extent(&info->extent_ins, extent_op->bytenr, | ||
2337 | extent_op->bytenr + extent_op->num_bytes | ||
2338 | - 1, GFP_NOFS); | ||
2339 | |||
2340 | mutex_lock(&extent_root->fs_info->pinned_mutex); | ||
2341 | ret = pin_down_bytes(trans, extent_root, | ||
2342 | extent_op->bytenr, | ||
2343 | extent_op->num_bytes, 0); | ||
2344 | mutex_unlock(&extent_root->fs_info->pinned_mutex); | ||
2345 | |||
2346 | spin_lock(&info->delalloc_lock); | ||
2347 | used = btrfs_super_bytes_used(&info->super_copy); | ||
2348 | btrfs_set_super_bytes_used(&info->super_copy, | ||
2349 | used - extent_op->num_bytes); | ||
2350 | used = btrfs_root_used(&extent_root->root_item); | ||
2351 | btrfs_set_root_used(&extent_root->root_item, | ||
2352 | used - extent_op->num_bytes); | ||
2353 | spin_unlock(&info->delalloc_lock); | ||
2354 | |||
2355 | ret = update_block_group(trans, extent_root, | ||
2356 | extent_op->bytenr, | ||
2357 | extent_op->num_bytes, | ||
2358 | 0, ret > 0); | ||
2359 | BUG_ON(ret); | ||
2360 | kfree(extent_op); | ||
2361 | num_inserts--; | ||
2362 | } | ||
2363 | } | ||
2364 | mutex_unlock(&info->extent_ins_mutex); | ||
2365 | |||
2366 | ret = insert_extents(trans, extent_root, path, &insert_list, | ||
2367 | num_inserts); | ||
2368 | BUG_ON(ret); | ||
2369 | |||
2370 | /* | ||
2371 | * if we broke out of the loop in order to insert stuff because we hit | ||
2372 | * the maximum number of inserts at a time we can handle, then loop | ||
2373 | * back and pick up where we left off | ||
2374 | */ | ||
2375 | if (num_inserts == max_inserts) { | ||
2376 | INIT_LIST_HEAD(&insert_list); | ||
2377 | INIT_LIST_HEAD(&update_list); | ||
2378 | num_inserts = 0; | ||
2379 | goto again; | ||
2380 | } | ||
2381 | |||
2382 | /* | ||
2383 | * again, if we need to make absolutely sure there are no more pending | ||
2384 | * extent operations left and we know that we skipped some, go back to | ||
2385 | * the beginning and do it all again | ||
2386 | */ | ||
2387 | if (all && skipped) { | ||
2388 | INIT_LIST_HEAD(&insert_list); | ||
2389 | INIT_LIST_HEAD(&update_list); | ||
2390 | search = 0; | ||
2391 | skipped = 0; | ||
2392 | num_inserts = 0; | ||
2393 | goto again; | ||
2394 | } | ||
2395 | out: | ||
2396 | btrfs_free_path(path); | ||
2397 | return 0; | ||
2398 | } | ||
2399 | |||
2400 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 2163 | static int pin_down_bytes(struct btrfs_trans_handle *trans, |
2401 | struct btrfs_root *root, | 2164 | struct btrfs_root *root, |
2402 | u64 bytenr, u64 num_bytes, int is_data) | 2165 | struct btrfs_path *path, |
2166 | u64 bytenr, u64 num_bytes, int is_data, | ||
2167 | struct extent_buffer **must_clean) | ||
2403 | { | 2168 | { |
2404 | int err = 0; | 2169 | int err = 0; |
2405 | struct extent_buffer *buf; | 2170 | struct extent_buffer *buf; |
@@ -2422,17 +2187,18 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
2422 | u64 header_transid = btrfs_header_generation(buf); | 2187 | u64 header_transid = btrfs_header_generation(buf); |
2423 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | 2188 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && |
2424 | header_owner != BTRFS_TREE_RELOC_OBJECTID && | 2189 | header_owner != BTRFS_TREE_RELOC_OBJECTID && |
2190 | header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID && | ||
2425 | header_transid == trans->transid && | 2191 | header_transid == trans->transid && |
2426 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 2192 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
2427 | clean_tree_block(NULL, root, buf); | 2193 | *must_clean = buf; |
2428 | btrfs_tree_unlock(buf); | ||
2429 | free_extent_buffer(buf); | ||
2430 | return 1; | 2194 | return 1; |
2431 | } | 2195 | } |
2432 | btrfs_tree_unlock(buf); | 2196 | btrfs_tree_unlock(buf); |
2433 | } | 2197 | } |
2434 | free_extent_buffer(buf); | 2198 | free_extent_buffer(buf); |
2435 | pinit: | 2199 | pinit: |
2200 | btrfs_set_path_blocking(path); | ||
2201 | /* unlocks the pinned mutex */ | ||
2436 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 2202 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); |
2437 | 2203 | ||
2438 | BUG_ON(err < 0); | 2204 | BUG_ON(err < 0); |
@@ -2446,7 +2212,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2446 | struct btrfs_root *root, | 2212 | struct btrfs_root *root, |
2447 | u64 bytenr, u64 num_bytes, u64 parent, | 2213 | u64 bytenr, u64 num_bytes, u64 parent, |
2448 | u64 root_objectid, u64 ref_generation, | 2214 | u64 root_objectid, u64 ref_generation, |
2449 | u64 owner_objectid, int pin, int mark_free) | 2215 | u64 owner_objectid, int pin, int mark_free, |
2216 | int refs_to_drop) | ||
2450 | { | 2217 | { |
2451 | struct btrfs_path *path; | 2218 | struct btrfs_path *path; |
2452 | struct btrfs_key key; | 2219 | struct btrfs_key key; |
@@ -2468,6 +2235,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2468 | return -ENOMEM; | 2235 | return -ENOMEM; |
2469 | 2236 | ||
2470 | path->reada = 1; | 2237 | path->reada = 1; |
2238 | path->leave_spinning = 1; | ||
2471 | ret = lookup_extent_backref(trans, extent_root, path, | 2239 | ret = lookup_extent_backref(trans, extent_root, path, |
2472 | bytenr, parent, root_objectid, | 2240 | bytenr, parent, root_objectid, |
2473 | ref_generation, owner_objectid, 1); | 2241 | ref_generation, owner_objectid, 1); |
@@ -2489,9 +2257,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2489 | break; | 2257 | break; |
2490 | } | 2258 | } |
2491 | if (!found_extent) { | 2259 | if (!found_extent) { |
2492 | ret = remove_extent_backref(trans, extent_root, path); | 2260 | ret = remove_extent_backref(trans, extent_root, path, |
2261 | refs_to_drop); | ||
2493 | BUG_ON(ret); | 2262 | BUG_ON(ret); |
2494 | btrfs_release_path(extent_root, path); | 2263 | btrfs_release_path(extent_root, path); |
2264 | path->leave_spinning = 1; | ||
2495 | ret = btrfs_search_slot(trans, extent_root, | 2265 | ret = btrfs_search_slot(trans, extent_root, |
2496 | &key, path, -1, 1); | 2266 | &key, path, -1, 1); |
2497 | if (ret) { | 2267 | if (ret) { |
@@ -2507,8 +2277,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2507 | btrfs_print_leaf(extent_root, path->nodes[0]); | 2277 | btrfs_print_leaf(extent_root, path->nodes[0]); |
2508 | WARN_ON(1); | 2278 | WARN_ON(1); |
2509 | printk(KERN_ERR "btrfs unable to find ref byte nr %llu " | 2279 | printk(KERN_ERR "btrfs unable to find ref byte nr %llu " |
2510 | "root %llu gen %llu owner %llu\n", | 2280 | "parent %llu root %llu gen %llu owner %llu\n", |
2511 | (unsigned long long)bytenr, | 2281 | (unsigned long long)bytenr, |
2282 | (unsigned long long)parent, | ||
2512 | (unsigned long long)root_objectid, | 2283 | (unsigned long long)root_objectid, |
2513 | (unsigned long long)ref_generation, | 2284 | (unsigned long long)ref_generation, |
2514 | (unsigned long long)owner_objectid); | 2285 | (unsigned long long)owner_objectid); |
@@ -2518,17 +2289,23 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2518 | ei = btrfs_item_ptr(leaf, extent_slot, | 2289 | ei = btrfs_item_ptr(leaf, extent_slot, |
2519 | struct btrfs_extent_item); | 2290 | struct btrfs_extent_item); |
2520 | refs = btrfs_extent_refs(leaf, ei); | 2291 | refs = btrfs_extent_refs(leaf, ei); |
2521 | BUG_ON(refs == 0); | ||
2522 | refs -= 1; | ||
2523 | btrfs_set_extent_refs(leaf, ei, refs); | ||
2524 | 2292 | ||
2293 | /* | ||
2294 | * we're not allowed to delete the extent item if there | ||
2295 | * are other delayed ref updates pending | ||
2296 | */ | ||
2297 | |||
2298 | BUG_ON(refs < refs_to_drop); | ||
2299 | refs -= refs_to_drop; | ||
2300 | btrfs_set_extent_refs(leaf, ei, refs); | ||
2525 | btrfs_mark_buffer_dirty(leaf); | 2301 | btrfs_mark_buffer_dirty(leaf); |
2526 | 2302 | ||
2527 | if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { | 2303 | if (refs == 0 && found_extent && |
2304 | path->slots[0] == extent_slot + 1) { | ||
2528 | struct btrfs_extent_ref *ref; | 2305 | struct btrfs_extent_ref *ref; |
2529 | ref = btrfs_item_ptr(leaf, path->slots[0], | 2306 | ref = btrfs_item_ptr(leaf, path->slots[0], |
2530 | struct btrfs_extent_ref); | 2307 | struct btrfs_extent_ref); |
2531 | BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); | 2308 | BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop); |
2532 | /* if the back ref and the extent are next to each other | 2309 | /* if the back ref and the extent are next to each other |
2533 | * they get deleted below in one shot | 2310 | * they get deleted below in one shot |
2534 | */ | 2311 | */ |
@@ -2536,11 +2313,13 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2536 | num_to_del = 2; | 2313 | num_to_del = 2; |
2537 | } else if (found_extent) { | 2314 | } else if (found_extent) { |
2538 | /* otherwise delete the extent back ref */ | 2315 | /* otherwise delete the extent back ref */ |
2539 | ret = remove_extent_backref(trans, extent_root, path); | 2316 | ret = remove_extent_backref(trans, extent_root, path, |
2317 | refs_to_drop); | ||
2540 | BUG_ON(ret); | 2318 | BUG_ON(ret); |
2541 | /* if refs are 0, we need to setup the path for deletion */ | 2319 | /* if refs are 0, we need to setup the path for deletion */ |
2542 | if (refs == 0) { | 2320 | if (refs == 0) { |
2543 | btrfs_release_path(extent_root, path); | 2321 | btrfs_release_path(extent_root, path); |
2322 | path->leave_spinning = 1; | ||
2544 | ret = btrfs_search_slot(trans, extent_root, &key, path, | 2323 | ret = btrfs_search_slot(trans, extent_root, &key, path, |
2545 | -1, 1); | 2324 | -1, 1); |
2546 | BUG_ON(ret); | 2325 | BUG_ON(ret); |
@@ -2550,16 +2329,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2550 | if (refs == 0) { | 2329 | if (refs == 0) { |
2551 | u64 super_used; | 2330 | u64 super_used; |
2552 | u64 root_used; | 2331 | u64 root_used; |
2332 | struct extent_buffer *must_clean = NULL; | ||
2553 | 2333 | ||
2554 | if (pin) { | 2334 | if (pin) { |
2555 | mutex_lock(&root->fs_info->pinned_mutex); | 2335 | ret = pin_down_bytes(trans, root, path, |
2556 | ret = pin_down_bytes(trans, root, bytenr, num_bytes, | 2336 | bytenr, num_bytes, |
2557 | owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); | 2337 | owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, |
2558 | mutex_unlock(&root->fs_info->pinned_mutex); | 2338 | &must_clean); |
2559 | if (ret > 0) | 2339 | if (ret > 0) |
2560 | mark_free = 1; | 2340 | mark_free = 1; |
2561 | BUG_ON(ret < 0); | 2341 | BUG_ON(ret < 0); |
2562 | } | 2342 | } |
2343 | |||
2563 | /* block accounting for super block */ | 2344 | /* block accounting for super block */ |
2564 | spin_lock(&info->delalloc_lock); | 2345 | spin_lock(&info->delalloc_lock); |
2565 | super_used = btrfs_super_bytes_used(&info->super_copy); | 2346 | super_used = btrfs_super_bytes_used(&info->super_copy); |
@@ -2571,14 +2352,34 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2571 | btrfs_set_root_used(&root->root_item, | 2352 | btrfs_set_root_used(&root->root_item, |
2572 | root_used - num_bytes); | 2353 | root_used - num_bytes); |
2573 | spin_unlock(&info->delalloc_lock); | 2354 | spin_unlock(&info->delalloc_lock); |
2355 | |||
2356 | /* | ||
2357 | * it is going to be very rare for someone to be waiting | ||
2358 | * on the block we're freeing. del_items might need to | ||
2359 | * schedule, so rather than get fancy, just force it | ||
2360 | * to blocking here | ||
2361 | */ | ||
2362 | if (must_clean) | ||
2363 | btrfs_set_lock_blocking(must_clean); | ||
2364 | |||
2574 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 2365 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
2575 | num_to_del); | 2366 | num_to_del); |
2576 | BUG_ON(ret); | 2367 | BUG_ON(ret); |
2577 | btrfs_release_path(extent_root, path); | 2368 | btrfs_release_path(extent_root, path); |
2578 | 2369 | ||
2370 | if (must_clean) { | ||
2371 | clean_tree_block(NULL, root, must_clean); | ||
2372 | btrfs_tree_unlock(must_clean); | ||
2373 | free_extent_buffer(must_clean); | ||
2374 | } | ||
2375 | |||
2579 | if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 2376 | if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { |
2580 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 2377 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
2581 | BUG_ON(ret); | 2378 | BUG_ON(ret); |
2379 | } else { | ||
2380 | invalidate_mapping_pages(info->btree_inode->i_mapping, | ||
2381 | bytenr >> PAGE_CACHE_SHIFT, | ||
2382 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); | ||
2582 | } | 2383 | } |
2583 | 2384 | ||
2584 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, | 2385 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, |
@@ -2586,216 +2387,103 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2586 | BUG_ON(ret); | 2387 | BUG_ON(ret); |
2587 | } | 2388 | } |
2588 | btrfs_free_path(path); | 2389 | btrfs_free_path(path); |
2589 | finish_current_insert(trans, extent_root, 0); | ||
2590 | return ret; | 2390 | return ret; |
2591 | } | 2391 | } |
2592 | 2392 | ||
2593 | /* | 2393 | /* |
2594 | * find all the blocks marked as pending in the radix tree and remove | 2394 | * remove an extent from the root, returns 0 on success |
2595 | * them from the extent map | ||
2596 | */ | 2395 | */ |
2597 | static int del_pending_extents(struct btrfs_trans_handle *trans, | 2396 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
2598 | struct btrfs_root *extent_root, int all) | 2397 | struct btrfs_root *root, |
2398 | u64 bytenr, u64 num_bytes, u64 parent, | ||
2399 | u64 root_objectid, u64 ref_generation, | ||
2400 | u64 owner_objectid, int pin, | ||
2401 | int refs_to_drop) | ||
2599 | { | 2402 | { |
2600 | int ret; | 2403 | WARN_ON(num_bytes < root->sectorsize); |
2601 | int err = 0; | ||
2602 | u64 start; | ||
2603 | u64 end; | ||
2604 | u64 priv; | ||
2605 | u64 search = 0; | ||
2606 | int nr = 0, skipped = 0; | ||
2607 | struct extent_io_tree *pending_del; | ||
2608 | struct extent_io_tree *extent_ins; | ||
2609 | struct pending_extent_op *extent_op; | ||
2610 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2611 | struct list_head delete_list; | ||
2612 | |||
2613 | INIT_LIST_HEAD(&delete_list); | ||
2614 | extent_ins = &extent_root->fs_info->extent_ins; | ||
2615 | pending_del = &extent_root->fs_info->pending_del; | ||
2616 | |||
2617 | again: | ||
2618 | mutex_lock(&info->extent_ins_mutex); | ||
2619 | while (1) { | ||
2620 | ret = find_first_extent_bit(pending_del, search, &start, &end, | ||
2621 | EXTENT_WRITEBACK); | ||
2622 | if (ret) { | ||
2623 | if (all && skipped && !nr) { | ||
2624 | search = 0; | ||
2625 | skipped = 0; | ||
2626 | continue; | ||
2627 | } | ||
2628 | mutex_unlock(&info->extent_ins_mutex); | ||
2629 | break; | ||
2630 | } | ||
2631 | |||
2632 | ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); | ||
2633 | if (!ret) { | ||
2634 | search = end+1; | ||
2635 | skipped = 1; | ||
2636 | |||
2637 | if (need_resched()) { | ||
2638 | mutex_unlock(&info->extent_ins_mutex); | ||
2639 | cond_resched(); | ||
2640 | mutex_lock(&info->extent_ins_mutex); | ||
2641 | } | ||
2642 | |||
2643 | continue; | ||
2644 | } | ||
2645 | BUG_ON(ret < 0); | ||
2646 | |||
2647 | ret = get_state_private(pending_del, start, &priv); | ||
2648 | BUG_ON(ret); | ||
2649 | extent_op = (struct pending_extent_op *)(unsigned long)priv; | ||
2650 | |||
2651 | clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK, | ||
2652 | GFP_NOFS); | ||
2653 | if (!test_range_bit(extent_ins, start, end, | ||
2654 | EXTENT_WRITEBACK, 0)) { | ||
2655 | list_add_tail(&extent_op->list, &delete_list); | ||
2656 | nr++; | ||
2657 | } else { | ||
2658 | kfree(extent_op); | ||
2659 | |||
2660 | ret = get_state_private(&info->extent_ins, start, | ||
2661 | &priv); | ||
2662 | BUG_ON(ret); | ||
2663 | extent_op = (struct pending_extent_op *) | ||
2664 | (unsigned long)priv; | ||
2665 | |||
2666 | clear_extent_bits(&info->extent_ins, start, end, | ||
2667 | EXTENT_WRITEBACK, GFP_NOFS); | ||
2668 | |||
2669 | if (extent_op->type == PENDING_BACKREF_UPDATE) { | ||
2670 | list_add_tail(&extent_op->list, &delete_list); | ||
2671 | search = end + 1; | ||
2672 | nr++; | ||
2673 | continue; | ||
2674 | } | ||
2675 | |||
2676 | mutex_lock(&extent_root->fs_info->pinned_mutex); | ||
2677 | ret = pin_down_bytes(trans, extent_root, start, | ||
2678 | end + 1 - start, 0); | ||
2679 | mutex_unlock(&extent_root->fs_info->pinned_mutex); | ||
2680 | |||
2681 | ret = update_block_group(trans, extent_root, start, | ||
2682 | end + 1 - start, 0, ret > 0); | ||
2683 | |||
2684 | unlock_extent(extent_ins, start, end, GFP_NOFS); | ||
2685 | BUG_ON(ret); | ||
2686 | kfree(extent_op); | ||
2687 | } | ||
2688 | if (ret) | ||
2689 | err = ret; | ||
2690 | |||
2691 | search = end + 1; | ||
2692 | |||
2693 | if (need_resched()) { | ||
2694 | mutex_unlock(&info->extent_ins_mutex); | ||
2695 | cond_resched(); | ||
2696 | mutex_lock(&info->extent_ins_mutex); | ||
2697 | } | ||
2698 | } | ||
2699 | 2404 | ||
2700 | if (nr) { | 2405 | /* |
2701 | ret = free_extents(trans, extent_root, &delete_list); | 2406 | * if metadata always pin |
2702 | BUG_ON(ret); | 2407 | * if data pin when any transaction has committed this |
2703 | } | 2408 | */ |
2409 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID || | ||
2410 | ref_generation != trans->transid) | ||
2411 | pin = 1; | ||
2704 | 2412 | ||
2705 | if (all && skipped) { | 2413 | if (ref_generation != trans->transid) |
2706 | INIT_LIST_HEAD(&delete_list); | 2414 | pin = 1; |
2707 | search = 0; | ||
2708 | nr = 0; | ||
2709 | goto again; | ||
2710 | } | ||
2711 | 2415 | ||
2712 | return err; | 2416 | return __free_extent(trans, root, bytenr, num_bytes, parent, |
2417 | root_objectid, ref_generation, | ||
2418 | owner_objectid, pin, pin == 0, refs_to_drop); | ||
2713 | } | 2419 | } |
2714 | 2420 | ||
2715 | /* | 2421 | /* |
2716 | * remove an extent from the root, returns 0 on success | 2422 | * when we free an extent, it is possible (and likely) that we free the last |
2423 | * delayed ref for that extent as well. This searches the delayed ref tree for | ||
2424 | * a given extent, and if there are no other delayed refs to be processed, it | ||
2425 | * removes it from the tree. | ||
2717 | */ | 2426 | */ |
2718 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 2427 | static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, |
2719 | struct btrfs_root *root, | 2428 | struct btrfs_root *root, u64 bytenr) |
2720 | u64 bytenr, u64 num_bytes, u64 parent, | ||
2721 | u64 root_objectid, u64 ref_generation, | ||
2722 | u64 owner_objectid, int pin) | ||
2723 | { | 2429 | { |
2724 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 2430 | struct btrfs_delayed_ref_head *head; |
2725 | int pending_ret; | 2431 | struct btrfs_delayed_ref_root *delayed_refs; |
2432 | struct btrfs_delayed_ref_node *ref; | ||
2433 | struct rb_node *node; | ||
2726 | int ret; | 2434 | int ret; |
2727 | 2435 | ||
2728 | WARN_ON(num_bytes < root->sectorsize); | 2436 | delayed_refs = &trans->transaction->delayed_refs; |
2729 | if (root == extent_root) { | 2437 | spin_lock(&delayed_refs->lock); |
2730 | struct pending_extent_op *extent_op = NULL; | 2438 | head = btrfs_find_delayed_ref_head(trans, bytenr); |
2731 | 2439 | if (!head) | |
2732 | mutex_lock(&root->fs_info->extent_ins_mutex); | 2440 | goto out; |
2733 | if (test_range_bit(&root->fs_info->extent_ins, bytenr, | ||
2734 | bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { | ||
2735 | u64 priv; | ||
2736 | ret = get_state_private(&root->fs_info->extent_ins, | ||
2737 | bytenr, &priv); | ||
2738 | BUG_ON(ret); | ||
2739 | extent_op = (struct pending_extent_op *) | ||
2740 | (unsigned long)priv; | ||
2741 | 2441 | ||
2742 | extent_op->del = 1; | 2442 | node = rb_prev(&head->node.rb_node); |
2743 | if (extent_op->type == PENDING_EXTENT_INSERT) { | 2443 | if (!node) |
2744 | mutex_unlock(&root->fs_info->extent_ins_mutex); | 2444 | goto out; |
2745 | return 0; | ||
2746 | } | ||
2747 | } | ||
2748 | 2445 | ||
2749 | if (extent_op) { | 2446 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); |
2750 | ref_generation = extent_op->orig_generation; | ||
2751 | parent = extent_op->orig_parent; | ||
2752 | } | ||
2753 | 2447 | ||
2754 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 2448 | /* there are still entries for this ref, we can't drop it */ |
2755 | BUG_ON(!extent_op); | 2449 | if (ref->bytenr == bytenr) |
2756 | 2450 | goto out; | |
2757 | extent_op->type = PENDING_EXTENT_DELETE; | ||
2758 | extent_op->bytenr = bytenr; | ||
2759 | extent_op->num_bytes = num_bytes; | ||
2760 | extent_op->parent = parent; | ||
2761 | extent_op->orig_parent = parent; | ||
2762 | extent_op->generation = ref_generation; | ||
2763 | extent_op->orig_generation = ref_generation; | ||
2764 | extent_op->level = (int)owner_objectid; | ||
2765 | INIT_LIST_HEAD(&extent_op->list); | ||
2766 | extent_op->del = 0; | ||
2767 | |||
2768 | set_extent_bits(&root->fs_info->pending_del, | ||
2769 | bytenr, bytenr + num_bytes - 1, | ||
2770 | EXTENT_WRITEBACK, GFP_NOFS); | ||
2771 | set_state_private(&root->fs_info->pending_del, | ||
2772 | bytenr, (unsigned long)extent_op); | ||
2773 | mutex_unlock(&root->fs_info->extent_ins_mutex); | ||
2774 | return 0; | ||
2775 | } | ||
2776 | /* if metadata always pin */ | ||
2777 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
2778 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | ||
2779 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2780 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | ||
2781 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2782 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
2783 | return 0; | ||
2784 | } | ||
2785 | pin = 1; | ||
2786 | } | ||
2787 | 2451 | ||
2788 | /* if data pin when any transaction has committed this */ | 2452 | /* |
2789 | if (ref_generation != trans->transid) | 2453 | * waiting for the lock here would deadlock. If someone else has it |
2790 | pin = 1; | 2454 | * locked they are already in the process of dropping it anyway |
2455 | */ | ||
2456 | if (!mutex_trylock(&head->mutex)) | ||
2457 | goto out; | ||
2791 | 2458 | ||
2792 | ret = __free_extent(trans, root, bytenr, num_bytes, parent, | 2459 | /* |
2793 | root_objectid, ref_generation, | 2460 | * at this point we have a head with no other entries. Go |
2794 | owner_objectid, pin, pin == 0); | 2461 | * ahead and process it. |
2462 | */ | ||
2463 | head->node.in_tree = 0; | ||
2464 | rb_erase(&head->node.rb_node, &delayed_refs->root); | ||
2795 | 2465 | ||
2796 | finish_current_insert(trans, root->fs_info->extent_root, 0); | 2466 | delayed_refs->num_entries--; |
2797 | pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0); | 2467 | |
2798 | return ret ? ret : pending_ret; | 2468 | /* |
2469 | * we don't take a ref on the node because we're removing it from the | ||
2470 | * tree, so we just steal the ref the tree was holding. | ||
2471 | */ | ||
2472 | delayed_refs->num_heads--; | ||
2473 | if (list_empty(&head->cluster)) | ||
2474 | delayed_refs->num_heads_ready--; | ||
2475 | |||
2476 | list_del_init(&head->cluster); | ||
2477 | spin_unlock(&delayed_refs->lock); | ||
2478 | |||
2479 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | ||
2480 | &head->node, head->must_insert_reserved); | ||
2481 | BUG_ON(ret); | ||
2482 | btrfs_put_delayed_ref(&head->node); | ||
2483 | return 0; | ||
2484 | out: | ||
2485 | spin_unlock(&delayed_refs->lock); | ||
2486 | return 0; | ||
2799 | } | 2487 | } |
2800 | 2488 | ||
2801 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 2489 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
@@ -2806,9 +2494,28 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
2806 | { | 2494 | { |
2807 | int ret; | 2495 | int ret; |
2808 | 2496 | ||
2809 | ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, | 2497 | /* |
2810 | root_objectid, ref_generation, | 2498 | * tree log blocks never actually go into the extent allocation |
2811 | owner_objectid, pin); | 2499 | * tree, just update pinning info and exit early. |
2500 | * | ||
2501 | * data extents referenced by the tree log do need to have | ||
2502 | * their reference counts bumped. | ||
2503 | */ | ||
2504 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && | ||
2505 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
2506 | /* unlocks the pinned mutex */ | ||
2507 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | ||
2508 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
2509 | ret = 0; | ||
2510 | } else { | ||
2511 | ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, | ||
2512 | root_objectid, ref_generation, | ||
2513 | owner_objectid, | ||
2514 | BTRFS_DROP_DELAYED_REF, 1); | ||
2515 | BUG_ON(ret); | ||
2516 | ret = check_ref_cleanup(trans, root, bytenr); | ||
2517 | BUG_ON(ret); | ||
2518 | } | ||
2812 | return ret; | 2519 | return ret; |
2813 | } | 2520 | } |
2814 | 2521 | ||
@@ -2837,227 +2544,237 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
2837 | { | 2544 | { |
2838 | int ret = 0; | 2545 | int ret = 0; |
2839 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 2546 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
2840 | u64 total_needed = num_bytes; | 2547 | struct btrfs_free_cluster *last_ptr = NULL; |
2841 | u64 *last_ptr = NULL; | ||
2842 | u64 last_wanted = 0; | ||
2843 | struct btrfs_block_group_cache *block_group = NULL; | 2548 | struct btrfs_block_group_cache *block_group = NULL; |
2844 | int chunk_alloc_done = 0; | ||
2845 | int empty_cluster = 2 * 1024 * 1024; | 2549 | int empty_cluster = 2 * 1024 * 1024; |
2846 | int allowed_chunk_alloc = 0; | 2550 | int allowed_chunk_alloc = 0; |
2847 | struct list_head *head = NULL, *cur = NULL; | ||
2848 | int loop = 0; | ||
2849 | int extra_loop = 0; | ||
2850 | struct btrfs_space_info *space_info; | 2551 | struct btrfs_space_info *space_info; |
2552 | int last_ptr_loop = 0; | ||
2553 | int loop = 0; | ||
2851 | 2554 | ||
2852 | WARN_ON(num_bytes < root->sectorsize); | 2555 | WARN_ON(num_bytes < root->sectorsize); |
2853 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 2556 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
2854 | ins->objectid = 0; | 2557 | ins->objectid = 0; |
2855 | ins->offset = 0; | 2558 | ins->offset = 0; |
2856 | 2559 | ||
2560 | space_info = __find_space_info(root->fs_info, data); | ||
2561 | |||
2857 | if (orig_root->ref_cows || empty_size) | 2562 | if (orig_root->ref_cows || empty_size) |
2858 | allowed_chunk_alloc = 1; | 2563 | allowed_chunk_alloc = 1; |
2859 | 2564 | ||
2860 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 2565 | if (data & BTRFS_BLOCK_GROUP_METADATA) { |
2861 | last_ptr = &root->fs_info->last_alloc; | 2566 | last_ptr = &root->fs_info->meta_alloc_cluster; |
2862 | empty_cluster = 64 * 1024; | 2567 | if (!btrfs_test_opt(root, SSD)) |
2568 | empty_cluster = 64 * 1024; | ||
2863 | } | 2569 | } |
2864 | 2570 | ||
2865 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) | 2571 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { |
2866 | last_ptr = &root->fs_info->last_data_alloc; | 2572 | last_ptr = &root->fs_info->data_alloc_cluster; |
2573 | } | ||
2867 | 2574 | ||
2868 | if (last_ptr) { | 2575 | if (last_ptr) { |
2869 | if (*last_ptr) { | 2576 | spin_lock(&last_ptr->lock); |
2870 | hint_byte = *last_ptr; | 2577 | if (last_ptr->block_group) |
2871 | last_wanted = *last_ptr; | 2578 | hint_byte = last_ptr->window_start; |
2872 | } else | 2579 | spin_unlock(&last_ptr->lock); |
2873 | empty_size += empty_cluster; | ||
2874 | } else { | ||
2875 | empty_cluster = 0; | ||
2876 | } | 2580 | } |
2581 | |||
2877 | search_start = max(search_start, first_logical_byte(root, 0)); | 2582 | search_start = max(search_start, first_logical_byte(root, 0)); |
2878 | search_start = max(search_start, hint_byte); | 2583 | search_start = max(search_start, hint_byte); |
2879 | 2584 | ||
2880 | if (last_wanted && search_start != last_wanted) { | 2585 | if (!last_ptr) { |
2881 | last_wanted = 0; | 2586 | empty_cluster = 0; |
2882 | empty_size += empty_cluster; | 2587 | loop = 1; |
2883 | } | 2588 | } |
2884 | 2589 | ||
2885 | total_needed += empty_size; | 2590 | if (search_start == hint_byte) { |
2886 | block_group = btrfs_lookup_block_group(root->fs_info, search_start); | 2591 | block_group = btrfs_lookup_block_group(root->fs_info, |
2887 | if (!block_group) | 2592 | search_start); |
2888 | block_group = btrfs_lookup_first_block_group(root->fs_info, | 2593 | if (block_group && block_group_bits(block_group, data)) { |
2889 | search_start); | 2594 | down_read(&space_info->groups_sem); |
2890 | space_info = __find_space_info(root->fs_info, data); | 2595 | goto have_block_group; |
2596 | } else if (block_group) { | ||
2597 | btrfs_put_block_group(block_group); | ||
2598 | } | ||
2599 | } | ||
2891 | 2600 | ||
2601 | search: | ||
2892 | down_read(&space_info->groups_sem); | 2602 | down_read(&space_info->groups_sem); |
2893 | while (1) { | 2603 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
2894 | struct btrfs_free_space *free_space; | 2604 | u64 offset; |
2895 | /* | ||
2896 | * the only way this happens if our hint points to a block | ||
2897 | * group thats not of the proper type, while looping this | ||
2898 | * should never happen | ||
2899 | */ | ||
2900 | if (empty_size) | ||
2901 | extra_loop = 1; | ||
2902 | 2605 | ||
2903 | if (!block_group) | 2606 | atomic_inc(&block_group->count); |
2904 | goto new_group_no_lock; | 2607 | search_start = block_group->key.objectid; |
2905 | 2608 | ||
2609 | have_block_group: | ||
2906 | if (unlikely(!block_group->cached)) { | 2610 | if (unlikely(!block_group->cached)) { |
2907 | mutex_lock(&block_group->cache_mutex); | 2611 | mutex_lock(&block_group->cache_mutex); |
2908 | ret = cache_block_group(root, block_group); | 2612 | ret = cache_block_group(root, block_group); |
2909 | mutex_unlock(&block_group->cache_mutex); | 2613 | mutex_unlock(&block_group->cache_mutex); |
2910 | if (ret) | 2614 | if (ret) { |
2615 | btrfs_put_block_group(block_group); | ||
2911 | break; | 2616 | break; |
2617 | } | ||
2912 | } | 2618 | } |
2913 | 2619 | ||
2914 | mutex_lock(&block_group->alloc_mutex); | ||
2915 | if (unlikely(!block_group_bits(block_group, data))) | ||
2916 | goto new_group; | ||
2917 | |||
2918 | if (unlikely(block_group->ro)) | 2620 | if (unlikely(block_group->ro)) |
2919 | goto new_group; | 2621 | goto loop; |
2920 | 2622 | ||
2921 | free_space = btrfs_find_free_space(block_group, search_start, | 2623 | if (last_ptr) { |
2922 | total_needed); | 2624 | /* |
2923 | if (free_space) { | 2625 | * the refill lock keeps out other |
2924 | u64 start = block_group->key.objectid; | 2626 | * people trying to start a new cluster |
2925 | u64 end = block_group->key.objectid + | 2627 | */ |
2926 | block_group->key.offset; | 2628 | spin_lock(&last_ptr->refill_lock); |
2629 | offset = btrfs_alloc_from_cluster(block_group, last_ptr, | ||
2630 | num_bytes, search_start); | ||
2631 | if (offset) { | ||
2632 | /* we have a block, we're done */ | ||
2633 | spin_unlock(&last_ptr->refill_lock); | ||
2634 | goto checks; | ||
2635 | } | ||
2927 | 2636 | ||
2928 | search_start = stripe_align(root, free_space->offset); | 2637 | spin_lock(&last_ptr->lock); |
2638 | /* | ||
2639 | * whoops, this cluster doesn't actually point to | ||
2640 | * this block group. Get a ref on the block | ||
2641 | * group is does point to and try again | ||
2642 | */ | ||
2643 | if (!last_ptr_loop && last_ptr->block_group && | ||
2644 | last_ptr->block_group != block_group) { | ||
2645 | |||
2646 | btrfs_put_block_group(block_group); | ||
2647 | block_group = last_ptr->block_group; | ||
2648 | atomic_inc(&block_group->count); | ||
2649 | spin_unlock(&last_ptr->lock); | ||
2650 | spin_unlock(&last_ptr->refill_lock); | ||
2651 | |||
2652 | last_ptr_loop = 1; | ||
2653 | search_start = block_group->key.objectid; | ||
2654 | goto have_block_group; | ||
2655 | } | ||
2656 | spin_unlock(&last_ptr->lock); | ||
2929 | 2657 | ||
2930 | /* move on to the next group */ | 2658 | /* |
2931 | if (search_start + num_bytes >= search_end) | 2659 | * this cluster didn't work out, free it and |
2932 | goto new_group; | 2660 | * start over |
2661 | */ | ||
2662 | btrfs_return_cluster_to_free_space(NULL, last_ptr); | ||
2933 | 2663 | ||
2934 | /* move on to the next group */ | 2664 | last_ptr_loop = 0; |
2935 | if (search_start + num_bytes > end) | ||
2936 | goto new_group; | ||
2937 | 2665 | ||
2938 | if (last_wanted && search_start != last_wanted) { | 2666 | /* allocate a cluster in this block group */ |
2939 | total_needed += empty_cluster; | 2667 | ret = btrfs_find_space_cluster(trans, |
2940 | empty_size += empty_cluster; | 2668 | block_group, last_ptr, |
2941 | last_wanted = 0; | 2669 | offset, num_bytes, |
2670 | empty_cluster + empty_size); | ||
2671 | if (ret == 0) { | ||
2942 | /* | 2672 | /* |
2943 | * if search_start is still in this block group | 2673 | * now pull our allocation out of this |
2944 | * then we just re-search this block group | 2674 | * cluster |
2945 | */ | 2675 | */ |
2946 | if (search_start >= start && | 2676 | offset = btrfs_alloc_from_cluster(block_group, |
2947 | search_start < end) { | 2677 | last_ptr, num_bytes, |
2948 | mutex_unlock(&block_group->alloc_mutex); | 2678 | search_start); |
2949 | continue; | 2679 | if (offset) { |
2680 | /* we found one, proceed */ | ||
2681 | spin_unlock(&last_ptr->refill_lock); | ||
2682 | goto checks; | ||
2950 | } | 2683 | } |
2951 | |||
2952 | /* else we go to the next block group */ | ||
2953 | goto new_group; | ||
2954 | } | 2684 | } |
2955 | 2685 | /* | |
2956 | if (exclude_nr > 0 && | 2686 | * at this point we either didn't find a cluster |
2957 | (search_start + num_bytes > exclude_start && | 2687 | * or we weren't able to allocate a block from our |
2958 | search_start < exclude_start + exclude_nr)) { | 2688 | * cluster. Free the cluster we've been trying |
2959 | search_start = exclude_start + exclude_nr; | 2689 | * to use, and go to the next block group |
2960 | /* | 2690 | */ |
2961 | * if search_start is still in this block group | 2691 | if (loop < 2) { |
2962 | * then we just re-search this block group | 2692 | btrfs_return_cluster_to_free_space(NULL, |
2963 | */ | 2693 | last_ptr); |
2964 | if (search_start >= start && | 2694 | spin_unlock(&last_ptr->refill_lock); |
2965 | search_start < end) { | 2695 | goto loop; |
2966 | mutex_unlock(&block_group->alloc_mutex); | ||
2967 | last_wanted = 0; | ||
2968 | continue; | ||
2969 | } | ||
2970 | |||
2971 | /* else we go to the next block group */ | ||
2972 | goto new_group; | ||
2973 | } | 2696 | } |
2697 | spin_unlock(&last_ptr->refill_lock); | ||
2698 | } | ||
2974 | 2699 | ||
2975 | ins->objectid = search_start; | 2700 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
2976 | ins->offset = num_bytes; | 2701 | num_bytes, empty_size); |
2702 | if (!offset) | ||
2703 | goto loop; | ||
2704 | checks: | ||
2705 | search_start = stripe_align(root, offset); | ||
2977 | 2706 | ||
2978 | btrfs_remove_free_space_lock(block_group, search_start, | 2707 | /* move on to the next group */ |
2979 | num_bytes); | 2708 | if (search_start + num_bytes >= search_end) { |
2980 | /* we are all good, lets return */ | 2709 | btrfs_add_free_space(block_group, offset, num_bytes); |
2981 | mutex_unlock(&block_group->alloc_mutex); | 2710 | goto loop; |
2982 | break; | ||
2983 | } | 2711 | } |
2984 | new_group: | ||
2985 | mutex_unlock(&block_group->alloc_mutex); | ||
2986 | put_block_group(block_group); | ||
2987 | block_group = NULL; | ||
2988 | new_group_no_lock: | ||
2989 | /* don't try to compare new allocations against the | ||
2990 | * last allocation any more | ||
2991 | */ | ||
2992 | last_wanted = 0; | ||
2993 | 2712 | ||
2994 | /* | 2713 | /* move on to the next group */ |
2995 | * Here's how this works. | 2714 | if (search_start + num_bytes > |
2996 | * loop == 0: we were searching a block group via a hint | 2715 | block_group->key.objectid + block_group->key.offset) { |
2997 | * and didn't find anything, so we start at | 2716 | btrfs_add_free_space(block_group, offset, num_bytes); |
2998 | * the head of the block groups and keep searching | 2717 | goto loop; |
2999 | * loop == 1: we're searching through all of the block groups | 2718 | } |
3000 | * if we hit the head again we have searched | 2719 | |
3001 | * all of the block groups for this space and we | 2720 | if (exclude_nr > 0 && |
3002 | * need to try and allocate, if we cant error out. | 2721 | (search_start + num_bytes > exclude_start && |
3003 | * loop == 2: we allocated more space and are looping through | 2722 | search_start < exclude_start + exclude_nr)) { |
3004 | * all of the block groups again. | 2723 | search_start = exclude_start + exclude_nr; |
3005 | */ | 2724 | |
3006 | if (loop == 0) { | 2725 | btrfs_add_free_space(block_group, offset, num_bytes); |
3007 | head = &space_info->block_groups; | 2726 | /* |
3008 | cur = head->next; | 2727 | * if search_start is still in this block group |
3009 | loop++; | 2728 | * then we just re-search this block group |
3010 | } else if (loop == 1 && cur == head) { | ||
3011 | int keep_going; | ||
3012 | |||
3013 | /* at this point we give up on the empty_size | ||
3014 | * allocations and just try to allocate the min | ||
3015 | * space. | ||
3016 | * | ||
3017 | * The extra_loop field was set if an empty_size | ||
3018 | * allocation was attempted above, and if this | ||
3019 | * is try we need to try the loop again without | ||
3020 | * the additional empty_size. | ||
3021 | */ | 2729 | */ |
3022 | total_needed -= empty_size; | 2730 | if (search_start >= block_group->key.objectid && |
3023 | empty_size = 0; | 2731 | search_start < (block_group->key.objectid + |
3024 | keep_going = extra_loop; | 2732 | block_group->key.offset)) |
3025 | loop++; | 2733 | goto have_block_group; |
2734 | goto loop; | ||
2735 | } | ||
3026 | 2736 | ||
3027 | if (allowed_chunk_alloc && !chunk_alloc_done) { | 2737 | ins->objectid = search_start; |
3028 | up_read(&space_info->groups_sem); | 2738 | ins->offset = num_bytes; |
3029 | ret = do_chunk_alloc(trans, root, num_bytes + | 2739 | |
3030 | 2 * 1024 * 1024, data, 1); | 2740 | if (offset < search_start) |
3031 | down_read(&space_info->groups_sem); | 2741 | btrfs_add_free_space(block_group, offset, |
3032 | if (ret < 0) | 2742 | search_start - offset); |
3033 | goto loop_check; | 2743 | BUG_ON(offset > search_start); |
3034 | head = &space_info->block_groups; | 2744 | |
3035 | /* | 2745 | /* we are all good, lets return */ |
3036 | * we've allocated a new chunk, keep | 2746 | break; |
3037 | * trying | 2747 | loop: |
3038 | */ | 2748 | btrfs_put_block_group(block_group); |
3039 | keep_going = 1; | 2749 | } |
3040 | chunk_alloc_done = 1; | 2750 | up_read(&space_info->groups_sem); |
3041 | } else if (!allowed_chunk_alloc) { | 2751 | |
3042 | space_info->force_alloc = 1; | 2752 | /* loop == 0, try to find a clustered alloc in every block group |
3043 | } | 2753 | * loop == 1, try again after forcing a chunk allocation |
3044 | loop_check: | 2754 | * loop == 2, set empty_size and empty_cluster to 0 and try again |
3045 | if (keep_going) { | 2755 | */ |
3046 | cur = head->next; | 2756 | if (!ins->objectid && loop < 3 && |
3047 | extra_loop = 0; | 2757 | (empty_size || empty_cluster || allowed_chunk_alloc)) { |
3048 | } else { | 2758 | if (loop >= 2) { |
3049 | break; | 2759 | empty_size = 0; |
3050 | } | 2760 | empty_cluster = 0; |
3051 | } else if (cur == head) { | ||
3052 | break; | ||
3053 | } | 2761 | } |
3054 | 2762 | ||
3055 | block_group = list_entry(cur, struct btrfs_block_group_cache, | 2763 | if (allowed_chunk_alloc) { |
3056 | list); | 2764 | ret = do_chunk_alloc(trans, root, num_bytes + |
3057 | atomic_inc(&block_group->count); | 2765 | 2 * 1024 * 1024, data, 1); |
2766 | allowed_chunk_alloc = 0; | ||
2767 | } else { | ||
2768 | space_info->force_alloc = 1; | ||
2769 | } | ||
3058 | 2770 | ||
3059 | search_start = block_group->key.objectid; | 2771 | if (loop < 3) { |
3060 | cur = cur->next; | 2772 | loop++; |
2773 | goto search; | ||
2774 | } | ||
2775 | ret = -ENOSPC; | ||
2776 | } else if (!ins->objectid) { | ||
2777 | ret = -ENOSPC; | ||
3061 | } | 2778 | } |
3062 | 2779 | ||
3063 | /* we found what we needed */ | 2780 | /* we found what we needed */ |
@@ -3065,21 +2782,10 @@ loop_check: | |||
3065 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) | 2782 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) |
3066 | trans->block_group = block_group->key.objectid; | 2783 | trans->block_group = block_group->key.objectid; |
3067 | 2784 | ||
3068 | if (last_ptr) | 2785 | btrfs_put_block_group(block_group); |
3069 | *last_ptr = ins->objectid + ins->offset; | ||
3070 | ret = 0; | 2786 | ret = 0; |
3071 | } else if (!ret) { | ||
3072 | printk(KERN_ERR "btrfs searching for %llu bytes, " | ||
3073 | "num_bytes %llu, loop %d, allowed_alloc %d\n", | ||
3074 | (unsigned long long)total_needed, | ||
3075 | (unsigned long long)num_bytes, | ||
3076 | loop, allowed_chunk_alloc); | ||
3077 | ret = -ENOSPC; | ||
3078 | } | 2787 | } |
3079 | if (block_group) | ||
3080 | put_block_group(block_group); | ||
3081 | 2788 | ||
3082 | up_read(&space_info->groups_sem); | ||
3083 | return ret; | 2789 | return ret; |
3084 | } | 2790 | } |
3085 | 2791 | ||
@@ -3091,6 +2797,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
3091 | (unsigned long long)(info->total_bytes - info->bytes_used - | 2797 | (unsigned long long)(info->total_bytes - info->bytes_used - |
3092 | info->bytes_pinned - info->bytes_reserved), | 2798 | info->bytes_pinned - info->bytes_reserved), |
3093 | (info->full) ? "" : "not "); | 2799 | (info->full) ? "" : "not "); |
2800 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | ||
2801 | " may_use=%llu, used=%llu\n", info->total_bytes, | ||
2802 | info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, | ||
2803 | info->bytes_used); | ||
3094 | 2804 | ||
3095 | down_read(&info->groups_sem); | 2805 | down_read(&info->groups_sem); |
3096 | list_for_each_entry(cache, &info->block_groups, list) { | 2806 | list_for_each_entry(cache, &info->block_groups, list) { |
@@ -3117,24 +2827,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
3117 | { | 2827 | { |
3118 | int ret; | 2828 | int ret; |
3119 | u64 search_start = 0; | 2829 | u64 search_start = 0; |
3120 | u64 alloc_profile; | ||
3121 | struct btrfs_fs_info *info = root->fs_info; | 2830 | struct btrfs_fs_info *info = root->fs_info; |
3122 | 2831 | ||
3123 | if (data) { | 2832 | data = btrfs_get_alloc_profile(root, data); |
3124 | alloc_profile = info->avail_data_alloc_bits & | ||
3125 | info->data_alloc_profile; | ||
3126 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
3127 | } else if (root == root->fs_info->chunk_root) { | ||
3128 | alloc_profile = info->avail_system_alloc_bits & | ||
3129 | info->system_alloc_profile; | ||
3130 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
3131 | } else { | ||
3132 | alloc_profile = info->avail_metadata_alloc_bits & | ||
3133 | info->metadata_alloc_profile; | ||
3134 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
3135 | } | ||
3136 | again: | 2833 | again: |
3137 | data = btrfs_reduce_alloc_profile(root, data); | ||
3138 | /* | 2834 | /* |
3139 | * the only place that sets empty_size is btrfs_realloc_node, which | 2835 | * the only place that sets empty_size is btrfs_realloc_node, which |
3140 | * is not called recursively on allocations | 2836 | * is not called recursively on allocations |
@@ -3194,7 +2890,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
3194 | ret = btrfs_discard_extent(root, start, len); | 2890 | ret = btrfs_discard_extent(root, start, len); |
3195 | 2891 | ||
3196 | btrfs_add_free_space(cache, start, len); | 2892 | btrfs_add_free_space(cache, start, len); |
3197 | put_block_group(cache); | 2893 | btrfs_put_block_group(cache); |
3198 | update_reserved_extents(root, start, len, 0); | 2894 | update_reserved_extents(root, start, len, 0); |
3199 | 2895 | ||
3200 | return ret; | 2896 | return ret; |
@@ -3218,10 +2914,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
3218 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | 2914 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, |
3219 | struct btrfs_root *root, u64 parent, | 2915 | struct btrfs_root *root, u64 parent, |
3220 | u64 root_objectid, u64 ref_generation, | 2916 | u64 root_objectid, u64 ref_generation, |
3221 | u64 owner, struct btrfs_key *ins) | 2917 | u64 owner, struct btrfs_key *ins, |
2918 | int ref_mod) | ||
3222 | { | 2919 | { |
3223 | int ret; | 2920 | int ret; |
3224 | int pending_ret; | ||
3225 | u64 super_used; | 2921 | u64 super_used; |
3226 | u64 root_used; | 2922 | u64 root_used; |
3227 | u64 num_bytes = ins->offset; | 2923 | u64 num_bytes = ins->offset; |
@@ -3246,33 +2942,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
3246 | btrfs_set_root_used(&root->root_item, root_used + num_bytes); | 2942 | btrfs_set_root_used(&root->root_item, root_used + num_bytes); |
3247 | spin_unlock(&info->delalloc_lock); | 2943 | spin_unlock(&info->delalloc_lock); |
3248 | 2944 | ||
3249 | if (root == extent_root) { | ||
3250 | struct pending_extent_op *extent_op; | ||
3251 | |||
3252 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
3253 | BUG_ON(!extent_op); | ||
3254 | |||
3255 | extent_op->type = PENDING_EXTENT_INSERT; | ||
3256 | extent_op->bytenr = ins->objectid; | ||
3257 | extent_op->num_bytes = ins->offset; | ||
3258 | extent_op->parent = parent; | ||
3259 | extent_op->orig_parent = 0; | ||
3260 | extent_op->generation = ref_generation; | ||
3261 | extent_op->orig_generation = 0; | ||
3262 | extent_op->level = (int)owner; | ||
3263 | INIT_LIST_HEAD(&extent_op->list); | ||
3264 | extent_op->del = 0; | ||
3265 | |||
3266 | mutex_lock(&root->fs_info->extent_ins_mutex); | ||
3267 | set_extent_bits(&root->fs_info->extent_ins, ins->objectid, | ||
3268 | ins->objectid + ins->offset - 1, | ||
3269 | EXTENT_WRITEBACK, GFP_NOFS); | ||
3270 | set_state_private(&root->fs_info->extent_ins, | ||
3271 | ins->objectid, (unsigned long)extent_op); | ||
3272 | mutex_unlock(&root->fs_info->extent_ins_mutex); | ||
3273 | goto update_block; | ||
3274 | } | ||
3275 | |||
3276 | memcpy(&keys[0], ins, sizeof(*ins)); | 2945 | memcpy(&keys[0], ins, sizeof(*ins)); |
3277 | keys[1].objectid = ins->objectid; | 2946 | keys[1].objectid = ins->objectid; |
3278 | keys[1].type = BTRFS_EXTENT_REF_KEY; | 2947 | keys[1].type = BTRFS_EXTENT_REF_KEY; |
@@ -3283,37 +2952,31 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
3283 | path = btrfs_alloc_path(); | 2952 | path = btrfs_alloc_path(); |
3284 | BUG_ON(!path); | 2953 | BUG_ON(!path); |
3285 | 2954 | ||
2955 | path->leave_spinning = 1; | ||
3286 | ret = btrfs_insert_empty_items(trans, extent_root, path, keys, | 2956 | ret = btrfs_insert_empty_items(trans, extent_root, path, keys, |
3287 | sizes, 2); | 2957 | sizes, 2); |
3288 | BUG_ON(ret); | 2958 | BUG_ON(ret); |
3289 | 2959 | ||
3290 | extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2960 | extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
3291 | struct btrfs_extent_item); | 2961 | struct btrfs_extent_item); |
3292 | btrfs_set_extent_refs(path->nodes[0], extent_item, 1); | 2962 | btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod); |
3293 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, | 2963 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, |
3294 | struct btrfs_extent_ref); | 2964 | struct btrfs_extent_ref); |
3295 | 2965 | ||
3296 | btrfs_set_ref_root(path->nodes[0], ref, root_objectid); | 2966 | btrfs_set_ref_root(path->nodes[0], ref, root_objectid); |
3297 | btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); | 2967 | btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); |
3298 | btrfs_set_ref_objectid(path->nodes[0], ref, owner); | 2968 | btrfs_set_ref_objectid(path->nodes[0], ref, owner); |
3299 | btrfs_set_ref_num_refs(path->nodes[0], ref, 1); | 2969 | btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod); |
3300 | 2970 | ||
3301 | btrfs_mark_buffer_dirty(path->nodes[0]); | 2971 | btrfs_mark_buffer_dirty(path->nodes[0]); |
3302 | 2972 | ||
3303 | trans->alloc_exclude_start = 0; | 2973 | trans->alloc_exclude_start = 0; |
3304 | trans->alloc_exclude_nr = 0; | 2974 | trans->alloc_exclude_nr = 0; |
3305 | btrfs_free_path(path); | 2975 | btrfs_free_path(path); |
3306 | finish_current_insert(trans, extent_root, 0); | ||
3307 | pending_ret = del_pending_extents(trans, extent_root, 0); | ||
3308 | 2976 | ||
3309 | if (ret) | 2977 | if (ret) |
3310 | goto out; | 2978 | goto out; |
3311 | if (pending_ret) { | ||
3312 | ret = pending_ret; | ||
3313 | goto out; | ||
3314 | } | ||
3315 | 2979 | ||
3316 | update_block: | ||
3317 | ret = update_block_group(trans, root, ins->objectid, | 2980 | ret = update_block_group(trans, root, ins->objectid, |
3318 | ins->offset, 1, 0); | 2981 | ins->offset, 1, 0); |
3319 | if (ret) { | 2982 | if (ret) { |
@@ -3335,9 +2998,12 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
3335 | 2998 | ||
3336 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) | 2999 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) |
3337 | return 0; | 3000 | return 0; |
3338 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | 3001 | |
3339 | ref_generation, owner, ins); | 3002 | ret = btrfs_add_delayed_ref(trans, ins->objectid, |
3340 | update_reserved_extents(root, ins->objectid, ins->offset, 0); | 3003 | ins->offset, parent, root_objectid, |
3004 | ref_generation, owner, | ||
3005 | BTRFS_ADD_DELAYED_EXTENT, 0); | ||
3006 | BUG_ON(ret); | ||
3341 | return ret; | 3007 | return ret; |
3342 | } | 3008 | } |
3343 | 3009 | ||
@@ -3362,9 +3028,9 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | |||
3362 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 3028 | ret = btrfs_remove_free_space(block_group, ins->objectid, |
3363 | ins->offset); | 3029 | ins->offset); |
3364 | BUG_ON(ret); | 3030 | BUG_ON(ret); |
3365 | put_block_group(block_group); | 3031 | btrfs_put_block_group(block_group); |
3366 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | 3032 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, |
3367 | ref_generation, owner, ins); | 3033 | ref_generation, owner, ins, 1); |
3368 | return ret; | 3034 | return ret; |
3369 | } | 3035 | } |
3370 | 3036 | ||
@@ -3383,26 +3049,25 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
3383 | u64 search_end, struct btrfs_key *ins, u64 data) | 3049 | u64 search_end, struct btrfs_key *ins, u64 data) |
3384 | { | 3050 | { |
3385 | int ret; | 3051 | int ret; |
3386 | |||
3387 | ret = __btrfs_reserve_extent(trans, root, num_bytes, | 3052 | ret = __btrfs_reserve_extent(trans, root, num_bytes, |
3388 | min_alloc_size, empty_size, hint_byte, | 3053 | min_alloc_size, empty_size, hint_byte, |
3389 | search_end, ins, data); | 3054 | search_end, ins, data); |
3390 | BUG_ON(ret); | 3055 | BUG_ON(ret); |
3391 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 3056 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
3392 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, | 3057 | ret = btrfs_add_delayed_ref(trans, ins->objectid, |
3393 | root_objectid, ref_generation, | 3058 | ins->offset, parent, root_objectid, |
3394 | owner_objectid, ins); | 3059 | ref_generation, owner_objectid, |
3060 | BTRFS_ADD_DELAYED_EXTENT, 0); | ||
3395 | BUG_ON(ret); | 3061 | BUG_ON(ret); |
3396 | |||
3397 | } else { | ||
3398 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
3399 | } | 3062 | } |
3063 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
3400 | return ret; | 3064 | return ret; |
3401 | } | 3065 | } |
3402 | 3066 | ||
3403 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 3067 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
3404 | struct btrfs_root *root, | 3068 | struct btrfs_root *root, |
3405 | u64 bytenr, u32 blocksize) | 3069 | u64 bytenr, u32 blocksize, |
3070 | int level) | ||
3406 | { | 3071 | { |
3407 | struct extent_buffer *buf; | 3072 | struct extent_buffer *buf; |
3408 | 3073 | ||
@@ -3410,6 +3075,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
3410 | if (!buf) | 3075 | if (!buf) |
3411 | return ERR_PTR(-ENOMEM); | 3076 | return ERR_PTR(-ENOMEM); |
3412 | btrfs_set_header_generation(buf, trans->transid); | 3077 | btrfs_set_header_generation(buf, trans->transid); |
3078 | btrfs_set_buffer_lockdep_class(buf, level); | ||
3413 | btrfs_tree_lock(buf); | 3079 | btrfs_tree_lock(buf); |
3414 | clean_tree_block(trans, root, buf); | 3080 | clean_tree_block(trans, root, buf); |
3415 | 3081 | ||
@@ -3453,7 +3119,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
3453 | return ERR_PTR(ret); | 3119 | return ERR_PTR(ret); |
3454 | } | 3120 | } |
3455 | 3121 | ||
3456 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); | 3122 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, |
3123 | blocksize, level); | ||
3457 | return buf; | 3124 | return buf; |
3458 | } | 3125 | } |
3459 | 3126 | ||
@@ -3529,7 +3196,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3529 | 3196 | ||
3530 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | 3197 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); |
3531 | 3198 | ||
3532 | ret = __btrfs_free_extent(trans, root, disk_bytenr, | 3199 | ret = btrfs_free_extent(trans, root, disk_bytenr, |
3533 | btrfs_file_extent_disk_num_bytes(leaf, fi), | 3200 | btrfs_file_extent_disk_num_bytes(leaf, fi), |
3534 | leaf->start, leaf_owner, leaf_generation, | 3201 | leaf->start, leaf_owner, leaf_generation, |
3535 | key.objectid, 0); | 3202 | key.objectid, 0); |
@@ -3569,7 +3236,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3569 | */ | 3236 | */ |
3570 | for (i = 0; i < ref->nritems; i++) { | 3237 | for (i = 0; i < ref->nritems; i++) { |
3571 | info = ref->extents + sorted[i].slot; | 3238 | info = ref->extents + sorted[i].slot; |
3572 | ret = __btrfs_free_extent(trans, root, info->bytenr, | 3239 | ret = btrfs_free_extent(trans, root, info->bytenr, |
3573 | info->num_bytes, ref->bytenr, | 3240 | info->num_bytes, ref->bytenr, |
3574 | ref->owner, ref->generation, | 3241 | ref->owner, ref->generation, |
3575 | info->objectid, 0); | 3242 | info->objectid, 0); |
@@ -3586,12 +3253,13 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3586 | return 0; | 3253 | return 0; |
3587 | } | 3254 | } |
3588 | 3255 | ||
3589 | static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, | 3256 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, |
3257 | struct btrfs_root *root, u64 start, | ||
3590 | u64 len, u32 *refs) | 3258 | u64 len, u32 *refs) |
3591 | { | 3259 | { |
3592 | int ret; | 3260 | int ret; |
3593 | 3261 | ||
3594 | ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); | 3262 | ret = btrfs_lookup_extent_ref(trans, root, start, len, refs); |
3595 | BUG_ON(ret); | 3263 | BUG_ON(ret); |
3596 | 3264 | ||
3597 | #if 0 /* some debugging code in case we see problems here */ | 3265 | #if 0 /* some debugging code in case we see problems here */ |
@@ -3699,7 +3367,8 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | |||
3699 | * we just decrement it below and don't update any | 3367 | * we just decrement it below and don't update any |
3700 | * of the refs the leaf points to. | 3368 | * of the refs the leaf points to. |
3701 | */ | 3369 | */ |
3702 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | 3370 | ret = drop_snap_lookup_refcount(trans, root, bytenr, |
3371 | blocksize, &refs); | ||
3703 | BUG_ON(ret); | 3372 | BUG_ON(ret); |
3704 | if (refs != 1) | 3373 | if (refs != 1) |
3705 | continue; | 3374 | continue; |
@@ -3750,7 +3419,7 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | |||
3750 | */ | 3419 | */ |
3751 | for (i = 0; i < refi; i++) { | 3420 | for (i = 0; i < refi; i++) { |
3752 | bytenr = sorted[i].bytenr; | 3421 | bytenr = sorted[i].bytenr; |
3753 | ret = __btrfs_free_extent(trans, root, bytenr, | 3422 | ret = btrfs_free_extent(trans, root, bytenr, |
3754 | blocksize, eb->start, | 3423 | blocksize, eb->start, |
3755 | root_owner, root_gen, 0, 1); | 3424 | root_owner, root_gen, 0, 1); |
3756 | BUG_ON(ret); | 3425 | BUG_ON(ret); |
@@ -3793,7 +3462,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
3793 | 3462 | ||
3794 | WARN_ON(*level < 0); | 3463 | WARN_ON(*level < 0); |
3795 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | 3464 | WARN_ON(*level >= BTRFS_MAX_LEVEL); |
3796 | ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, | 3465 | ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, |
3797 | path->nodes[*level]->len, &refs); | 3466 | path->nodes[*level]->len, &refs); |
3798 | BUG_ON(ret); | 3467 | BUG_ON(ret); |
3799 | if (refs > 1) | 3468 | if (refs > 1) |
@@ -3844,7 +3513,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
3844 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | 3513 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); |
3845 | blocksize = btrfs_level_size(root, *level - 1); | 3514 | blocksize = btrfs_level_size(root, *level - 1); |
3846 | 3515 | ||
3847 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | 3516 | ret = drop_snap_lookup_refcount(trans, root, bytenr, |
3517 | blocksize, &refs); | ||
3848 | BUG_ON(ret); | 3518 | BUG_ON(ret); |
3849 | 3519 | ||
3850 | /* | 3520 | /* |
@@ -3859,7 +3529,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
3859 | root_gen = btrfs_header_generation(parent); | 3529 | root_gen = btrfs_header_generation(parent); |
3860 | path->slots[*level]++; | 3530 | path->slots[*level]++; |
3861 | 3531 | ||
3862 | ret = __btrfs_free_extent(trans, root, bytenr, | 3532 | ret = btrfs_free_extent(trans, root, bytenr, |
3863 | blocksize, parent->start, | 3533 | blocksize, parent->start, |
3864 | root_owner, root_gen, | 3534 | root_owner, root_gen, |
3865 | *level - 1, 1); | 3535 | *level - 1, 1); |
@@ -3905,7 +3575,7 @@ out: | |||
3905 | * cleanup and free the reference on the last node | 3575 | * cleanup and free the reference on the last node |
3906 | * we processed | 3576 | * we processed |
3907 | */ | 3577 | */ |
3908 | ret = __btrfs_free_extent(trans, root, bytenr, blocksize, | 3578 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, |
3909 | parent->start, root_owner, root_gen, | 3579 | parent->start, root_owner, root_gen, |
3910 | *level, 1); | 3580 | *level, 1); |
3911 | free_extent_buffer(path->nodes[*level]); | 3581 | free_extent_buffer(path->nodes[*level]); |
@@ -4094,6 +3764,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
4094 | struct btrfs_path *path; | 3764 | struct btrfs_path *path; |
4095 | int i; | 3765 | int i; |
4096 | int orig_level; | 3766 | int orig_level; |
3767 | int update_count; | ||
4097 | struct btrfs_root_item *root_item = &root->root_item; | 3768 | struct btrfs_root_item *root_item = &root->root_item; |
4098 | 3769 | ||
4099 | WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); | 3770 | WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); |
@@ -4135,6 +3806,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
4135 | } | 3806 | } |
4136 | } | 3807 | } |
4137 | while (1) { | 3808 | while (1) { |
3809 | unsigned long update; | ||
4138 | wret = walk_down_tree(trans, root, path, &level); | 3810 | wret = walk_down_tree(trans, root, path, &level); |
4139 | if (wret > 0) | 3811 | if (wret > 0) |
4140 | break; | 3812 | break; |
@@ -4147,12 +3819,21 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
4147 | break; | 3819 | break; |
4148 | if (wret < 0) | 3820 | if (wret < 0) |
4149 | ret = wret; | 3821 | ret = wret; |
4150 | if (trans->transaction->in_commit) { | 3822 | if (trans->transaction->in_commit || |
3823 | trans->transaction->delayed_refs.flushing) { | ||
4151 | ret = -EAGAIN; | 3824 | ret = -EAGAIN; |
4152 | break; | 3825 | break; |
4153 | } | 3826 | } |
4154 | atomic_inc(&root->fs_info->throttle_gen); | 3827 | atomic_inc(&root->fs_info->throttle_gen); |
4155 | wake_up(&root->fs_info->transaction_throttle); | 3828 | wake_up(&root->fs_info->transaction_throttle); |
3829 | for (update_count = 0; update_count < 16; update_count++) { | ||
3830 | update = trans->delayed_ref_updates; | ||
3831 | trans->delayed_ref_updates = 0; | ||
3832 | if (update) | ||
3833 | btrfs_run_delayed_refs(trans, root, update); | ||
3834 | else | ||
3835 | break; | ||
3836 | } | ||
4156 | } | 3837 | } |
4157 | for (i = 0; i <= orig_level; i++) { | 3838 | for (i = 0; i <= orig_level; i++) { |
4158 | if (path->nodes[i]) { | 3839 | if (path->nodes[i]) { |
@@ -4179,13 +3860,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
4179 | path = btrfs_alloc_path(); | 3860 | path = btrfs_alloc_path(); |
4180 | BUG_ON(!path); | 3861 | BUG_ON(!path); |
4181 | 3862 | ||
4182 | BUG_ON(!btrfs_tree_locked(parent)); | 3863 | btrfs_assert_tree_locked(parent); |
4183 | parent_level = btrfs_header_level(parent); | 3864 | parent_level = btrfs_header_level(parent); |
4184 | extent_buffer_get(parent); | 3865 | extent_buffer_get(parent); |
4185 | path->nodes[parent_level] = parent; | 3866 | path->nodes[parent_level] = parent; |
4186 | path->slots[parent_level] = btrfs_header_nritems(parent); | 3867 | path->slots[parent_level] = btrfs_header_nritems(parent); |
4187 | 3868 | ||
4188 | BUG_ON(!btrfs_tree_locked(node)); | 3869 | btrfs_assert_tree_locked(node); |
4189 | level = btrfs_header_level(node); | 3870 | level = btrfs_header_level(node); |
4190 | extent_buffer_get(node); | 3871 | extent_buffer_get(node); |
4191 | path->nodes[level] = node; | 3872 | path->nodes[level] = node; |
@@ -5197,6 +4878,7 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, | |||
5197 | root->root_key.objectid, | 4878 | root->root_key.objectid, |
5198 | trans->transid, key.objectid); | 4879 | trans->transid, key.objectid); |
5199 | BUG_ON(ret); | 4880 | BUG_ON(ret); |
4881 | |||
5200 | ret = btrfs_free_extent(trans, root, | 4882 | ret = btrfs_free_extent(trans, root, |
5201 | bytenr, num_bytes, leaf->start, | 4883 | bytenr, num_bytes, leaf->start, |
5202 | btrfs_header_owner(leaf), | 4884 | btrfs_header_owner(leaf), |
@@ -5508,9 +5190,6 @@ static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
5508 | ref_path, NULL, NULL); | 5190 | ref_path, NULL, NULL); |
5509 | BUG_ON(ret); | 5191 | BUG_ON(ret); |
5510 | 5192 | ||
5511 | if (root == root->fs_info->extent_root) | ||
5512 | btrfs_extent_post_op(trans, root); | ||
5513 | |||
5514 | return 0; | 5193 | return 0; |
5515 | } | 5194 | } |
5516 | 5195 | ||
@@ -5641,7 +5320,9 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, | |||
5641 | prev_block = block_start; | 5320 | prev_block = block_start; |
5642 | } | 5321 | } |
5643 | 5322 | ||
5323 | mutex_lock(&extent_root->fs_info->trans_mutex); | ||
5644 | btrfs_record_root_in_trans(found_root); | 5324 | btrfs_record_root_in_trans(found_root); |
5325 | mutex_unlock(&extent_root->fs_info->trans_mutex); | ||
5645 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 5326 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { |
5646 | /* | 5327 | /* |
5647 | * try to update data extent references while | 5328 | * try to update data extent references while |
@@ -5776,6 +5457,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
5776 | if (!path) | 5457 | if (!path) |
5777 | return -ENOMEM; | 5458 | return -ENOMEM; |
5778 | 5459 | ||
5460 | path->leave_spinning = 1; | ||
5779 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | 5461 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); |
5780 | if (ret) | 5462 | if (ret) |
5781 | goto out; | 5463 | goto out; |
@@ -5946,6 +5628,9 @@ again: | |||
5946 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | 5628 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); |
5947 | mutex_unlock(&root->fs_info->cleaner_mutex); | 5629 | mutex_unlock(&root->fs_info->cleaner_mutex); |
5948 | 5630 | ||
5631 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
5632 | btrfs_commit_transaction(trans, info->tree_root); | ||
5633 | |||
5949 | while (1) { | 5634 | while (1) { |
5950 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 5635 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
5951 | if (ret < 0) | 5636 | if (ret < 0) |
@@ -6032,7 +5717,7 @@ next: | |||
6032 | WARN_ON(block_group->reserved > 0); | 5717 | WARN_ON(block_group->reserved > 0); |
6033 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | 5718 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); |
6034 | spin_unlock(&block_group->lock); | 5719 | spin_unlock(&block_group->lock); |
6035 | put_block_group(block_group); | 5720 | btrfs_put_block_group(block_group); |
6036 | ret = 0; | 5721 | ret = 0; |
6037 | out: | 5722 | out: |
6038 | btrfs_free_path(path); | 5723 | btrfs_free_path(path); |
@@ -6079,6 +5764,7 @@ out: | |||
6079 | int btrfs_free_block_groups(struct btrfs_fs_info *info) | 5764 | int btrfs_free_block_groups(struct btrfs_fs_info *info) |
6080 | { | 5765 | { |
6081 | struct btrfs_block_group_cache *block_group; | 5766 | struct btrfs_block_group_cache *block_group; |
5767 | struct btrfs_space_info *space_info; | ||
6082 | struct rb_node *n; | 5768 | struct rb_node *n; |
6083 | 5769 | ||
6084 | spin_lock(&info->block_group_cache_lock); | 5770 | spin_lock(&info->block_group_cache_lock); |
@@ -6100,6 +5786,23 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
6100 | spin_lock(&info->block_group_cache_lock); | 5786 | spin_lock(&info->block_group_cache_lock); |
6101 | } | 5787 | } |
6102 | spin_unlock(&info->block_group_cache_lock); | 5788 | spin_unlock(&info->block_group_cache_lock); |
5789 | |||
5790 | /* now that all the block groups are freed, go through and | ||
5791 | * free all the space_info structs. This is only called during | ||
5792 | * the final stages of unmount, and so we know nobody is | ||
5793 | * using them. We call synchronize_rcu() once before we start, | ||
5794 | * just to be on the safe side. | ||
5795 | */ | ||
5796 | synchronize_rcu(); | ||
5797 | |||
5798 | while(!list_empty(&info->space_info)) { | ||
5799 | space_info = list_entry(info->space_info.next, | ||
5800 | struct btrfs_space_info, | ||
5801 | list); | ||
5802 | |||
5803 | list_del(&space_info->list); | ||
5804 | kfree(space_info); | ||
5805 | } | ||
6103 | return 0; | 5806 | return 0; |
6104 | } | 5807 | } |
6105 | 5808 | ||
@@ -6141,9 +5844,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
6141 | 5844 | ||
6142 | atomic_set(&cache->count, 1); | 5845 | atomic_set(&cache->count, 1); |
6143 | spin_lock_init(&cache->lock); | 5846 | spin_lock_init(&cache->lock); |
6144 | mutex_init(&cache->alloc_mutex); | 5847 | spin_lock_init(&cache->tree_lock); |
6145 | mutex_init(&cache->cache_mutex); | 5848 | mutex_init(&cache->cache_mutex); |
6146 | INIT_LIST_HEAD(&cache->list); | 5849 | INIT_LIST_HEAD(&cache->list); |
5850 | INIT_LIST_HEAD(&cache->cluster_list); | ||
6147 | read_extent_buffer(leaf, &cache->item, | 5851 | read_extent_buffer(leaf, &cache->item, |
6148 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 5852 | btrfs_item_ptr_offset(leaf, path->slots[0]), |
6149 | sizeof(cache->item)); | 5853 | sizeof(cache->item)); |
@@ -6186,7 +5890,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
6186 | 5890 | ||
6187 | extent_root = root->fs_info->extent_root; | 5891 | extent_root = root->fs_info->extent_root; |
6188 | 5892 | ||
6189 | root->fs_info->last_trans_new_blockgroup = trans->transid; | 5893 | root->fs_info->last_trans_log_full_commit = trans->transid; |
6190 | 5894 | ||
6191 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 5895 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
6192 | if (!cache) | 5896 | if (!cache) |
@@ -6197,9 +5901,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
6197 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 5901 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
6198 | atomic_set(&cache->count, 1); | 5902 | atomic_set(&cache->count, 1); |
6199 | spin_lock_init(&cache->lock); | 5903 | spin_lock_init(&cache->lock); |
6200 | mutex_init(&cache->alloc_mutex); | 5904 | spin_lock_init(&cache->tree_lock); |
6201 | mutex_init(&cache->cache_mutex); | 5905 | mutex_init(&cache->cache_mutex); |
6202 | INIT_LIST_HEAD(&cache->list); | 5906 | INIT_LIST_HEAD(&cache->list); |
5907 | INIT_LIST_HEAD(&cache->cluster_list); | ||
6203 | 5908 | ||
6204 | btrfs_set_block_group_used(&cache->item, bytes_used); | 5909 | btrfs_set_block_group_used(&cache->item, bytes_used); |
6205 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); | 5910 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); |
@@ -6220,9 +5925,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
6220 | sizeof(cache->item)); | 5925 | sizeof(cache->item)); |
6221 | BUG_ON(ret); | 5926 | BUG_ON(ret); |
6222 | 5927 | ||
6223 | finish_current_insert(trans, extent_root, 0); | ||
6224 | ret = del_pending_extents(trans, extent_root, 0); | ||
6225 | BUG_ON(ret); | ||
6226 | set_avail_alloc_bits(extent_root->fs_info, type); | 5928 | set_avail_alloc_bits(extent_root->fs_info, type); |
6227 | 5929 | ||
6228 | return 0; | 5930 | return 0; |
@@ -6262,8 +5964,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
6262 | spin_unlock(&block_group->space_info->lock); | 5964 | spin_unlock(&block_group->space_info->lock); |
6263 | block_group->space_info->full = 0; | 5965 | block_group->space_info->full = 0; |
6264 | 5966 | ||
6265 | put_block_group(block_group); | 5967 | btrfs_put_block_group(block_group); |
6266 | put_block_group(block_group); | 5968 | btrfs_put_block_group(block_group); |
6267 | 5969 | ||
6268 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 5970 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
6269 | if (ret > 0) | 5971 | if (ret > 0) |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 37d43b516b79..eb2bee8b7fbf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -415,8 +415,6 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
415 | 415 | ||
416 | node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); | 416 | node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); |
417 | if (node) { | 417 | if (node) { |
418 | struct extent_state *found; | ||
419 | found = rb_entry(node, struct extent_state, rb_node); | ||
420 | free_extent_state(prealloc); | 418 | free_extent_state(prealloc); |
421 | return -EEXIST; | 419 | return -EEXIST; |
422 | } | 420 | } |
@@ -2886,25 +2884,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2886 | disko = 0; | 2884 | disko = 0; |
2887 | flags = 0; | 2885 | flags = 0; |
2888 | 2886 | ||
2889 | switch (em->block_start) { | 2887 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { |
2890 | case EXTENT_MAP_LAST_BYTE: | ||
2891 | end = 1; | 2888 | end = 1; |
2892 | flags |= FIEMAP_EXTENT_LAST; | 2889 | flags |= FIEMAP_EXTENT_LAST; |
2893 | break; | 2890 | } else if (em->block_start == EXTENT_MAP_HOLE) { |
2894 | case EXTENT_MAP_HOLE: | ||
2895 | flags |= FIEMAP_EXTENT_UNWRITTEN; | 2891 | flags |= FIEMAP_EXTENT_UNWRITTEN; |
2896 | break; | 2892 | } else if (em->block_start == EXTENT_MAP_INLINE) { |
2897 | case EXTENT_MAP_INLINE: | ||
2898 | flags |= (FIEMAP_EXTENT_DATA_INLINE | | 2893 | flags |= (FIEMAP_EXTENT_DATA_INLINE | |
2899 | FIEMAP_EXTENT_NOT_ALIGNED); | 2894 | FIEMAP_EXTENT_NOT_ALIGNED); |
2900 | break; | 2895 | } else if (em->block_start == EXTENT_MAP_DELALLOC) { |
2901 | case EXTENT_MAP_DELALLOC: | ||
2902 | flags |= (FIEMAP_EXTENT_DELALLOC | | 2896 | flags |= (FIEMAP_EXTENT_DELALLOC | |
2903 | FIEMAP_EXTENT_UNKNOWN); | 2897 | FIEMAP_EXTENT_UNKNOWN); |
2904 | break; | 2898 | } else { |
2905 | default: | ||
2906 | disko = em->block_start; | 2899 | disko = em->block_start; |
2907 | break; | ||
2908 | } | 2900 | } |
2909 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2901 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
2910 | flags |= FIEMAP_EXTENT_ENCODED; | 2902 | flags |= FIEMAP_EXTENT_ENCODED; |
@@ -3126,20 +3118,15 @@ void free_extent_buffer(struct extent_buffer *eb) | |||
3126 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, | 3118 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, |
3127 | struct extent_buffer *eb) | 3119 | struct extent_buffer *eb) |
3128 | { | 3120 | { |
3129 | int set; | ||
3130 | unsigned long i; | 3121 | unsigned long i; |
3131 | unsigned long num_pages; | 3122 | unsigned long num_pages; |
3132 | struct page *page; | 3123 | struct page *page; |
3133 | 3124 | ||
3134 | u64 start = eb->start; | ||
3135 | u64 end = start + eb->len - 1; | ||
3136 | |||
3137 | set = clear_extent_dirty(tree, start, end, GFP_NOFS); | ||
3138 | num_pages = num_extent_pages(eb->start, eb->len); | 3125 | num_pages = num_extent_pages(eb->start, eb->len); |
3139 | 3126 | ||
3140 | for (i = 0; i < num_pages; i++) { | 3127 | for (i = 0; i < num_pages; i++) { |
3141 | page = extent_buffer_page(eb, i); | 3128 | page = extent_buffer_page(eb, i); |
3142 | if (!set && !PageDirty(page)) | 3129 | if (!PageDirty(page)) |
3143 | continue; | 3130 | continue; |
3144 | 3131 | ||
3145 | lock_page(page); | 3132 | lock_page(page); |
@@ -3148,22 +3135,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3148 | else | 3135 | else |
3149 | set_page_private(page, EXTENT_PAGE_PRIVATE); | 3136 | set_page_private(page, EXTENT_PAGE_PRIVATE); |
3150 | 3137 | ||
3151 | /* | ||
3152 | * if we're on the last page or the first page and the | ||
3153 | * block isn't aligned on a page boundary, do extra checks | ||
3154 | * to make sure we don't clean page that is partially dirty | ||
3155 | */ | ||
3156 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | ||
3157 | ((i == num_pages - 1) && | ||
3158 | ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { | ||
3159 | start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
3160 | end = start + PAGE_CACHE_SIZE - 1; | ||
3161 | if (test_range_bit(tree, start, end, | ||
3162 | EXTENT_DIRTY, 0)) { | ||
3163 | unlock_page(page); | ||
3164 | continue; | ||
3165 | } | ||
3166 | } | ||
3167 | clear_page_dirty_for_io(page); | 3138 | clear_page_dirty_for_io(page); |
3168 | spin_lock_irq(&page->mapping->tree_lock); | 3139 | spin_lock_irq(&page->mapping->tree_lock); |
3169 | if (!PageDirty(page)) { | 3140 | if (!PageDirty(page)) { |
@@ -3189,29 +3160,13 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3189 | { | 3160 | { |
3190 | unsigned long i; | 3161 | unsigned long i; |
3191 | unsigned long num_pages; | 3162 | unsigned long num_pages; |
3163 | int was_dirty = 0; | ||
3192 | 3164 | ||
3165 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | ||
3193 | num_pages = num_extent_pages(eb->start, eb->len); | 3166 | num_pages = num_extent_pages(eb->start, eb->len); |
3194 | for (i = 0; i < num_pages; i++) { | 3167 | for (i = 0; i < num_pages; i++) |
3195 | struct page *page = extent_buffer_page(eb, i); | ||
3196 | /* writepage may need to do something special for the | ||
3197 | * first page, we have to make sure page->private is | ||
3198 | * properly set. releasepage may drop page->private | ||
3199 | * on us if the page isn't already dirty. | ||
3200 | */ | ||
3201 | lock_page(page); | ||
3202 | if (i == 0) { | ||
3203 | set_page_extent_head(page, eb->len); | ||
3204 | } else if (PagePrivate(page) && | ||
3205 | page->private != EXTENT_PAGE_PRIVATE) { | ||
3206 | set_page_extent_mapped(page); | ||
3207 | } | ||
3208 | __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); | 3168 | __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); |
3209 | set_extent_dirty(tree, page_offset(page), | 3169 | return was_dirty; |
3210 | page_offset(page) + PAGE_CACHE_SIZE - 1, | ||
3211 | GFP_NOFS); | ||
3212 | unlock_page(page); | ||
3213 | } | ||
3214 | return 0; | ||
3215 | } | 3170 | } |
3216 | 3171 | ||
3217 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 3172 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
@@ -3791,6 +3746,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | |||
3791 | ret = 0; | 3746 | ret = 0; |
3792 | goto out; | 3747 | goto out; |
3793 | } | 3748 | } |
3749 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
3750 | ret = 0; | ||
3751 | goto out; | ||
3752 | } | ||
3794 | /* at this point we can safely release the extent buffer */ | 3753 | /* at this point we can safely release the extent buffer */ |
3795 | num_pages = num_extent_pages(eb->start, eb->len); | 3754 | num_pages = num_extent_pages(eb->start, eb->len); |
3796 | for (i = 0; i < num_pages; i++) | 3755 | for (i = 0; i < num_pages; i++) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1f9df88afbf6..5bc20abf3f3d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -25,6 +25,7 @@ | |||
25 | /* these are bit numbers for test/set bit */ | 25 | /* these are bit numbers for test/set bit */ |
26 | #define EXTENT_BUFFER_UPTODATE 0 | 26 | #define EXTENT_BUFFER_UPTODATE 0 |
27 | #define EXTENT_BUFFER_BLOCKING 1 | 27 | #define EXTENT_BUFFER_BLOCKING 1 |
28 | #define EXTENT_BUFFER_DIRTY 2 | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * page->private values. Every page that is controlled by the extent | 31 | * page->private values. Every page that is controlled by the extent |
@@ -254,6 +255,8 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
254 | struct extent_buffer *eb); | 255 | struct extent_buffer *eb); |
255 | int set_extent_buffer_dirty(struct extent_io_tree *tree, | 256 | int set_extent_buffer_dirty(struct extent_io_tree *tree, |
256 | struct extent_buffer *eb); | 257 | struct extent_buffer *eb); |
258 | int test_extent_buffer_dirty(struct extent_io_tree *tree, | ||
259 | struct extent_buffer *eb); | ||
257 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, | 260 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, |
258 | struct extent_buffer *eb); | 261 | struct extent_buffer *eb); |
259 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 262 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 50da69da20ce..b187917b36fa 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -234,7 +234,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
234 | rb = tree_insert(&tree->map, em->start, &em->rb_node); | 234 | rb = tree_insert(&tree->map, em->start, &em->rb_node); |
235 | if (rb) { | 235 | if (rb) { |
236 | ret = -EEXIST; | 236 | ret = -EEXIST; |
237 | free_extent_map(merge); | ||
238 | goto out; | 237 | goto out; |
239 | } | 238 | } |
240 | atomic_inc(&em->refs); | 239 | atomic_inc(&em->refs); |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 964652435fd1..9b99886562d0 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -52,6 +52,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
52 | file_key.offset = pos; | 52 | file_key.offset = pos; |
53 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 53 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); |
54 | 54 | ||
55 | path->leave_spinning = 1; | ||
55 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, | 56 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, |
56 | sizeof(*item)); | 57 | sizeof(*item)); |
57 | if (ret < 0) | 58 | if (ret < 0) |
@@ -523,6 +524,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
523 | key.offset = end_byte - 1; | 524 | key.offset = end_byte - 1; |
524 | key.type = BTRFS_EXTENT_CSUM_KEY; | 525 | key.type = BTRFS_EXTENT_CSUM_KEY; |
525 | 526 | ||
527 | path->leave_spinning = 1; | ||
526 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 528 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
527 | if (ret > 0) { | 529 | if (ret > 0) { |
528 | if (path->slots[0] == 0) | 530 | if (path->slots[0] == 0) |
@@ -757,8 +759,10 @@ insert: | |||
757 | } else { | 759 | } else { |
758 | ins_size = csum_size; | 760 | ins_size = csum_size; |
759 | } | 761 | } |
762 | path->leave_spinning = 1; | ||
760 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, | 763 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, |
761 | ins_size); | 764 | ins_size); |
765 | path->leave_spinning = 0; | ||
762 | if (ret < 0) | 766 | if (ret < 0) |
763 | goto fail_unlock; | 767 | goto fail_unlock; |
764 | if (ret != 0) { | 768 | if (ret != 0) { |
@@ -776,7 +780,6 @@ found: | |||
776 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | 780 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + |
777 | btrfs_item_size_nr(leaf, path->slots[0])); | 781 | btrfs_item_size_nr(leaf, path->slots[0])); |
778 | eb_token = NULL; | 782 | eb_token = NULL; |
779 | cond_resched(); | ||
780 | next_sector: | 783 | next_sector: |
781 | 784 | ||
782 | if (!eb_token || | 785 | if (!eb_token || |
@@ -817,9 +820,9 @@ next_sector: | |||
817 | eb_token = NULL; | 820 | eb_token = NULL; |
818 | } | 821 | } |
819 | btrfs_mark_buffer_dirty(path->nodes[0]); | 822 | btrfs_mark_buffer_dirty(path->nodes[0]); |
820 | cond_resched(); | ||
821 | if (total_bytes < sums->len) { | 823 | if (total_bytes < sums->len) { |
822 | btrfs_release_path(root, path); | 824 | btrfs_release_path(root, path); |
825 | cond_resched(); | ||
823 | goto again; | 826 | goto again; |
824 | } | 827 | } |
825 | out: | 828 | out: |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3e8023efaff7..9c9fb46ccd08 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -606,6 +606,7 @@ next_slot: | |||
606 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); | 606 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); |
607 | 607 | ||
608 | btrfs_release_path(root, path); | 608 | btrfs_release_path(root, path); |
609 | path->leave_spinning = 1; | ||
609 | ret = btrfs_insert_empty_item(trans, root, path, &ins, | 610 | ret = btrfs_insert_empty_item(trans, root, path, &ins, |
610 | sizeof(*extent)); | 611 | sizeof(*extent)); |
611 | BUG_ON(ret); | 612 | BUG_ON(ret); |
@@ -639,17 +640,22 @@ next_slot: | |||
639 | ram_bytes); | 640 | ram_bytes); |
640 | btrfs_set_file_extent_type(leaf, extent, found_type); | 641 | btrfs_set_file_extent_type(leaf, extent, found_type); |
641 | 642 | ||
643 | btrfs_unlock_up_safe(path, 1); | ||
642 | btrfs_mark_buffer_dirty(path->nodes[0]); | 644 | btrfs_mark_buffer_dirty(path->nodes[0]); |
645 | btrfs_set_lock_blocking(path->nodes[0]); | ||
643 | 646 | ||
644 | if (disk_bytenr != 0) { | 647 | if (disk_bytenr != 0) { |
645 | ret = btrfs_update_extent_ref(trans, root, | 648 | ret = btrfs_update_extent_ref(trans, root, |
646 | disk_bytenr, orig_parent, | 649 | disk_bytenr, |
650 | le64_to_cpu(old.disk_num_bytes), | ||
651 | orig_parent, | ||
647 | leaf->start, | 652 | leaf->start, |
648 | root->root_key.objectid, | 653 | root->root_key.objectid, |
649 | trans->transid, ins.objectid); | 654 | trans->transid, ins.objectid); |
650 | 655 | ||
651 | BUG_ON(ret); | 656 | BUG_ON(ret); |
652 | } | 657 | } |
658 | path->leave_spinning = 0; | ||
653 | btrfs_release_path(root, path); | 659 | btrfs_release_path(root, path); |
654 | if (disk_bytenr != 0) | 660 | if (disk_bytenr != 0) |
655 | inode_add_bytes(inode, extent_end - end); | 661 | inode_add_bytes(inode, extent_end - end); |
@@ -912,7 +918,7 @@ again: | |||
912 | btrfs_set_file_extent_other_encoding(leaf, fi, 0); | 918 | btrfs_set_file_extent_other_encoding(leaf, fi, 0); |
913 | 919 | ||
914 | if (orig_parent != leaf->start) { | 920 | if (orig_parent != leaf->start) { |
915 | ret = btrfs_update_extent_ref(trans, root, bytenr, | 921 | ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes, |
916 | orig_parent, leaf->start, | 922 | orig_parent, leaf->start, |
917 | root->root_key.objectid, | 923 | root->root_key.objectid, |
918 | trans->transid, inode->i_ino); | 924 | trans->transid, inode->i_ino); |
@@ -1091,19 +1097,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
1091 | WARN_ON(num_pages > nrptrs); | 1097 | WARN_ON(num_pages > nrptrs); |
1092 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 1098 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
1093 | 1099 | ||
1094 | ret = btrfs_check_free_space(root, write_bytes, 0); | 1100 | ret = btrfs_check_data_free_space(root, inode, write_bytes); |
1095 | if (ret) | 1101 | if (ret) |
1096 | goto out; | 1102 | goto out; |
1097 | 1103 | ||
1098 | ret = prepare_pages(root, file, pages, num_pages, | 1104 | ret = prepare_pages(root, file, pages, num_pages, |
1099 | pos, first_index, last_index, | 1105 | pos, first_index, last_index, |
1100 | write_bytes); | 1106 | write_bytes); |
1101 | if (ret) | 1107 | if (ret) { |
1108 | btrfs_free_reserved_data_space(root, inode, | ||
1109 | write_bytes); | ||
1102 | goto out; | 1110 | goto out; |
1111 | } | ||
1103 | 1112 | ||
1104 | ret = btrfs_copy_from_user(pos, num_pages, | 1113 | ret = btrfs_copy_from_user(pos, num_pages, |
1105 | write_bytes, pages, buf); | 1114 | write_bytes, pages, buf); |
1106 | if (ret) { | 1115 | if (ret) { |
1116 | btrfs_free_reserved_data_space(root, inode, | ||
1117 | write_bytes); | ||
1107 | btrfs_drop_pages(pages, num_pages); | 1118 | btrfs_drop_pages(pages, num_pages); |
1108 | goto out; | 1119 | goto out; |
1109 | } | 1120 | } |
@@ -1111,8 +1122,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
1111 | ret = dirty_and_release_pages(NULL, root, file, pages, | 1122 | ret = dirty_and_release_pages(NULL, root, file, pages, |
1112 | num_pages, pos, write_bytes); | 1123 | num_pages, pos, write_bytes); |
1113 | btrfs_drop_pages(pages, num_pages); | 1124 | btrfs_drop_pages(pages, num_pages); |
1114 | if (ret) | 1125 | if (ret) { |
1126 | btrfs_free_reserved_data_space(root, inode, | ||
1127 | write_bytes); | ||
1115 | goto out; | 1128 | goto out; |
1129 | } | ||
1116 | 1130 | ||
1117 | if (will_write) { | 1131 | if (will_write) { |
1118 | btrfs_fdatawrite_range(inode->i_mapping, pos, | 1132 | btrfs_fdatawrite_range(inode->i_mapping, pos, |
@@ -1136,6 +1150,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
1136 | } | 1150 | } |
1137 | out: | 1151 | out: |
1138 | mutex_unlock(&inode->i_mutex); | 1152 | mutex_unlock(&inode->i_mutex); |
1153 | if (ret) | ||
1154 | err = ret; | ||
1139 | 1155 | ||
1140 | out_nolock: | 1156 | out_nolock: |
1141 | kfree(pages); | 1157 | kfree(pages); |
@@ -1145,6 +1161,20 @@ out_nolock: | |||
1145 | page_cache_release(pinned[1]); | 1161 | page_cache_release(pinned[1]); |
1146 | *ppos = pos; | 1162 | *ppos = pos; |
1147 | 1163 | ||
1164 | /* | ||
1165 | * we want to make sure fsync finds this change | ||
1166 | * but we haven't joined a transaction running right now. | ||
1167 | * | ||
1168 | * Later on, someone is sure to update the inode and get the | ||
1169 | * real transid recorded. | ||
1170 | * | ||
1171 | * We set last_trans now to the fs_info generation + 1, | ||
1172 | * this will either be one more than the running transaction | ||
1173 | * or the generation used for the next transaction if there isn't | ||
1174 | * one running right now. | ||
1175 | */ | ||
1176 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | ||
1177 | |||
1148 | if (num_written > 0 && will_write) { | 1178 | if (num_written > 0 && will_write) { |
1149 | struct btrfs_trans_handle *trans; | 1179 | struct btrfs_trans_handle *trans; |
1150 | 1180 | ||
@@ -1157,8 +1187,11 @@ out_nolock: | |||
1157 | ret = btrfs_log_dentry_safe(trans, root, | 1187 | ret = btrfs_log_dentry_safe(trans, root, |
1158 | file->f_dentry); | 1188 | file->f_dentry); |
1159 | if (ret == 0) { | 1189 | if (ret == 0) { |
1160 | btrfs_sync_log(trans, root); | 1190 | ret = btrfs_sync_log(trans, root); |
1161 | btrfs_end_transaction(trans, root); | 1191 | if (ret == 0) |
1192 | btrfs_end_transaction(trans, root); | ||
1193 | else | ||
1194 | btrfs_commit_transaction(trans, root); | ||
1162 | } else { | 1195 | } else { |
1163 | btrfs_commit_transaction(trans, root); | 1196 | btrfs_commit_transaction(trans, root); |
1164 | } | 1197 | } |
@@ -1175,6 +1208,18 @@ out_nolock: | |||
1175 | 1208 | ||
1176 | int btrfs_release_file(struct inode *inode, struct file *filp) | 1209 | int btrfs_release_file(struct inode *inode, struct file *filp) |
1177 | { | 1210 | { |
1211 | /* | ||
1212 | * ordered_data_close is set by settattr when we are about to truncate | ||
1213 | * a file from a non-zero size to a zero size. This tries to | ||
1214 | * flush down new bytes that may have been written if the | ||
1215 | * application were using truncate to replace a file in place. | ||
1216 | */ | ||
1217 | if (BTRFS_I(inode)->ordered_data_close) { | ||
1218 | BTRFS_I(inode)->ordered_data_close = 0; | ||
1219 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); | ||
1220 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | ||
1221 | filemap_flush(inode->i_mapping); | ||
1222 | } | ||
1178 | if (filp->private_data) | 1223 | if (filp->private_data) |
1179 | btrfs_ioctl_trans_end(filp); | 1224 | btrfs_ioctl_trans_end(filp); |
1180 | return 0; | 1225 | return 0; |
@@ -1222,7 +1267,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1222 | /* | 1267 | /* |
1223 | * ok we haven't committed the transaction yet, lets do a commit | 1268 | * ok we haven't committed the transaction yet, lets do a commit |
1224 | */ | 1269 | */ |
1225 | if (file->private_data) | 1270 | if (file && file->private_data) |
1226 | btrfs_ioctl_trans_end(file); | 1271 | btrfs_ioctl_trans_end(file); |
1227 | 1272 | ||
1228 | trans = btrfs_start_transaction(root, 1); | 1273 | trans = btrfs_start_transaction(root, 1); |
@@ -1231,7 +1276,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1231 | goto out; | 1276 | goto out; |
1232 | } | 1277 | } |
1233 | 1278 | ||
1234 | ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); | 1279 | ret = btrfs_log_dentry_safe(trans, root, dentry); |
1235 | if (ret < 0) | 1280 | if (ret < 0) |
1236 | goto out; | 1281 | goto out; |
1237 | 1282 | ||
@@ -1245,15 +1290,18 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1245 | * file again, but that will end up using the synchronization | 1290 | * file again, but that will end up using the synchronization |
1246 | * inside btrfs_sync_log to keep things safe. | 1291 | * inside btrfs_sync_log to keep things safe. |
1247 | */ | 1292 | */ |
1248 | mutex_unlock(&file->f_dentry->d_inode->i_mutex); | 1293 | mutex_unlock(&dentry->d_inode->i_mutex); |
1249 | 1294 | ||
1250 | if (ret > 0) { | 1295 | if (ret > 0) { |
1251 | ret = btrfs_commit_transaction(trans, root); | 1296 | ret = btrfs_commit_transaction(trans, root); |
1252 | } else { | 1297 | } else { |
1253 | btrfs_sync_log(trans, root); | 1298 | ret = btrfs_sync_log(trans, root); |
1254 | ret = btrfs_end_transaction(trans, root); | 1299 | if (ret == 0) |
1300 | ret = btrfs_end_transaction(trans, root); | ||
1301 | else | ||
1302 | ret = btrfs_commit_transaction(trans, root); | ||
1255 | } | 1303 | } |
1256 | mutex_lock(&file->f_dentry->d_inode->i_mutex); | 1304 | mutex_lock(&dentry->d_inode->i_mutex); |
1257 | out: | 1305 | out: |
1258 | return ret > 0 ? EIO : ret; | 1306 | return ret > 0 ? EIO : ret; |
1259 | } | 1307 | } |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d1e5f0e84c58..768b9523662d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -18,6 +18,15 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include "ctree.h" | 20 | #include "ctree.h" |
21 | #include "free-space-cache.h" | ||
22 | #include "transaction.h" | ||
23 | |||
24 | struct btrfs_free_space { | ||
25 | struct rb_node bytes_index; | ||
26 | struct rb_node offset_index; | ||
27 | u64 offset; | ||
28 | u64 bytes; | ||
29 | }; | ||
21 | 30 | ||
22 | static int tree_insert_offset(struct rb_root *root, u64 offset, | 31 | static int tree_insert_offset(struct rb_root *root, u64 offset, |
23 | struct rb_node *node) | 32 | struct rb_node *node) |
@@ -68,14 +77,24 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes, | |||
68 | } | 77 | } |
69 | 78 | ||
70 | /* | 79 | /* |
71 | * searches the tree for the given offset. If contains is set we will return | 80 | * searches the tree for the given offset. |
72 | * the free space that contains the given offset. If contains is not set we | 81 | * |
73 | * will return the free space that starts at or after the given offset and is | 82 | * fuzzy == 1: this is used for allocations where we are given a hint of where |
74 | * at least bytes long. | 83 | * to look for free space. Because the hint may not be completely on an offset |
84 | * mark, or the hint may no longer point to free space we need to fudge our | ||
85 | * results a bit. So we look for free space starting at or after offset with at | ||
86 | * least bytes size. We prefer to find as close to the given offset as we can. | ||
87 | * Also if the offset is within a free space range, then we will return the free | ||
88 | * space that contains the given offset, which means we can return a free space | ||
89 | * chunk with an offset before the provided offset. | ||
90 | * | ||
91 | * fuzzy == 0: this is just a normal tree search. Give us the free space that | ||
92 | * starts at the given offset which is at least bytes size, and if its not there | ||
93 | * return NULL. | ||
75 | */ | 94 | */ |
76 | static struct btrfs_free_space *tree_search_offset(struct rb_root *root, | 95 | static struct btrfs_free_space *tree_search_offset(struct rb_root *root, |
77 | u64 offset, u64 bytes, | 96 | u64 offset, u64 bytes, |
78 | int contains) | 97 | int fuzzy) |
79 | { | 98 | { |
80 | struct rb_node *n = root->rb_node; | 99 | struct rb_node *n = root->rb_node; |
81 | struct btrfs_free_space *entry, *ret = NULL; | 100 | struct btrfs_free_space *entry, *ret = NULL; |
@@ -84,13 +103,14 @@ static struct btrfs_free_space *tree_search_offset(struct rb_root *root, | |||
84 | entry = rb_entry(n, struct btrfs_free_space, offset_index); | 103 | entry = rb_entry(n, struct btrfs_free_space, offset_index); |
85 | 104 | ||
86 | if (offset < entry->offset) { | 105 | if (offset < entry->offset) { |
87 | if (!contains && | 106 | if (fuzzy && |
88 | (!ret || entry->offset < ret->offset) && | 107 | (!ret || entry->offset < ret->offset) && |
89 | (bytes <= entry->bytes)) | 108 | (bytes <= entry->bytes)) |
90 | ret = entry; | 109 | ret = entry; |
91 | n = n->rb_left; | 110 | n = n->rb_left; |
92 | } else if (offset > entry->offset) { | 111 | } else if (offset > entry->offset) { |
93 | if ((entry->offset + entry->bytes - 1) >= offset && | 112 | if (fuzzy && |
113 | (entry->offset + entry->bytes - 1) >= offset && | ||
94 | bytes <= entry->bytes) { | 114 | bytes <= entry->bytes) { |
95 | ret = entry; | 115 | ret = entry; |
96 | break; | 116 | break; |
@@ -171,6 +191,7 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
171 | int ret = 0; | 191 | int ret = 0; |
172 | 192 | ||
173 | 193 | ||
194 | BUG_ON(!info->bytes); | ||
174 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, | 195 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, |
175 | &info->offset_index); | 196 | &info->offset_index); |
176 | if (ret) | 197 | if (ret) |
@@ -184,108 +205,70 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
184 | return ret; | 205 | return ret; |
185 | } | 206 | } |
186 | 207 | ||
187 | static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 208 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, |
188 | u64 offset, u64 bytes) | 209 | u64 offset, u64 bytes) |
189 | { | 210 | { |
190 | struct btrfs_free_space *right_info; | 211 | struct btrfs_free_space *right_info; |
191 | struct btrfs_free_space *left_info; | 212 | struct btrfs_free_space *left_info; |
192 | struct btrfs_free_space *info = NULL; | 213 | struct btrfs_free_space *info = NULL; |
193 | struct btrfs_free_space *alloc_info; | ||
194 | int ret = 0; | 214 | int ret = 0; |
195 | 215 | ||
196 | alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | 216 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); |
197 | if (!alloc_info) | 217 | if (!info) |
198 | return -ENOMEM; | 218 | return -ENOMEM; |
199 | 219 | ||
220 | info->offset = offset; | ||
221 | info->bytes = bytes; | ||
222 | |||
223 | spin_lock(&block_group->tree_lock); | ||
224 | |||
200 | /* | 225 | /* |
201 | * first we want to see if there is free space adjacent to the range we | 226 | * first we want to see if there is free space adjacent to the range we |
202 | * are adding, if there is remove that struct and add a new one to | 227 | * are adding, if there is remove that struct and add a new one to |
203 | * cover the entire range | 228 | * cover the entire range |
204 | */ | 229 | */ |
205 | right_info = tree_search_offset(&block_group->free_space_offset, | 230 | right_info = tree_search_offset(&block_group->free_space_offset, |
206 | offset+bytes, 0, 1); | 231 | offset+bytes, 0, 0); |
207 | left_info = tree_search_offset(&block_group->free_space_offset, | 232 | left_info = tree_search_offset(&block_group->free_space_offset, |
208 | offset-1, 0, 1); | 233 | offset-1, 0, 1); |
209 | 234 | ||
210 | if (right_info && right_info->offset == offset+bytes) { | 235 | if (right_info) { |
211 | unlink_free_space(block_group, right_info); | 236 | unlink_free_space(block_group, right_info); |
212 | info = right_info; | 237 | info->bytes += right_info->bytes; |
213 | info->offset = offset; | 238 | kfree(right_info); |
214 | info->bytes += bytes; | ||
215 | } else if (right_info && right_info->offset != offset+bytes) { | ||
216 | printk(KERN_ERR "btrfs adding space in the middle of an " | ||
217 | "existing free space area. existing: " | ||
218 | "offset=%llu, bytes=%llu. new: offset=%llu, " | ||
219 | "bytes=%llu\n", (unsigned long long)right_info->offset, | ||
220 | (unsigned long long)right_info->bytes, | ||
221 | (unsigned long long)offset, | ||
222 | (unsigned long long)bytes); | ||
223 | BUG(); | ||
224 | } | 239 | } |
225 | 240 | ||
226 | if (left_info) { | 241 | if (left_info && left_info->offset + left_info->bytes == offset) { |
227 | unlink_free_space(block_group, left_info); | 242 | unlink_free_space(block_group, left_info); |
228 | 243 | info->offset = left_info->offset; | |
229 | if (unlikely((left_info->offset + left_info->bytes) != | 244 | info->bytes += left_info->bytes; |
230 | offset)) { | 245 | kfree(left_info); |
231 | printk(KERN_ERR "btrfs free space to the left " | ||
232 | "of new free space isn't " | ||
233 | "quite right. existing: offset=%llu, " | ||
234 | "bytes=%llu. new: offset=%llu, bytes=%llu\n", | ||
235 | (unsigned long long)left_info->offset, | ||
236 | (unsigned long long)left_info->bytes, | ||
237 | (unsigned long long)offset, | ||
238 | (unsigned long long)bytes); | ||
239 | BUG(); | ||
240 | } | ||
241 | |||
242 | if (info) { | ||
243 | info->offset = left_info->offset; | ||
244 | info->bytes += left_info->bytes; | ||
245 | kfree(left_info); | ||
246 | } else { | ||
247 | info = left_info; | ||
248 | info->bytes += bytes; | ||
249 | } | ||
250 | } | 246 | } |
251 | 247 | ||
252 | if (info) { | ||
253 | ret = link_free_space(block_group, info); | ||
254 | if (!ret) | ||
255 | info = NULL; | ||
256 | goto out; | ||
257 | } | ||
258 | |||
259 | info = alloc_info; | ||
260 | alloc_info = NULL; | ||
261 | info->offset = offset; | ||
262 | info->bytes = bytes; | ||
263 | |||
264 | ret = link_free_space(block_group, info); | 248 | ret = link_free_space(block_group, info); |
265 | if (ret) | 249 | if (ret) |
266 | kfree(info); | 250 | kfree(info); |
267 | out: | 251 | |
252 | spin_unlock(&block_group->tree_lock); | ||
253 | |||
268 | if (ret) { | 254 | if (ret) { |
269 | printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); | 255 | printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); |
270 | if (ret == -EEXIST) | 256 | BUG_ON(ret == -EEXIST); |
271 | BUG(); | ||
272 | } | 257 | } |
273 | 258 | ||
274 | kfree(alloc_info); | ||
275 | |||
276 | return ret; | 259 | return ret; |
277 | } | 260 | } |
278 | 261 | ||
279 | static int | 262 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, |
280 | __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | 263 | u64 offset, u64 bytes) |
281 | u64 offset, u64 bytes) | ||
282 | { | 264 | { |
283 | struct btrfs_free_space *info; | 265 | struct btrfs_free_space *info; |
284 | int ret = 0; | 266 | int ret = 0; |
285 | 267 | ||
268 | spin_lock(&block_group->tree_lock); | ||
269 | |||
286 | info = tree_search_offset(&block_group->free_space_offset, offset, 0, | 270 | info = tree_search_offset(&block_group->free_space_offset, offset, 0, |
287 | 1); | 271 | 1); |
288 | |||
289 | if (info && info->offset == offset) { | 272 | if (info && info->offset == offset) { |
290 | if (info->bytes < bytes) { | 273 | if (info->bytes < bytes) { |
291 | printk(KERN_ERR "Found free space at %llu, size %llu," | 274 | printk(KERN_ERR "Found free space at %llu, size %llu," |
@@ -295,12 +278,14 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
295 | (unsigned long long)bytes); | 278 | (unsigned long long)bytes); |
296 | WARN_ON(1); | 279 | WARN_ON(1); |
297 | ret = -EINVAL; | 280 | ret = -EINVAL; |
281 | spin_unlock(&block_group->tree_lock); | ||
298 | goto out; | 282 | goto out; |
299 | } | 283 | } |
300 | unlink_free_space(block_group, info); | 284 | unlink_free_space(block_group, info); |
301 | 285 | ||
302 | if (info->bytes == bytes) { | 286 | if (info->bytes == bytes) { |
303 | kfree(info); | 287 | kfree(info); |
288 | spin_unlock(&block_group->tree_lock); | ||
304 | goto out; | 289 | goto out; |
305 | } | 290 | } |
306 | 291 | ||
@@ -308,6 +293,7 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
308 | info->bytes -= bytes; | 293 | info->bytes -= bytes; |
309 | 294 | ||
310 | ret = link_free_space(block_group, info); | 295 | ret = link_free_space(block_group, info); |
296 | spin_unlock(&block_group->tree_lock); | ||
311 | BUG_ON(ret); | 297 | BUG_ON(ret); |
312 | } else if (info && info->offset < offset && | 298 | } else if (info && info->offset < offset && |
313 | info->offset + info->bytes >= offset + bytes) { | 299 | info->offset + info->bytes >= offset + bytes) { |
@@ -333,70 +319,33 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
333 | */ | 319 | */ |
334 | kfree(info); | 320 | kfree(info); |
335 | } | 321 | } |
336 | 322 | spin_unlock(&block_group->tree_lock); | |
337 | /* step two, insert a new info struct to cover anything | 323 | /* step two, insert a new info struct to cover anything |
338 | * before the hole | 324 | * before the hole |
339 | */ | 325 | */ |
340 | ret = __btrfs_add_free_space(block_group, old_start, | 326 | ret = btrfs_add_free_space(block_group, old_start, |
341 | offset - old_start); | 327 | offset - old_start); |
342 | BUG_ON(ret); | 328 | BUG_ON(ret); |
343 | } else { | 329 | } else { |
330 | spin_unlock(&block_group->tree_lock); | ||
331 | if (!info) { | ||
332 | printk(KERN_ERR "couldn't find space %llu to free\n", | ||
333 | (unsigned long long)offset); | ||
334 | printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", | ||
335 | block_group->cached, block_group->key.objectid, | ||
336 | block_group->key.offset); | ||
337 | btrfs_dump_free_space(block_group, bytes); | ||
338 | } else if (info) { | ||
339 | printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " | ||
340 | "but wanted offset=%llu bytes=%llu\n", | ||
341 | info->offset, info->bytes, offset, bytes); | ||
342 | } | ||
344 | WARN_ON(1); | 343 | WARN_ON(1); |
345 | } | 344 | } |
346 | out: | 345 | out: |
347 | return ret; | 346 | return ret; |
348 | } | 347 | } |
349 | 348 | ||
350 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
351 | u64 offset, u64 bytes) | ||
352 | { | ||
353 | int ret; | ||
354 | struct btrfs_free_space *sp; | ||
355 | |||
356 | mutex_lock(&block_group->alloc_mutex); | ||
357 | ret = __btrfs_add_free_space(block_group, offset, bytes); | ||
358 | sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1); | ||
359 | BUG_ON(!sp); | ||
360 | mutex_unlock(&block_group->alloc_mutex); | ||
361 | |||
362 | return ret; | ||
363 | } | ||
364 | |||
365 | int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
366 | u64 offset, u64 bytes) | ||
367 | { | ||
368 | int ret; | ||
369 | struct btrfs_free_space *sp; | ||
370 | |||
371 | ret = __btrfs_add_free_space(block_group, offset, bytes); | ||
372 | sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1); | ||
373 | BUG_ON(!sp); | ||
374 | |||
375 | return ret; | ||
376 | } | ||
377 | |||
378 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
379 | u64 offset, u64 bytes) | ||
380 | { | ||
381 | int ret = 0; | ||
382 | |||
383 | mutex_lock(&block_group->alloc_mutex); | ||
384 | ret = __btrfs_remove_free_space(block_group, offset, bytes); | ||
385 | mutex_unlock(&block_group->alloc_mutex); | ||
386 | |||
387 | return ret; | ||
388 | } | ||
389 | |||
390 | int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
391 | u64 offset, u64 bytes) | ||
392 | { | ||
393 | int ret; | ||
394 | |||
395 | ret = __btrfs_remove_free_space(block_group, offset, bytes); | ||
396 | |||
397 | return ret; | ||
398 | } | ||
399 | |||
400 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | 349 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, |
401 | u64 bytes) | 350 | u64 bytes) |
402 | { | 351 | { |
@@ -408,6 +357,8 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
408 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 357 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
409 | if (info->bytes >= bytes) | 358 | if (info->bytes >= bytes) |
410 | count++; | 359 | count++; |
360 | printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset, | ||
361 | info->bytes); | ||
411 | } | 362 | } |
412 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" | 363 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" |
413 | "\n", count); | 364 | "\n", count); |
@@ -428,68 +379,337 @@ u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) | |||
428 | return ret; | 379 | return ret; |
429 | } | 380 | } |
430 | 381 | ||
382 | /* | ||
383 | * for a given cluster, put all of its extents back into the free | ||
384 | * space cache. If the block group passed doesn't match the block group | ||
385 | * pointed to by the cluster, someone else raced in and freed the | ||
386 | * cluster already. In that case, we just return without changing anything | ||
387 | */ | ||
388 | static int | ||
389 | __btrfs_return_cluster_to_free_space( | ||
390 | struct btrfs_block_group_cache *block_group, | ||
391 | struct btrfs_free_cluster *cluster) | ||
392 | { | ||
393 | struct btrfs_free_space *entry; | ||
394 | struct rb_node *node; | ||
395 | |||
396 | spin_lock(&cluster->lock); | ||
397 | if (cluster->block_group != block_group) | ||
398 | goto out; | ||
399 | |||
400 | cluster->window_start = 0; | ||
401 | node = rb_first(&cluster->root); | ||
402 | while(node) { | ||
403 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
404 | node = rb_next(&entry->offset_index); | ||
405 | rb_erase(&entry->offset_index, &cluster->root); | ||
406 | link_free_space(block_group, entry); | ||
407 | } | ||
408 | list_del_init(&cluster->block_group_list); | ||
409 | |||
410 | btrfs_put_block_group(cluster->block_group); | ||
411 | cluster->block_group = NULL; | ||
412 | cluster->root.rb_node = NULL; | ||
413 | out: | ||
414 | spin_unlock(&cluster->lock); | ||
415 | return 0; | ||
416 | } | ||
417 | |||
431 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | 418 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) |
432 | { | 419 | { |
433 | struct btrfs_free_space *info; | 420 | struct btrfs_free_space *info; |
434 | struct rb_node *node; | 421 | struct rb_node *node; |
422 | struct btrfs_free_cluster *cluster; | ||
423 | struct btrfs_free_cluster *safe; | ||
424 | |||
425 | spin_lock(&block_group->tree_lock); | ||
426 | |||
427 | list_for_each_entry_safe(cluster, safe, &block_group->cluster_list, | ||
428 | block_group_list) { | ||
429 | |||
430 | WARN_ON(cluster->block_group != block_group); | ||
431 | __btrfs_return_cluster_to_free_space(block_group, cluster); | ||
432 | } | ||
435 | 433 | ||
436 | mutex_lock(&block_group->alloc_mutex); | ||
437 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { | 434 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { |
438 | info = rb_entry(node, struct btrfs_free_space, bytes_index); | 435 | info = rb_entry(node, struct btrfs_free_space, bytes_index); |
439 | unlink_free_space(block_group, info); | 436 | unlink_free_space(block_group, info); |
440 | kfree(info); | 437 | kfree(info); |
441 | if (need_resched()) { | 438 | if (need_resched()) { |
442 | mutex_unlock(&block_group->alloc_mutex); | 439 | spin_unlock(&block_group->tree_lock); |
443 | cond_resched(); | 440 | cond_resched(); |
444 | mutex_lock(&block_group->alloc_mutex); | 441 | spin_lock(&block_group->tree_lock); |
445 | } | 442 | } |
446 | } | 443 | } |
447 | mutex_unlock(&block_group->alloc_mutex); | 444 | spin_unlock(&block_group->tree_lock); |
448 | } | 445 | } |
449 | 446 | ||
450 | #if 0 | 447 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, |
451 | static struct btrfs_free_space *btrfs_find_free_space_offset(struct | 448 | u64 offset, u64 bytes, u64 empty_size) |
452 | btrfs_block_group_cache | ||
453 | *block_group, u64 offset, | ||
454 | u64 bytes) | ||
455 | { | 449 | { |
456 | struct btrfs_free_space *ret; | 450 | struct btrfs_free_space *entry = NULL; |
451 | u64 ret = 0; | ||
457 | 452 | ||
458 | mutex_lock(&block_group->alloc_mutex); | 453 | spin_lock(&block_group->tree_lock); |
459 | ret = tree_search_offset(&block_group->free_space_offset, offset, | 454 | entry = tree_search_offset(&block_group->free_space_offset, offset, |
460 | bytes, 0); | 455 | bytes + empty_size, 1); |
461 | mutex_unlock(&block_group->alloc_mutex); | 456 | if (!entry) |
457 | entry = tree_search_bytes(&block_group->free_space_bytes, | ||
458 | offset, bytes + empty_size); | ||
459 | if (entry) { | ||
460 | unlink_free_space(block_group, entry); | ||
461 | ret = entry->offset; | ||
462 | entry->offset += bytes; | ||
463 | entry->bytes -= bytes; | ||
464 | |||
465 | if (!entry->bytes) | ||
466 | kfree(entry); | ||
467 | else | ||
468 | link_free_space(block_group, entry); | ||
469 | } | ||
470 | spin_unlock(&block_group->tree_lock); | ||
462 | 471 | ||
463 | return ret; | 472 | return ret; |
464 | } | 473 | } |
465 | 474 | ||
466 | static struct btrfs_free_space *btrfs_find_free_space_bytes(struct | 475 | /* |
467 | btrfs_block_group_cache | 476 | * given a cluster, put all of its extents back into the free space |
468 | *block_group, u64 offset, | 477 | * cache. If a block group is passed, this function will only free |
469 | u64 bytes) | 478 | * a cluster that belongs to the passed block group. |
479 | * | ||
480 | * Otherwise, it'll get a reference on the block group pointed to by the | ||
481 | * cluster and remove the cluster from it. | ||
482 | */ | ||
483 | int btrfs_return_cluster_to_free_space( | ||
484 | struct btrfs_block_group_cache *block_group, | ||
485 | struct btrfs_free_cluster *cluster) | ||
470 | { | 486 | { |
471 | struct btrfs_free_space *ret; | 487 | int ret; |
472 | 488 | ||
473 | mutex_lock(&block_group->alloc_mutex); | 489 | /* first, get a safe pointer to the block group */ |
490 | spin_lock(&cluster->lock); | ||
491 | if (!block_group) { | ||
492 | block_group = cluster->block_group; | ||
493 | if (!block_group) { | ||
494 | spin_unlock(&cluster->lock); | ||
495 | return 0; | ||
496 | } | ||
497 | } else if (cluster->block_group != block_group) { | ||
498 | /* someone else has already freed it don't redo their work */ | ||
499 | spin_unlock(&cluster->lock); | ||
500 | return 0; | ||
501 | } | ||
502 | atomic_inc(&block_group->count); | ||
503 | spin_unlock(&cluster->lock); | ||
474 | 504 | ||
475 | ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes); | 505 | /* now return any extents the cluster had on it */ |
476 | mutex_unlock(&block_group->alloc_mutex); | 506 | spin_lock(&block_group->tree_lock); |
507 | ret = __btrfs_return_cluster_to_free_space(block_group, cluster); | ||
508 | spin_unlock(&block_group->tree_lock); | ||
477 | 509 | ||
510 | /* finally drop our ref */ | ||
511 | btrfs_put_block_group(block_group); | ||
478 | return ret; | 512 | return ret; |
479 | } | 513 | } |
480 | #endif | ||
481 | 514 | ||
482 | struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache | 515 | /* |
483 | *block_group, u64 offset, | 516 | * given a cluster, try to allocate 'bytes' from it, returns 0 |
484 | u64 bytes) | 517 | * if it couldn't find anything suitably large, or a logical disk offset |
518 | * if things worked out | ||
519 | */ | ||
520 | u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | ||
521 | struct btrfs_free_cluster *cluster, u64 bytes, | ||
522 | u64 min_start) | ||
523 | { | ||
524 | struct btrfs_free_space *entry = NULL; | ||
525 | struct rb_node *node; | ||
526 | u64 ret = 0; | ||
527 | |||
528 | spin_lock(&cluster->lock); | ||
529 | if (bytes > cluster->max_size) | ||
530 | goto out; | ||
531 | |||
532 | if (cluster->block_group != block_group) | ||
533 | goto out; | ||
534 | |||
535 | node = rb_first(&cluster->root); | ||
536 | if (!node) | ||
537 | goto out; | ||
538 | |||
539 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
540 | |||
541 | while(1) { | ||
542 | if (entry->bytes < bytes || entry->offset < min_start) { | ||
543 | struct rb_node *node; | ||
544 | |||
545 | node = rb_next(&entry->offset_index); | ||
546 | if (!node) | ||
547 | break; | ||
548 | entry = rb_entry(node, struct btrfs_free_space, | ||
549 | offset_index); | ||
550 | continue; | ||
551 | } | ||
552 | ret = entry->offset; | ||
553 | |||
554 | entry->offset += bytes; | ||
555 | entry->bytes -= bytes; | ||
556 | |||
557 | if (entry->bytes == 0) { | ||
558 | rb_erase(&entry->offset_index, &cluster->root); | ||
559 | kfree(entry); | ||
560 | } | ||
561 | break; | ||
562 | } | ||
563 | out: | ||
564 | spin_unlock(&cluster->lock); | ||
565 | return ret; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * here we try to find a cluster of blocks in a block group. The goal | ||
570 | * is to find at least bytes free and up to empty_size + bytes free. | ||
571 | * We might not find them all in one contiguous area. | ||
572 | * | ||
573 | * returns zero and sets up cluster if things worked out, otherwise | ||
574 | * it returns -enospc | ||
575 | */ | ||
576 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
577 | struct btrfs_block_group_cache *block_group, | ||
578 | struct btrfs_free_cluster *cluster, | ||
579 | u64 offset, u64 bytes, u64 empty_size) | ||
485 | { | 580 | { |
486 | struct btrfs_free_space *ret = NULL; | 581 | struct btrfs_free_space *entry = NULL; |
582 | struct rb_node *node; | ||
583 | struct btrfs_free_space *next; | ||
584 | struct btrfs_free_space *last; | ||
585 | u64 min_bytes; | ||
586 | u64 window_start; | ||
587 | u64 window_free; | ||
588 | u64 max_extent = 0; | ||
589 | int total_retries = 0; | ||
590 | int ret; | ||
591 | |||
592 | /* for metadata, allow allocates with more holes */ | ||
593 | if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
594 | /* | ||
595 | * we want to do larger allocations when we are | ||
596 | * flushing out the delayed refs, it helps prevent | ||
597 | * making more work as we go along. | ||
598 | */ | ||
599 | if (trans->transaction->delayed_refs.flushing) | ||
600 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
601 | else | ||
602 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
603 | } else | ||
604 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
605 | |||
606 | spin_lock(&block_group->tree_lock); | ||
607 | spin_lock(&cluster->lock); | ||
608 | |||
609 | /* someone already found a cluster, hooray */ | ||
610 | if (cluster->block_group) { | ||
611 | ret = 0; | ||
612 | goto out; | ||
613 | } | ||
614 | again: | ||
615 | min_bytes = min(min_bytes, bytes + empty_size); | ||
616 | entry = tree_search_bytes(&block_group->free_space_bytes, | ||
617 | offset, min_bytes); | ||
618 | if (!entry) { | ||
619 | ret = -ENOSPC; | ||
620 | goto out; | ||
621 | } | ||
622 | window_start = entry->offset; | ||
623 | window_free = entry->bytes; | ||
624 | last = entry; | ||
625 | max_extent = entry->bytes; | ||
626 | |||
627 | while(1) { | ||
628 | /* out window is just right, lets fill it */ | ||
629 | if (window_free >= bytes + empty_size) | ||
630 | break; | ||
487 | 631 | ||
488 | ret = tree_search_offset(&block_group->free_space_offset, offset, | 632 | node = rb_next(&last->offset_index); |
489 | bytes, 0); | 633 | if (!node) { |
490 | if (!ret) | 634 | ret = -ENOSPC; |
491 | ret = tree_search_bytes(&block_group->free_space_bytes, | 635 | goto out; |
492 | offset, bytes); | 636 | } |
637 | next = rb_entry(node, struct btrfs_free_space, offset_index); | ||
638 | |||
639 | /* | ||
640 | * we haven't filled the empty size and the window is | ||
641 | * very large. reset and try again | ||
642 | */ | ||
643 | if (next->offset - window_start > (bytes + empty_size) * 2) { | ||
644 | entry = next; | ||
645 | window_start = entry->offset; | ||
646 | window_free = entry->bytes; | ||
647 | last = entry; | ||
648 | max_extent = 0; | ||
649 | total_retries++; | ||
650 | if (total_retries % 256 == 0) { | ||
651 | if (min_bytes >= (bytes + empty_size)) { | ||
652 | ret = -ENOSPC; | ||
653 | goto out; | ||
654 | } | ||
655 | /* | ||
656 | * grow our allocation a bit, we're not having | ||
657 | * much luck | ||
658 | */ | ||
659 | min_bytes *= 2; | ||
660 | goto again; | ||
661 | } | ||
662 | } else { | ||
663 | last = next; | ||
664 | window_free += next->bytes; | ||
665 | if (entry->bytes > max_extent) | ||
666 | max_extent = entry->bytes; | ||
667 | } | ||
668 | } | ||
669 | |||
670 | cluster->window_start = entry->offset; | ||
671 | |||
672 | /* | ||
673 | * now we've found our entries, pull them out of the free space | ||
674 | * cache and put them into the cluster rbtree | ||
675 | * | ||
676 | * The cluster includes an rbtree, but only uses the offset index | ||
677 | * of each free space cache entry. | ||
678 | */ | ||
679 | while(1) { | ||
680 | node = rb_next(&entry->offset_index); | ||
681 | unlink_free_space(block_group, entry); | ||
682 | ret = tree_insert_offset(&cluster->root, entry->offset, | ||
683 | &entry->offset_index); | ||
684 | BUG_ON(ret); | ||
685 | |||
686 | if (!node || entry == last) | ||
687 | break; | ||
688 | |||
689 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
690 | } | ||
691 | ret = 0; | ||
692 | cluster->max_size = max_extent; | ||
693 | atomic_inc(&block_group->count); | ||
694 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | ||
695 | cluster->block_group = block_group; | ||
696 | out: | ||
697 | spin_unlock(&cluster->lock); | ||
698 | spin_unlock(&block_group->tree_lock); | ||
493 | 699 | ||
494 | return ret; | 700 | return ret; |
495 | } | 701 | } |
702 | |||
703 | /* | ||
704 | * simple code to zero out a cluster | ||
705 | */ | ||
706 | void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | ||
707 | { | ||
708 | spin_lock_init(&cluster->lock); | ||
709 | spin_lock_init(&cluster->refill_lock); | ||
710 | cluster->root.rb_node = NULL; | ||
711 | cluster->max_size = 0; | ||
712 | INIT_LIST_HEAD(&cluster->block_group_list); | ||
713 | cluster->block_group = NULL; | ||
714 | } | ||
715 | |||
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h new file mode 100644 index 000000000000..ab0bdc0a63ce --- /dev/null +++ b/fs/btrfs/free-space-cache.h | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef __BTRFS_FREE_SPACE_CACHE | ||
20 | #define __BTRFS_FREE_SPACE_CACHE | ||
21 | |||
22 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
23 | u64 bytenr, u64 size); | ||
24 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
25 | u64 bytenr, u64 size); | ||
26 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
27 | *block_group); | ||
28 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | ||
29 | u64 offset, u64 bytes, u64 empty_size); | ||
30 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
31 | u64 bytes); | ||
32 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
33 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
34 | struct btrfs_block_group_cache *block_group, | ||
35 | struct btrfs_free_cluster *cluster, | ||
36 | u64 offset, u64 bytes, u64 empty_size); | ||
37 | void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster); | ||
38 | u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | ||
39 | struct btrfs_free_cluster *cluster, u64 bytes, | ||
40 | u64 min_start); | ||
41 | int btrfs_return_cluster_to_free_space( | ||
42 | struct btrfs_block_group_cache *block_group, | ||
43 | struct btrfs_free_cluster *cluster); | ||
44 | #endif | ||
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 3d46fa1f29a4..6b627c611808 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -73,6 +73,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
73 | if (!path) | 73 | if (!path) |
74 | return -ENOMEM; | 74 | return -ENOMEM; |
75 | 75 | ||
76 | path->leave_spinning = 1; | ||
77 | |||
76 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 78 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
77 | if (ret > 0) { | 79 | if (ret > 0) { |
78 | ret = -ENOENT; | 80 | ret = -ENOENT; |
@@ -127,6 +129,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
127 | if (!path) | 129 | if (!path) |
128 | return -ENOMEM; | 130 | return -ENOMEM; |
129 | 131 | ||
132 | path->leave_spinning = 1; | ||
130 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 133 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
131 | ins_len); | 134 | ins_len); |
132 | if (ret == -EEXIST) { | 135 | if (ret == -EEXIST) { |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 2aa79873eb46..cc7334d833c9 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -84,7 +84,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | |||
84 | search_key.type = 0; | 84 | search_key.type = 0; |
85 | search_key.offset = 0; | 85 | search_key.offset = 0; |
86 | 86 | ||
87 | btrfs_init_path(path); | ||
88 | start_found = 0; | 87 | start_found = 0; |
89 | ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); | 88 | ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); |
90 | if (ret < 0) | 89 | if (ret < 0) |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8f0706210a47..a0d1dd492a58 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -102,34 +102,6 @@ static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) | |||
102 | } | 102 | } |
103 | 103 | ||
104 | /* | 104 | /* |
105 | * a very lame attempt at stopping writes when the FS is 85% full. There | ||
106 | * are countless ways this is incorrect, but it is better than nothing. | ||
107 | */ | ||
108 | int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, | ||
109 | int for_del) | ||
110 | { | ||
111 | u64 total; | ||
112 | u64 used; | ||
113 | u64 thresh; | ||
114 | int ret = 0; | ||
115 | |||
116 | spin_lock(&root->fs_info->delalloc_lock); | ||
117 | total = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
118 | used = btrfs_super_bytes_used(&root->fs_info->super_copy); | ||
119 | if (for_del) | ||
120 | thresh = total * 90; | ||
121 | else | ||
122 | thresh = total * 85; | ||
123 | |||
124 | do_div(thresh, 100); | ||
125 | |||
126 | if (used + root->fs_info->delalloc_bytes + num_required > thresh) | ||
127 | ret = -ENOSPC; | ||
128 | spin_unlock(&root->fs_info->delalloc_lock); | ||
129 | return ret; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * this does all the hard work for inserting an inline extent into | 105 | * this does all the hard work for inserting an inline extent into |
134 | * the btree. The caller should have done a btrfs_drop_extents so that | 106 | * the btree. The caller should have done a btrfs_drop_extents so that |
135 | * no overlapping inline items exist in the btree | 107 | * no overlapping inline items exist in the btree |
@@ -162,6 +134,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
162 | if (!path) | 134 | if (!path) |
163 | return -ENOMEM; | 135 | return -ENOMEM; |
164 | 136 | ||
137 | path->leave_spinning = 1; | ||
165 | btrfs_set_trans_block_group(trans, inode); | 138 | btrfs_set_trans_block_group(trans, inode); |
166 | 139 | ||
167 | key.objectid = inode->i_ino; | 140 | key.objectid = inode->i_ino; |
@@ -195,9 +168,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
195 | cur_size = min_t(unsigned long, compressed_size, | 168 | cur_size = min_t(unsigned long, compressed_size, |
196 | PAGE_CACHE_SIZE); | 169 | PAGE_CACHE_SIZE); |
197 | 170 | ||
198 | kaddr = kmap(cpage); | 171 | kaddr = kmap_atomic(cpage, KM_USER0); |
199 | write_extent_buffer(leaf, kaddr, ptr, cur_size); | 172 | write_extent_buffer(leaf, kaddr, ptr, cur_size); |
200 | kunmap(cpage); | 173 | kunmap_atomic(kaddr, KM_USER0); |
201 | 174 | ||
202 | i++; | 175 | i++; |
203 | ptr += cur_size; | 176 | ptr += cur_size; |
@@ -232,7 +205,7 @@ fail: | |||
232 | * does the checks required to make sure the data is small enough | 205 | * does the checks required to make sure the data is small enough |
233 | * to fit as an inline extent. | 206 | * to fit as an inline extent. |
234 | */ | 207 | */ |
235 | static int cow_file_range_inline(struct btrfs_trans_handle *trans, | 208 | static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, |
236 | struct btrfs_root *root, | 209 | struct btrfs_root *root, |
237 | struct inode *inode, u64 start, u64 end, | 210 | struct inode *inode, u64 start, u64 end, |
238 | size_t compressed_size, | 211 | size_t compressed_size, |
@@ -882,11 +855,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
882 | u64 cur_end; | 855 | u64 cur_end; |
883 | int limit = 10 * 1024 * 1042; | 856 | int limit = 10 * 1024 * 1042; |
884 | 857 | ||
885 | if (!btrfs_test_opt(root, COMPRESS)) { | ||
886 | return cow_file_range(inode, locked_page, start, end, | ||
887 | page_started, nr_written, 1); | ||
888 | } | ||
889 | |||
890 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | | 858 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | |
891 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); | 859 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); |
892 | while (start < end) { | 860 | while (start < end) { |
@@ -963,7 +931,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root, | |||
963 | * If no cow copies or snapshots exist, we write directly to the existing | 931 | * If no cow copies or snapshots exist, we write directly to the existing |
964 | * blocks on disk | 932 | * blocks on disk |
965 | */ | 933 | */ |
966 | static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, | 934 | static noinline int run_delalloc_nocow(struct inode *inode, |
935 | struct page *locked_page, | ||
967 | u64 start, u64 end, int *page_started, int force, | 936 | u64 start, u64 end, int *page_started, int force, |
968 | unsigned long *nr_written) | 937 | unsigned long *nr_written) |
969 | { | 938 | { |
@@ -1161,6 +1130,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1161 | unsigned long *nr_written) | 1130 | unsigned long *nr_written) |
1162 | { | 1131 | { |
1163 | int ret; | 1132 | int ret; |
1133 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1164 | 1134 | ||
1165 | if (btrfs_test_flag(inode, NODATACOW)) | 1135 | if (btrfs_test_flag(inode, NODATACOW)) |
1166 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1136 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
@@ -1168,10 +1138,12 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1168 | else if (btrfs_test_flag(inode, PREALLOC)) | 1138 | else if (btrfs_test_flag(inode, PREALLOC)) |
1169 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1139 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1170 | page_started, 0, nr_written); | 1140 | page_started, 0, nr_written); |
1141 | else if (!btrfs_test_opt(root, COMPRESS)) | ||
1142 | ret = cow_file_range(inode, locked_page, start, end, | ||
1143 | page_started, nr_written, 1); | ||
1171 | else | 1144 | else |
1172 | ret = cow_file_range_async(inode, locked_page, start, end, | 1145 | ret = cow_file_range_async(inode, locked_page, start, end, |
1173 | page_started, nr_written); | 1146 | page_started, nr_written); |
1174 | |||
1175 | return ret; | 1147 | return ret; |
1176 | } | 1148 | } |
1177 | 1149 | ||
@@ -1190,6 +1162,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1190 | */ | 1162 | */ |
1191 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1163 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
1192 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1164 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1165 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | ||
1193 | spin_lock(&root->fs_info->delalloc_lock); | 1166 | spin_lock(&root->fs_info->delalloc_lock); |
1194 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1167 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; |
1195 | root->fs_info->delalloc_bytes += end - start + 1; | 1168 | root->fs_info->delalloc_bytes += end - start + 1; |
@@ -1223,9 +1196,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1223 | (unsigned long long)end - start + 1, | 1196 | (unsigned long long)end - start + 1, |
1224 | (unsigned long long) | 1197 | (unsigned long long) |
1225 | root->fs_info->delalloc_bytes); | 1198 | root->fs_info->delalloc_bytes); |
1199 | btrfs_delalloc_free_space(root, inode, (u64)-1); | ||
1226 | root->fs_info->delalloc_bytes = 0; | 1200 | root->fs_info->delalloc_bytes = 0; |
1227 | BTRFS_I(inode)->delalloc_bytes = 0; | 1201 | BTRFS_I(inode)->delalloc_bytes = 0; |
1228 | } else { | 1202 | } else { |
1203 | btrfs_delalloc_free_space(root, inode, | ||
1204 | end - start + 1); | ||
1229 | root->fs_info->delalloc_bytes -= end - start + 1; | 1205 | root->fs_info->delalloc_bytes -= end - start + 1; |
1230 | BTRFS_I(inode)->delalloc_bytes -= end - start + 1; | 1206 | BTRFS_I(inode)->delalloc_bytes -= end - start + 1; |
1231 | } | 1207 | } |
@@ -1477,6 +1453,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1477 | path = btrfs_alloc_path(); | 1453 | path = btrfs_alloc_path(); |
1478 | BUG_ON(!path); | 1454 | BUG_ON(!path); |
1479 | 1455 | ||
1456 | path->leave_spinning = 1; | ||
1480 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1457 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
1481 | file_pos + num_bytes, file_pos, &hint); | 1458 | file_pos + num_bytes, file_pos, &hint); |
1482 | BUG_ON(ret); | 1459 | BUG_ON(ret); |
@@ -1499,6 +1476,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1499 | btrfs_set_file_extent_compression(leaf, fi, compression); | 1476 | btrfs_set_file_extent_compression(leaf, fi, compression); |
1500 | btrfs_set_file_extent_encryption(leaf, fi, encryption); | 1477 | btrfs_set_file_extent_encryption(leaf, fi, encryption); |
1501 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); | 1478 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); |
1479 | |||
1480 | btrfs_unlock_up_safe(path, 1); | ||
1481 | btrfs_set_lock_blocking(leaf); | ||
1482 | |||
1502 | btrfs_mark_buffer_dirty(leaf); | 1483 | btrfs_mark_buffer_dirty(leaf); |
1503 | 1484 | ||
1504 | inode_add_bytes(inode, num_bytes); | 1485 | inode_add_bytes(inode, num_bytes); |
@@ -1511,11 +1492,35 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1511 | root->root_key.objectid, | 1492 | root->root_key.objectid, |
1512 | trans->transid, inode->i_ino, &ins); | 1493 | trans->transid, inode->i_ino, &ins); |
1513 | BUG_ON(ret); | 1494 | BUG_ON(ret); |
1514 | |||
1515 | btrfs_free_path(path); | 1495 | btrfs_free_path(path); |
1496 | |||
1516 | return 0; | 1497 | return 0; |
1517 | } | 1498 | } |
1518 | 1499 | ||
1500 | /* | ||
1501 | * helper function for btrfs_finish_ordered_io, this | ||
1502 | * just reads in some of the csum leaves to prime them into ram | ||
1503 | * before we start the transaction. It limits the amount of btree | ||
1504 | * reads required while inside the transaction. | ||
1505 | */ | ||
1506 | static noinline void reada_csum(struct btrfs_root *root, | ||
1507 | struct btrfs_path *path, | ||
1508 | struct btrfs_ordered_extent *ordered_extent) | ||
1509 | { | ||
1510 | struct btrfs_ordered_sum *sum; | ||
1511 | u64 bytenr; | ||
1512 | |||
1513 | sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum, | ||
1514 | list); | ||
1515 | bytenr = sum->sums[0].bytenr; | ||
1516 | |||
1517 | /* | ||
1518 | * we don't care about the results, the point of this search is | ||
1519 | * just to get the btree leaves into ram | ||
1520 | */ | ||
1521 | btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0); | ||
1522 | } | ||
1523 | |||
1519 | /* as ordered data IO finishes, this gets called so we can finish | 1524 | /* as ordered data IO finishes, this gets called so we can finish |
1520 | * an ordered extent if the range of bytes in the file it covers are | 1525 | * an ordered extent if the range of bytes in the file it covers are |
1521 | * fully written. | 1526 | * fully written. |
@@ -1524,8 +1529,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1524 | { | 1529 | { |
1525 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1530 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1526 | struct btrfs_trans_handle *trans; | 1531 | struct btrfs_trans_handle *trans; |
1527 | struct btrfs_ordered_extent *ordered_extent; | 1532 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1528 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1533 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1534 | struct btrfs_path *path; | ||
1529 | int compressed = 0; | 1535 | int compressed = 0; |
1530 | int ret; | 1536 | int ret; |
1531 | 1537 | ||
@@ -1533,9 +1539,33 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1533 | if (!ret) | 1539 | if (!ret) |
1534 | return 0; | 1540 | return 0; |
1535 | 1541 | ||
1542 | /* | ||
1543 | * before we join the transaction, try to do some of our IO. | ||
1544 | * This will limit the amount of IO that we have to do with | ||
1545 | * the transaction running. We're unlikely to need to do any | ||
1546 | * IO if the file extents are new, the disk_i_size checks | ||
1547 | * covers the most common case. | ||
1548 | */ | ||
1549 | if (start < BTRFS_I(inode)->disk_i_size) { | ||
1550 | path = btrfs_alloc_path(); | ||
1551 | if (path) { | ||
1552 | ret = btrfs_lookup_file_extent(NULL, root, path, | ||
1553 | inode->i_ino, | ||
1554 | start, 0); | ||
1555 | ordered_extent = btrfs_lookup_ordered_extent(inode, | ||
1556 | start); | ||
1557 | if (!list_empty(&ordered_extent->list)) { | ||
1558 | btrfs_release_path(root, path); | ||
1559 | reada_csum(root, path, ordered_extent); | ||
1560 | } | ||
1561 | btrfs_free_path(path); | ||
1562 | } | ||
1563 | } | ||
1564 | |||
1536 | trans = btrfs_join_transaction(root, 1); | 1565 | trans = btrfs_join_transaction(root, 1); |
1537 | 1566 | ||
1538 | ordered_extent = btrfs_lookup_ordered_extent(inode, start); | 1567 | if (!ordered_extent) |
1568 | ordered_extent = btrfs_lookup_ordered_extent(inode, start); | ||
1539 | BUG_ON(!ordered_extent); | 1569 | BUG_ON(!ordered_extent); |
1540 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) | 1570 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) |
1541 | goto nocow; | 1571 | goto nocow; |
@@ -2125,6 +2155,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2125 | 2155 | ||
2126 | path = btrfs_alloc_path(); | 2156 | path = btrfs_alloc_path(); |
2127 | BUG_ON(!path); | 2157 | BUG_ON(!path); |
2158 | path->leave_spinning = 1; | ||
2128 | ret = btrfs_lookup_inode(trans, root, path, | 2159 | ret = btrfs_lookup_inode(trans, root, path, |
2129 | &BTRFS_I(inode)->location, 1); | 2160 | &BTRFS_I(inode)->location, 1); |
2130 | if (ret) { | 2161 | if (ret) { |
@@ -2171,6 +2202,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2171 | goto err; | 2202 | goto err; |
2172 | } | 2203 | } |
2173 | 2204 | ||
2205 | path->leave_spinning = 1; | ||
2174 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | 2206 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, |
2175 | name, name_len, -1); | 2207 | name, name_len, -1); |
2176 | if (IS_ERR(di)) { | 2208 | if (IS_ERR(di)) { |
@@ -2214,8 +2246,6 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2214 | ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, | 2246 | ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, |
2215 | inode, dir->i_ino); | 2247 | inode, dir->i_ino); |
2216 | BUG_ON(ret != 0 && ret != -ENOENT); | 2248 | BUG_ON(ret != 0 && ret != -ENOENT); |
2217 | if (ret != -ENOENT) | ||
2218 | BTRFS_I(dir)->log_dirty_trans = trans->transid; | ||
2219 | 2249 | ||
2220 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, | 2250 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, |
2221 | dir, index); | 2251 | dir, index); |
@@ -2245,13 +2275,12 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2245 | 2275 | ||
2246 | root = BTRFS_I(dir)->root; | 2276 | root = BTRFS_I(dir)->root; |
2247 | 2277 | ||
2248 | ret = btrfs_check_free_space(root, 1, 1); | ||
2249 | if (ret) | ||
2250 | goto fail; | ||
2251 | |||
2252 | trans = btrfs_start_transaction(root, 1); | 2278 | trans = btrfs_start_transaction(root, 1); |
2253 | 2279 | ||
2254 | btrfs_set_trans_block_group(trans, dir); | 2280 | btrfs_set_trans_block_group(trans, dir); |
2281 | |||
2282 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); | ||
2283 | |||
2255 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2284 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
2256 | dentry->d_name.name, dentry->d_name.len); | 2285 | dentry->d_name.name, dentry->d_name.len); |
2257 | 2286 | ||
@@ -2261,7 +2290,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2261 | nr = trans->blocks_used; | 2290 | nr = trans->blocks_used; |
2262 | 2291 | ||
2263 | btrfs_end_transaction_throttle(trans, root); | 2292 | btrfs_end_transaction_throttle(trans, root); |
2264 | fail: | ||
2265 | btrfs_btree_balance_dirty(root, nr); | 2293 | btrfs_btree_balance_dirty(root, nr); |
2266 | return ret; | 2294 | return ret; |
2267 | } | 2295 | } |
@@ -2284,10 +2312,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2284 | return -ENOTEMPTY; | 2312 | return -ENOTEMPTY; |
2285 | } | 2313 | } |
2286 | 2314 | ||
2287 | ret = btrfs_check_free_space(root, 1, 1); | ||
2288 | if (ret) | ||
2289 | goto fail; | ||
2290 | |||
2291 | trans = btrfs_start_transaction(root, 1); | 2315 | trans = btrfs_start_transaction(root, 1); |
2292 | btrfs_set_trans_block_group(trans, dir); | 2316 | btrfs_set_trans_block_group(trans, dir); |
2293 | 2317 | ||
@@ -2304,7 +2328,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2304 | fail_trans: | 2328 | fail_trans: |
2305 | nr = trans->blocks_used; | 2329 | nr = trans->blocks_used; |
2306 | ret = btrfs_end_transaction_throttle(trans, root); | 2330 | ret = btrfs_end_transaction_throttle(trans, root); |
2307 | fail: | ||
2308 | btrfs_btree_balance_dirty(root, nr); | 2331 | btrfs_btree_balance_dirty(root, nr); |
2309 | 2332 | ||
2310 | if (ret && !err) | 2333 | if (ret && !err) |
@@ -2531,9 +2554,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2531 | key.offset = (u64)-1; | 2554 | key.offset = (u64)-1; |
2532 | key.type = (u8)-1; | 2555 | key.type = (u8)-1; |
2533 | 2556 | ||
2534 | btrfs_init_path(path); | ||
2535 | |||
2536 | search_again: | 2557 | search_again: |
2558 | path->leave_spinning = 1; | ||
2537 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 2559 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
2538 | if (ret < 0) | 2560 | if (ret < 0) |
2539 | goto error; | 2561 | goto error; |
@@ -2680,6 +2702,7 @@ delete: | |||
2680 | break; | 2702 | break; |
2681 | } | 2703 | } |
2682 | if (found_extent) { | 2704 | if (found_extent) { |
2705 | btrfs_set_path_blocking(path); | ||
2683 | ret = btrfs_free_extent(trans, root, extent_start, | 2706 | ret = btrfs_free_extent(trans, root, extent_start, |
2684 | extent_num_bytes, | 2707 | extent_num_bytes, |
2685 | leaf->start, root_owner, | 2708 | leaf->start, root_owner, |
@@ -2820,7 +2843,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
2820 | if (size <= hole_start) | 2843 | if (size <= hole_start) |
2821 | return 0; | 2844 | return 0; |
2822 | 2845 | ||
2823 | err = btrfs_check_free_space(root, 1, 0); | 2846 | err = btrfs_check_metadata_free_space(root); |
2824 | if (err) | 2847 | if (err) |
2825 | return err; | 2848 | return err; |
2826 | 2849 | ||
@@ -2884,11 +2907,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
2884 | if (err) | 2907 | if (err) |
2885 | return err; | 2908 | return err; |
2886 | 2909 | ||
2887 | if (S_ISREG(inode->i_mode) && | 2910 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
2888 | attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { | 2911 | if (attr->ia_size > inode->i_size) { |
2889 | err = btrfs_cont_expand(inode, attr->ia_size); | 2912 | err = btrfs_cont_expand(inode, attr->ia_size); |
2890 | if (err) | 2913 | if (err) |
2891 | return err; | 2914 | return err; |
2915 | } else if (inode->i_size > 0 && | ||
2916 | attr->ia_size == 0) { | ||
2917 | |||
2918 | /* we're truncating a file that used to have good | ||
2919 | * data down to zero. Make sure it gets into | ||
2920 | * the ordered flush list so that any new writes | ||
2921 | * get down to disk quickly. | ||
2922 | */ | ||
2923 | BTRFS_I(inode)->ordered_data_close = 1; | ||
2924 | } | ||
2892 | } | 2925 | } |
2893 | 2926 | ||
2894 | err = inode_setattr(inode, attr); | 2927 | err = inode_setattr(inode, attr); |
@@ -3016,16 +3049,18 @@ static noinline void init_btrfs_i(struct inode *inode) | |||
3016 | bi->last_trans = 0; | 3049 | bi->last_trans = 0; |
3017 | bi->logged_trans = 0; | 3050 | bi->logged_trans = 0; |
3018 | bi->delalloc_bytes = 0; | 3051 | bi->delalloc_bytes = 0; |
3052 | bi->reserved_bytes = 0; | ||
3019 | bi->disk_i_size = 0; | 3053 | bi->disk_i_size = 0; |
3020 | bi->flags = 0; | 3054 | bi->flags = 0; |
3021 | bi->index_cnt = (u64)-1; | 3055 | bi->index_cnt = (u64)-1; |
3022 | bi->log_dirty_trans = 0; | 3056 | bi->last_unlink_trans = 0; |
3023 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | 3057 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); |
3024 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | 3058 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, |
3025 | inode->i_mapping, GFP_NOFS); | 3059 | inode->i_mapping, GFP_NOFS); |
3026 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | 3060 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, |
3027 | inode->i_mapping, GFP_NOFS); | 3061 | inode->i_mapping, GFP_NOFS); |
3028 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | 3062 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); |
3063 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | ||
3029 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | 3064 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); |
3030 | mutex_init(&BTRFS_I(inode)->extent_mutex); | 3065 | mutex_init(&BTRFS_I(inode)->extent_mutex); |
3031 | mutex_init(&BTRFS_I(inode)->log_mutex); | 3066 | mutex_init(&BTRFS_I(inode)->log_mutex); |
@@ -3037,6 +3072,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) | |||
3037 | inode->i_ino = args->ino; | 3072 | inode->i_ino = args->ino; |
3038 | init_btrfs_i(inode); | 3073 | init_btrfs_i(inode); |
3039 | BTRFS_I(inode)->root = args->root; | 3074 | BTRFS_I(inode)->root = args->root; |
3075 | btrfs_set_inode_space_info(args->root, inode); | ||
3040 | return 0; | 3076 | return 0; |
3041 | } | 3077 | } |
3042 | 3078 | ||
@@ -3445,8 +3481,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3445 | 3481 | ||
3446 | if (dir) { | 3482 | if (dir) { |
3447 | ret = btrfs_set_inode_index(dir, index); | 3483 | ret = btrfs_set_inode_index(dir, index); |
3448 | if (ret) | 3484 | if (ret) { |
3485 | iput(inode); | ||
3449 | return ERR_PTR(ret); | 3486 | return ERR_PTR(ret); |
3487 | } | ||
3450 | } | 3488 | } |
3451 | /* | 3489 | /* |
3452 | * index_cnt is ignored for everything but a dir, | 3490 | * index_cnt is ignored for everything but a dir, |
@@ -3457,6 +3495,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3457 | BTRFS_I(inode)->index_cnt = 2; | 3495 | BTRFS_I(inode)->index_cnt = 2; |
3458 | BTRFS_I(inode)->root = root; | 3496 | BTRFS_I(inode)->root = root; |
3459 | BTRFS_I(inode)->generation = trans->transid; | 3497 | BTRFS_I(inode)->generation = trans->transid; |
3498 | btrfs_set_inode_space_info(root, inode); | ||
3460 | 3499 | ||
3461 | if (mode & S_IFDIR) | 3500 | if (mode & S_IFDIR) |
3462 | owner = 0; | 3501 | owner = 0; |
@@ -3482,6 +3521,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3482 | sizes[0] = sizeof(struct btrfs_inode_item); | 3521 | sizes[0] = sizeof(struct btrfs_inode_item); |
3483 | sizes[1] = name_len + sizeof(*ref); | 3522 | sizes[1] = name_len + sizeof(*ref); |
3484 | 3523 | ||
3524 | path->leave_spinning = 1; | ||
3485 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); | 3525 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); |
3486 | if (ret != 0) | 3526 | if (ret != 0) |
3487 | goto fail; | 3527 | goto fail; |
@@ -3527,6 +3567,7 @@ fail: | |||
3527 | if (dir) | 3567 | if (dir) |
3528 | BTRFS_I(dir)->index_cnt--; | 3568 | BTRFS_I(dir)->index_cnt--; |
3529 | btrfs_free_path(path); | 3569 | btrfs_free_path(path); |
3570 | iput(inode); | ||
3530 | return ERR_PTR(ret); | 3571 | return ERR_PTR(ret); |
3531 | } | 3572 | } |
3532 | 3573 | ||
@@ -3604,7 +3645,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
3604 | if (!new_valid_dev(rdev)) | 3645 | if (!new_valid_dev(rdev)) |
3605 | return -EINVAL; | 3646 | return -EINVAL; |
3606 | 3647 | ||
3607 | err = btrfs_check_free_space(root, 1, 0); | 3648 | err = btrfs_check_metadata_free_space(root); |
3608 | if (err) | 3649 | if (err) |
3609 | goto fail; | 3650 | goto fail; |
3610 | 3651 | ||
@@ -3667,7 +3708,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
3667 | u64 objectid; | 3708 | u64 objectid; |
3668 | u64 index = 0; | 3709 | u64 index = 0; |
3669 | 3710 | ||
3670 | err = btrfs_check_free_space(root, 1, 0); | 3711 | err = btrfs_check_metadata_free_space(root); |
3671 | if (err) | 3712 | if (err) |
3672 | goto fail; | 3713 | goto fail; |
3673 | trans = btrfs_start_transaction(root, 1); | 3714 | trans = btrfs_start_transaction(root, 1); |
@@ -3735,7 +3776,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
3735 | return -ENOENT; | 3776 | return -ENOENT; |
3736 | 3777 | ||
3737 | btrfs_inc_nlink(inode); | 3778 | btrfs_inc_nlink(inode); |
3738 | err = btrfs_check_free_space(root, 1, 0); | 3779 | err = btrfs_check_metadata_free_space(root); |
3739 | if (err) | 3780 | if (err) |
3740 | goto fail; | 3781 | goto fail; |
3741 | err = btrfs_set_inode_index(dir, &index); | 3782 | err = btrfs_set_inode_index(dir, &index); |
@@ -3760,6 +3801,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
3760 | drop_inode = 1; | 3801 | drop_inode = 1; |
3761 | 3802 | ||
3762 | nr = trans->blocks_used; | 3803 | nr = trans->blocks_used; |
3804 | |||
3805 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
3763 | btrfs_end_transaction_throttle(trans, root); | 3806 | btrfs_end_transaction_throttle(trans, root); |
3764 | fail: | 3807 | fail: |
3765 | if (drop_inode) { | 3808 | if (drop_inode) { |
@@ -3781,7 +3824,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
3781 | u64 index = 0; | 3824 | u64 index = 0; |
3782 | unsigned long nr = 1; | 3825 | unsigned long nr = 1; |
3783 | 3826 | ||
3784 | err = btrfs_check_free_space(root, 1, 0); | 3827 | err = btrfs_check_metadata_free_space(root); |
3785 | if (err) | 3828 | if (err) |
3786 | goto out_unlock; | 3829 | goto out_unlock; |
3787 | 3830 | ||
@@ -4263,7 +4306,7 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) | |||
4263 | { | 4306 | { |
4264 | if (PageWriteback(page) || PageDirty(page)) | 4307 | if (PageWriteback(page) || PageDirty(page)) |
4265 | return 0; | 4308 | return 0; |
4266 | return __btrfs_releasepage(page, gfp_flags); | 4309 | return __btrfs_releasepage(page, gfp_flags & GFP_NOFS); |
4267 | } | 4310 | } |
4268 | 4311 | ||
4269 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) | 4312 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) |
@@ -4325,8 +4368,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
4325 | * beyond EOF, then the page is guaranteed safe against truncation until we | 4368 | * beyond EOF, then the page is guaranteed safe against truncation until we |
4326 | * unlock the page. | 4369 | * unlock the page. |
4327 | */ | 4370 | */ |
4328 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) | 4371 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) |
4329 | { | 4372 | { |
4373 | struct page *page = vmf->page; | ||
4330 | struct inode *inode = fdentry(vma->vm_file)->d_inode; | 4374 | struct inode *inode = fdentry(vma->vm_file)->d_inode; |
4331 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4375 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4332 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 4376 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
@@ -4338,11 +4382,16 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4338 | u64 page_start; | 4382 | u64 page_start; |
4339 | u64 page_end; | 4383 | u64 page_end; |
4340 | 4384 | ||
4341 | ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); | 4385 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); |
4342 | if (ret) | 4386 | if (ret) { |
4387 | if (ret == -ENOMEM) | ||
4388 | ret = VM_FAULT_OOM; | ||
4389 | else /* -ENOSPC, -EIO, etc */ | ||
4390 | ret = VM_FAULT_SIGBUS; | ||
4343 | goto out; | 4391 | goto out; |
4392 | } | ||
4344 | 4393 | ||
4345 | ret = -EINVAL; | 4394 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
4346 | again: | 4395 | again: |
4347 | lock_page(page); | 4396 | lock_page(page); |
4348 | size = i_size_read(inode); | 4397 | size = i_size_read(inode); |
@@ -4351,6 +4400,7 @@ again: | |||
4351 | 4400 | ||
4352 | if ((page->mapping != inode->i_mapping) || | 4401 | if ((page->mapping != inode->i_mapping) || |
4353 | (page_start >= size)) { | 4402 | (page_start >= size)) { |
4403 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
4354 | /* page got truncated out from underneath us */ | 4404 | /* page got truncated out from underneath us */ |
4355 | goto out_unlock; | 4405 | goto out_unlock; |
4356 | } | 4406 | } |
@@ -4389,6 +4439,8 @@ again: | |||
4389 | } | 4439 | } |
4390 | ClearPageChecked(page); | 4440 | ClearPageChecked(page); |
4391 | set_page_dirty(page); | 4441 | set_page_dirty(page); |
4442 | |||
4443 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | ||
4392 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 4444 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
4393 | 4445 | ||
4394 | out_unlock: | 4446 | out_unlock: |
@@ -4414,6 +4466,27 @@ static void btrfs_truncate(struct inode *inode) | |||
4414 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 4466 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
4415 | 4467 | ||
4416 | trans = btrfs_start_transaction(root, 1); | 4468 | trans = btrfs_start_transaction(root, 1); |
4469 | |||
4470 | /* | ||
4471 | * setattr is responsible for setting the ordered_data_close flag, | ||
4472 | * but that is only tested during the last file release. That | ||
4473 | * could happen well after the next commit, leaving a great big | ||
4474 | * window where new writes may get lost if someone chooses to write | ||
4475 | * to this file after truncating to zero | ||
4476 | * | ||
4477 | * The inode doesn't have any dirty data here, and so if we commit | ||
4478 | * this is a noop. If someone immediately starts writing to the inode | ||
4479 | * it is very likely we'll catch some of their writes in this | ||
4480 | * transaction, and the commit will find this file on the ordered | ||
4481 | * data list with good things to send down. | ||
4482 | * | ||
4483 | * This is a best effort solution, there is still a window where | ||
4484 | * using truncate to replace the contents of the file will | ||
4485 | * end up with a zero length file after a crash. | ||
4486 | */ | ||
4487 | if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) | ||
4488 | btrfs_add_ordered_operation(trans, root, inode); | ||
4489 | |||
4417 | btrfs_set_trans_block_group(trans, inode); | 4490 | btrfs_set_trans_block_group(trans, inode); |
4418 | btrfs_i_size_write(inode, inode->i_size); | 4491 | btrfs_i_size_write(inode, inode->i_size); |
4419 | 4492 | ||
@@ -4490,12 +4563,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
4490 | ei->i_acl = BTRFS_ACL_NOT_CACHED; | 4563 | ei->i_acl = BTRFS_ACL_NOT_CACHED; |
4491 | ei->i_default_acl = BTRFS_ACL_NOT_CACHED; | 4564 | ei->i_default_acl = BTRFS_ACL_NOT_CACHED; |
4492 | INIT_LIST_HEAD(&ei->i_orphan); | 4565 | INIT_LIST_HEAD(&ei->i_orphan); |
4566 | INIT_LIST_HEAD(&ei->ordered_operations); | ||
4493 | return &ei->vfs_inode; | 4567 | return &ei->vfs_inode; |
4494 | } | 4568 | } |
4495 | 4569 | ||
4496 | void btrfs_destroy_inode(struct inode *inode) | 4570 | void btrfs_destroy_inode(struct inode *inode) |
4497 | { | 4571 | { |
4498 | struct btrfs_ordered_extent *ordered; | 4572 | struct btrfs_ordered_extent *ordered; |
4573 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4574 | |||
4499 | WARN_ON(!list_empty(&inode->i_dentry)); | 4575 | WARN_ON(!list_empty(&inode->i_dentry)); |
4500 | WARN_ON(inode->i_data.nrpages); | 4576 | WARN_ON(inode->i_data.nrpages); |
4501 | 4577 | ||
@@ -4506,13 +4582,24 @@ void btrfs_destroy_inode(struct inode *inode) | |||
4506 | BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) | 4582 | BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) |
4507 | posix_acl_release(BTRFS_I(inode)->i_default_acl); | 4583 | posix_acl_release(BTRFS_I(inode)->i_default_acl); |
4508 | 4584 | ||
4509 | spin_lock(&BTRFS_I(inode)->root->list_lock); | 4585 | /* |
4586 | * Make sure we're properly removed from the ordered operation | ||
4587 | * lists. | ||
4588 | */ | ||
4589 | smp_mb(); | ||
4590 | if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { | ||
4591 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
4592 | list_del_init(&BTRFS_I(inode)->ordered_operations); | ||
4593 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
4594 | } | ||
4595 | |||
4596 | spin_lock(&root->list_lock); | ||
4510 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 4597 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
4511 | printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" | 4598 | printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" |
4512 | " list\n", inode->i_ino); | 4599 | " list\n", inode->i_ino); |
4513 | dump_stack(); | 4600 | dump_stack(); |
4514 | } | 4601 | } |
4515 | spin_unlock(&BTRFS_I(inode)->root->list_lock); | 4602 | spin_unlock(&root->list_lock); |
4516 | 4603 | ||
4517 | while (1) { | 4604 | while (1) { |
4518 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 4605 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
@@ -4633,12 +4720,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4633 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 4720 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
4634 | return -EXDEV; | 4721 | return -EXDEV; |
4635 | 4722 | ||
4636 | ret = btrfs_check_free_space(root, 1, 0); | 4723 | ret = btrfs_check_metadata_free_space(root); |
4637 | if (ret) | 4724 | if (ret) |
4638 | goto out_unlock; | 4725 | goto out_unlock; |
4639 | 4726 | ||
4727 | /* | ||
4728 | * we're using rename to replace one file with another. | ||
4729 | * and the replacement file is large. Start IO on it now so | ||
4730 | * we don't add too much work to the end of the transaction | ||
4731 | */ | ||
4732 | if (new_inode && old_inode && S_ISREG(old_inode->i_mode) && | ||
4733 | new_inode->i_size && | ||
4734 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | ||
4735 | filemap_flush(old_inode->i_mapping); | ||
4736 | |||
4640 | trans = btrfs_start_transaction(root, 1); | 4737 | trans = btrfs_start_transaction(root, 1); |
4641 | 4738 | ||
4739 | /* | ||
4740 | * make sure the inode gets flushed if it is replacing | ||
4741 | * something. | ||
4742 | */ | ||
4743 | if (new_inode && new_inode->i_size && | ||
4744 | old_inode && S_ISREG(old_inode->i_mode)) { | ||
4745 | btrfs_add_ordered_operation(trans, root, old_inode); | ||
4746 | } | ||
4747 | |||
4748 | /* | ||
4749 | * this is an ugly little race, but the rename is required to make | ||
4750 | * sure that if we crash, the inode is either at the old name | ||
4751 | * or the new one. pinning the log transaction lets us make sure | ||
4752 | * we don't allow a log commit to come in after we unlink the | ||
4753 | * name but before we add the new name back in. | ||
4754 | */ | ||
4755 | btrfs_pin_log_trans(root); | ||
4756 | |||
4642 | btrfs_set_trans_block_group(trans, new_dir); | 4757 | btrfs_set_trans_block_group(trans, new_dir); |
4643 | 4758 | ||
4644 | btrfs_inc_nlink(old_dentry->d_inode); | 4759 | btrfs_inc_nlink(old_dentry->d_inode); |
@@ -4646,6 +4761,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4646 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 4761 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
4647 | old_inode->i_ctime = ctime; | 4762 | old_inode->i_ctime = ctime; |
4648 | 4763 | ||
4764 | if (old_dentry->d_parent != new_dentry->d_parent) | ||
4765 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | ||
4766 | |||
4649 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, | 4767 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, |
4650 | old_dentry->d_name.name, | 4768 | old_dentry->d_name.name, |
4651 | old_dentry->d_name.len); | 4769 | old_dentry->d_name.len); |
@@ -4677,7 +4795,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4677 | if (ret) | 4795 | if (ret) |
4678 | goto out_fail; | 4796 | goto out_fail; |
4679 | 4797 | ||
4798 | btrfs_log_new_name(trans, old_inode, old_dir, | ||
4799 | new_dentry->d_parent); | ||
4680 | out_fail: | 4800 | out_fail: |
4801 | |||
4802 | /* this btrfs_end_log_trans just allows the current | ||
4803 | * log-sub transaction to complete | ||
4804 | */ | ||
4805 | btrfs_end_log_trans(root); | ||
4681 | btrfs_end_transaction_throttle(trans, root); | 4806 | btrfs_end_transaction_throttle(trans, root); |
4682 | out_unlock: | 4807 | out_unlock: |
4683 | return ret; | 4808 | return ret; |
@@ -4751,7 +4876,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
4751 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 4876 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
4752 | return -ENAMETOOLONG; | 4877 | return -ENAMETOOLONG; |
4753 | 4878 | ||
4754 | err = btrfs_check_free_space(root, 1, 0); | 4879 | err = btrfs_check_metadata_free_space(root); |
4755 | if (err) | 4880 | if (err) |
4756 | goto out_fail; | 4881 | goto out_fail; |
4757 | 4882 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 988fdc8b49eb..7594bec1be10 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -70,7 +70,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
70 | u64 index = 0; | 70 | u64 index = 0; |
71 | unsigned long nr = 1; | 71 | unsigned long nr = 1; |
72 | 72 | ||
73 | ret = btrfs_check_free_space(root, 1, 0); | 73 | ret = btrfs_check_metadata_free_space(root); |
74 | if (ret) | 74 | if (ret) |
75 | goto fail_commit; | 75 | goto fail_commit; |
76 | 76 | ||
@@ -203,7 +203,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
203 | if (!root->ref_cows) | 203 | if (!root->ref_cows) |
204 | return -EINVAL; | 204 | return -EINVAL; |
205 | 205 | ||
206 | ret = btrfs_check_free_space(root, 1, 0); | 206 | ret = btrfs_check_metadata_free_space(root); |
207 | if (ret) | 207 | if (ret) |
208 | goto fail_unlock; | 208 | goto fail_unlock; |
209 | 209 | ||
@@ -267,7 +267,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, | |||
267 | goto out_dput; | 267 | goto out_dput; |
268 | 268 | ||
269 | if (!IS_POSIXACL(parent->dentry->d_inode)) | 269 | if (!IS_POSIXACL(parent->dentry->d_inode)) |
270 | mode &= ~current->fs->umask; | 270 | mode &= ~current_umask(); |
271 | 271 | ||
272 | error = mnt_want_write(parent->mnt); | 272 | error = mnt_want_write(parent->mnt); |
273 | if (error) | 273 | if (error) |
@@ -374,7 +374,7 @@ static int btrfs_defrag_file(struct file *file) | |||
374 | unsigned long i; | 374 | unsigned long i; |
375 | int ret; | 375 | int ret; |
376 | 376 | ||
377 | ret = btrfs_check_free_space(root, inode->i_size, 0); | 377 | ret = btrfs_check_data_free_space(root, inode, inode->i_size); |
378 | if (ret) | 378 | if (ret) |
379 | return -ENOSPC; | 379 | return -ENOSPC; |
380 | 380 | ||
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 9ebe9385129b..1c36e5cd8f55 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -25,21 +25,10 @@ | |||
25 | #include "extent_io.h" | 25 | #include "extent_io.h" |
26 | #include "locking.h" | 26 | #include "locking.h" |
27 | 27 | ||
28 | /* | ||
29 | * btrfs_header_level() isn't free, so don't call it when lockdep isn't | ||
30 | * on | ||
31 | */ | ||
32 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
33 | static inline void spin_nested(struct extent_buffer *eb) | ||
34 | { | ||
35 | spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); | ||
36 | } | ||
37 | #else | ||
38 | static inline void spin_nested(struct extent_buffer *eb) | 28 | static inline void spin_nested(struct extent_buffer *eb) |
39 | { | 29 | { |
40 | spin_lock(&eb->lock); | 30 | spin_lock(&eb->lock); |
41 | } | 31 | } |
42 | #endif | ||
43 | 32 | ||
44 | /* | 33 | /* |
45 | * Setting a lock to blocking will drop the spinlock and set the | 34 | * Setting a lock to blocking will drop the spinlock and set the |
@@ -71,8 +60,8 @@ void btrfs_clear_lock_blocking(struct extent_buffer *eb) | |||
71 | 60 | ||
72 | /* | 61 | /* |
73 | * unfortunately, many of the places that currently set a lock to blocking | 62 | * unfortunately, many of the places that currently set a lock to blocking |
74 | * don't end up blocking for every long, and often they don't block | 63 | * don't end up blocking for very long, and often they don't block |
75 | * at all. For a dbench 50 run, if we don't spin one the blocking bit | 64 | * at all. For a dbench 50 run, if we don't spin on the blocking bit |
76 | * at all, the context switch rate can jump up to 400,000/sec or more. | 65 | * at all, the context switch rate can jump up to 400,000/sec or more. |
77 | * | 66 | * |
78 | * So, we're still stuck with this crummy spin on the blocking bit, | 67 | * So, we're still stuck with this crummy spin on the blocking bit, |
@@ -82,12 +71,13 @@ void btrfs_clear_lock_blocking(struct extent_buffer *eb) | |||
82 | static int btrfs_spin_on_block(struct extent_buffer *eb) | 71 | static int btrfs_spin_on_block(struct extent_buffer *eb) |
83 | { | 72 | { |
84 | int i; | 73 | int i; |
74 | |||
85 | for (i = 0; i < 512; i++) { | 75 | for (i = 0; i < 512; i++) { |
86 | cpu_relax(); | ||
87 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 76 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
88 | return 1; | 77 | return 1; |
89 | if (need_resched()) | 78 | if (need_resched()) |
90 | break; | 79 | break; |
80 | cpu_relax(); | ||
91 | } | 81 | } |
92 | return 0; | 82 | return 0; |
93 | } | 83 | } |
@@ -106,13 +96,15 @@ int btrfs_try_spin_lock(struct extent_buffer *eb) | |||
106 | { | 96 | { |
107 | int i; | 97 | int i; |
108 | 98 | ||
109 | spin_nested(eb); | 99 | if (btrfs_spin_on_block(eb)) { |
110 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 100 | spin_nested(eb); |
111 | return 1; | 101 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
112 | spin_unlock(&eb->lock); | 102 | return 1; |
113 | 103 | spin_unlock(&eb->lock); | |
104 | } | ||
114 | /* spin for a bit on the BLOCKING flag */ | 105 | /* spin for a bit on the BLOCKING flag */ |
115 | for (i = 0; i < 2; i++) { | 106 | for (i = 0; i < 2; i++) { |
107 | cpu_relax(); | ||
116 | if (!btrfs_spin_on_block(eb)) | 108 | if (!btrfs_spin_on_block(eb)) |
117 | break; | 109 | break; |
118 | 110 | ||
@@ -159,6 +151,9 @@ int btrfs_tree_lock(struct extent_buffer *eb) | |||
159 | DEFINE_WAIT(wait); | 151 | DEFINE_WAIT(wait); |
160 | wait.func = btrfs_wake_function; | 152 | wait.func = btrfs_wake_function; |
161 | 153 | ||
154 | if (!btrfs_spin_on_block(eb)) | ||
155 | goto sleep; | ||
156 | |||
162 | while(1) { | 157 | while(1) { |
163 | spin_nested(eb); | 158 | spin_nested(eb); |
164 | 159 | ||
@@ -176,9 +171,10 @@ int btrfs_tree_lock(struct extent_buffer *eb) | |||
176 | * spin for a bit, and if the blocking flag goes away, | 171 | * spin for a bit, and if the blocking flag goes away, |
177 | * loop around | 172 | * loop around |
178 | */ | 173 | */ |
174 | cpu_relax(); | ||
179 | if (btrfs_spin_on_block(eb)) | 175 | if (btrfs_spin_on_block(eb)) |
180 | continue; | 176 | continue; |
181 | 177 | sleep: | |
182 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, | 178 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, |
183 | TASK_UNINTERRUPTIBLE); | 179 | TASK_UNINTERRUPTIBLE); |
184 | 180 | ||
@@ -231,8 +227,8 @@ int btrfs_tree_unlock(struct extent_buffer *eb) | |||
231 | return 0; | 227 | return 0; |
232 | } | 228 | } |
233 | 229 | ||
234 | int btrfs_tree_locked(struct extent_buffer *eb) | 230 | void btrfs_assert_tree_locked(struct extent_buffer *eb) |
235 | { | 231 | { |
236 | return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || | 232 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
237 | spin_is_locked(&eb->lock); | 233 | assert_spin_locked(&eb->lock); |
238 | } | 234 | } |
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 6bb0afbff928..6c4ce457168c 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h | |||
@@ -21,11 +21,11 @@ | |||
21 | 21 | ||
22 | int btrfs_tree_lock(struct extent_buffer *eb); | 22 | int btrfs_tree_lock(struct extent_buffer *eb); |
23 | int btrfs_tree_unlock(struct extent_buffer *eb); | 23 | int btrfs_tree_unlock(struct extent_buffer *eb); |
24 | int btrfs_tree_locked(struct extent_buffer *eb); | ||
25 | 24 | ||
26 | int btrfs_try_tree_lock(struct extent_buffer *eb); | 25 | int btrfs_try_tree_lock(struct extent_buffer *eb); |
27 | int btrfs_try_spin_lock(struct extent_buffer *eb); | 26 | int btrfs_try_spin_lock(struct extent_buffer *eb); |
28 | 27 | ||
29 | void btrfs_set_lock_blocking(struct extent_buffer *eb); | 28 | void btrfs_set_lock_blocking(struct extent_buffer *eb); |
30 | void btrfs_clear_lock_blocking(struct extent_buffer *eb); | 29 | void btrfs_clear_lock_blocking(struct extent_buffer *eb); |
30 | void btrfs_assert_tree_locked(struct extent_buffer *eb); | ||
31 | #endif | 31 | #endif |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 77c2411a5f0f..53c87b197d70 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -310,6 +310,16 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
310 | 310 | ||
311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
312 | list_del_init(&entry->root_extent_list); | 312 | list_del_init(&entry->root_extent_list); |
313 | |||
314 | /* | ||
315 | * we have no more ordered extents for this inode and | ||
316 | * no dirty pages. We can safely remove it from the | ||
317 | * list of ordered extents | ||
318 | */ | ||
319 | if (RB_EMPTY_ROOT(&tree->tree) && | ||
320 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { | ||
321 | list_del_init(&BTRFS_I(inode)->ordered_operations); | ||
322 | } | ||
313 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 323 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
314 | 324 | ||
315 | mutex_unlock(&tree->mutex); | 325 | mutex_unlock(&tree->mutex); |
@@ -370,6 +380,68 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) | |||
370 | } | 380 | } |
371 | 381 | ||
372 | /* | 382 | /* |
383 | * this is used during transaction commit to write all the inodes | ||
384 | * added to the ordered operation list. These files must be fully on | ||
385 | * disk before the transaction commits. | ||
386 | * | ||
387 | * we have two modes here, one is to just start the IO via filemap_flush | ||
388 | * and the other is to wait for all the io. When we wait, we have an | ||
389 | * extra check to make sure the ordered operation list really is empty | ||
390 | * before we return | ||
391 | */ | ||
392 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) | ||
393 | { | ||
394 | struct btrfs_inode *btrfs_inode; | ||
395 | struct inode *inode; | ||
396 | struct list_head splice; | ||
397 | |||
398 | INIT_LIST_HEAD(&splice); | ||
399 | |||
400 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
401 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
402 | again: | ||
403 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
404 | |||
405 | while (!list_empty(&splice)) { | ||
406 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
407 | ordered_operations); | ||
408 | |||
409 | inode = &btrfs_inode->vfs_inode; | ||
410 | |||
411 | list_del_init(&btrfs_inode->ordered_operations); | ||
412 | |||
413 | /* | ||
414 | * the inode may be getting freed (in sys_unlink path). | ||
415 | */ | ||
416 | inode = igrab(inode); | ||
417 | |||
418 | if (!wait && inode) { | ||
419 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | ||
420 | &root->fs_info->ordered_operations); | ||
421 | } | ||
422 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
423 | |||
424 | if (inode) { | ||
425 | if (wait) | ||
426 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
427 | else | ||
428 | filemap_flush(inode->i_mapping); | ||
429 | iput(inode); | ||
430 | } | ||
431 | |||
432 | cond_resched(); | ||
433 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
434 | } | ||
435 | if (wait && !list_empty(&root->fs_info->ordered_operations)) | ||
436 | goto again; | ||
437 | |||
438 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
439 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
440 | |||
441 | return 0; | ||
442 | } | ||
443 | |||
444 | /* | ||
373 | * Used to start IO or wait for a given ordered extent to finish. | 445 | * Used to start IO or wait for a given ordered extent to finish. |
374 | * | 446 | * |
375 | * If wait is one, this effectively waits on page writeback for all the pages | 447 | * If wait is one, this effectively waits on page writeback for all the pages |
@@ -726,3 +798,49 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | |||
726 | 798 | ||
727 | return ret; | 799 | return ret; |
728 | } | 800 | } |
801 | |||
802 | /* | ||
803 | * add a given inode to the list of inodes that must be fully on | ||
804 | * disk before a transaction commit finishes. | ||
805 | * | ||
806 | * This basically gives us the ext3 style data=ordered mode, and it is mostly | ||
807 | * used to make sure renamed files are fully on disk. | ||
808 | * | ||
809 | * It is a noop if the inode is already fully on disk. | ||
810 | * | ||
811 | * If trans is not null, we'll do a friendly check for a transaction that | ||
812 | * is already flushing things and force the IO down ourselves. | ||
813 | */ | ||
814 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | ||
815 | struct btrfs_root *root, | ||
816 | struct inode *inode) | ||
817 | { | ||
818 | u64 last_mod; | ||
819 | |||
820 | last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); | ||
821 | |||
822 | /* | ||
823 | * if this file hasn't been changed since the last transaction | ||
824 | * commit, we can safely return without doing anything | ||
825 | */ | ||
826 | if (last_mod < root->fs_info->last_trans_committed) | ||
827 | return 0; | ||
828 | |||
829 | /* | ||
830 | * the transaction is already committing. Just start the IO and | ||
831 | * don't bother with all of this list nonsense | ||
832 | */ | ||
833 | if (trans && root->fs_info->running_transaction->blocked) { | ||
834 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
835 | return 0; | ||
836 | } | ||
837 | |||
838 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
839 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { | ||
840 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | ||
841 | &root->fs_info->ordered_operations); | ||
842 | } | ||
843 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
844 | |||
845 | return 0; | ||
846 | } | ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index ab66d5e8d6d6..3d31c8827b01 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -155,4 +155,8 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | |||
155 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | 155 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, |
156 | loff_t end, int sync_mode); | 156 | loff_t end, int sync_mode); |
157 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); | 157 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); |
158 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | ||
159 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | ||
160 | struct btrfs_root *root, | ||
161 | struct inode *inode); | ||
158 | #endif | 162 | #endif |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f3fd7e2cbc38..9744af9d71e9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/highmem.h> | 24 | #include <linux/highmem.h> |
25 | #include <linux/time.h> | 25 | #include <linux/time.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/seq_file.h> | ||
27 | #include <linux/string.h> | 28 | #include <linux/string.h> |
28 | #include <linux/smp_lock.h> | 29 | #include <linux/smp_lock.h> |
29 | #include <linux/backing-dev.h> | 30 | #include <linux/backing-dev.h> |
@@ -66,7 +67,8 @@ static void btrfs_put_super(struct super_block *sb) | |||
66 | enum { | 67 | enum { |
67 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 68 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
68 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 69 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
69 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err, | 70 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, |
71 | Opt_flushoncommit, Opt_err, | ||
70 | }; | 72 | }; |
71 | 73 | ||
72 | static match_table_t tokens = { | 74 | static match_table_t tokens = { |
@@ -83,6 +85,8 @@ static match_table_t tokens = { | |||
83 | {Opt_compress, "compress"}, | 85 | {Opt_compress, "compress"}, |
84 | {Opt_ssd, "ssd"}, | 86 | {Opt_ssd, "ssd"}, |
85 | {Opt_noacl, "noacl"}, | 87 | {Opt_noacl, "noacl"}, |
88 | {Opt_notreelog, "notreelog"}, | ||
89 | {Opt_flushoncommit, "flushoncommit"}, | ||
86 | {Opt_err, NULL}, | 90 | {Opt_err, NULL}, |
87 | }; | 91 | }; |
88 | 92 | ||
@@ -222,6 +226,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
222 | case Opt_noacl: | 226 | case Opt_noacl: |
223 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; | 227 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; |
224 | break; | 228 | break; |
229 | case Opt_notreelog: | ||
230 | printk(KERN_INFO "btrfs: disabling tree log\n"); | ||
231 | btrfs_set_opt(info->mount_opt, NOTREELOG); | ||
232 | break; | ||
233 | case Opt_flushoncommit: | ||
234 | printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); | ||
235 | btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); | ||
236 | break; | ||
225 | default: | 237 | default: |
226 | break; | 238 | break; |
227 | } | 239 | } |
@@ -363,9 +375,8 @@ fail_close: | |||
363 | int btrfs_sync_fs(struct super_block *sb, int wait) | 375 | int btrfs_sync_fs(struct super_block *sb, int wait) |
364 | { | 376 | { |
365 | struct btrfs_trans_handle *trans; | 377 | struct btrfs_trans_handle *trans; |
366 | struct btrfs_root *root; | 378 | struct btrfs_root *root = btrfs_sb(sb); |
367 | int ret; | 379 | int ret; |
368 | root = btrfs_sb(sb); | ||
369 | 380 | ||
370 | if (sb->s_flags & MS_RDONLY) | 381 | if (sb->s_flags & MS_RDONLY) |
371 | return 0; | 382 | return 0; |
@@ -379,13 +390,47 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
379 | btrfs_start_delalloc_inodes(root); | 390 | btrfs_start_delalloc_inodes(root); |
380 | btrfs_wait_ordered_extents(root, 0); | 391 | btrfs_wait_ordered_extents(root, 0); |
381 | 392 | ||
382 | btrfs_clean_old_snapshots(root); | ||
383 | trans = btrfs_start_transaction(root, 1); | 393 | trans = btrfs_start_transaction(root, 1); |
384 | ret = btrfs_commit_transaction(trans, root); | 394 | ret = btrfs_commit_transaction(trans, root); |
385 | sb->s_dirt = 0; | 395 | sb->s_dirt = 0; |
386 | return ret; | 396 | return ret; |
387 | } | 397 | } |
388 | 398 | ||
399 | static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | ||
400 | { | ||
401 | struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); | ||
402 | struct btrfs_fs_info *info = root->fs_info; | ||
403 | |||
404 | if (btrfs_test_opt(root, DEGRADED)) | ||
405 | seq_puts(seq, ",degraded"); | ||
406 | if (btrfs_test_opt(root, NODATASUM)) | ||
407 | seq_puts(seq, ",nodatasum"); | ||
408 | if (btrfs_test_opt(root, NODATACOW)) | ||
409 | seq_puts(seq, ",nodatacow"); | ||
410 | if (btrfs_test_opt(root, NOBARRIER)) | ||
411 | seq_puts(seq, ",nobarrier"); | ||
412 | if (info->max_extent != (u64)-1) | ||
413 | seq_printf(seq, ",max_extent=%llu", info->max_extent); | ||
414 | if (info->max_inline != 8192 * 1024) | ||
415 | seq_printf(seq, ",max_inline=%llu", info->max_inline); | ||
416 | if (info->alloc_start != 0) | ||
417 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); | ||
418 | if (info->thread_pool_size != min_t(unsigned long, | ||
419 | num_online_cpus() + 2, 8)) | ||
420 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | ||
421 | if (btrfs_test_opt(root, COMPRESS)) | ||
422 | seq_puts(seq, ",compress"); | ||
423 | if (btrfs_test_opt(root, SSD)) | ||
424 | seq_puts(seq, ",ssd"); | ||
425 | if (btrfs_test_opt(root, NOTREELOG)) | ||
426 | seq_puts(seq, ",no-treelog"); | ||
427 | if (btrfs_test_opt(root, FLUSHONCOMMIT)) | ||
428 | seq_puts(seq, ",flush-on-commit"); | ||
429 | if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) | ||
430 | seq_puts(seq, ",noacl"); | ||
431 | return 0; | ||
432 | } | ||
433 | |||
389 | static void btrfs_write_super(struct super_block *sb) | 434 | static void btrfs_write_super(struct super_block *sb) |
390 | { | 435 | { |
391 | sb->s_dirt = 0; | 436 | sb->s_dirt = 0; |
@@ -511,6 +556,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
511 | struct btrfs_root *root = btrfs_sb(sb); | 556 | struct btrfs_root *root = btrfs_sb(sb); |
512 | int ret; | 557 | int ret; |
513 | 558 | ||
559 | ret = btrfs_parse_options(root, data); | ||
560 | if (ret) | ||
561 | return -EINVAL; | ||
562 | |||
514 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 563 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
515 | return 0; | 564 | return 0; |
516 | 565 | ||
@@ -627,7 +676,7 @@ static struct super_operations btrfs_super_ops = { | |||
627 | .put_super = btrfs_put_super, | 676 | .put_super = btrfs_put_super, |
628 | .write_super = btrfs_write_super, | 677 | .write_super = btrfs_write_super, |
629 | .sync_fs = btrfs_sync_fs, | 678 | .sync_fs = btrfs_sync_fs, |
630 | .show_options = generic_show_options, | 679 | .show_options = btrfs_show_options, |
631 | .write_inode = btrfs_write_inode, | 680 | .write_inode = btrfs_write_inode, |
632 | .dirty_inode = btrfs_dirty_inode, | 681 | .dirty_inode = btrfs_dirty_inode, |
633 | .alloc_inode = btrfs_alloc_inode, | 682 | .alloc_inode = btrfs_alloc_inode, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 919172de5c9a..2869b3361eb6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -53,8 +53,6 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
53 | GFP_NOFS); | 53 | GFP_NOFS); |
54 | BUG_ON(!cur_trans); | 54 | BUG_ON(!cur_trans); |
55 | root->fs_info->generation++; | 55 | root->fs_info->generation++; |
56 | root->fs_info->last_alloc = 0; | ||
57 | root->fs_info->last_data_alloc = 0; | ||
58 | cur_trans->num_writers = 1; | 56 | cur_trans->num_writers = 1; |
59 | cur_trans->num_joined = 0; | 57 | cur_trans->num_joined = 0; |
60 | cur_trans->transid = root->fs_info->generation; | 58 | cur_trans->transid = root->fs_info->generation; |
@@ -65,6 +63,15 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
65 | cur_trans->use_count = 1; | 63 | cur_trans->use_count = 1; |
66 | cur_trans->commit_done = 0; | 64 | cur_trans->commit_done = 0; |
67 | cur_trans->start_time = get_seconds(); | 65 | cur_trans->start_time = get_seconds(); |
66 | |||
67 | cur_trans->delayed_refs.root.rb_node = NULL; | ||
68 | cur_trans->delayed_refs.num_entries = 0; | ||
69 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
70 | cur_trans->delayed_refs.num_heads = 0; | ||
71 | cur_trans->delayed_refs.flushing = 0; | ||
72 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
73 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
74 | |||
68 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 75 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
69 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 76 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); |
70 | extent_io_tree_init(&cur_trans->dirty_pages, | 77 | extent_io_tree_init(&cur_trans->dirty_pages, |
@@ -182,6 +189,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
182 | h->block_group = 0; | 189 | h->block_group = 0; |
183 | h->alloc_exclude_nr = 0; | 190 | h->alloc_exclude_nr = 0; |
184 | h->alloc_exclude_start = 0; | 191 | h->alloc_exclude_start = 0; |
192 | h->delayed_ref_updates = 0; | ||
193 | |||
185 | root->fs_info->running_transaction->use_count++; | 194 | root->fs_info->running_transaction->use_count++; |
186 | mutex_unlock(&root->fs_info->trans_mutex); | 195 | mutex_unlock(&root->fs_info->trans_mutex); |
187 | return h; | 196 | return h; |
@@ -271,7 +280,6 @@ void btrfs_throttle(struct btrfs_root *root) | |||
271 | if (!root->fs_info->open_ioctl_trans) | 280 | if (!root->fs_info->open_ioctl_trans) |
272 | wait_current_trans(root); | 281 | wait_current_trans(root); |
273 | mutex_unlock(&root->fs_info->trans_mutex); | 282 | mutex_unlock(&root->fs_info->trans_mutex); |
274 | |||
275 | throttle_on_drops(root); | 283 | throttle_on_drops(root); |
276 | } | 284 | } |
277 | 285 | ||
@@ -280,6 +288,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
280 | { | 288 | { |
281 | struct btrfs_transaction *cur_trans; | 289 | struct btrfs_transaction *cur_trans; |
282 | struct btrfs_fs_info *info = root->fs_info; | 290 | struct btrfs_fs_info *info = root->fs_info; |
291 | int count = 0; | ||
292 | |||
293 | while (count < 4) { | ||
294 | unsigned long cur = trans->delayed_ref_updates; | ||
295 | trans->delayed_ref_updates = 0; | ||
296 | if (cur && | ||
297 | trans->transaction->delayed_refs.num_heads_ready > 64) { | ||
298 | trans->delayed_ref_updates = 0; | ||
299 | |||
300 | /* | ||
301 | * do a full flush if the transaction is trying | ||
302 | * to close | ||
303 | */ | ||
304 | if (trans->transaction->delayed_refs.flushing) | ||
305 | cur = 0; | ||
306 | btrfs_run_delayed_refs(trans, root, cur); | ||
307 | } else { | ||
308 | break; | ||
309 | } | ||
310 | count++; | ||
311 | } | ||
283 | 312 | ||
284 | mutex_lock(&info->trans_mutex); | 313 | mutex_lock(&info->trans_mutex); |
285 | cur_trans = info->running_transaction; | 314 | cur_trans = info->running_transaction; |
@@ -424,9 +453,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
424 | u64 old_root_bytenr; | 453 | u64 old_root_bytenr; |
425 | struct btrfs_root *tree_root = root->fs_info->tree_root; | 454 | struct btrfs_root *tree_root = root->fs_info->tree_root; |
426 | 455 | ||
427 | btrfs_extent_post_op(trans, root); | ||
428 | btrfs_write_dirty_block_groups(trans, root); | 456 | btrfs_write_dirty_block_groups(trans, root); |
429 | btrfs_extent_post_op(trans, root); | 457 | |
458 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
459 | BUG_ON(ret); | ||
430 | 460 | ||
431 | while (1) { | 461 | while (1) { |
432 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | 462 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); |
@@ -438,14 +468,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
438 | btrfs_header_level(root->node)); | 468 | btrfs_header_level(root->node)); |
439 | btrfs_set_root_generation(&root->root_item, trans->transid); | 469 | btrfs_set_root_generation(&root->root_item, trans->transid); |
440 | 470 | ||
441 | btrfs_extent_post_op(trans, root); | ||
442 | |||
443 | ret = btrfs_update_root(trans, tree_root, | 471 | ret = btrfs_update_root(trans, tree_root, |
444 | &root->root_key, | 472 | &root->root_key, |
445 | &root->root_item); | 473 | &root->root_item); |
446 | BUG_ON(ret); | 474 | BUG_ON(ret); |
447 | btrfs_write_dirty_block_groups(trans, root); | 475 | btrfs_write_dirty_block_groups(trans, root); |
448 | btrfs_extent_post_op(trans, root); | 476 | |
477 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
478 | BUG_ON(ret); | ||
449 | } | 479 | } |
450 | return 0; | 480 | return 0; |
451 | } | 481 | } |
@@ -459,15 +489,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
459 | struct btrfs_fs_info *fs_info = root->fs_info; | 489 | struct btrfs_fs_info *fs_info = root->fs_info; |
460 | struct list_head *next; | 490 | struct list_head *next; |
461 | struct extent_buffer *eb; | 491 | struct extent_buffer *eb; |
492 | int ret; | ||
462 | 493 | ||
463 | btrfs_extent_post_op(trans, fs_info->tree_root); | 494 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
495 | BUG_ON(ret); | ||
464 | 496 | ||
465 | eb = btrfs_lock_root_node(fs_info->tree_root); | 497 | eb = btrfs_lock_root_node(fs_info->tree_root); |
466 | btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0); | 498 | btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); |
467 | btrfs_tree_unlock(eb); | 499 | btrfs_tree_unlock(eb); |
468 | free_extent_buffer(eb); | 500 | free_extent_buffer(eb); |
469 | 501 | ||
470 | btrfs_extent_post_op(trans, fs_info->tree_root); | 502 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
503 | BUG_ON(ret); | ||
471 | 504 | ||
472 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 505 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
473 | next = fs_info->dirty_cowonly_roots.next; | 506 | next = fs_info->dirty_cowonly_roots.next; |
@@ -475,6 +508,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
475 | root = list_entry(next, struct btrfs_root, dirty_list); | 508 | root = list_entry(next, struct btrfs_root, dirty_list); |
476 | 509 | ||
477 | update_cowonly_root(trans, root); | 510 | update_cowonly_root(trans, root); |
511 | |||
512 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
513 | BUG_ON(ret); | ||
478 | } | 514 | } |
479 | return 0; | 515 | return 0; |
480 | } | 516 | } |
@@ -635,6 +671,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
635 | } | 671 | } |
636 | 672 | ||
637 | /* | 673 | /* |
674 | * when dropping snapshots, we generate a ton of delayed refs, and it makes | ||
675 | * sense not to join the transaction while it is trying to flush the current | ||
676 | * queue of delayed refs out. | ||
677 | * | ||
678 | * This is used by the drop snapshot code only | ||
679 | */ | ||
680 | static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | ||
681 | { | ||
682 | DEFINE_WAIT(wait); | ||
683 | |||
684 | mutex_lock(&info->trans_mutex); | ||
685 | while (info->running_transaction && | ||
686 | info->running_transaction->delayed_refs.flushing) { | ||
687 | prepare_to_wait(&info->transaction_wait, &wait, | ||
688 | TASK_UNINTERRUPTIBLE); | ||
689 | mutex_unlock(&info->trans_mutex); | ||
690 | schedule(); | ||
691 | mutex_lock(&info->trans_mutex); | ||
692 | finish_wait(&info->transaction_wait, &wait); | ||
693 | } | ||
694 | mutex_unlock(&info->trans_mutex); | ||
695 | return 0; | ||
696 | } | ||
697 | |||
698 | /* | ||
638 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on | 699 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on |
639 | * all of them | 700 | * all of them |
640 | */ | 701 | */ |
@@ -661,7 +722,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | |||
661 | atomic_inc(&root->fs_info->throttles); | 722 | atomic_inc(&root->fs_info->throttles); |
662 | 723 | ||
663 | while (1) { | 724 | while (1) { |
725 | /* | ||
726 | * we don't want to jump in and create a bunch of | ||
727 | * delayed refs if the transaction is starting to close | ||
728 | */ | ||
729 | wait_transaction_pre_flush(tree_root->fs_info); | ||
664 | trans = btrfs_start_transaction(tree_root, 1); | 730 | trans = btrfs_start_transaction(tree_root, 1); |
731 | |||
732 | /* | ||
733 | * we've joined a transaction, make sure it isn't | ||
734 | * closing right now | ||
735 | */ | ||
736 | if (trans->transaction->delayed_refs.flushing) { | ||
737 | btrfs_end_transaction(trans, tree_root); | ||
738 | continue; | ||
739 | } | ||
740 | |||
665 | mutex_lock(&root->fs_info->drop_mutex); | 741 | mutex_lock(&root->fs_info->drop_mutex); |
666 | ret = btrfs_drop_snapshot(trans, dirty->root); | 742 | ret = btrfs_drop_snapshot(trans, dirty->root); |
667 | if (ret != -EAGAIN) | 743 | if (ret != -EAGAIN) |
@@ -688,7 +764,9 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | |||
688 | num_bytes -= btrfs_root_used(&dirty->root->root_item); | 764 | num_bytes -= btrfs_root_used(&dirty->root->root_item); |
689 | bytes_used = btrfs_root_used(&root->root_item); | 765 | bytes_used = btrfs_root_used(&root->root_item); |
690 | if (num_bytes) { | 766 | if (num_bytes) { |
767 | mutex_lock(&root->fs_info->trans_mutex); | ||
691 | btrfs_record_root_in_trans(root); | 768 | btrfs_record_root_in_trans(root); |
769 | mutex_unlock(&root->fs_info->trans_mutex); | ||
692 | btrfs_set_root_used(&root->root_item, | 770 | btrfs_set_root_used(&root->root_item, |
693 | bytes_used - num_bytes); | 771 | bytes_used - num_bytes); |
694 | } | 772 | } |
@@ -764,7 +842,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
764 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 842 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
765 | 843 | ||
766 | old = btrfs_lock_root_node(root); | 844 | old = btrfs_lock_root_node(root); |
767 | btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); | 845 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
768 | 846 | ||
769 | btrfs_copy_root(trans, root, old, &tmp, objectid); | 847 | btrfs_copy_root(trans, root, old, &tmp, objectid); |
770 | btrfs_tree_unlock(old); | 848 | btrfs_tree_unlock(old); |
@@ -892,12 +970,32 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
892 | struct extent_io_tree *pinned_copy; | 970 | struct extent_io_tree *pinned_copy; |
893 | DEFINE_WAIT(wait); | 971 | DEFINE_WAIT(wait); |
894 | int ret; | 972 | int ret; |
973 | int should_grow = 0; | ||
974 | unsigned long now = get_seconds(); | ||
975 | int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); | ||
976 | |||
977 | btrfs_run_ordered_operations(root, 0); | ||
978 | |||
979 | /* make a pass through all the delayed refs we have so far | ||
980 | * any runnings procs may add more while we are here | ||
981 | */ | ||
982 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
983 | BUG_ON(ret); | ||
984 | |||
985 | cur_trans = trans->transaction; | ||
986 | /* | ||
987 | * set the flushing flag so procs in this transaction have to | ||
988 | * start sending their work down. | ||
989 | */ | ||
990 | cur_trans->delayed_refs.flushing = 1; | ||
991 | |||
992 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
993 | BUG_ON(ret); | ||
895 | 994 | ||
896 | INIT_LIST_HEAD(&dirty_fs_roots); | ||
897 | mutex_lock(&root->fs_info->trans_mutex); | 995 | mutex_lock(&root->fs_info->trans_mutex); |
898 | if (trans->transaction->in_commit) { | 996 | INIT_LIST_HEAD(&dirty_fs_roots); |
899 | cur_trans = trans->transaction; | 997 | if (cur_trans->in_commit) { |
900 | trans->transaction->use_count++; | 998 | cur_trans->use_count++; |
901 | mutex_unlock(&root->fs_info->trans_mutex); | 999 | mutex_unlock(&root->fs_info->trans_mutex); |
902 | btrfs_end_transaction(trans, root); | 1000 | btrfs_end_transaction(trans, root); |
903 | 1001 | ||
@@ -920,7 +1018,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
920 | 1018 | ||
921 | trans->transaction->in_commit = 1; | 1019 | trans->transaction->in_commit = 1; |
922 | trans->transaction->blocked = 1; | 1020 | trans->transaction->blocked = 1; |
923 | cur_trans = trans->transaction; | ||
924 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1021 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
925 | prev_trans = list_entry(cur_trans->list.prev, | 1022 | prev_trans = list_entry(cur_trans->list.prev, |
926 | struct btrfs_transaction, list); | 1023 | struct btrfs_transaction, list); |
@@ -935,6 +1032,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
935 | } | 1032 | } |
936 | } | 1033 | } |
937 | 1034 | ||
1035 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) | ||
1036 | should_grow = 1; | ||
1037 | |||
938 | do { | 1038 | do { |
939 | int snap_pending = 0; | 1039 | int snap_pending = 0; |
940 | joined = cur_trans->num_joined; | 1040 | joined = cur_trans->num_joined; |
@@ -947,26 +1047,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
947 | 1047 | ||
948 | if (cur_trans->num_writers > 1) | 1048 | if (cur_trans->num_writers > 1) |
949 | timeout = MAX_SCHEDULE_TIMEOUT; | 1049 | timeout = MAX_SCHEDULE_TIMEOUT; |
950 | else | 1050 | else if (should_grow) |
951 | timeout = 1; | 1051 | timeout = 1; |
952 | 1052 | ||
953 | mutex_unlock(&root->fs_info->trans_mutex); | 1053 | mutex_unlock(&root->fs_info->trans_mutex); |
954 | 1054 | ||
955 | if (snap_pending) { | 1055 | if (flush_on_commit || snap_pending) { |
1056 | if (flush_on_commit) | ||
1057 | btrfs_start_delalloc_inodes(root); | ||
956 | ret = btrfs_wait_ordered_extents(root, 1); | 1058 | ret = btrfs_wait_ordered_extents(root, 1); |
957 | BUG_ON(ret); | 1059 | BUG_ON(ret); |
958 | } | 1060 | } |
959 | 1061 | ||
960 | schedule_timeout(timeout); | 1062 | /* |
1063 | * rename don't use btrfs_join_transaction, so, once we | ||
1064 | * set the transaction to blocked above, we aren't going | ||
1065 | * to get any new ordered operations. We can safely run | ||
1066 | * it here and no for sure that nothing new will be added | ||
1067 | * to the list | ||
1068 | */ | ||
1069 | btrfs_run_ordered_operations(root, 1); | ||
1070 | |||
1071 | smp_mb(); | ||
1072 | if (cur_trans->num_writers > 1 || should_grow) | ||
1073 | schedule_timeout(timeout); | ||
961 | 1074 | ||
962 | mutex_lock(&root->fs_info->trans_mutex); | 1075 | mutex_lock(&root->fs_info->trans_mutex); |
963 | finish_wait(&cur_trans->writer_wait, &wait); | 1076 | finish_wait(&cur_trans->writer_wait, &wait); |
964 | } while (cur_trans->num_writers > 1 || | 1077 | } while (cur_trans->num_writers > 1 || |
965 | (cur_trans->num_joined != joined)); | 1078 | (should_grow && cur_trans->num_joined != joined)); |
966 | 1079 | ||
967 | ret = create_pending_snapshots(trans, root->fs_info); | 1080 | ret = create_pending_snapshots(trans, root->fs_info); |
968 | BUG_ON(ret); | 1081 | BUG_ON(ret); |
969 | 1082 | ||
1083 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1084 | BUG_ON(ret); | ||
1085 | |||
970 | WARN_ON(cur_trans != trans->transaction); | 1086 | WARN_ON(cur_trans != trans->transaction); |
971 | 1087 | ||
972 | /* btrfs_commit_tree_roots is responsible for getting the | 1088 | /* btrfs_commit_tree_roots is responsible for getting the |
@@ -1030,6 +1146,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1030 | btrfs_copy_pinned(root, pinned_copy); | 1146 | btrfs_copy_pinned(root, pinned_copy); |
1031 | 1147 | ||
1032 | trans->transaction->blocked = 0; | 1148 | trans->transaction->blocked = 0; |
1149 | |||
1033 | wake_up(&root->fs_info->transaction_throttle); | 1150 | wake_up(&root->fs_info->transaction_throttle); |
1034 | wake_up(&root->fs_info->transaction_wait); | 1151 | wake_up(&root->fs_info->transaction_wait); |
1035 | 1152 | ||
@@ -1056,6 +1173,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1056 | mutex_lock(&root->fs_info->trans_mutex); | 1173 | mutex_lock(&root->fs_info->trans_mutex); |
1057 | 1174 | ||
1058 | cur_trans->commit_done = 1; | 1175 | cur_trans->commit_done = 1; |
1176 | |||
1059 | root->fs_info->last_trans_committed = cur_trans->transid; | 1177 | root->fs_info->last_trans_committed = cur_trans->transid; |
1060 | wake_up(&cur_trans->commit_wait); | 1178 | wake_up(&cur_trans->commit_wait); |
1061 | 1179 | ||
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ea292117f882..94f5bde2b58d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -19,10 +19,16 @@ | |||
19 | #ifndef __BTRFS_TRANSACTION__ | 19 | #ifndef __BTRFS_TRANSACTION__ |
20 | #define __BTRFS_TRANSACTION__ | 20 | #define __BTRFS_TRANSACTION__ |
21 | #include "btrfs_inode.h" | 21 | #include "btrfs_inode.h" |
22 | #include "delayed-ref.h" | ||
22 | 23 | ||
23 | struct btrfs_transaction { | 24 | struct btrfs_transaction { |
24 | u64 transid; | 25 | u64 transid; |
26 | /* | ||
27 | * total writers in this transaction, it must be zero before the | ||
28 | * transaction can end | ||
29 | */ | ||
25 | unsigned long num_writers; | 30 | unsigned long num_writers; |
31 | |||
26 | unsigned long num_joined; | 32 | unsigned long num_joined; |
27 | int in_commit; | 33 | int in_commit; |
28 | int use_count; | 34 | int use_count; |
@@ -34,6 +40,7 @@ struct btrfs_transaction { | |||
34 | wait_queue_head_t writer_wait; | 40 | wait_queue_head_t writer_wait; |
35 | wait_queue_head_t commit_wait; | 41 | wait_queue_head_t commit_wait; |
36 | struct list_head pending_snapshots; | 42 | struct list_head pending_snapshots; |
43 | struct btrfs_delayed_ref_root delayed_refs; | ||
37 | }; | 44 | }; |
38 | 45 | ||
39 | struct btrfs_trans_handle { | 46 | struct btrfs_trans_handle { |
@@ -44,6 +51,7 @@ struct btrfs_trans_handle { | |||
44 | u64 block_group; | 51 | u64 block_group; |
45 | u64 alloc_exclude_start; | 52 | u64 alloc_exclude_start; |
46 | u64 alloc_exclude_nr; | 53 | u64 alloc_exclude_nr; |
54 | unsigned long delayed_ref_updates; | ||
47 | }; | 55 | }; |
48 | 56 | ||
49 | struct btrfs_pending_snapshot { | 57 | struct btrfs_pending_snapshot { |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 98d25fa4570e..b10eacdb1620 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -124,8 +124,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
124 | } | 124 | } |
125 | 125 | ||
126 | btrfs_release_path(root, path); | 126 | btrfs_release_path(root, path); |
127 | if (is_extent) | ||
128 | btrfs_extent_post_op(trans, root); | ||
129 | out: | 127 | out: |
130 | if (path) | 128 | if (path) |
131 | btrfs_free_path(path); | 129 | btrfs_free_path(path); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 20794290256b..25f20ea11f27 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -35,6 +35,49 @@ | |||
35 | #define LOG_INODE_EXISTS 1 | 35 | #define LOG_INODE_EXISTS 1 |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * directory trouble cases | ||
39 | * | ||
40 | * 1) on rename or unlink, if the inode being unlinked isn't in the fsync | ||
41 | * log, we must force a full commit before doing an fsync of the directory | ||
42 | * where the unlink was done. | ||
43 | * ---> record transid of last unlink/rename per directory | ||
44 | * | ||
45 | * mkdir foo/some_dir | ||
46 | * normal commit | ||
47 | * rename foo/some_dir foo2/some_dir | ||
48 | * mkdir foo/some_dir | ||
49 | * fsync foo/some_dir/some_file | ||
50 | * | ||
51 | * The fsync above will unlink the original some_dir without recording | ||
52 | * it in its new location (foo2). After a crash, some_dir will be gone | ||
53 | * unless the fsync of some_file forces a full commit | ||
54 | * | ||
55 | * 2) we must log any new names for any file or dir that is in the fsync | ||
56 | * log. ---> check inode while renaming/linking. | ||
57 | * | ||
58 | * 2a) we must log any new names for any file or dir during rename | ||
59 | * when the directory they are being removed from was logged. | ||
60 | * ---> check inode and old parent dir during rename | ||
61 | * | ||
62 | * 2a is actually the more important variant. With the extra logging | ||
63 | * a crash might unlink the old name without recreating the new one | ||
64 | * | ||
65 | * 3) after a crash, we must go through any directories with a link count | ||
66 | * of zero and redo the rm -rf | ||
67 | * | ||
68 | * mkdir f1/foo | ||
69 | * normal commit | ||
70 | * rm -rf f1/foo | ||
71 | * fsync(f1) | ||
72 | * | ||
73 | * The directory f1 was fully removed from the FS, but fsync was never | ||
74 | * called on f1, only its parent dir. After a crash the rm -rf must | ||
75 | * be replayed. This must be able to recurse down the entire | ||
76 | * directory tree. The inode link count fixup code takes care of the | ||
77 | * ugly details. | ||
78 | */ | ||
79 | |||
80 | /* | ||
38 | * stages for the tree walking. The first | 81 | * stages for the tree walking. The first |
39 | * stage (0) is to only pin down the blocks we find | 82 | * stage (0) is to only pin down the blocks we find |
40 | * the second stage (1) is to make sure that all the inodes | 83 | * the second stage (1) is to make sure that all the inodes |
@@ -47,12 +90,17 @@ | |||
47 | #define LOG_WALK_REPLAY_INODES 1 | 90 | #define LOG_WALK_REPLAY_INODES 1 |
48 | #define LOG_WALK_REPLAY_ALL 2 | 91 | #define LOG_WALK_REPLAY_ALL 2 |
49 | 92 | ||
50 | static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | 93 | static int btrfs_log_inode(struct btrfs_trans_handle *trans, |
51 | struct btrfs_root *root, struct inode *inode, | 94 | struct btrfs_root *root, struct inode *inode, |
52 | int inode_only); | 95 | int inode_only); |
53 | static int link_to_fixup_dir(struct btrfs_trans_handle *trans, | 96 | static int link_to_fixup_dir(struct btrfs_trans_handle *trans, |
54 | struct btrfs_root *root, | 97 | struct btrfs_root *root, |
55 | struct btrfs_path *path, u64 objectid); | 98 | struct btrfs_path *path, u64 objectid); |
99 | static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | ||
100 | struct btrfs_root *root, | ||
101 | struct btrfs_root *log, | ||
102 | struct btrfs_path *path, | ||
103 | u64 dirid, int del_all); | ||
56 | 104 | ||
57 | /* | 105 | /* |
58 | * tree logging is a special write ahead log used to make sure that | 106 | * tree logging is a special write ahead log used to make sure that |
@@ -133,10 +181,25 @@ static int join_running_log_trans(struct btrfs_root *root) | |||
133 | } | 181 | } |
134 | 182 | ||
135 | /* | 183 | /* |
184 | * This either makes the current running log transaction wait | ||
185 | * until you call btrfs_end_log_trans() or it makes any future | ||
186 | * log transactions wait until you call btrfs_end_log_trans() | ||
187 | */ | ||
188 | int btrfs_pin_log_trans(struct btrfs_root *root) | ||
189 | { | ||
190 | int ret = -ENOENT; | ||
191 | |||
192 | mutex_lock(&root->log_mutex); | ||
193 | atomic_inc(&root->log_writers); | ||
194 | mutex_unlock(&root->log_mutex); | ||
195 | return ret; | ||
196 | } | ||
197 | |||
198 | /* | ||
136 | * indicate we're done making changes to the log tree | 199 | * indicate we're done making changes to the log tree |
137 | * and wake up anyone waiting to do a sync | 200 | * and wake up anyone waiting to do a sync |
138 | */ | 201 | */ |
139 | static int end_log_trans(struct btrfs_root *root) | 202 | int btrfs_end_log_trans(struct btrfs_root *root) |
140 | { | 203 | { |
141 | if (atomic_dec_and_test(&root->log_writers)) { | 204 | if (atomic_dec_and_test(&root->log_writers)) { |
142 | smp_mb(); | 205 | smp_mb(); |
@@ -199,12 +262,9 @@ static int process_one_buffer(struct btrfs_root *log, | |||
199 | struct extent_buffer *eb, | 262 | struct extent_buffer *eb, |
200 | struct walk_control *wc, u64 gen) | 263 | struct walk_control *wc, u64 gen) |
201 | { | 264 | { |
202 | if (wc->pin) { | 265 | if (wc->pin) |
203 | mutex_lock(&log->fs_info->pinned_mutex); | ||
204 | btrfs_update_pinned_extents(log->fs_info->extent_root, | 266 | btrfs_update_pinned_extents(log->fs_info->extent_root, |
205 | eb->start, eb->len, 1); | 267 | eb->start, eb->len, 1); |
206 | mutex_unlock(&log->fs_info->pinned_mutex); | ||
207 | } | ||
208 | 268 | ||
209 | if (btrfs_buffer_uptodate(eb, gen)) { | 269 | if (btrfs_buffer_uptodate(eb, gen)) { |
210 | if (wc->write) | 270 | if (wc->write) |
@@ -603,6 +663,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, | |||
603 | 663 | ||
604 | ret = link_to_fixup_dir(trans, root, path, location.objectid); | 664 | ret = link_to_fixup_dir(trans, root, path, location.objectid); |
605 | BUG_ON(ret); | 665 | BUG_ON(ret); |
666 | |||
606 | ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); | 667 | ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); |
607 | BUG_ON(ret); | 668 | BUG_ON(ret); |
608 | kfree(name); | 669 | kfree(name); |
@@ -804,6 +865,7 @@ conflict_again: | |||
804 | victim_name_len)) { | 865 | victim_name_len)) { |
805 | btrfs_inc_nlink(inode); | 866 | btrfs_inc_nlink(inode); |
806 | btrfs_release_path(root, path); | 867 | btrfs_release_path(root, path); |
868 | |||
807 | ret = btrfs_unlink_inode(trans, root, dir, | 869 | ret = btrfs_unlink_inode(trans, root, dir, |
808 | inode, victim_name, | 870 | inode, victim_name, |
809 | victim_name_len); | 871 | victim_name_len); |
@@ -922,13 +984,20 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
922 | key.offset--; | 984 | key.offset--; |
923 | btrfs_release_path(root, path); | 985 | btrfs_release_path(root, path); |
924 | } | 986 | } |
925 | btrfs_free_path(path); | 987 | btrfs_release_path(root, path); |
926 | if (nlink != inode->i_nlink) { | 988 | if (nlink != inode->i_nlink) { |
927 | inode->i_nlink = nlink; | 989 | inode->i_nlink = nlink; |
928 | btrfs_update_inode(trans, root, inode); | 990 | btrfs_update_inode(trans, root, inode); |
929 | } | 991 | } |
930 | BTRFS_I(inode)->index_cnt = (u64)-1; | 992 | BTRFS_I(inode)->index_cnt = (u64)-1; |
931 | 993 | ||
994 | if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { | ||
995 | ret = replay_dir_deletes(trans, root, NULL, path, | ||
996 | inode->i_ino, 1); | ||
997 | BUG_ON(ret); | ||
998 | } | ||
999 | btrfs_free_path(path); | ||
1000 | |||
932 | return 0; | 1001 | return 0; |
933 | } | 1002 | } |
934 | 1003 | ||
@@ -971,9 +1040,12 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | |||
971 | 1040 | ||
972 | iput(inode); | 1041 | iput(inode); |
973 | 1042 | ||
974 | if (key.offset == 0) | 1043 | /* |
975 | break; | 1044 | * fixup on a directory may create new entries, |
976 | key.offset--; | 1045 | * make sure we always look for the highset possible |
1046 | * offset | ||
1047 | */ | ||
1048 | key.offset = (u64)-1; | ||
977 | } | 1049 | } |
978 | btrfs_release_path(root, path); | 1050 | btrfs_release_path(root, path); |
979 | return 0; | 1051 | return 0; |
@@ -1150,8 +1222,7 @@ insert: | |||
1150 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, | 1222 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, |
1151 | name, name_len, log_type, &log_key); | 1223 | name, name_len, log_type, &log_key); |
1152 | 1224 | ||
1153 | if (ret && ret != -ENOENT) | 1225 | BUG_ON(ret && ret != -ENOENT); |
1154 | BUG(); | ||
1155 | goto out; | 1226 | goto out; |
1156 | } | 1227 | } |
1157 | 1228 | ||
@@ -1313,11 +1384,11 @@ again: | |||
1313 | read_extent_buffer(eb, name, (unsigned long)(di + 1), | 1384 | read_extent_buffer(eb, name, (unsigned long)(di + 1), |
1314 | name_len); | 1385 | name_len); |
1315 | log_di = NULL; | 1386 | log_di = NULL; |
1316 | if (dir_key->type == BTRFS_DIR_ITEM_KEY) { | 1387 | if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) { |
1317 | log_di = btrfs_lookup_dir_item(trans, log, log_path, | 1388 | log_di = btrfs_lookup_dir_item(trans, log, log_path, |
1318 | dir_key->objectid, | 1389 | dir_key->objectid, |
1319 | name, name_len, 0); | 1390 | name, name_len, 0); |
1320 | } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) { | 1391 | } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) { |
1321 | log_di = btrfs_lookup_dir_index_item(trans, log, | 1392 | log_di = btrfs_lookup_dir_index_item(trans, log, |
1322 | log_path, | 1393 | log_path, |
1323 | dir_key->objectid, | 1394 | dir_key->objectid, |
@@ -1378,7 +1449,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | |||
1378 | struct btrfs_root *root, | 1449 | struct btrfs_root *root, |
1379 | struct btrfs_root *log, | 1450 | struct btrfs_root *log, |
1380 | struct btrfs_path *path, | 1451 | struct btrfs_path *path, |
1381 | u64 dirid) | 1452 | u64 dirid, int del_all) |
1382 | { | 1453 | { |
1383 | u64 range_start; | 1454 | u64 range_start; |
1384 | u64 range_end; | 1455 | u64 range_end; |
@@ -1408,10 +1479,14 @@ again: | |||
1408 | range_start = 0; | 1479 | range_start = 0; |
1409 | range_end = 0; | 1480 | range_end = 0; |
1410 | while (1) { | 1481 | while (1) { |
1411 | ret = find_dir_range(log, path, dirid, key_type, | 1482 | if (del_all) |
1412 | &range_start, &range_end); | 1483 | range_end = (u64)-1; |
1413 | if (ret != 0) | 1484 | else { |
1414 | break; | 1485 | ret = find_dir_range(log, path, dirid, key_type, |
1486 | &range_start, &range_end); | ||
1487 | if (ret != 0) | ||
1488 | break; | ||
1489 | } | ||
1415 | 1490 | ||
1416 | dir_key.offset = range_start; | 1491 | dir_key.offset = range_start; |
1417 | while (1) { | 1492 | while (1) { |
@@ -1437,7 +1512,8 @@ again: | |||
1437 | break; | 1512 | break; |
1438 | 1513 | ||
1439 | ret = check_item_in_log(trans, root, log, path, | 1514 | ret = check_item_in_log(trans, root, log, path, |
1440 | log_path, dir, &found_key); | 1515 | log_path, dir, |
1516 | &found_key); | ||
1441 | BUG_ON(ret); | 1517 | BUG_ON(ret); |
1442 | if (found_key.offset == (u64)-1) | 1518 | if (found_key.offset == (u64)-1) |
1443 | break; | 1519 | break; |
@@ -1514,7 +1590,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1514 | mode = btrfs_inode_mode(eb, inode_item); | 1590 | mode = btrfs_inode_mode(eb, inode_item); |
1515 | if (S_ISDIR(mode)) { | 1591 | if (S_ISDIR(mode)) { |
1516 | ret = replay_dir_deletes(wc->trans, | 1592 | ret = replay_dir_deletes(wc->trans, |
1517 | root, log, path, key.objectid); | 1593 | root, log, path, key.objectid, 0); |
1518 | BUG_ON(ret); | 1594 | BUG_ON(ret); |
1519 | } | 1595 | } |
1520 | ret = overwrite_item(wc->trans, root, path, | 1596 | ret = overwrite_item(wc->trans, root, path, |
@@ -1533,6 +1609,17 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1533 | root, inode, inode->i_size, | 1609 | root, inode, inode->i_size, |
1534 | BTRFS_EXTENT_DATA_KEY); | 1610 | BTRFS_EXTENT_DATA_KEY); |
1535 | BUG_ON(ret); | 1611 | BUG_ON(ret); |
1612 | |||
1613 | /* if the nlink count is zero here, the iput | ||
1614 | * will free the inode. We bump it to make | ||
1615 | * sure it doesn't get freed until the link | ||
1616 | * count fixup is done | ||
1617 | */ | ||
1618 | if (inode->i_nlink == 0) { | ||
1619 | btrfs_inc_nlink(inode); | ||
1620 | btrfs_update_inode(wc->trans, | ||
1621 | root, inode); | ||
1622 | } | ||
1536 | iput(inode); | 1623 | iput(inode); |
1537 | } | 1624 | } |
1538 | ret = link_to_fixup_dir(wc->trans, root, | 1625 | ret = link_to_fixup_dir(wc->trans, root, |
@@ -1840,7 +1927,8 @@ static int update_log_root(struct btrfs_trans_handle *trans, | |||
1840 | return ret; | 1927 | return ret; |
1841 | } | 1928 | } |
1842 | 1929 | ||
1843 | static int wait_log_commit(struct btrfs_root *root, unsigned long transid) | 1930 | static int wait_log_commit(struct btrfs_trans_handle *trans, |
1931 | struct btrfs_root *root, unsigned long transid) | ||
1844 | { | 1932 | { |
1845 | DEFINE_WAIT(wait); | 1933 | DEFINE_WAIT(wait); |
1846 | int index = transid % 2; | 1934 | int index = transid % 2; |
@@ -1854,9 +1942,12 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid) | |||
1854 | prepare_to_wait(&root->log_commit_wait[index], | 1942 | prepare_to_wait(&root->log_commit_wait[index], |
1855 | &wait, TASK_UNINTERRUPTIBLE); | 1943 | &wait, TASK_UNINTERRUPTIBLE); |
1856 | mutex_unlock(&root->log_mutex); | 1944 | mutex_unlock(&root->log_mutex); |
1857 | if (root->log_transid < transid + 2 && | 1945 | |
1946 | if (root->fs_info->last_trans_log_full_commit != | ||
1947 | trans->transid && root->log_transid < transid + 2 && | ||
1858 | atomic_read(&root->log_commit[index])) | 1948 | atomic_read(&root->log_commit[index])) |
1859 | schedule(); | 1949 | schedule(); |
1950 | |||
1860 | finish_wait(&root->log_commit_wait[index], &wait); | 1951 | finish_wait(&root->log_commit_wait[index], &wait); |
1861 | mutex_lock(&root->log_mutex); | 1952 | mutex_lock(&root->log_mutex); |
1862 | } while (root->log_transid < transid + 2 && | 1953 | } while (root->log_transid < transid + 2 && |
@@ -1864,14 +1955,16 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid) | |||
1864 | return 0; | 1955 | return 0; |
1865 | } | 1956 | } |
1866 | 1957 | ||
1867 | static int wait_for_writer(struct btrfs_root *root) | 1958 | static int wait_for_writer(struct btrfs_trans_handle *trans, |
1959 | struct btrfs_root *root) | ||
1868 | { | 1960 | { |
1869 | DEFINE_WAIT(wait); | 1961 | DEFINE_WAIT(wait); |
1870 | while (atomic_read(&root->log_writers)) { | 1962 | while (atomic_read(&root->log_writers)) { |
1871 | prepare_to_wait(&root->log_writer_wait, | 1963 | prepare_to_wait(&root->log_writer_wait, |
1872 | &wait, TASK_UNINTERRUPTIBLE); | 1964 | &wait, TASK_UNINTERRUPTIBLE); |
1873 | mutex_unlock(&root->log_mutex); | 1965 | mutex_unlock(&root->log_mutex); |
1874 | if (atomic_read(&root->log_writers)) | 1966 | if (root->fs_info->last_trans_log_full_commit != |
1967 | trans->transid && atomic_read(&root->log_writers)) | ||
1875 | schedule(); | 1968 | schedule(); |
1876 | mutex_lock(&root->log_mutex); | 1969 | mutex_lock(&root->log_mutex); |
1877 | finish_wait(&root->log_writer_wait, &wait); | 1970 | finish_wait(&root->log_writer_wait, &wait); |
@@ -1882,7 +1975,14 @@ static int wait_for_writer(struct btrfs_root *root) | |||
1882 | /* | 1975 | /* |
1883 | * btrfs_sync_log does sends a given tree log down to the disk and | 1976 | * btrfs_sync_log does sends a given tree log down to the disk and |
1884 | * updates the super blocks to record it. When this call is done, | 1977 | * updates the super blocks to record it. When this call is done, |
1885 | * you know that any inodes previously logged are safely on disk | 1978 | * you know that any inodes previously logged are safely on disk only |
1979 | * if it returns 0. | ||
1980 | * | ||
1981 | * Any other return value means you need to call btrfs_commit_transaction. | ||
1982 | * Some of the edge cases for fsyncing directories that have had unlinks | ||
1983 | * or renames done in the past mean that sometimes the only safe | ||
1984 | * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN, | ||
1985 | * that has happened. | ||
1886 | */ | 1986 | */ |
1887 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 1987 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
1888 | struct btrfs_root *root) | 1988 | struct btrfs_root *root) |
@@ -1896,7 +1996,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1896 | mutex_lock(&root->log_mutex); | 1996 | mutex_lock(&root->log_mutex); |
1897 | index1 = root->log_transid % 2; | 1997 | index1 = root->log_transid % 2; |
1898 | if (atomic_read(&root->log_commit[index1])) { | 1998 | if (atomic_read(&root->log_commit[index1])) { |
1899 | wait_log_commit(root, root->log_transid); | 1999 | wait_log_commit(trans, root, root->log_transid); |
1900 | mutex_unlock(&root->log_mutex); | 2000 | mutex_unlock(&root->log_mutex); |
1901 | return 0; | 2001 | return 0; |
1902 | } | 2002 | } |
@@ -1904,18 +2004,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1904 | 2004 | ||
1905 | /* wait for previous tree log sync to complete */ | 2005 | /* wait for previous tree log sync to complete */ |
1906 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2006 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
1907 | wait_log_commit(root, root->log_transid - 1); | 2007 | wait_log_commit(trans, root, root->log_transid - 1); |
1908 | 2008 | ||
1909 | while (1) { | 2009 | while (1) { |
1910 | unsigned long batch = root->log_batch; | 2010 | unsigned long batch = root->log_batch; |
1911 | mutex_unlock(&root->log_mutex); | 2011 | mutex_unlock(&root->log_mutex); |
1912 | schedule_timeout_uninterruptible(1); | 2012 | schedule_timeout_uninterruptible(1); |
1913 | mutex_lock(&root->log_mutex); | 2013 | mutex_lock(&root->log_mutex); |
1914 | wait_for_writer(root); | 2014 | |
2015 | wait_for_writer(trans, root); | ||
1915 | if (batch == root->log_batch) | 2016 | if (batch == root->log_batch) |
1916 | break; | 2017 | break; |
1917 | } | 2018 | } |
1918 | 2019 | ||
2020 | /* bail out if we need to do a full commit */ | ||
2021 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | ||
2022 | ret = -EAGAIN; | ||
2023 | mutex_unlock(&root->log_mutex); | ||
2024 | goto out; | ||
2025 | } | ||
2026 | |||
1919 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); | 2027 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); |
1920 | BUG_ON(ret); | 2028 | BUG_ON(ret); |
1921 | 2029 | ||
@@ -1951,16 +2059,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1951 | 2059 | ||
1952 | index2 = log_root_tree->log_transid % 2; | 2060 | index2 = log_root_tree->log_transid % 2; |
1953 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2061 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
1954 | wait_log_commit(log_root_tree, log_root_tree->log_transid); | 2062 | wait_log_commit(trans, log_root_tree, |
2063 | log_root_tree->log_transid); | ||
1955 | mutex_unlock(&log_root_tree->log_mutex); | 2064 | mutex_unlock(&log_root_tree->log_mutex); |
1956 | goto out; | 2065 | goto out; |
1957 | } | 2066 | } |
1958 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2067 | atomic_set(&log_root_tree->log_commit[index2], 1); |
1959 | 2068 | ||
1960 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) | 2069 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { |
1961 | wait_log_commit(log_root_tree, log_root_tree->log_transid - 1); | 2070 | wait_log_commit(trans, log_root_tree, |
2071 | log_root_tree->log_transid - 1); | ||
2072 | } | ||
2073 | |||
2074 | wait_for_writer(trans, log_root_tree); | ||
1962 | 2075 | ||
1963 | wait_for_writer(log_root_tree); | 2076 | /* |
2077 | * now that we've moved on to the tree of log tree roots, | ||
2078 | * check the full commit flag again | ||
2079 | */ | ||
2080 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | ||
2081 | mutex_unlock(&log_root_tree->log_mutex); | ||
2082 | ret = -EAGAIN; | ||
2083 | goto out_wake_log_root; | ||
2084 | } | ||
1964 | 2085 | ||
1965 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, | 2086 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, |
1966 | &log_root_tree->dirty_log_pages); | 2087 | &log_root_tree->dirty_log_pages); |
@@ -1985,7 +2106,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1985 | * in and cause problems either. | 2106 | * in and cause problems either. |
1986 | */ | 2107 | */ |
1987 | write_ctree_super(trans, root->fs_info->tree_root, 2); | 2108 | write_ctree_super(trans, root->fs_info->tree_root, 2); |
2109 | ret = 0; | ||
1988 | 2110 | ||
2111 | out_wake_log_root: | ||
1989 | atomic_set(&log_root_tree->log_commit[index2], 0); | 2112 | atomic_set(&log_root_tree->log_commit[index2], 0); |
1990 | smp_mb(); | 2113 | smp_mb(); |
1991 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) | 2114 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) |
@@ -1998,7 +2121,8 @@ out: | |||
1998 | return 0; | 2121 | return 0; |
1999 | } | 2122 | } |
2000 | 2123 | ||
2001 | /* * free all the extents used by the tree log. This should be called | 2124 | /* |
2125 | * free all the extents used by the tree log. This should be called | ||
2002 | * at commit time of the full transaction | 2126 | * at commit time of the full transaction |
2003 | */ | 2127 | */ |
2004 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | 2128 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) |
@@ -2132,7 +2256,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2132 | 2256 | ||
2133 | btrfs_free_path(path); | 2257 | btrfs_free_path(path); |
2134 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2258 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
2135 | end_log_trans(root); | 2259 | btrfs_end_log_trans(root); |
2136 | 2260 | ||
2137 | return 0; | 2261 | return 0; |
2138 | } | 2262 | } |
@@ -2159,7 +2283,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
2159 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | 2283 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, |
2160 | dirid, &index); | 2284 | dirid, &index); |
2161 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2285 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2162 | end_log_trans(root); | 2286 | btrfs_end_log_trans(root); |
2163 | 2287 | ||
2164 | return ret; | 2288 | return ret; |
2165 | } | 2289 | } |
@@ -2559,7 +2683,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2559 | * | 2683 | * |
2560 | * This handles both files and directories. | 2684 | * This handles both files and directories. |
2561 | */ | 2685 | */ |
2562 | static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | 2686 | static int btrfs_log_inode(struct btrfs_trans_handle *trans, |
2563 | struct btrfs_root *root, struct inode *inode, | 2687 | struct btrfs_root *root, struct inode *inode, |
2564 | int inode_only) | 2688 | int inode_only) |
2565 | { | 2689 | { |
@@ -2585,28 +2709,17 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2585 | min_key.offset = 0; | 2709 | min_key.offset = 0; |
2586 | 2710 | ||
2587 | max_key.objectid = inode->i_ino; | 2711 | max_key.objectid = inode->i_ino; |
2712 | |||
2713 | /* today the code can only do partial logging of directories */ | ||
2714 | if (!S_ISDIR(inode->i_mode)) | ||
2715 | inode_only = LOG_INODE_ALL; | ||
2716 | |||
2588 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) | 2717 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) |
2589 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 2718 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
2590 | else | 2719 | else |
2591 | max_key.type = (u8)-1; | 2720 | max_key.type = (u8)-1; |
2592 | max_key.offset = (u64)-1; | 2721 | max_key.offset = (u64)-1; |
2593 | 2722 | ||
2594 | /* | ||
2595 | * if this inode has already been logged and we're in inode_only | ||
2596 | * mode, we don't want to delete the things that have already | ||
2597 | * been written to the log. | ||
2598 | * | ||
2599 | * But, if the inode has been through an inode_only log, | ||
2600 | * the logged_trans field is not set. This allows us to catch | ||
2601 | * any new names for this inode in the backrefs by logging it | ||
2602 | * again | ||
2603 | */ | ||
2604 | if (inode_only == LOG_INODE_EXISTS && | ||
2605 | BTRFS_I(inode)->logged_trans == trans->transid) { | ||
2606 | btrfs_free_path(path); | ||
2607 | btrfs_free_path(dst_path); | ||
2608 | goto out; | ||
2609 | } | ||
2610 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 2723 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
2611 | 2724 | ||
2612 | /* | 2725 | /* |
@@ -2693,7 +2806,6 @@ next_slot: | |||
2693 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 2806 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
2694 | btrfs_release_path(root, path); | 2807 | btrfs_release_path(root, path); |
2695 | btrfs_release_path(log, dst_path); | 2808 | btrfs_release_path(log, dst_path); |
2696 | BTRFS_I(inode)->log_dirty_trans = 0; | ||
2697 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 2809 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
2698 | BUG_ON(ret); | 2810 | BUG_ON(ret); |
2699 | } | 2811 | } |
@@ -2702,19 +2814,69 @@ next_slot: | |||
2702 | 2814 | ||
2703 | btrfs_free_path(path); | 2815 | btrfs_free_path(path); |
2704 | btrfs_free_path(dst_path); | 2816 | btrfs_free_path(dst_path); |
2705 | out: | ||
2706 | return 0; | 2817 | return 0; |
2707 | } | 2818 | } |
2708 | 2819 | ||
2709 | int btrfs_log_inode(struct btrfs_trans_handle *trans, | 2820 | /* |
2710 | struct btrfs_root *root, struct inode *inode, | 2821 | * follow the dentry parent pointers up the chain and see if any |
2711 | int inode_only) | 2822 | * of the directories in it require a full commit before they can |
2823 | * be logged. Returns zero if nothing special needs to be done or 1 if | ||
2824 | * a full commit is required. | ||
2825 | */ | ||
2826 | static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | ||
2827 | struct inode *inode, | ||
2828 | struct dentry *parent, | ||
2829 | struct super_block *sb, | ||
2830 | u64 last_committed) | ||
2712 | { | 2831 | { |
2713 | int ret; | 2832 | int ret = 0; |
2833 | struct btrfs_root *root; | ||
2714 | 2834 | ||
2715 | start_log_trans(trans, root); | 2835 | /* |
2716 | ret = __btrfs_log_inode(trans, root, inode, inode_only); | 2836 | * for regular files, if its inode is already on disk, we don't |
2717 | end_log_trans(root); | 2837 | * have to worry about the parents at all. This is because |
2838 | * we can use the last_unlink_trans field to record renames | ||
2839 | * and other fun in this file. | ||
2840 | */ | ||
2841 | if (S_ISREG(inode->i_mode) && | ||
2842 | BTRFS_I(inode)->generation <= last_committed && | ||
2843 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | ||
2844 | goto out; | ||
2845 | |||
2846 | if (!S_ISDIR(inode->i_mode)) { | ||
2847 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | ||
2848 | goto out; | ||
2849 | inode = parent->d_inode; | ||
2850 | } | ||
2851 | |||
2852 | while (1) { | ||
2853 | BTRFS_I(inode)->logged_trans = trans->transid; | ||
2854 | smp_mb(); | ||
2855 | |||
2856 | if (BTRFS_I(inode)->last_unlink_trans > last_committed) { | ||
2857 | root = BTRFS_I(inode)->root; | ||
2858 | |||
2859 | /* | ||
2860 | * make sure any commits to the log are forced | ||
2861 | * to be full commits | ||
2862 | */ | ||
2863 | root->fs_info->last_trans_log_full_commit = | ||
2864 | trans->transid; | ||
2865 | ret = 1; | ||
2866 | break; | ||
2867 | } | ||
2868 | |||
2869 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | ||
2870 | break; | ||
2871 | |||
2872 | if (parent == sb->s_root) | ||
2873 | break; | ||
2874 | |||
2875 | parent = parent->d_parent; | ||
2876 | inode = parent->d_inode; | ||
2877 | |||
2878 | } | ||
2879 | out: | ||
2718 | return ret; | 2880 | return ret; |
2719 | } | 2881 | } |
2720 | 2882 | ||
@@ -2724,31 +2886,70 @@ int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2724 | * only logging is done of any parent directories that are older than | 2886 | * only logging is done of any parent directories that are older than |
2725 | * the last committed transaction | 2887 | * the last committed transaction |
2726 | */ | 2888 | */ |
2727 | int btrfs_log_dentry(struct btrfs_trans_handle *trans, | 2889 | int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, |
2728 | struct btrfs_root *root, struct dentry *dentry) | 2890 | struct btrfs_root *root, struct inode *inode, |
2891 | struct dentry *parent, int exists_only) | ||
2729 | { | 2892 | { |
2730 | int inode_only = LOG_INODE_ALL; | 2893 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
2731 | struct super_block *sb; | 2894 | struct super_block *sb; |
2732 | int ret; | 2895 | int ret = 0; |
2896 | u64 last_committed = root->fs_info->last_trans_committed; | ||
2897 | |||
2898 | sb = inode->i_sb; | ||
2899 | |||
2900 | if (btrfs_test_opt(root, NOTREELOG)) { | ||
2901 | ret = 1; | ||
2902 | goto end_no_trans; | ||
2903 | } | ||
2904 | |||
2905 | if (root->fs_info->last_trans_log_full_commit > | ||
2906 | root->fs_info->last_trans_committed) { | ||
2907 | ret = 1; | ||
2908 | goto end_no_trans; | ||
2909 | } | ||
2910 | |||
2911 | ret = check_parent_dirs_for_sync(trans, inode, parent, | ||
2912 | sb, last_committed); | ||
2913 | if (ret) | ||
2914 | goto end_no_trans; | ||
2733 | 2915 | ||
2734 | start_log_trans(trans, root); | 2916 | start_log_trans(trans, root); |
2735 | sb = dentry->d_inode->i_sb; | ||
2736 | while (1) { | ||
2737 | ret = __btrfs_log_inode(trans, root, dentry->d_inode, | ||
2738 | inode_only); | ||
2739 | BUG_ON(ret); | ||
2740 | inode_only = LOG_INODE_EXISTS; | ||
2741 | 2917 | ||
2742 | dentry = dentry->d_parent; | 2918 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2743 | if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb) | 2919 | BUG_ON(ret); |
2920 | |||
2921 | /* | ||
2922 | * for regular files, if its inode is already on disk, we don't | ||
2923 | * have to worry about the parents at all. This is because | ||
2924 | * we can use the last_unlink_trans field to record renames | ||
2925 | * and other fun in this file. | ||
2926 | */ | ||
2927 | if (S_ISREG(inode->i_mode) && | ||
2928 | BTRFS_I(inode)->generation <= last_committed && | ||
2929 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | ||
2930 | goto no_parent; | ||
2931 | |||
2932 | inode_only = LOG_INODE_EXISTS; | ||
2933 | while (1) { | ||
2934 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | ||
2744 | break; | 2935 | break; |
2745 | 2936 | ||
2746 | if (BTRFS_I(dentry->d_inode)->generation <= | 2937 | inode = parent->d_inode; |
2747 | root->fs_info->last_trans_committed) | 2938 | if (BTRFS_I(inode)->generation > |
2939 | root->fs_info->last_trans_committed) { | ||
2940 | ret = btrfs_log_inode(trans, root, inode, inode_only); | ||
2941 | BUG_ON(ret); | ||
2942 | } | ||
2943 | if (parent == sb->s_root) | ||
2748 | break; | 2944 | break; |
2945 | |||
2946 | parent = parent->d_parent; | ||
2749 | } | 2947 | } |
2750 | end_log_trans(root); | 2948 | no_parent: |
2751 | return 0; | 2949 | ret = 0; |
2950 | btrfs_end_log_trans(root); | ||
2951 | end_no_trans: | ||
2952 | return ret; | ||
2752 | } | 2953 | } |
2753 | 2954 | ||
2754 | /* | 2955 | /* |
@@ -2760,12 +2961,8 @@ int btrfs_log_dentry(struct btrfs_trans_handle *trans, | |||
2760 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 2961 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
2761 | struct btrfs_root *root, struct dentry *dentry) | 2962 | struct btrfs_root *root, struct dentry *dentry) |
2762 | { | 2963 | { |
2763 | u64 gen; | 2964 | return btrfs_log_inode_parent(trans, root, dentry->d_inode, |
2764 | gen = root->fs_info->last_trans_new_blockgroup; | 2965 | dentry->d_parent, 0); |
2765 | if (gen > root->fs_info->last_trans_committed) | ||
2766 | return 1; | ||
2767 | else | ||
2768 | return btrfs_log_dentry(trans, root, dentry); | ||
2769 | } | 2966 | } |
2770 | 2967 | ||
2771 | /* | 2968 | /* |
@@ -2832,7 +3029,9 @@ again: | |||
2832 | BUG_ON(!wc.replay_dest); | 3029 | BUG_ON(!wc.replay_dest); |
2833 | 3030 | ||
2834 | wc.replay_dest->log_root = log; | 3031 | wc.replay_dest->log_root = log; |
3032 | mutex_lock(&fs_info->trans_mutex); | ||
2835 | btrfs_record_root_in_trans(wc.replay_dest); | 3033 | btrfs_record_root_in_trans(wc.replay_dest); |
3034 | mutex_unlock(&fs_info->trans_mutex); | ||
2836 | ret = walk_log_tree(trans, log, &wc); | 3035 | ret = walk_log_tree(trans, log, &wc); |
2837 | BUG_ON(ret); | 3036 | BUG_ON(ret); |
2838 | 3037 | ||
@@ -2882,3 +3081,94 @@ again: | |||
2882 | kfree(log_root_tree); | 3081 | kfree(log_root_tree); |
2883 | return 0; | 3082 | return 0; |
2884 | } | 3083 | } |
3084 | |||
3085 | /* | ||
3086 | * there are some corner cases where we want to force a full | ||
3087 | * commit instead of allowing a directory to be logged. | ||
3088 | * | ||
3089 | * They revolve around files there were unlinked from the directory, and | ||
3090 | * this function updates the parent directory so that a full commit is | ||
3091 | * properly done if it is fsync'd later after the unlinks are done. | ||
3092 | */ | ||
3093 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | ||
3094 | struct inode *dir, struct inode *inode, | ||
3095 | int for_rename) | ||
3096 | { | ||
3097 | /* | ||
3098 | * when we're logging a file, if it hasn't been renamed | ||
3099 | * or unlinked, and its inode is fully committed on disk, | ||
3100 | * we don't have to worry about walking up the directory chain | ||
3101 | * to log its parents. | ||
3102 | * | ||
3103 | * So, we use the last_unlink_trans field to put this transid | ||
3104 | * into the file. When the file is logged we check it and | ||
3105 | * don't log the parents if the file is fully on disk. | ||
3106 | */ | ||
3107 | if (S_ISREG(inode->i_mode)) | ||
3108 | BTRFS_I(inode)->last_unlink_trans = trans->transid; | ||
3109 | |||
3110 | /* | ||
3111 | * if this directory was already logged any new | ||
3112 | * names for this file/dir will get recorded | ||
3113 | */ | ||
3114 | smp_mb(); | ||
3115 | if (BTRFS_I(dir)->logged_trans == trans->transid) | ||
3116 | return; | ||
3117 | |||
3118 | /* | ||
3119 | * if the inode we're about to unlink was logged, | ||
3120 | * the log will be properly updated for any new names | ||
3121 | */ | ||
3122 | if (BTRFS_I(inode)->logged_trans == trans->transid) | ||
3123 | return; | ||
3124 | |||
3125 | /* | ||
3126 | * when renaming files across directories, if the directory | ||
3127 | * there we're unlinking from gets fsync'd later on, there's | ||
3128 | * no way to find the destination directory later and fsync it | ||
3129 | * properly. So, we have to be conservative and force commits | ||
3130 | * so the new name gets discovered. | ||
3131 | */ | ||
3132 | if (for_rename) | ||
3133 | goto record; | ||
3134 | |||
3135 | /* we can safely do the unlink without any special recording */ | ||
3136 | return; | ||
3137 | |||
3138 | record: | ||
3139 | BTRFS_I(dir)->last_unlink_trans = trans->transid; | ||
3140 | } | ||
3141 | |||
3142 | /* | ||
3143 | * Call this after adding a new name for a file and it will properly | ||
3144 | * update the log to reflect the new name. | ||
3145 | * | ||
3146 | * It will return zero if all goes well, and it will return 1 if a | ||
3147 | * full transaction commit is required. | ||
3148 | */ | ||
3149 | int btrfs_log_new_name(struct btrfs_trans_handle *trans, | ||
3150 | struct inode *inode, struct inode *old_dir, | ||
3151 | struct dentry *parent) | ||
3152 | { | ||
3153 | struct btrfs_root * root = BTRFS_I(inode)->root; | ||
3154 | |||
3155 | /* | ||
3156 | * this will force the logging code to walk the dentry chain | ||
3157 | * up for the file | ||
3158 | */ | ||
3159 | if (S_ISREG(inode->i_mode)) | ||
3160 | BTRFS_I(inode)->last_unlink_trans = trans->transid; | ||
3161 | |||
3162 | /* | ||
3163 | * if this inode hasn't been logged and directory we're renaming it | ||
3164 | * from hasn't been logged, we don't need to log it | ||
3165 | */ | ||
3166 | if (BTRFS_I(inode)->logged_trans <= | ||
3167 | root->fs_info->last_trans_committed && | ||
3168 | (!old_dir || BTRFS_I(old_dir)->logged_trans <= | ||
3169 | root->fs_info->last_trans_committed)) | ||
3170 | return 0; | ||
3171 | |||
3172 | return btrfs_log_inode_parent(trans, root, inode, parent, 1); | ||
3173 | } | ||
3174 | |||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index b9409b32ed02..d09c7609e16b 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -22,14 +22,9 @@ | |||
22 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 22 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
23 | struct btrfs_root *root); | 23 | struct btrfs_root *root); |
24 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 24 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
25 | int btrfs_log_dentry(struct btrfs_trans_handle *trans, | ||
26 | struct btrfs_root *root, struct dentry *dentry); | ||
27 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 25 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
28 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 26 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
29 | struct btrfs_root *root, struct dentry *dentry); | 27 | struct btrfs_root *root, struct dentry *dentry); |
30 | int btrfs_log_inode(struct btrfs_trans_handle *trans, | ||
31 | struct btrfs_root *root, struct inode *inode, | ||
32 | int inode_only); | ||
33 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | 28 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, |
34 | struct btrfs_root *root, | 29 | struct btrfs_root *root, |
35 | const char *name, int name_len, | 30 | const char *name, int name_len, |
@@ -38,4 +33,16 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
38 | struct btrfs_root *root, | 33 | struct btrfs_root *root, |
39 | const char *name, int name_len, | 34 | const char *name, int name_len, |
40 | struct inode *inode, u64 dirid); | 35 | struct inode *inode, u64 dirid); |
36 | int btrfs_join_running_log_trans(struct btrfs_root *root); | ||
37 | int btrfs_end_log_trans(struct btrfs_root *root); | ||
38 | int btrfs_pin_log_trans(struct btrfs_root *root); | ||
39 | int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | ||
40 | struct btrfs_root *root, struct inode *inode, | ||
41 | struct dentry *parent, int exists_only); | ||
42 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | ||
43 | struct inode *dir, struct inode *inode, | ||
44 | int for_rename); | ||
45 | int btrfs_log_new_name(struct btrfs_trans_handle *trans, | ||
46 | struct inode *inode, struct inode *old_dir, | ||
47 | struct dentry *parent); | ||
41 | #endif | 48 | #endif |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bcd14ebccae1..e0913e469728 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/random.h> | 22 | #include <linux/random.h> |
23 | #include <linux/iocontext.h> | ||
23 | #include <asm/div64.h> | 24 | #include <asm/div64.h> |
24 | #include "compat.h" | 25 | #include "compat.h" |
25 | #include "ctree.h" | 26 | #include "ctree.h" |
@@ -145,8 +146,9 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
145 | int again = 0; | 146 | int again = 0; |
146 | unsigned long num_run = 0; | 147 | unsigned long num_run = 0; |
147 | unsigned long limit; | 148 | unsigned long limit; |
149 | unsigned long last_waited = 0; | ||
148 | 150 | ||
149 | bdi = device->bdev->bd_inode->i_mapping->backing_dev_info; | 151 | bdi = blk_get_backing_dev_info(device->bdev); |
150 | fs_info = device->dev_root->fs_info; | 152 | fs_info = device->dev_root->fs_info; |
151 | limit = btrfs_async_submit_limit(fs_info); | 153 | limit = btrfs_async_submit_limit(fs_info); |
152 | limit = limit * 2 / 3; | 154 | limit = limit * 2 / 3; |
@@ -207,7 +209,32 @@ loop_lock: | |||
207 | if (pending && bdi_write_congested(bdi) && num_run > 16 && | 209 | if (pending && bdi_write_congested(bdi) && num_run > 16 && |
208 | fs_info->fs_devices->open_devices > 1) { | 210 | fs_info->fs_devices->open_devices > 1) { |
209 | struct bio *old_head; | 211 | struct bio *old_head; |
212 | struct io_context *ioc; | ||
210 | 213 | ||
214 | ioc = current->io_context; | ||
215 | |||
216 | /* | ||
217 | * the main goal here is that we don't want to | ||
218 | * block if we're going to be able to submit | ||
219 | * more requests without blocking. | ||
220 | * | ||
221 | * This code does two great things, it pokes into | ||
222 | * the elevator code from a filesystem _and_ | ||
223 | * it makes assumptions about how batching works. | ||
224 | */ | ||
225 | if (ioc && ioc->nr_batch_requests > 0 && | ||
226 | time_before(jiffies, ioc->last_waited + HZ/50UL) && | ||
227 | (last_waited == 0 || | ||
228 | ioc->last_waited == last_waited)) { | ||
229 | /* | ||
230 | * we want to go through our batch of | ||
231 | * requests and stop. So, we copy out | ||
232 | * the ioc->last_waited time and test | ||
233 | * against it before looping | ||
234 | */ | ||
235 | last_waited = ioc->last_waited; | ||
236 | continue; | ||
237 | } | ||
211 | spin_lock(&device->io_lock); | 238 | spin_lock(&device->io_lock); |
212 | 239 | ||
213 | old_head = device->pending_bios; | 240 | old_head = device->pending_bios; |
@@ -231,6 +258,18 @@ loop_lock: | |||
231 | if (device->pending_bios) | 258 | if (device->pending_bios) |
232 | goto loop_lock; | 259 | goto loop_lock; |
233 | spin_unlock(&device->io_lock); | 260 | spin_unlock(&device->io_lock); |
261 | |||
262 | /* | ||
263 | * IO has already been through a long path to get here. Checksumming, | ||
264 | * async helper threads, perhaps compression. We've done a pretty | ||
265 | * good job of collecting a batch of IO and should just unplug | ||
266 | * the device right away. | ||
267 | * | ||
268 | * This will help anyone who is waiting on the IO, they might have | ||
269 | * already unplugged, but managed to do so before the bio they | ||
270 | * cared about found its way down here. | ||
271 | */ | ||
272 | blk_run_backing_dev(bdi, NULL); | ||
234 | done: | 273 | done: |
235 | return 0; | 274 | return 0; |
236 | } | 275 | } |
@@ -1374,6 +1413,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1374 | ret = btrfs_add_device(trans, root, device); | 1413 | ret = btrfs_add_device(trans, root, device); |
1375 | } | 1414 | } |
1376 | 1415 | ||
1416 | /* | ||
1417 | * we've got more storage, clear any full flags on the space | ||
1418 | * infos | ||
1419 | */ | ||
1420 | btrfs_clear_space_info_full(root->fs_info); | ||
1421 | |||
1377 | unlock_chunks(root); | 1422 | unlock_chunks(root); |
1378 | btrfs_commit_transaction(trans, root); | 1423 | btrfs_commit_transaction(trans, root); |
1379 | 1424 | ||
@@ -1459,6 +1504,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, | |||
1459 | device->fs_devices->total_rw_bytes += diff; | 1504 | device->fs_devices->total_rw_bytes += diff; |
1460 | 1505 | ||
1461 | device->total_bytes = new_size; | 1506 | device->total_bytes = new_size; |
1507 | btrfs_clear_space_info_full(device->dev_root->fs_info); | ||
1508 | |||
1462 | return btrfs_update_device(trans, device); | 1509 | return btrfs_update_device(trans, device); |
1463 | } | 1510 | } |
1464 | 1511 | ||
@@ -2894,10 +2941,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
2894 | free_extent_map(em); | 2941 | free_extent_map(em); |
2895 | } | 2942 | } |
2896 | 2943 | ||
2897 | map = kzalloc(sizeof(*map), GFP_NOFS); | ||
2898 | if (!map) | ||
2899 | return -ENOMEM; | ||
2900 | |||
2901 | em = alloc_extent_map(GFP_NOFS); | 2944 | em = alloc_extent_map(GFP_NOFS); |
2902 | if (!em) | 2945 | if (!em) |
2903 | return -ENOMEM; | 2946 | return -ENOMEM; |
@@ -3106,6 +3149,8 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
3106 | if (!sb) | 3149 | if (!sb) |
3107 | return -ENOMEM; | 3150 | return -ENOMEM; |
3108 | btrfs_set_buffer_uptodate(sb); | 3151 | btrfs_set_buffer_uptodate(sb); |
3152 | btrfs_set_buffer_lockdep_class(sb, 0); | ||
3153 | |||
3109 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); | 3154 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); |
3110 | array_size = btrfs_super_sys_array_size(super_copy); | 3155 | array_size = btrfs_super_sys_array_size(super_copy); |
3111 | 3156 | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 86c44e9ae110..2185de72ff7d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -76,7 +76,7 @@ struct btrfs_device { | |||
76 | struct btrfs_fs_devices { | 76 | struct btrfs_fs_devices { |
77 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ | 77 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ |
78 | 78 | ||
79 | /* the device with this id has the most recent coyp of the super */ | 79 | /* the device with this id has the most recent copy of the super */ |
80 | u64 latest_devid; | 80 | u64 latest_devid; |
81 | u64 latest_trans; | 81 | u64 latest_trans; |
82 | u64 num_devices; | 82 | u64 num_devices; |