diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-04-05 19:41:22 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-05 19:41:22 -0400 |
commit | 9efe21cb82b5dbe3b0b2ae4de4eccc64ecb94e95 (patch) | |
tree | 7ff8833745d2f268f897f6fa4a27263b4a572245 /fs/btrfs | |
parent | de18836e447c2dc30120c0919b8db8ddc0401cc4 (diff) | |
parent | 0221c81b1b8eb0cbb6b30a0ced52ead32d2b4e4c (diff) |
Merge branch 'linus' into irq/threaded
Conflicts:
include/linux/irq.h
kernel/irq/handle.c
Diffstat (limited to 'fs/btrfs')
33 files changed, 3751 insertions, 2167 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d2cf5a54a4b8..9adf5e4f7e96 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -8,7 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ |
11 | compression.o | 11 | compression.o delayed-ref.o |
12 | else | 12 | else |
13 | 13 | ||
14 | # Normal Makefile | 14 | # Normal Makefile |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 1d53b62dbba5..7fdd184a528d 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -256,7 +256,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) | |||
256 | } | 256 | } |
257 | 257 | ||
258 | if (!acl) | 258 | if (!acl) |
259 | inode->i_mode &= ~current->fs->umask; | 259 | inode->i_mode &= ~current_umask(); |
260 | } | 260 | } |
261 | 261 | ||
262 | if (IS_POSIXACL(dir) && acl) { | 262 | if (IS_POSIXACL(dir) && acl) { |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c84ca1f5259a..51bfdfc8fcda 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/spinlock.h> | 21 | #include <linux/spinlock.h> |
22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
23 | #include <linux/ftrace.h> | ||
24 | #include "async-thread.h" | 23 | #include "async-thread.h" |
25 | 24 | ||
26 | #define WORK_QUEUED_BIT 0 | 25 | #define WORK_QUEUED_BIT 0 |
@@ -195,6 +194,9 @@ again_locked: | |||
195 | if (!list_empty(&worker->pending)) | 194 | if (!list_empty(&worker->pending)) |
196 | continue; | 195 | continue; |
197 | 196 | ||
197 | if (kthread_should_stop()) | ||
198 | break; | ||
199 | |||
198 | /* still no more work?, sleep for real */ | 200 | /* still no more work?, sleep for real */ |
199 | spin_lock_irq(&worker->lock); | 201 | spin_lock_irq(&worker->lock); |
200 | set_current_state(TASK_INTERRUPTIBLE); | 202 | set_current_state(TASK_INTERRUPTIBLE); |
@@ -208,7 +210,8 @@ again_locked: | |||
208 | worker->working = 0; | 210 | worker->working = 0; |
209 | spin_unlock_irq(&worker->lock); | 211 | spin_unlock_irq(&worker->lock); |
210 | 212 | ||
211 | schedule(); | 213 | if (!kthread_should_stop()) |
214 | schedule(); | ||
212 | } | 215 | } |
213 | __set_current_state(TASK_RUNNING); | 216 | __set_current_state(TASK_RUNNING); |
214 | } | 217 | } |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 72677ce2b74f..b30986f00b9d 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -66,6 +66,12 @@ struct btrfs_inode { | |||
66 | */ | 66 | */ |
67 | struct list_head delalloc_inodes; | 67 | struct list_head delalloc_inodes; |
68 | 68 | ||
69 | /* | ||
70 | * list for tracking inodes that must be sent to disk before a | ||
71 | * rename or truncate commit | ||
72 | */ | ||
73 | struct list_head ordered_operations; | ||
74 | |||
69 | /* the space_info for where this inode's data allocations are done */ | 75 | /* the space_info for where this inode's data allocations are done */ |
70 | struct btrfs_space_info *space_info; | 76 | struct btrfs_space_info *space_info; |
71 | 77 | ||
@@ -86,12 +92,6 @@ struct btrfs_inode { | |||
86 | */ | 92 | */ |
87 | u64 logged_trans; | 93 | u64 logged_trans; |
88 | 94 | ||
89 | /* | ||
90 | * trans that last made a change that should be fully fsync'd. This | ||
91 | * gets reset to zero each time the inode is logged | ||
92 | */ | ||
93 | u64 log_dirty_trans; | ||
94 | |||
95 | /* total number of bytes pending delalloc, used by stat to calc the | 95 | /* total number of bytes pending delalloc, used by stat to calc the |
96 | * real block usage of the file | 96 | * real block usage of the file |
97 | */ | 97 | */ |
@@ -121,6 +121,25 @@ struct btrfs_inode { | |||
121 | /* the start of block group preferred for allocations. */ | 121 | /* the start of block group preferred for allocations. */ |
122 | u64 block_group; | 122 | u64 block_group; |
123 | 123 | ||
124 | /* the fsync log has some corner cases that mean we have to check | ||
125 | * directories to see if any unlinks have been done before | ||
126 | * the directory was logged. See tree-log.c for all the | ||
127 | * details | ||
128 | */ | ||
129 | u64 last_unlink_trans; | ||
130 | |||
131 | /* | ||
132 | * ordered_data_close is set by truncate when a file that used | ||
133 | * to have good data has been truncated to zero. When it is set | ||
134 | * the btrfs file release call will add this inode to the | ||
135 | * ordered operations list so that we make sure to flush out any | ||
136 | * new data the application may have written before commit. | ||
137 | * | ||
138 | * yes, its silly to have a single bitflag, but we might grow more | ||
139 | * of these. | ||
140 | */ | ||
141 | unsigned ordered_data_close:1; | ||
142 | |||
124 | struct inode vfs_inode; | 143 | struct inode vfs_inode; |
125 | }; | 144 | }; |
126 | 145 | ||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 37f31b5529aa..e5b2533b691a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -254,18 +254,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
254 | * empty_size -- a hint that you plan on doing more cow. This is the size in | 254 | * empty_size -- a hint that you plan on doing more cow. This is the size in |
255 | * bytes the allocator should try to find free next to the block it returns. | 255 | * bytes the allocator should try to find free next to the block it returns. |
256 | * This is just a hint and may be ignored by the allocator. | 256 | * This is just a hint and may be ignored by the allocator. |
257 | * | ||
258 | * prealloc_dest -- if you have already reserved a destination for the cow, | ||
259 | * this uses that block instead of allocating a new one. | ||
260 | * btrfs_alloc_reserved_extent is used to finish the allocation. | ||
261 | */ | 257 | */ |
262 | static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | 258 | static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, |
263 | struct btrfs_root *root, | 259 | struct btrfs_root *root, |
264 | struct extent_buffer *buf, | 260 | struct extent_buffer *buf, |
265 | struct extent_buffer *parent, int parent_slot, | 261 | struct extent_buffer *parent, int parent_slot, |
266 | struct extent_buffer **cow_ret, | 262 | struct extent_buffer **cow_ret, |
267 | u64 search_start, u64 empty_size, | 263 | u64 search_start, u64 empty_size) |
268 | u64 prealloc_dest) | ||
269 | { | 264 | { |
270 | u64 parent_start; | 265 | u64 parent_start; |
271 | struct extent_buffer *cow; | 266 | struct extent_buffer *cow; |
@@ -291,26 +286,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
291 | level = btrfs_header_level(buf); | 286 | level = btrfs_header_level(buf); |
292 | nritems = btrfs_header_nritems(buf); | 287 | nritems = btrfs_header_nritems(buf); |
293 | 288 | ||
294 | if (prealloc_dest) { | 289 | cow = btrfs_alloc_free_block(trans, root, buf->len, |
295 | struct btrfs_key ins; | 290 | parent_start, root->root_key.objectid, |
296 | 291 | trans->transid, level, | |
297 | ins.objectid = prealloc_dest; | 292 | search_start, empty_size); |
298 | ins.offset = buf->len; | ||
299 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
300 | |||
301 | ret = btrfs_alloc_reserved_extent(trans, root, parent_start, | ||
302 | root->root_key.objectid, | ||
303 | trans->transid, level, &ins); | ||
304 | BUG_ON(ret); | ||
305 | cow = btrfs_init_new_buffer(trans, root, prealloc_dest, | ||
306 | buf->len, level); | ||
307 | } else { | ||
308 | cow = btrfs_alloc_free_block(trans, root, buf->len, | ||
309 | parent_start, | ||
310 | root->root_key.objectid, | ||
311 | trans->transid, level, | ||
312 | search_start, empty_size); | ||
313 | } | ||
314 | if (IS_ERR(cow)) | 293 | if (IS_ERR(cow)) |
315 | return PTR_ERR(cow); | 294 | return PTR_ERR(cow); |
316 | 295 | ||
@@ -413,7 +392,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
413 | noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | 392 | noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, |
414 | struct btrfs_root *root, struct extent_buffer *buf, | 393 | struct btrfs_root *root, struct extent_buffer *buf, |
415 | struct extent_buffer *parent, int parent_slot, | 394 | struct extent_buffer *parent, int parent_slot, |
416 | struct extent_buffer **cow_ret, u64 prealloc_dest) | 395 | struct extent_buffer **cow_ret) |
417 | { | 396 | { |
418 | u64 search_start; | 397 | u64 search_start; |
419 | int ret; | 398 | int ret; |
@@ -436,7 +415,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
436 | btrfs_header_owner(buf) == root->root_key.objectid && | 415 | btrfs_header_owner(buf) == root->root_key.objectid && |
437 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 416 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
438 | *cow_ret = buf; | 417 | *cow_ret = buf; |
439 | WARN_ON(prealloc_dest); | ||
440 | return 0; | 418 | return 0; |
441 | } | 419 | } |
442 | 420 | ||
@@ -447,8 +425,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
447 | btrfs_set_lock_blocking(buf); | 425 | btrfs_set_lock_blocking(buf); |
448 | 426 | ||
449 | ret = __btrfs_cow_block(trans, root, buf, parent, | 427 | ret = __btrfs_cow_block(trans, root, buf, parent, |
450 | parent_slot, cow_ret, search_start, 0, | 428 | parent_slot, cow_ret, search_start, 0); |
451 | prealloc_dest); | ||
452 | return ret; | 429 | return ret; |
453 | } | 430 | } |
454 | 431 | ||
@@ -617,7 +594,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
617 | err = __btrfs_cow_block(trans, root, cur, parent, i, | 594 | err = __btrfs_cow_block(trans, root, cur, parent, i, |
618 | &cur, search_start, | 595 | &cur, search_start, |
619 | min(16 * blocksize, | 596 | min(16 * blocksize, |
620 | (end_slot - i) * blocksize), 0); | 597 | (end_slot - i) * blocksize)); |
621 | if (err) { | 598 | if (err) { |
622 | btrfs_tree_unlock(cur); | 599 | btrfs_tree_unlock(cur); |
623 | free_extent_buffer(cur); | 600 | free_extent_buffer(cur); |
@@ -937,7 +914,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
937 | BUG_ON(!child); | 914 | BUG_ON(!child); |
938 | btrfs_tree_lock(child); | 915 | btrfs_tree_lock(child); |
939 | btrfs_set_lock_blocking(child); | 916 | btrfs_set_lock_blocking(child); |
940 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); | 917 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); |
941 | BUG_ON(ret); | 918 | BUG_ON(ret); |
942 | 919 | ||
943 | spin_lock(&root->node_lock); | 920 | spin_lock(&root->node_lock); |
@@ -945,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
945 | spin_unlock(&root->node_lock); | 922 | spin_unlock(&root->node_lock); |
946 | 923 | ||
947 | ret = btrfs_update_extent_ref(trans, root, child->start, | 924 | ret = btrfs_update_extent_ref(trans, root, child->start, |
925 | child->len, | ||
948 | mid->start, child->start, | 926 | mid->start, child->start, |
949 | root->root_key.objectid, | 927 | root->root_key.objectid, |
950 | trans->transid, level - 1); | 928 | trans->transid, level - 1); |
@@ -971,6 +949,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
971 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 949 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
972 | return 0; | 950 | return 0; |
973 | 951 | ||
952 | if (trans->transaction->delayed_refs.flushing && | ||
953 | btrfs_header_nritems(mid) > 2) | ||
954 | return 0; | ||
955 | |||
974 | if (btrfs_header_nritems(mid) < 2) | 956 | if (btrfs_header_nritems(mid) < 2) |
975 | err_on_enospc = 1; | 957 | err_on_enospc = 1; |
976 | 958 | ||
@@ -979,7 +961,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
979 | btrfs_tree_lock(left); | 961 | btrfs_tree_lock(left); |
980 | btrfs_set_lock_blocking(left); | 962 | btrfs_set_lock_blocking(left); |
981 | wret = btrfs_cow_block(trans, root, left, | 963 | wret = btrfs_cow_block(trans, root, left, |
982 | parent, pslot - 1, &left, 0); | 964 | parent, pslot - 1, &left); |
983 | if (wret) { | 965 | if (wret) { |
984 | ret = wret; | 966 | ret = wret; |
985 | goto enospc; | 967 | goto enospc; |
@@ -990,7 +972,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
990 | btrfs_tree_lock(right); | 972 | btrfs_tree_lock(right); |
991 | btrfs_set_lock_blocking(right); | 973 | btrfs_set_lock_blocking(right); |
992 | wret = btrfs_cow_block(trans, root, right, | 974 | wret = btrfs_cow_block(trans, root, right, |
993 | parent, pslot + 1, &right, 0); | 975 | parent, pslot + 1, &right); |
994 | if (wret) { | 976 | if (wret) { |
995 | ret = wret; | 977 | ret = wret; |
996 | goto enospc; | 978 | goto enospc; |
@@ -1171,7 +1153,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1171 | wret = 1; | 1153 | wret = 1; |
1172 | } else { | 1154 | } else { |
1173 | ret = btrfs_cow_block(trans, root, left, parent, | 1155 | ret = btrfs_cow_block(trans, root, left, parent, |
1174 | pslot - 1, &left, 0); | 1156 | pslot - 1, &left); |
1175 | if (ret) | 1157 | if (ret) |
1176 | wret = 1; | 1158 | wret = 1; |
1177 | else { | 1159 | else { |
@@ -1222,7 +1204,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1222 | } else { | 1204 | } else { |
1223 | ret = btrfs_cow_block(trans, root, right, | 1205 | ret = btrfs_cow_block(trans, root, right, |
1224 | parent, pslot + 1, | 1206 | parent, pslot + 1, |
1225 | &right, 0); | 1207 | &right); |
1226 | if (ret) | 1208 | if (ret) |
1227 | wret = 1; | 1209 | wret = 1; |
1228 | else { | 1210 | else { |
@@ -1262,9 +1244,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1262 | * readahead one full node of leaves, finding things that are close | 1244 | * readahead one full node of leaves, finding things that are close |
1263 | * to the block in 'slot', and triggering ra on them. | 1245 | * to the block in 'slot', and triggering ra on them. |
1264 | */ | 1246 | */ |
1265 | static noinline void reada_for_search(struct btrfs_root *root, | 1247 | static void reada_for_search(struct btrfs_root *root, |
1266 | struct btrfs_path *path, | 1248 | struct btrfs_path *path, |
1267 | int level, int slot, u64 objectid) | 1249 | int level, int slot, u64 objectid) |
1268 | { | 1250 | { |
1269 | struct extent_buffer *node; | 1251 | struct extent_buffer *node; |
1270 | struct btrfs_disk_key disk_key; | 1252 | struct btrfs_disk_key disk_key; |
@@ -1465,6 +1447,117 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
1465 | } | 1447 | } |
1466 | 1448 | ||
1467 | /* | 1449 | /* |
1450 | * helper function for btrfs_search_slot. The goal is to find a block | ||
1451 | * in cache without setting the path to blocking. If we find the block | ||
1452 | * we return zero and the path is unchanged. | ||
1453 | * | ||
1454 | * If we can't find the block, we set the path blocking and do some | ||
1455 | * reada. -EAGAIN is returned and the search must be repeated. | ||
1456 | */ | ||
1457 | static int | ||
1458 | read_block_for_search(struct btrfs_trans_handle *trans, | ||
1459 | struct btrfs_root *root, struct btrfs_path *p, | ||
1460 | struct extent_buffer **eb_ret, int level, int slot, | ||
1461 | struct btrfs_key *key) | ||
1462 | { | ||
1463 | u64 blocknr; | ||
1464 | u64 gen; | ||
1465 | u32 blocksize; | ||
1466 | struct extent_buffer *b = *eb_ret; | ||
1467 | struct extent_buffer *tmp; | ||
1468 | |||
1469 | blocknr = btrfs_node_blockptr(b, slot); | ||
1470 | gen = btrfs_node_ptr_generation(b, slot); | ||
1471 | blocksize = btrfs_level_size(root, level - 1); | ||
1472 | |||
1473 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | ||
1474 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
1475 | *eb_ret = tmp; | ||
1476 | return 0; | ||
1477 | } | ||
1478 | |||
1479 | /* | ||
1480 | * reduce lock contention at high levels | ||
1481 | * of the btree by dropping locks before | ||
1482 | * we read. | ||
1483 | */ | ||
1484 | btrfs_release_path(NULL, p); | ||
1485 | if (tmp) | ||
1486 | free_extent_buffer(tmp); | ||
1487 | if (p->reada) | ||
1488 | reada_for_search(root, p, level, slot, key->objectid); | ||
1489 | |||
1490 | tmp = read_tree_block(root, blocknr, blocksize, gen); | ||
1491 | if (tmp) | ||
1492 | free_extent_buffer(tmp); | ||
1493 | return -EAGAIN; | ||
1494 | } | ||
1495 | |||
1496 | /* | ||
1497 | * helper function for btrfs_search_slot. This does all of the checks | ||
1498 | * for node-level blocks and does any balancing required based on | ||
1499 | * the ins_len. | ||
1500 | * | ||
1501 | * If no extra work was required, zero is returned. If we had to | ||
1502 | * drop the path, -EAGAIN is returned and btrfs_search_slot must | ||
1503 | * start over | ||
1504 | */ | ||
1505 | static int | ||
1506 | setup_nodes_for_search(struct btrfs_trans_handle *trans, | ||
1507 | struct btrfs_root *root, struct btrfs_path *p, | ||
1508 | struct extent_buffer *b, int level, int ins_len) | ||
1509 | { | ||
1510 | int ret; | ||
1511 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= | ||
1512 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | ||
1513 | int sret; | ||
1514 | |||
1515 | sret = reada_for_balance(root, p, level); | ||
1516 | if (sret) | ||
1517 | goto again; | ||
1518 | |||
1519 | btrfs_set_path_blocking(p); | ||
1520 | sret = split_node(trans, root, p, level); | ||
1521 | btrfs_clear_path_blocking(p, NULL); | ||
1522 | |||
1523 | BUG_ON(sret > 0); | ||
1524 | if (sret) { | ||
1525 | ret = sret; | ||
1526 | goto done; | ||
1527 | } | ||
1528 | b = p->nodes[level]; | ||
1529 | } else if (ins_len < 0 && btrfs_header_nritems(b) < | ||
1530 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { | ||
1531 | int sret; | ||
1532 | |||
1533 | sret = reada_for_balance(root, p, level); | ||
1534 | if (sret) | ||
1535 | goto again; | ||
1536 | |||
1537 | btrfs_set_path_blocking(p); | ||
1538 | sret = balance_level(trans, root, p, level); | ||
1539 | btrfs_clear_path_blocking(p, NULL); | ||
1540 | |||
1541 | if (sret) { | ||
1542 | ret = sret; | ||
1543 | goto done; | ||
1544 | } | ||
1545 | b = p->nodes[level]; | ||
1546 | if (!b) { | ||
1547 | btrfs_release_path(NULL, p); | ||
1548 | goto again; | ||
1549 | } | ||
1550 | BUG_ON(btrfs_header_nritems(b) == 1); | ||
1551 | } | ||
1552 | return 0; | ||
1553 | |||
1554 | again: | ||
1555 | ret = -EAGAIN; | ||
1556 | done: | ||
1557 | return ret; | ||
1558 | } | ||
1559 | |||
1560 | /* | ||
1468 | * look for key in the tree. path is filled in with nodes along the way | 1561 | * look for key in the tree. path is filled in with nodes along the way |
1469 | * if key is found, we return zero and you can find the item in the leaf | 1562 | * if key is found, we return zero and you can find the item in the leaf |
1470 | * level of the path (level 0) | 1563 | * level of the path (level 0) |
@@ -1482,17 +1575,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1482 | ins_len, int cow) | 1575 | ins_len, int cow) |
1483 | { | 1576 | { |
1484 | struct extent_buffer *b; | 1577 | struct extent_buffer *b; |
1485 | struct extent_buffer *tmp; | ||
1486 | int slot; | 1578 | int slot; |
1487 | int ret; | 1579 | int ret; |
1488 | int level; | 1580 | int level; |
1489 | int should_reada = p->reada; | ||
1490 | int lowest_unlock = 1; | 1581 | int lowest_unlock = 1; |
1491 | int blocksize; | ||
1492 | u8 lowest_level = 0; | 1582 | u8 lowest_level = 0; |
1493 | u64 blocknr; | ||
1494 | u64 gen; | ||
1495 | struct btrfs_key prealloc_block; | ||
1496 | 1583 | ||
1497 | lowest_level = p->lowest_level; | 1584 | lowest_level = p->lowest_level; |
1498 | WARN_ON(lowest_level && ins_len > 0); | 1585 | WARN_ON(lowest_level && ins_len > 0); |
@@ -1501,8 +1588,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1501 | if (ins_len < 0) | 1588 | if (ins_len < 0) |
1502 | lowest_unlock = 2; | 1589 | lowest_unlock = 2; |
1503 | 1590 | ||
1504 | prealloc_block.objectid = 0; | ||
1505 | |||
1506 | again: | 1591 | again: |
1507 | if (p->skip_locking) | 1592 | if (p->skip_locking) |
1508 | b = btrfs_root_node(root); | 1593 | b = btrfs_root_node(root); |
@@ -1523,50 +1608,21 @@ again: | |||
1523 | if (cow) { | 1608 | if (cow) { |
1524 | int wret; | 1609 | int wret; |
1525 | 1610 | ||
1526 | /* is a cow on this block not required */ | 1611 | /* |
1612 | * if we don't really need to cow this block | ||
1613 | * then we don't want to set the path blocking, | ||
1614 | * so we test it here | ||
1615 | */ | ||
1527 | if (btrfs_header_generation(b) == trans->transid && | 1616 | if (btrfs_header_generation(b) == trans->transid && |
1528 | btrfs_header_owner(b) == root->root_key.objectid && | 1617 | btrfs_header_owner(b) == root->root_key.objectid && |
1529 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { | 1618 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { |
1530 | goto cow_done; | 1619 | goto cow_done; |
1531 | } | 1620 | } |
1532 | |||
1533 | /* ok, we have to cow, is our old prealloc the right | ||
1534 | * size? | ||
1535 | */ | ||
1536 | if (prealloc_block.objectid && | ||
1537 | prealloc_block.offset != b->len) { | ||
1538 | btrfs_release_path(root, p); | ||
1539 | btrfs_free_reserved_extent(root, | ||
1540 | prealloc_block.objectid, | ||
1541 | prealloc_block.offset); | ||
1542 | prealloc_block.objectid = 0; | ||
1543 | goto again; | ||
1544 | } | ||
1545 | |||
1546 | /* | ||
1547 | * for higher level blocks, try not to allocate blocks | ||
1548 | * with the block and the parent locks held. | ||
1549 | */ | ||
1550 | if (level > 0 && !prealloc_block.objectid) { | ||
1551 | u32 size = b->len; | ||
1552 | u64 hint = b->start; | ||
1553 | |||
1554 | btrfs_release_path(root, p); | ||
1555 | ret = btrfs_reserve_extent(trans, root, | ||
1556 | size, size, 0, | ||
1557 | hint, (u64)-1, | ||
1558 | &prealloc_block, 0); | ||
1559 | BUG_ON(ret); | ||
1560 | goto again; | ||
1561 | } | ||
1562 | |||
1563 | btrfs_set_path_blocking(p); | 1621 | btrfs_set_path_blocking(p); |
1564 | 1622 | ||
1565 | wret = btrfs_cow_block(trans, root, b, | 1623 | wret = btrfs_cow_block(trans, root, b, |
1566 | p->nodes[level + 1], | 1624 | p->nodes[level + 1], |
1567 | p->slots[level + 1], | 1625 | p->slots[level + 1], &b); |
1568 | &b, prealloc_block.objectid); | ||
1569 | prealloc_block.objectid = 0; | ||
1570 | if (wret) { | 1626 | if (wret) { |
1571 | free_extent_buffer(b); | 1627 | free_extent_buffer(b); |
1572 | ret = wret; | 1628 | ret = wret; |
@@ -1611,51 +1667,15 @@ cow_done: | |||
1611 | if (ret && slot > 0) | 1667 | if (ret && slot > 0) |
1612 | slot -= 1; | 1668 | slot -= 1; |
1613 | p->slots[level] = slot; | 1669 | p->slots[level] = slot; |
1614 | if ((p->search_for_split || ins_len > 0) && | 1670 | ret = setup_nodes_for_search(trans, root, p, b, level, |
1615 | btrfs_header_nritems(b) >= | 1671 | ins_len); |
1616 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1672 | if (ret == -EAGAIN) |
1617 | int sret; | 1673 | goto again; |
1618 | 1674 | else if (ret) | |
1619 | sret = reada_for_balance(root, p, level); | 1675 | goto done; |
1620 | if (sret) | 1676 | b = p->nodes[level]; |
1621 | goto again; | 1677 | slot = p->slots[level]; |
1622 | |||
1623 | btrfs_set_path_blocking(p); | ||
1624 | sret = split_node(trans, root, p, level); | ||
1625 | btrfs_clear_path_blocking(p, NULL); | ||
1626 | |||
1627 | BUG_ON(sret > 0); | ||
1628 | if (sret) { | ||
1629 | ret = sret; | ||
1630 | goto done; | ||
1631 | } | ||
1632 | b = p->nodes[level]; | ||
1633 | slot = p->slots[level]; | ||
1634 | } else if (ins_len < 0 && | ||
1635 | btrfs_header_nritems(b) < | ||
1636 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { | ||
1637 | int sret; | ||
1638 | |||
1639 | sret = reada_for_balance(root, p, level); | ||
1640 | if (sret) | ||
1641 | goto again; | ||
1642 | |||
1643 | btrfs_set_path_blocking(p); | ||
1644 | sret = balance_level(trans, root, p, level); | ||
1645 | btrfs_clear_path_blocking(p, NULL); | ||
1646 | 1678 | ||
1647 | if (sret) { | ||
1648 | ret = sret; | ||
1649 | goto done; | ||
1650 | } | ||
1651 | b = p->nodes[level]; | ||
1652 | if (!b) { | ||
1653 | btrfs_release_path(NULL, p); | ||
1654 | goto again; | ||
1655 | } | ||
1656 | slot = p->slots[level]; | ||
1657 | BUG_ON(btrfs_header_nritems(b) == 1); | ||
1658 | } | ||
1659 | unlock_up(p, level, lowest_unlock); | 1679 | unlock_up(p, level, lowest_unlock); |
1660 | 1680 | ||
1661 | /* this is only true while dropping a snapshot */ | 1681 | /* this is only true while dropping a snapshot */ |
@@ -1664,44 +1684,11 @@ cow_done: | |||
1664 | goto done; | 1684 | goto done; |
1665 | } | 1685 | } |
1666 | 1686 | ||
1667 | blocknr = btrfs_node_blockptr(b, slot); | 1687 | ret = read_block_for_search(trans, root, p, |
1668 | gen = btrfs_node_ptr_generation(b, slot); | 1688 | &b, level, slot, key); |
1669 | blocksize = btrfs_level_size(root, level - 1); | 1689 | if (ret == -EAGAIN) |
1690 | goto again; | ||
1670 | 1691 | ||
1671 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | ||
1672 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
1673 | b = tmp; | ||
1674 | } else { | ||
1675 | /* | ||
1676 | * reduce lock contention at high levels | ||
1677 | * of the btree by dropping locks before | ||
1678 | * we read. | ||
1679 | */ | ||
1680 | if (level > 0) { | ||
1681 | btrfs_release_path(NULL, p); | ||
1682 | if (tmp) | ||
1683 | free_extent_buffer(tmp); | ||
1684 | if (should_reada) | ||
1685 | reada_for_search(root, p, | ||
1686 | level, slot, | ||
1687 | key->objectid); | ||
1688 | |||
1689 | tmp = read_tree_block(root, blocknr, | ||
1690 | blocksize, gen); | ||
1691 | if (tmp) | ||
1692 | free_extent_buffer(tmp); | ||
1693 | goto again; | ||
1694 | } else { | ||
1695 | btrfs_set_path_blocking(p); | ||
1696 | if (tmp) | ||
1697 | free_extent_buffer(tmp); | ||
1698 | if (should_reada) | ||
1699 | reada_for_search(root, p, | ||
1700 | level, slot, | ||
1701 | key->objectid); | ||
1702 | b = read_node_slot(root, b, slot); | ||
1703 | } | ||
1704 | } | ||
1705 | if (!p->skip_locking) { | 1692 | if (!p->skip_locking) { |
1706 | int lret; | 1693 | int lret; |
1707 | 1694 | ||
@@ -1742,12 +1729,8 @@ done: | |||
1742 | * we don't really know what they plan on doing with the path | 1729 | * we don't really know what they plan on doing with the path |
1743 | * from here on, so for now just mark it as blocking | 1730 | * from here on, so for now just mark it as blocking |
1744 | */ | 1731 | */ |
1745 | btrfs_set_path_blocking(p); | 1732 | if (!p->leave_spinning) |
1746 | if (prealloc_block.objectid) { | 1733 | btrfs_set_path_blocking(p); |
1747 | btrfs_free_reserved_extent(root, | ||
1748 | prealloc_block.objectid, | ||
1749 | prealloc_block.offset); | ||
1750 | } | ||
1751 | return ret; | 1734 | return ret; |
1752 | } | 1735 | } |
1753 | 1736 | ||
@@ -1768,7 +1751,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1768 | int ret; | 1751 | int ret; |
1769 | 1752 | ||
1770 | eb = btrfs_lock_root_node(root); | 1753 | eb = btrfs_lock_root_node(root); |
1771 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); | 1754 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb); |
1772 | BUG_ON(ret); | 1755 | BUG_ON(ret); |
1773 | 1756 | ||
1774 | btrfs_set_lock_blocking(eb); | 1757 | btrfs_set_lock_blocking(eb); |
@@ -1826,7 +1809,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1826 | } | 1809 | } |
1827 | 1810 | ||
1828 | ret = btrfs_cow_block(trans, root, eb, parent, slot, | 1811 | ret = btrfs_cow_block(trans, root, eb, parent, slot, |
1829 | &eb, 0); | 1812 | &eb); |
1830 | BUG_ON(ret); | 1813 | BUG_ON(ret); |
1831 | 1814 | ||
1832 | if (root->root_key.objectid == | 1815 | if (root->root_key.objectid == |
@@ -2139,7 +2122,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2139 | spin_unlock(&root->node_lock); | 2122 | spin_unlock(&root->node_lock); |
2140 | 2123 | ||
2141 | ret = btrfs_update_extent_ref(trans, root, lower->start, | 2124 | ret = btrfs_update_extent_ref(trans, root, lower->start, |
2142 | lower->start, c->start, | 2125 | lower->len, lower->start, c->start, |
2143 | root->root_key.objectid, | 2126 | root->root_key.objectid, |
2144 | trans->transid, level - 1); | 2127 | trans->transid, level - 1); |
2145 | BUG_ON(ret); | 2128 | BUG_ON(ret); |
@@ -2174,8 +2157,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2174 | BUG_ON(!path->nodes[level]); | 2157 | BUG_ON(!path->nodes[level]); |
2175 | lower = path->nodes[level]; | 2158 | lower = path->nodes[level]; |
2176 | nritems = btrfs_header_nritems(lower); | 2159 | nritems = btrfs_header_nritems(lower); |
2177 | if (slot > nritems) | 2160 | BUG_ON(slot > nritems); |
2178 | BUG(); | ||
2179 | if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) | 2161 | if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) |
2180 | BUG(); | 2162 | BUG(); |
2181 | if (slot != nritems) { | 2163 | if (slot != nritems) { |
@@ -2221,7 +2203,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2221 | ret = insert_new_root(trans, root, path, level + 1); | 2203 | ret = insert_new_root(trans, root, path, level + 1); |
2222 | if (ret) | 2204 | if (ret) |
2223 | return ret; | 2205 | return ret; |
2224 | } else { | 2206 | } else if (!trans->transaction->delayed_refs.flushing) { |
2225 | ret = push_nodes_for_insert(trans, root, path, level); | 2207 | ret = push_nodes_for_insert(trans, root, path, level); |
2226 | c = path->nodes[level]; | 2208 | c = path->nodes[level]; |
2227 | if (!ret && btrfs_header_nritems(c) < | 2209 | if (!ret && btrfs_header_nritems(c) < |
@@ -2329,66 +2311,27 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root, | |||
2329 | return ret; | 2311 | return ret; |
2330 | } | 2312 | } |
2331 | 2313 | ||
2332 | /* | 2314 | static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, |
2333 | * push some data in the path leaf to the right, trying to free up at | 2315 | struct btrfs_root *root, |
2334 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2316 | struct btrfs_path *path, |
2335 | * | 2317 | int data_size, int empty, |
2336 | * returns 1 if the push failed because the other node didn't have enough | 2318 | struct extent_buffer *right, |
2337 | * room, 0 if everything worked out and < 0 if there were major errors. | 2319 | int free_space, u32 left_nritems) |
2338 | */ | ||
2339 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | ||
2340 | *root, struct btrfs_path *path, int data_size, | ||
2341 | int empty) | ||
2342 | { | 2320 | { |
2343 | struct extent_buffer *left = path->nodes[0]; | 2321 | struct extent_buffer *left = path->nodes[0]; |
2344 | struct extent_buffer *right; | 2322 | struct extent_buffer *upper = path->nodes[1]; |
2345 | struct extent_buffer *upper; | ||
2346 | struct btrfs_disk_key disk_key; | 2323 | struct btrfs_disk_key disk_key; |
2347 | int slot; | 2324 | int slot; |
2348 | u32 i; | 2325 | u32 i; |
2349 | int free_space; | ||
2350 | int push_space = 0; | 2326 | int push_space = 0; |
2351 | int push_items = 0; | 2327 | int push_items = 0; |
2352 | struct btrfs_item *item; | 2328 | struct btrfs_item *item; |
2353 | u32 left_nritems; | ||
2354 | u32 nr; | 2329 | u32 nr; |
2355 | u32 right_nritems; | 2330 | u32 right_nritems; |
2356 | u32 data_end; | 2331 | u32 data_end; |
2357 | u32 this_item_size; | 2332 | u32 this_item_size; |
2358 | int ret; | 2333 | int ret; |
2359 | 2334 | ||
2360 | slot = path->slots[1]; | ||
2361 | if (!path->nodes[1]) | ||
2362 | return 1; | ||
2363 | |||
2364 | upper = path->nodes[1]; | ||
2365 | if (slot >= btrfs_header_nritems(upper) - 1) | ||
2366 | return 1; | ||
2367 | |||
2368 | btrfs_assert_tree_locked(path->nodes[1]); | ||
2369 | |||
2370 | right = read_node_slot(root, upper, slot + 1); | ||
2371 | btrfs_tree_lock(right); | ||
2372 | btrfs_set_lock_blocking(right); | ||
2373 | |||
2374 | free_space = btrfs_leaf_free_space(root, right); | ||
2375 | if (free_space < data_size) | ||
2376 | goto out_unlock; | ||
2377 | |||
2378 | /* cow and double check */ | ||
2379 | ret = btrfs_cow_block(trans, root, right, upper, | ||
2380 | slot + 1, &right, 0); | ||
2381 | if (ret) | ||
2382 | goto out_unlock; | ||
2383 | |||
2384 | free_space = btrfs_leaf_free_space(root, right); | ||
2385 | if (free_space < data_size) | ||
2386 | goto out_unlock; | ||
2387 | |||
2388 | left_nritems = btrfs_header_nritems(left); | ||
2389 | if (left_nritems == 0) | ||
2390 | goto out_unlock; | ||
2391 | |||
2392 | if (empty) | 2335 | if (empty) |
2393 | nr = 0; | 2336 | nr = 0; |
2394 | else | 2337 | else |
@@ -2397,6 +2340,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2397 | if (path->slots[0] >= left_nritems) | 2340 | if (path->slots[0] >= left_nritems) |
2398 | push_space += data_size; | 2341 | push_space += data_size; |
2399 | 2342 | ||
2343 | slot = path->slots[1]; | ||
2400 | i = left_nritems - 1; | 2344 | i = left_nritems - 1; |
2401 | while (i >= nr) { | 2345 | while (i >= nr) { |
2402 | item = btrfs_item_nr(left, i); | 2346 | item = btrfs_item_nr(left, i); |
@@ -2528,24 +2472,82 @@ out_unlock: | |||
2528 | } | 2472 | } |
2529 | 2473 | ||
2530 | /* | 2474 | /* |
2475 | * push some data in the path leaf to the right, trying to free up at | ||
2476 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | ||
2477 | * | ||
2478 | * returns 1 if the push failed because the other node didn't have enough | ||
2479 | * room, 0 if everything worked out and < 0 if there were major errors. | ||
2480 | */ | ||
2481 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | ||
2482 | *root, struct btrfs_path *path, int data_size, | ||
2483 | int empty) | ||
2484 | { | ||
2485 | struct extent_buffer *left = path->nodes[0]; | ||
2486 | struct extent_buffer *right; | ||
2487 | struct extent_buffer *upper; | ||
2488 | int slot; | ||
2489 | int free_space; | ||
2490 | u32 left_nritems; | ||
2491 | int ret; | ||
2492 | |||
2493 | if (!path->nodes[1]) | ||
2494 | return 1; | ||
2495 | |||
2496 | slot = path->slots[1]; | ||
2497 | upper = path->nodes[1]; | ||
2498 | if (slot >= btrfs_header_nritems(upper) - 1) | ||
2499 | return 1; | ||
2500 | |||
2501 | btrfs_assert_tree_locked(path->nodes[1]); | ||
2502 | |||
2503 | right = read_node_slot(root, upper, slot + 1); | ||
2504 | btrfs_tree_lock(right); | ||
2505 | btrfs_set_lock_blocking(right); | ||
2506 | |||
2507 | free_space = btrfs_leaf_free_space(root, right); | ||
2508 | if (free_space < data_size) | ||
2509 | goto out_unlock; | ||
2510 | |||
2511 | /* cow and double check */ | ||
2512 | ret = btrfs_cow_block(trans, root, right, upper, | ||
2513 | slot + 1, &right); | ||
2514 | if (ret) | ||
2515 | goto out_unlock; | ||
2516 | |||
2517 | free_space = btrfs_leaf_free_space(root, right); | ||
2518 | if (free_space < data_size) | ||
2519 | goto out_unlock; | ||
2520 | |||
2521 | left_nritems = btrfs_header_nritems(left); | ||
2522 | if (left_nritems == 0) | ||
2523 | goto out_unlock; | ||
2524 | |||
2525 | return __push_leaf_right(trans, root, path, data_size, empty, | ||
2526 | right, free_space, left_nritems); | ||
2527 | out_unlock: | ||
2528 | btrfs_tree_unlock(right); | ||
2529 | free_extent_buffer(right); | ||
2530 | return 1; | ||
2531 | } | ||
2532 | |||
2533 | /* | ||
2531 | * push some data in the path leaf to the left, trying to free up at | 2534 | * push some data in the path leaf to the left, trying to free up at |
2532 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2535 | * least data_size bytes. returns zero if the push worked, nonzero otherwise |
2533 | */ | 2536 | */ |
2534 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | 2537 | static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, |
2535 | *root, struct btrfs_path *path, int data_size, | 2538 | struct btrfs_root *root, |
2536 | int empty) | 2539 | struct btrfs_path *path, int data_size, |
2540 | int empty, struct extent_buffer *left, | ||
2541 | int free_space, int right_nritems) | ||
2537 | { | 2542 | { |
2538 | struct btrfs_disk_key disk_key; | 2543 | struct btrfs_disk_key disk_key; |
2539 | struct extent_buffer *right = path->nodes[0]; | 2544 | struct extent_buffer *right = path->nodes[0]; |
2540 | struct extent_buffer *left; | ||
2541 | int slot; | 2545 | int slot; |
2542 | int i; | 2546 | int i; |
2543 | int free_space; | ||
2544 | int push_space = 0; | 2547 | int push_space = 0; |
2545 | int push_items = 0; | 2548 | int push_items = 0; |
2546 | struct btrfs_item *item; | 2549 | struct btrfs_item *item; |
2547 | u32 old_left_nritems; | 2550 | u32 old_left_nritems; |
2548 | u32 right_nritems; | ||
2549 | u32 nr; | 2551 | u32 nr; |
2550 | int ret = 0; | 2552 | int ret = 0; |
2551 | int wret; | 2553 | int wret; |
@@ -2553,41 +2555,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2553 | u32 old_left_item_size; | 2555 | u32 old_left_item_size; |
2554 | 2556 | ||
2555 | slot = path->slots[1]; | 2557 | slot = path->slots[1]; |
2556 | if (slot == 0) | ||
2557 | return 1; | ||
2558 | if (!path->nodes[1]) | ||
2559 | return 1; | ||
2560 | |||
2561 | right_nritems = btrfs_header_nritems(right); | ||
2562 | if (right_nritems == 0) | ||
2563 | return 1; | ||
2564 | |||
2565 | btrfs_assert_tree_locked(path->nodes[1]); | ||
2566 | |||
2567 | left = read_node_slot(root, path->nodes[1], slot - 1); | ||
2568 | btrfs_tree_lock(left); | ||
2569 | btrfs_set_lock_blocking(left); | ||
2570 | |||
2571 | free_space = btrfs_leaf_free_space(root, left); | ||
2572 | if (free_space < data_size) { | ||
2573 | ret = 1; | ||
2574 | goto out; | ||
2575 | } | ||
2576 | |||
2577 | /* cow and double check */ | ||
2578 | ret = btrfs_cow_block(trans, root, left, | ||
2579 | path->nodes[1], slot - 1, &left, 0); | ||
2580 | if (ret) { | ||
2581 | /* we hit -ENOSPC, but it isn't fatal here */ | ||
2582 | ret = 1; | ||
2583 | goto out; | ||
2584 | } | ||
2585 | |||
2586 | free_space = btrfs_leaf_free_space(root, left); | ||
2587 | if (free_space < data_size) { | ||
2588 | ret = 1; | ||
2589 | goto out; | ||
2590 | } | ||
2591 | 2558 | ||
2592 | if (empty) | 2559 | if (empty) |
2593 | nr = right_nritems; | 2560 | nr = right_nritems; |
@@ -2755,6 +2722,154 @@ out: | |||
2755 | } | 2722 | } |
2756 | 2723 | ||
2757 | /* | 2724 | /* |
2725 | * push some data in the path leaf to the left, trying to free up at | ||
2726 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | ||
2727 | */ | ||
2728 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | ||
2729 | *root, struct btrfs_path *path, int data_size, | ||
2730 | int empty) | ||
2731 | { | ||
2732 | struct extent_buffer *right = path->nodes[0]; | ||
2733 | struct extent_buffer *left; | ||
2734 | int slot; | ||
2735 | int free_space; | ||
2736 | u32 right_nritems; | ||
2737 | int ret = 0; | ||
2738 | |||
2739 | slot = path->slots[1]; | ||
2740 | if (slot == 0) | ||
2741 | return 1; | ||
2742 | if (!path->nodes[1]) | ||
2743 | return 1; | ||
2744 | |||
2745 | right_nritems = btrfs_header_nritems(right); | ||
2746 | if (right_nritems == 0) | ||
2747 | return 1; | ||
2748 | |||
2749 | btrfs_assert_tree_locked(path->nodes[1]); | ||
2750 | |||
2751 | left = read_node_slot(root, path->nodes[1], slot - 1); | ||
2752 | btrfs_tree_lock(left); | ||
2753 | btrfs_set_lock_blocking(left); | ||
2754 | |||
2755 | free_space = btrfs_leaf_free_space(root, left); | ||
2756 | if (free_space < data_size) { | ||
2757 | ret = 1; | ||
2758 | goto out; | ||
2759 | } | ||
2760 | |||
2761 | /* cow and double check */ | ||
2762 | ret = btrfs_cow_block(trans, root, left, | ||
2763 | path->nodes[1], slot - 1, &left); | ||
2764 | if (ret) { | ||
2765 | /* we hit -ENOSPC, but it isn't fatal here */ | ||
2766 | ret = 1; | ||
2767 | goto out; | ||
2768 | } | ||
2769 | |||
2770 | free_space = btrfs_leaf_free_space(root, left); | ||
2771 | if (free_space < data_size) { | ||
2772 | ret = 1; | ||
2773 | goto out; | ||
2774 | } | ||
2775 | |||
2776 | return __push_leaf_left(trans, root, path, data_size, | ||
2777 | empty, left, free_space, right_nritems); | ||
2778 | out: | ||
2779 | btrfs_tree_unlock(left); | ||
2780 | free_extent_buffer(left); | ||
2781 | return ret; | ||
2782 | } | ||
2783 | |||
2784 | /* | ||
2785 | * split the path's leaf in two, making sure there is at least data_size | ||
2786 | * available for the resulting leaf level of the path. | ||
2787 | * | ||
2788 | * returns 0 if all went well and < 0 on failure. | ||
2789 | */ | ||
2790 | static noinline int copy_for_split(struct btrfs_trans_handle *trans, | ||
2791 | struct btrfs_root *root, | ||
2792 | struct btrfs_path *path, | ||
2793 | struct extent_buffer *l, | ||
2794 | struct extent_buffer *right, | ||
2795 | int slot, int mid, int nritems) | ||
2796 | { | ||
2797 | int data_copy_size; | ||
2798 | int rt_data_off; | ||
2799 | int i; | ||
2800 | int ret = 0; | ||
2801 | int wret; | ||
2802 | struct btrfs_disk_key disk_key; | ||
2803 | |||
2804 | nritems = nritems - mid; | ||
2805 | btrfs_set_header_nritems(right, nritems); | ||
2806 | data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); | ||
2807 | |||
2808 | copy_extent_buffer(right, l, btrfs_item_nr_offset(0), | ||
2809 | btrfs_item_nr_offset(mid), | ||
2810 | nritems * sizeof(struct btrfs_item)); | ||
2811 | |||
2812 | copy_extent_buffer(right, l, | ||
2813 | btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - | ||
2814 | data_copy_size, btrfs_leaf_data(l) + | ||
2815 | leaf_data_end(root, l), data_copy_size); | ||
2816 | |||
2817 | rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - | ||
2818 | btrfs_item_end_nr(l, mid); | ||
2819 | |||
2820 | for (i = 0; i < nritems; i++) { | ||
2821 | struct btrfs_item *item = btrfs_item_nr(right, i); | ||
2822 | u32 ioff; | ||
2823 | |||
2824 | if (!right->map_token) { | ||
2825 | map_extent_buffer(right, (unsigned long)item, | ||
2826 | sizeof(struct btrfs_item), | ||
2827 | &right->map_token, &right->kaddr, | ||
2828 | &right->map_start, &right->map_len, | ||
2829 | KM_USER1); | ||
2830 | } | ||
2831 | |||
2832 | ioff = btrfs_item_offset(right, item); | ||
2833 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | ||
2834 | } | ||
2835 | |||
2836 | if (right->map_token) { | ||
2837 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2838 | right->map_token = NULL; | ||
2839 | } | ||
2840 | |||
2841 | btrfs_set_header_nritems(l, mid); | ||
2842 | ret = 0; | ||
2843 | btrfs_item_key(right, &disk_key, 0); | ||
2844 | wret = insert_ptr(trans, root, path, &disk_key, right->start, | ||
2845 | path->slots[1] + 1, 1); | ||
2846 | if (wret) | ||
2847 | ret = wret; | ||
2848 | |||
2849 | btrfs_mark_buffer_dirty(right); | ||
2850 | btrfs_mark_buffer_dirty(l); | ||
2851 | BUG_ON(path->slots[0] != slot); | ||
2852 | |||
2853 | ret = btrfs_update_ref(trans, root, l, right, 0, nritems); | ||
2854 | BUG_ON(ret); | ||
2855 | |||
2856 | if (mid <= slot) { | ||
2857 | btrfs_tree_unlock(path->nodes[0]); | ||
2858 | free_extent_buffer(path->nodes[0]); | ||
2859 | path->nodes[0] = right; | ||
2860 | path->slots[0] -= mid; | ||
2861 | path->slots[1] += 1; | ||
2862 | } else { | ||
2863 | btrfs_tree_unlock(right); | ||
2864 | free_extent_buffer(right); | ||
2865 | } | ||
2866 | |||
2867 | BUG_ON(path->slots[0] < 0); | ||
2868 | |||
2869 | return ret; | ||
2870 | } | ||
2871 | |||
2872 | /* | ||
2758 | * split the path's leaf in two, making sure there is at least data_size | 2873 | * split the path's leaf in two, making sure there is at least data_size |
2759 | * available for the resulting leaf level of the path. | 2874 | * available for the resulting leaf level of the path. |
2760 | * | 2875 | * |
@@ -2771,17 +2886,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2771 | int mid; | 2886 | int mid; |
2772 | int slot; | 2887 | int slot; |
2773 | struct extent_buffer *right; | 2888 | struct extent_buffer *right; |
2774 | int data_copy_size; | ||
2775 | int rt_data_off; | ||
2776 | int i; | ||
2777 | int ret = 0; | 2889 | int ret = 0; |
2778 | int wret; | 2890 | int wret; |
2779 | int double_split; | 2891 | int double_split; |
2780 | int num_doubles = 0; | 2892 | int num_doubles = 0; |
2781 | struct btrfs_disk_key disk_key; | ||
2782 | 2893 | ||
2783 | /* first try to make some room by pushing left and right */ | 2894 | /* first try to make some room by pushing left and right */ |
2784 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { | 2895 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY && |
2896 | !trans->transaction->delayed_refs.flushing) { | ||
2785 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2897 | wret = push_leaf_right(trans, root, path, data_size, 0); |
2786 | if (wret < 0) | 2898 | if (wret < 0) |
2787 | return wret; | 2899 | return wret; |
@@ -2830,11 +2942,14 @@ again: | |||
2830 | write_extent_buffer(right, root->fs_info->chunk_tree_uuid, | 2942 | write_extent_buffer(right, root->fs_info->chunk_tree_uuid, |
2831 | (unsigned long)btrfs_header_chunk_tree_uuid(right), | 2943 | (unsigned long)btrfs_header_chunk_tree_uuid(right), |
2832 | BTRFS_UUID_SIZE); | 2944 | BTRFS_UUID_SIZE); |
2945 | |||
2833 | if (mid <= slot) { | 2946 | if (mid <= slot) { |
2834 | if (nritems == 1 || | 2947 | if (nritems == 1 || |
2835 | leaf_space_used(l, mid, nritems - mid) + data_size > | 2948 | leaf_space_used(l, mid, nritems - mid) + data_size > |
2836 | BTRFS_LEAF_DATA_SIZE(root)) { | 2949 | BTRFS_LEAF_DATA_SIZE(root)) { |
2837 | if (slot >= nritems) { | 2950 | if (slot >= nritems) { |
2951 | struct btrfs_disk_key disk_key; | ||
2952 | |||
2838 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | 2953 | btrfs_cpu_key_to_disk(&disk_key, ins_key); |
2839 | btrfs_set_header_nritems(right, 0); | 2954 | btrfs_set_header_nritems(right, 0); |
2840 | wret = insert_ptr(trans, root, path, | 2955 | wret = insert_ptr(trans, root, path, |
@@ -2862,6 +2977,8 @@ again: | |||
2862 | if (leaf_space_used(l, 0, mid) + data_size > | 2977 | if (leaf_space_used(l, 0, mid) + data_size > |
2863 | BTRFS_LEAF_DATA_SIZE(root)) { | 2978 | BTRFS_LEAF_DATA_SIZE(root)) { |
2864 | if (!extend && data_size && slot == 0) { | 2979 | if (!extend && data_size && slot == 0) { |
2980 | struct btrfs_disk_key disk_key; | ||
2981 | |||
2865 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | 2982 | btrfs_cpu_key_to_disk(&disk_key, ins_key); |
2866 | btrfs_set_header_nritems(right, 0); | 2983 | btrfs_set_header_nritems(right, 0); |
2867 | wret = insert_ptr(trans, root, path, | 2984 | wret = insert_ptr(trans, root, path, |
@@ -2894,76 +3011,16 @@ again: | |||
2894 | } | 3011 | } |
2895 | } | 3012 | } |
2896 | } | 3013 | } |
2897 | nritems = nritems - mid; | ||
2898 | btrfs_set_header_nritems(right, nritems); | ||
2899 | data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); | ||
2900 | |||
2901 | copy_extent_buffer(right, l, btrfs_item_nr_offset(0), | ||
2902 | btrfs_item_nr_offset(mid), | ||
2903 | nritems * sizeof(struct btrfs_item)); | ||
2904 | |||
2905 | copy_extent_buffer(right, l, | ||
2906 | btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - | ||
2907 | data_copy_size, btrfs_leaf_data(l) + | ||
2908 | leaf_data_end(root, l), data_copy_size); | ||
2909 | |||
2910 | rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - | ||
2911 | btrfs_item_end_nr(l, mid); | ||
2912 | |||
2913 | for (i = 0; i < nritems; i++) { | ||
2914 | struct btrfs_item *item = btrfs_item_nr(right, i); | ||
2915 | u32 ioff; | ||
2916 | |||
2917 | if (!right->map_token) { | ||
2918 | map_extent_buffer(right, (unsigned long)item, | ||
2919 | sizeof(struct btrfs_item), | ||
2920 | &right->map_token, &right->kaddr, | ||
2921 | &right->map_start, &right->map_len, | ||
2922 | KM_USER1); | ||
2923 | } | ||
2924 | |||
2925 | ioff = btrfs_item_offset(right, item); | ||
2926 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | ||
2927 | } | ||
2928 | 3014 | ||
2929 | if (right->map_token) { | 3015 | ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); |
2930 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2931 | right->map_token = NULL; | ||
2932 | } | ||
2933 | |||
2934 | btrfs_set_header_nritems(l, mid); | ||
2935 | ret = 0; | ||
2936 | btrfs_item_key(right, &disk_key, 0); | ||
2937 | wret = insert_ptr(trans, root, path, &disk_key, right->start, | ||
2938 | path->slots[1] + 1, 1); | ||
2939 | if (wret) | ||
2940 | ret = wret; | ||
2941 | |||
2942 | btrfs_mark_buffer_dirty(right); | ||
2943 | btrfs_mark_buffer_dirty(l); | ||
2944 | BUG_ON(path->slots[0] != slot); | ||
2945 | |||
2946 | ret = btrfs_update_ref(trans, root, l, right, 0, nritems); | ||
2947 | BUG_ON(ret); | 3016 | BUG_ON(ret); |
2948 | 3017 | ||
2949 | if (mid <= slot) { | ||
2950 | btrfs_tree_unlock(path->nodes[0]); | ||
2951 | free_extent_buffer(path->nodes[0]); | ||
2952 | path->nodes[0] = right; | ||
2953 | path->slots[0] -= mid; | ||
2954 | path->slots[1] += 1; | ||
2955 | } else { | ||
2956 | btrfs_tree_unlock(right); | ||
2957 | free_extent_buffer(right); | ||
2958 | } | ||
2959 | |||
2960 | BUG_ON(path->slots[0] < 0); | ||
2961 | |||
2962 | if (double_split) { | 3018 | if (double_split) { |
2963 | BUG_ON(num_doubles != 0); | 3019 | BUG_ON(num_doubles != 0); |
2964 | num_doubles++; | 3020 | num_doubles++; |
2965 | goto again; | 3021 | goto again; |
2966 | } | 3022 | } |
3023 | |||
2967 | return ret; | 3024 | return ret; |
2968 | } | 3025 | } |
2969 | 3026 | ||
@@ -3021,26 +3078,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, | |||
3021 | return -EAGAIN; | 3078 | return -EAGAIN; |
3022 | } | 3079 | } |
3023 | 3080 | ||
3081 | btrfs_set_path_blocking(path); | ||
3024 | ret = split_leaf(trans, root, &orig_key, path, | 3082 | ret = split_leaf(trans, root, &orig_key, path, |
3025 | sizeof(struct btrfs_item), 1); | 3083 | sizeof(struct btrfs_item), 1); |
3026 | path->keep_locks = 0; | 3084 | path->keep_locks = 0; |
3027 | BUG_ON(ret); | 3085 | BUG_ON(ret); |
3028 | 3086 | ||
3087 | btrfs_unlock_up_safe(path, 1); | ||
3088 | leaf = path->nodes[0]; | ||
3089 | BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); | ||
3090 | |||
3091 | split: | ||
3029 | /* | 3092 | /* |
3030 | * make sure any changes to the path from split_leaf leave it | 3093 | * make sure any changes to the path from split_leaf leave it |
3031 | * in a blocking state | 3094 | * in a blocking state |
3032 | */ | 3095 | */ |
3033 | btrfs_set_path_blocking(path); | 3096 | btrfs_set_path_blocking(path); |
3034 | 3097 | ||
3035 | leaf = path->nodes[0]; | ||
3036 | BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); | ||
3037 | |||
3038 | split: | ||
3039 | item = btrfs_item_nr(leaf, path->slots[0]); | 3098 | item = btrfs_item_nr(leaf, path->slots[0]); |
3040 | orig_offset = btrfs_item_offset(leaf, item); | 3099 | orig_offset = btrfs_item_offset(leaf, item); |
3041 | item_size = btrfs_item_size(leaf, item); | 3100 | item_size = btrfs_item_size(leaf, item); |
3042 | 3101 | ||
3043 | |||
3044 | buf = kmalloc(item_size, GFP_NOFS); | 3102 | buf = kmalloc(item_size, GFP_NOFS); |
3045 | read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, | 3103 | read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, |
3046 | path->slots[0]), item_size); | 3104 | path->slots[0]), item_size); |
@@ -3445,39 +3503,27 @@ out: | |||
3445 | } | 3503 | } |
3446 | 3504 | ||
3447 | /* | 3505 | /* |
3448 | * Given a key and some data, insert items into the tree. | 3506 | * this is a helper for btrfs_insert_empty_items, the main goal here is |
3449 | * This does all the path init required, making room in the tree if needed. | 3507 | * to save stack depth by doing the bulk of the work in a function |
3508 | * that doesn't call btrfs_search_slot | ||
3450 | */ | 3509 | */ |
3451 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | 3510 | static noinline_for_stack int |
3452 | struct btrfs_root *root, | 3511 | setup_items_for_insert(struct btrfs_trans_handle *trans, |
3453 | struct btrfs_path *path, | 3512 | struct btrfs_root *root, struct btrfs_path *path, |
3454 | struct btrfs_key *cpu_key, u32 *data_size, | 3513 | struct btrfs_key *cpu_key, u32 *data_size, |
3455 | int nr) | 3514 | u32 total_data, u32 total_size, int nr) |
3456 | { | 3515 | { |
3457 | struct extent_buffer *leaf; | ||
3458 | struct btrfs_item *item; | 3516 | struct btrfs_item *item; |
3459 | int ret = 0; | ||
3460 | int slot; | ||
3461 | int slot_orig; | ||
3462 | int i; | 3517 | int i; |
3463 | u32 nritems; | 3518 | u32 nritems; |
3464 | u32 total_size = 0; | ||
3465 | u32 total_data = 0; | ||
3466 | unsigned int data_end; | 3519 | unsigned int data_end; |
3467 | struct btrfs_disk_key disk_key; | 3520 | struct btrfs_disk_key disk_key; |
3521 | int ret; | ||
3522 | struct extent_buffer *leaf; | ||
3523 | int slot; | ||
3468 | 3524 | ||
3469 | for (i = 0; i < nr; i++) | ||
3470 | total_data += data_size[i]; | ||
3471 | |||
3472 | total_size = total_data + (nr * sizeof(struct btrfs_item)); | ||
3473 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
3474 | if (ret == 0) | ||
3475 | return -EEXIST; | ||
3476 | if (ret < 0) | ||
3477 | goto out; | ||
3478 | |||
3479 | slot_orig = path->slots[0]; | ||
3480 | leaf = path->nodes[0]; | 3525 | leaf = path->nodes[0]; |
3526 | slot = path->slots[0]; | ||
3481 | 3527 | ||
3482 | nritems = btrfs_header_nritems(leaf); | 3528 | nritems = btrfs_header_nritems(leaf); |
3483 | data_end = leaf_data_end(root, leaf); | 3529 | data_end = leaf_data_end(root, leaf); |
@@ -3489,9 +3535,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3489 | BUG(); | 3535 | BUG(); |
3490 | } | 3536 | } |
3491 | 3537 | ||
3492 | slot = path->slots[0]; | ||
3493 | BUG_ON(slot < 0); | ||
3494 | |||
3495 | if (slot != nritems) { | 3538 | if (slot != nritems) { |
3496 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); | 3539 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); |
3497 | 3540 | ||
@@ -3547,21 +3590,60 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3547 | data_end -= data_size[i]; | 3590 | data_end -= data_size[i]; |
3548 | btrfs_set_item_size(leaf, item, data_size[i]); | 3591 | btrfs_set_item_size(leaf, item, data_size[i]); |
3549 | } | 3592 | } |
3593 | |||
3550 | btrfs_set_header_nritems(leaf, nritems + nr); | 3594 | btrfs_set_header_nritems(leaf, nritems + nr); |
3551 | btrfs_mark_buffer_dirty(leaf); | ||
3552 | 3595 | ||
3553 | ret = 0; | 3596 | ret = 0; |
3554 | if (slot == 0) { | 3597 | if (slot == 0) { |
3598 | struct btrfs_disk_key disk_key; | ||
3555 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | 3599 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); |
3556 | ret = fixup_low_keys(trans, root, path, &disk_key, 1); | 3600 | ret = fixup_low_keys(trans, root, path, &disk_key, 1); |
3557 | } | 3601 | } |
3602 | btrfs_unlock_up_safe(path, 1); | ||
3603 | btrfs_mark_buffer_dirty(leaf); | ||
3558 | 3604 | ||
3559 | if (btrfs_leaf_free_space(root, leaf) < 0) { | 3605 | if (btrfs_leaf_free_space(root, leaf) < 0) { |
3560 | btrfs_print_leaf(root, leaf); | 3606 | btrfs_print_leaf(root, leaf); |
3561 | BUG(); | 3607 | BUG(); |
3562 | } | 3608 | } |
3609 | return ret; | ||
3610 | } | ||
3611 | |||
3612 | /* | ||
3613 | * Given a key and some data, insert items into the tree. | ||
3614 | * This does all the path init required, making room in the tree if needed. | ||
3615 | */ | ||
3616 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | ||
3617 | struct btrfs_root *root, | ||
3618 | struct btrfs_path *path, | ||
3619 | struct btrfs_key *cpu_key, u32 *data_size, | ||
3620 | int nr) | ||
3621 | { | ||
3622 | struct extent_buffer *leaf; | ||
3623 | int ret = 0; | ||
3624 | int slot; | ||
3625 | int i; | ||
3626 | u32 total_size = 0; | ||
3627 | u32 total_data = 0; | ||
3628 | |||
3629 | for (i = 0; i < nr; i++) | ||
3630 | total_data += data_size[i]; | ||
3631 | |||
3632 | total_size = total_data + (nr * sizeof(struct btrfs_item)); | ||
3633 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
3634 | if (ret == 0) | ||
3635 | return -EEXIST; | ||
3636 | if (ret < 0) | ||
3637 | goto out; | ||
3638 | |||
3639 | leaf = path->nodes[0]; | ||
3640 | slot = path->slots[0]; | ||
3641 | BUG_ON(slot < 0); | ||
3642 | |||
3643 | ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, | ||
3644 | total_data, total_size, nr); | ||
3645 | |||
3563 | out: | 3646 | out: |
3564 | btrfs_unlock_up_safe(path, 1); | ||
3565 | return ret; | 3647 | return ret; |
3566 | } | 3648 | } |
3567 | 3649 | ||
@@ -3749,7 +3831,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3749 | } | 3831 | } |
3750 | 3832 | ||
3751 | /* delete the leaf if it is mostly empty */ | 3833 | /* delete the leaf if it is mostly empty */ |
3752 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { | 3834 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 && |
3835 | !trans->transaction->delayed_refs.flushing) { | ||
3753 | /* push_leaf_left fixes the path. | 3836 | /* push_leaf_left fixes the path. |
3754 | * make sure the path still points to our leaf | 3837 | * make sure the path still points to our leaf |
3755 | * for possible call to del_ptr below | 3838 | * for possible call to del_ptr below |
@@ -3757,6 +3840,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3757 | slot = path->slots[1]; | 3840 | slot = path->slots[1]; |
3758 | extent_buffer_get(leaf); | 3841 | extent_buffer_get(leaf); |
3759 | 3842 | ||
3843 | btrfs_set_path_blocking(path); | ||
3760 | wret = push_leaf_left(trans, root, path, 1, 1); | 3844 | wret = push_leaf_left(trans, root, path, 1, 1); |
3761 | if (wret < 0 && wret != -ENOSPC) | 3845 | if (wret < 0 && wret != -ENOSPC) |
3762 | ret = wret; | 3846 | ret = wret; |
@@ -4042,28 +4126,44 @@ next: | |||
4042 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | 4126 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) |
4043 | { | 4127 | { |
4044 | int slot; | 4128 | int slot; |
4045 | int level = 1; | 4129 | int level; |
4046 | struct extent_buffer *c; | 4130 | struct extent_buffer *c; |
4047 | struct extent_buffer *next = NULL; | 4131 | struct extent_buffer *next; |
4048 | struct btrfs_key key; | 4132 | struct btrfs_key key; |
4049 | u32 nritems; | 4133 | u32 nritems; |
4050 | int ret; | 4134 | int ret; |
4135 | int old_spinning = path->leave_spinning; | ||
4136 | int force_blocking = 0; | ||
4051 | 4137 | ||
4052 | nritems = btrfs_header_nritems(path->nodes[0]); | 4138 | nritems = btrfs_header_nritems(path->nodes[0]); |
4053 | if (nritems == 0) | 4139 | if (nritems == 0) |
4054 | return 1; | 4140 | return 1; |
4055 | 4141 | ||
4056 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | 4142 | /* |
4143 | * we take the blocks in an order that upsets lockdep. Using | ||
4144 | * blocking mode is the only way around it. | ||
4145 | */ | ||
4146 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
4147 | force_blocking = 1; | ||
4148 | #endif | ||
4057 | 4149 | ||
4150 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | ||
4151 | again: | ||
4152 | level = 1; | ||
4153 | next = NULL; | ||
4058 | btrfs_release_path(root, path); | 4154 | btrfs_release_path(root, path); |
4155 | |||
4059 | path->keep_locks = 1; | 4156 | path->keep_locks = 1; |
4157 | |||
4158 | if (!force_blocking) | ||
4159 | path->leave_spinning = 1; | ||
4160 | |||
4060 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4161 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4061 | path->keep_locks = 0; | 4162 | path->keep_locks = 0; |
4062 | 4163 | ||
4063 | if (ret < 0) | 4164 | if (ret < 0) |
4064 | return ret; | 4165 | return ret; |
4065 | 4166 | ||
4066 | btrfs_set_path_blocking(path); | ||
4067 | nritems = btrfs_header_nritems(path->nodes[0]); | 4167 | nritems = btrfs_header_nritems(path->nodes[0]); |
4068 | /* | 4168 | /* |
4069 | * by releasing the path above we dropped all our locks. A balance | 4169 | * by releasing the path above we dropped all our locks. A balance |
@@ -4073,19 +4173,24 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4073 | */ | 4173 | */ |
4074 | if (nritems > 0 && path->slots[0] < nritems - 1) { | 4174 | if (nritems > 0 && path->slots[0] < nritems - 1) { |
4075 | path->slots[0]++; | 4175 | path->slots[0]++; |
4176 | ret = 0; | ||
4076 | goto done; | 4177 | goto done; |
4077 | } | 4178 | } |
4078 | 4179 | ||
4079 | while (level < BTRFS_MAX_LEVEL) { | 4180 | while (level < BTRFS_MAX_LEVEL) { |
4080 | if (!path->nodes[level]) | 4181 | if (!path->nodes[level]) { |
4081 | return 1; | 4182 | ret = 1; |
4183 | goto done; | ||
4184 | } | ||
4082 | 4185 | ||
4083 | slot = path->slots[level] + 1; | 4186 | slot = path->slots[level] + 1; |
4084 | c = path->nodes[level]; | 4187 | c = path->nodes[level]; |
4085 | if (slot >= btrfs_header_nritems(c)) { | 4188 | if (slot >= btrfs_header_nritems(c)) { |
4086 | level++; | 4189 | level++; |
4087 | if (level == BTRFS_MAX_LEVEL) | 4190 | if (level == BTRFS_MAX_LEVEL) { |
4088 | return 1; | 4191 | ret = 1; |
4192 | goto done; | ||
4193 | } | ||
4089 | continue; | 4194 | continue; |
4090 | } | 4195 | } |
4091 | 4196 | ||
@@ -4094,16 +4199,22 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4094 | free_extent_buffer(next); | 4199 | free_extent_buffer(next); |
4095 | } | 4200 | } |
4096 | 4201 | ||
4097 | /* the path was set to blocking above */ | 4202 | next = c; |
4098 | if (level == 1 && (path->locks[1] || path->skip_locking) && | 4203 | ret = read_block_for_search(NULL, root, path, &next, level, |
4099 | path->reada) | 4204 | slot, &key); |
4100 | reada_for_search(root, path, level, slot, 0); | 4205 | if (ret == -EAGAIN) |
4206 | goto again; | ||
4101 | 4207 | ||
4102 | next = read_node_slot(root, c, slot); | ||
4103 | if (!path->skip_locking) { | 4208 | if (!path->skip_locking) { |
4104 | btrfs_assert_tree_locked(c); | 4209 | ret = btrfs_try_spin_lock(next); |
4105 | btrfs_tree_lock(next); | 4210 | if (!ret) { |
4106 | btrfs_set_lock_blocking(next); | 4211 | btrfs_set_path_blocking(path); |
4212 | btrfs_tree_lock(next); | ||
4213 | if (!force_blocking) | ||
4214 | btrfs_clear_path_blocking(path, next); | ||
4215 | } | ||
4216 | if (force_blocking) | ||
4217 | btrfs_set_lock_blocking(next); | ||
4107 | } | 4218 | } |
4108 | break; | 4219 | break; |
4109 | } | 4220 | } |
@@ -4113,27 +4224,42 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4113 | c = path->nodes[level]; | 4224 | c = path->nodes[level]; |
4114 | if (path->locks[level]) | 4225 | if (path->locks[level]) |
4115 | btrfs_tree_unlock(c); | 4226 | btrfs_tree_unlock(c); |
4227 | |||
4116 | free_extent_buffer(c); | 4228 | free_extent_buffer(c); |
4117 | path->nodes[level] = next; | 4229 | path->nodes[level] = next; |
4118 | path->slots[level] = 0; | 4230 | path->slots[level] = 0; |
4119 | if (!path->skip_locking) | 4231 | if (!path->skip_locking) |
4120 | path->locks[level] = 1; | 4232 | path->locks[level] = 1; |
4233 | |||
4121 | if (!level) | 4234 | if (!level) |
4122 | break; | 4235 | break; |
4123 | 4236 | ||
4124 | btrfs_set_path_blocking(path); | 4237 | ret = read_block_for_search(NULL, root, path, &next, level, |
4125 | if (level == 1 && path->locks[1] && path->reada) | 4238 | 0, &key); |
4126 | reada_for_search(root, path, level, slot, 0); | 4239 | if (ret == -EAGAIN) |
4127 | next = read_node_slot(root, next, 0); | 4240 | goto again; |
4241 | |||
4128 | if (!path->skip_locking) { | 4242 | if (!path->skip_locking) { |
4129 | btrfs_assert_tree_locked(path->nodes[level]); | 4243 | btrfs_assert_tree_locked(path->nodes[level]); |
4130 | btrfs_tree_lock(next); | 4244 | ret = btrfs_try_spin_lock(next); |
4131 | btrfs_set_lock_blocking(next); | 4245 | if (!ret) { |
4246 | btrfs_set_path_blocking(path); | ||
4247 | btrfs_tree_lock(next); | ||
4248 | if (!force_blocking) | ||
4249 | btrfs_clear_path_blocking(path, next); | ||
4250 | } | ||
4251 | if (force_blocking) | ||
4252 | btrfs_set_lock_blocking(next); | ||
4132 | } | 4253 | } |
4133 | } | 4254 | } |
4255 | ret = 0; | ||
4134 | done: | 4256 | done: |
4135 | unlock_up(path, 0, 1); | 4257 | unlock_up(path, 0, 1); |
4136 | return 0; | 4258 | path->leave_spinning = old_spinning; |
4259 | if (!old_spinning) | ||
4260 | btrfs_set_path_blocking(path); | ||
4261 | |||
4262 | return ret; | ||
4137 | } | 4263 | } |
4138 | 4264 | ||
4139 | /* | 4265 | /* |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5e1d4e30e9d8..ad96495dedc5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -45,6 +45,13 @@ struct btrfs_ordered_sum; | |||
45 | 45 | ||
46 | #define BTRFS_MAX_LEVEL 8 | 46 | #define BTRFS_MAX_LEVEL 8 |
47 | 47 | ||
48 | /* | ||
49 | * files bigger than this get some pre-flushing when they are added | ||
50 | * to the ordered operations list. That way we limit the total | ||
51 | * work done by the commit | ||
52 | */ | ||
53 | #define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024) | ||
54 | |||
48 | /* holds pointers to all of the tree roots */ | 55 | /* holds pointers to all of the tree roots */ |
49 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL | 56 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL |
50 | 57 | ||
@@ -136,12 +143,15 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
136 | #define BTRFS_FT_MAX 9 | 143 | #define BTRFS_FT_MAX 9 |
137 | 144 | ||
138 | /* | 145 | /* |
139 | * the key defines the order in the tree, and so it also defines (optimal) | 146 | * The key defines the order in the tree, and so it also defines (optimal) |
140 | * block layout. objectid corresonds to the inode number. The flags | 147 | * block layout. |
141 | * tells us things about the object, and is a kind of stream selector. | 148 | * |
142 | * so for a given inode, keys with flags of 1 might refer to the inode | 149 | * objectid corresponds to the inode number. |
143 | * data, flags of 2 may point to file data in the btree and flags == 3 | 150 | * |
144 | * may point to extents. | 151 | * type tells us things about the object, and is a kind of stream selector. |
152 | * so for a given inode, keys with type of 1 might refer to the inode data, | ||
153 | * type of 2 may point to file data in the btree and type == 3 may point to | ||
154 | * extents. | ||
145 | * | 155 | * |
146 | * offset is the starting byte offset for this key in the stream. | 156 | * offset is the starting byte offset for this key in the stream. |
147 | * | 157 | * |
@@ -193,7 +203,7 @@ struct btrfs_dev_item { | |||
193 | 203 | ||
194 | /* | 204 | /* |
195 | * starting byte of this partition on the device, | 205 | * starting byte of this partition on the device, |
196 | * to allowr for stripe alignment in the future | 206 | * to allow for stripe alignment in the future |
197 | */ | 207 | */ |
198 | __le64 start_offset; | 208 | __le64 start_offset; |
199 | 209 | ||
@@ -401,15 +411,16 @@ struct btrfs_path { | |||
401 | int locks[BTRFS_MAX_LEVEL]; | 411 | int locks[BTRFS_MAX_LEVEL]; |
402 | int reada; | 412 | int reada; |
403 | /* keep some upper locks as we walk down */ | 413 | /* keep some upper locks as we walk down */ |
404 | int keep_locks; | ||
405 | int skip_locking; | ||
406 | int lowest_level; | 414 | int lowest_level; |
407 | 415 | ||
408 | /* | 416 | /* |
409 | * set by btrfs_split_item, tells search_slot to keep all locks | 417 | * set by btrfs_split_item, tells search_slot to keep all locks |
410 | * and to force calls to keep space in the nodes | 418 | * and to force calls to keep space in the nodes |
411 | */ | 419 | */ |
412 | int search_for_split; | 420 | unsigned int search_for_split:1; |
421 | unsigned int keep_locks:1; | ||
422 | unsigned int skip_locking:1; | ||
423 | unsigned int leave_spinning:1; | ||
413 | }; | 424 | }; |
414 | 425 | ||
415 | /* | 426 | /* |
@@ -625,18 +636,35 @@ struct btrfs_space_info { | |||
625 | struct rw_semaphore groups_sem; | 636 | struct rw_semaphore groups_sem; |
626 | }; | 637 | }; |
627 | 638 | ||
628 | struct btrfs_free_space { | 639 | /* |
629 | struct rb_node bytes_index; | 640 | * free clusters are used to claim free space in relatively large chunks, |
630 | struct rb_node offset_index; | 641 | * allowing us to do less seeky writes. They are used for all metadata |
631 | u64 offset; | 642 | * allocations and data allocations in ssd mode. |
632 | u64 bytes; | 643 | */ |
644 | struct btrfs_free_cluster { | ||
645 | spinlock_t lock; | ||
646 | spinlock_t refill_lock; | ||
647 | struct rb_root root; | ||
648 | |||
649 | /* largest extent in this cluster */ | ||
650 | u64 max_size; | ||
651 | |||
652 | /* first extent starting offset */ | ||
653 | u64 window_start; | ||
654 | |||
655 | struct btrfs_block_group_cache *block_group; | ||
656 | /* | ||
657 | * when a cluster is allocated from a block group, we put the | ||
658 | * cluster onto a list in the block group so that it can | ||
659 | * be freed before the block group is freed. | ||
660 | */ | ||
661 | struct list_head block_group_list; | ||
633 | }; | 662 | }; |
634 | 663 | ||
635 | struct btrfs_block_group_cache { | 664 | struct btrfs_block_group_cache { |
636 | struct btrfs_key key; | 665 | struct btrfs_key key; |
637 | struct btrfs_block_group_item item; | 666 | struct btrfs_block_group_item item; |
638 | spinlock_t lock; | 667 | spinlock_t lock; |
639 | struct mutex alloc_mutex; | ||
640 | struct mutex cache_mutex; | 668 | struct mutex cache_mutex; |
641 | u64 pinned; | 669 | u64 pinned; |
642 | u64 reserved; | 670 | u64 reserved; |
@@ -648,6 +676,7 @@ struct btrfs_block_group_cache { | |||
648 | struct btrfs_space_info *space_info; | 676 | struct btrfs_space_info *space_info; |
649 | 677 | ||
650 | /* free space cache stuff */ | 678 | /* free space cache stuff */ |
679 | spinlock_t tree_lock; | ||
651 | struct rb_root free_space_bytes; | 680 | struct rb_root free_space_bytes; |
652 | struct rb_root free_space_offset; | 681 | struct rb_root free_space_offset; |
653 | 682 | ||
@@ -659,6 +688,11 @@ struct btrfs_block_group_cache { | |||
659 | 688 | ||
660 | /* usage count */ | 689 | /* usage count */ |
661 | atomic_t count; | 690 | atomic_t count; |
691 | |||
692 | /* List of struct btrfs_free_clusters for this block group. | ||
693 | * Today it will only have one thing on it, but that may change | ||
694 | */ | ||
695 | struct list_head cluster_list; | ||
662 | }; | 696 | }; |
663 | 697 | ||
664 | struct btrfs_leaf_ref_tree { | 698 | struct btrfs_leaf_ref_tree { |
@@ -688,15 +722,18 @@ struct btrfs_fs_info { | |||
688 | struct rb_root block_group_cache_tree; | 722 | struct rb_root block_group_cache_tree; |
689 | 723 | ||
690 | struct extent_io_tree pinned_extents; | 724 | struct extent_io_tree pinned_extents; |
691 | struct extent_io_tree pending_del; | ||
692 | struct extent_io_tree extent_ins; | ||
693 | 725 | ||
694 | /* logical->physical extent mapping */ | 726 | /* logical->physical extent mapping */ |
695 | struct btrfs_mapping_tree mapping_tree; | 727 | struct btrfs_mapping_tree mapping_tree; |
696 | 728 | ||
697 | u64 generation; | 729 | u64 generation; |
698 | u64 last_trans_committed; | 730 | u64 last_trans_committed; |
699 | u64 last_trans_new_blockgroup; | 731 | |
732 | /* | ||
733 | * this is updated to the current trans every time a full commit | ||
734 | * is required instead of the faster short fsync log commits | ||
735 | */ | ||
736 | u64 last_trans_log_full_commit; | ||
700 | u64 open_ioctl_trans; | 737 | u64 open_ioctl_trans; |
701 | unsigned long mount_opt; | 738 | unsigned long mount_opt; |
702 | u64 max_extent; | 739 | u64 max_extent; |
@@ -717,12 +754,20 @@ struct btrfs_fs_info { | |||
717 | struct mutex tree_log_mutex; | 754 | struct mutex tree_log_mutex; |
718 | struct mutex transaction_kthread_mutex; | 755 | struct mutex transaction_kthread_mutex; |
719 | struct mutex cleaner_mutex; | 756 | struct mutex cleaner_mutex; |
720 | struct mutex extent_ins_mutex; | ||
721 | struct mutex pinned_mutex; | ||
722 | struct mutex chunk_mutex; | 757 | struct mutex chunk_mutex; |
723 | struct mutex drop_mutex; | 758 | struct mutex drop_mutex; |
724 | struct mutex volume_mutex; | 759 | struct mutex volume_mutex; |
725 | struct mutex tree_reloc_mutex; | 760 | struct mutex tree_reloc_mutex; |
761 | |||
762 | /* | ||
763 | * this protects the ordered operations list only while we are | ||
764 | * processing all of the entries on it. This way we make | ||
765 | * sure the commit code doesn't find the list temporarily empty | ||
766 | * because another function happens to be doing non-waiting preflush | ||
767 | * before jumping into the main commit. | ||
768 | */ | ||
769 | struct mutex ordered_operations_mutex; | ||
770 | |||
726 | struct list_head trans_list; | 771 | struct list_head trans_list; |
727 | struct list_head hashers; | 772 | struct list_head hashers; |
728 | struct list_head dead_roots; | 773 | struct list_head dead_roots; |
@@ -737,10 +782,29 @@ struct btrfs_fs_info { | |||
737 | * ordered extents | 782 | * ordered extents |
738 | */ | 783 | */ |
739 | spinlock_t ordered_extent_lock; | 784 | spinlock_t ordered_extent_lock; |
785 | |||
786 | /* | ||
787 | * all of the data=ordered extents pending writeback | ||
788 | * these can span multiple transactions and basically include | ||
789 | * every dirty data page that isn't from nodatacow | ||
790 | */ | ||
740 | struct list_head ordered_extents; | 791 | struct list_head ordered_extents; |
792 | |||
793 | /* | ||
794 | * all of the inodes that have delalloc bytes. It is possible for | ||
795 | * this list to be empty even when there is still dirty data=ordered | ||
796 | * extents waiting to finish IO. | ||
797 | */ | ||
741 | struct list_head delalloc_inodes; | 798 | struct list_head delalloc_inodes; |
742 | 799 | ||
743 | /* | 800 | /* |
801 | * special rename and truncate targets that must be on disk before | ||
802 | * we're allowed to commit. This is basically the ext3 style | ||
803 | * data=ordered list. | ||
804 | */ | ||
805 | struct list_head ordered_operations; | ||
806 | |||
807 | /* | ||
744 | * there is a pool of worker threads for checksumming during writes | 808 | * there is a pool of worker threads for checksumming during writes |
745 | * and a pool for checksumming after reads. This is because readers | 809 | * and a pool for checksumming after reads. This is because readers |
746 | * can run with FS locks held, and the writers may be waiting for | 810 | * can run with FS locks held, and the writers may be waiting for |
@@ -781,6 +845,11 @@ struct btrfs_fs_info { | |||
781 | atomic_t throttle_gen; | 845 | atomic_t throttle_gen; |
782 | 846 | ||
783 | u64 total_pinned; | 847 | u64 total_pinned; |
848 | |||
849 | /* protected by the delalloc lock, used to keep from writing | ||
850 | * metadata until there is a nice batch | ||
851 | */ | ||
852 | u64 dirty_metadata_bytes; | ||
784 | struct list_head dirty_cowonly_roots; | 853 | struct list_head dirty_cowonly_roots; |
785 | 854 | ||
786 | struct btrfs_fs_devices *fs_devices; | 855 | struct btrfs_fs_devices *fs_devices; |
@@ -795,8 +864,12 @@ struct btrfs_fs_info { | |||
795 | spinlock_t delalloc_lock; | 864 | spinlock_t delalloc_lock; |
796 | spinlock_t new_trans_lock; | 865 | spinlock_t new_trans_lock; |
797 | u64 delalloc_bytes; | 866 | u64 delalloc_bytes; |
798 | u64 last_alloc; | 867 | |
799 | u64 last_data_alloc; | 868 | /* data_alloc_cluster is only used in ssd mode */ |
869 | struct btrfs_free_cluster data_alloc_cluster; | ||
870 | |||
871 | /* all metadata allocations go through this cluster */ | ||
872 | struct btrfs_free_cluster meta_alloc_cluster; | ||
800 | 873 | ||
801 | spinlock_t ref_cache_lock; | 874 | spinlock_t ref_cache_lock; |
802 | u64 total_ref_cache_size; | 875 | u64 total_ref_cache_size; |
@@ -888,7 +961,6 @@ struct btrfs_root { | |||
888 | }; | 961 | }; |
889 | 962 | ||
890 | /* | 963 | /* |
891 | |||
892 | * inode items have the data typically returned from stat and store other | 964 | * inode items have the data typically returned from stat and store other |
893 | * info about object characteristics. There is one for every file and dir in | 965 | * info about object characteristics. There is one for every file and dir in |
894 | * the FS | 966 | * the FS |
@@ -919,7 +991,7 @@ struct btrfs_root { | |||
919 | #define BTRFS_EXTENT_CSUM_KEY 128 | 991 | #define BTRFS_EXTENT_CSUM_KEY 128 |
920 | 992 | ||
921 | /* | 993 | /* |
922 | * root items point to tree roots. There are typically in the root | 994 | * root items point to tree roots. They are typically in the root |
923 | * tree used by the super block to find all the other trees | 995 | * tree used by the super block to find all the other trees |
924 | */ | 996 | */ |
925 | #define BTRFS_ROOT_ITEM_KEY 132 | 997 | #define BTRFS_ROOT_ITEM_KEY 132 |
@@ -966,6 +1038,8 @@ struct btrfs_root { | |||
966 | #define BTRFS_MOUNT_SSD (1 << 3) | 1038 | #define BTRFS_MOUNT_SSD (1 << 3) |
967 | #define BTRFS_MOUNT_DEGRADED (1 << 4) | 1039 | #define BTRFS_MOUNT_DEGRADED (1 << 4) |
968 | #define BTRFS_MOUNT_COMPRESS (1 << 5) | 1040 | #define BTRFS_MOUNT_COMPRESS (1 << 5) |
1041 | #define BTRFS_MOUNT_NOTREELOG (1 << 6) | ||
1042 | #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) | ||
969 | 1043 | ||
970 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1044 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
971 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1045 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -1704,18 +1778,16 @@ static inline struct dentry *fdentry(struct file *file) | |||
1704 | } | 1778 | } |
1705 | 1779 | ||
1706 | /* extent-tree.c */ | 1780 | /* extent-tree.c */ |
1781 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | ||
1782 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | ||
1783 | struct btrfs_root *root, unsigned long count); | ||
1707 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1784 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1708 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
1709 | struct btrfs_root *root, u64 bytenr, | ||
1710 | u64 num_bytes, u32 *refs); | ||
1711 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 1785 | int btrfs_update_pinned_extents(struct btrfs_root *root, |
1712 | u64 bytenr, u64 num, int pin); | 1786 | u64 bytenr, u64 num, int pin); |
1713 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1787 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
1714 | struct btrfs_root *root, struct extent_buffer *leaf); | 1788 | struct btrfs_root *root, struct extent_buffer *leaf); |
1715 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1789 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
1716 | struct btrfs_root *root, u64 objectid, u64 bytenr); | 1790 | struct btrfs_root *root, u64 objectid, u64 bytenr); |
1717 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | ||
1718 | struct btrfs_root *root); | ||
1719 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); | 1791 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); |
1720 | struct btrfs_block_group_cache *btrfs_lookup_block_group( | 1792 | struct btrfs_block_group_cache *btrfs_lookup_block_group( |
1721 | struct btrfs_fs_info *info, | 1793 | struct btrfs_fs_info *info, |
@@ -1777,7 +1849,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1777 | u64 root_objectid, u64 ref_generation, | 1849 | u64 root_objectid, u64 ref_generation, |
1778 | u64 owner_objectid); | 1850 | u64 owner_objectid); |
1779 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 1851 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1780 | struct btrfs_root *root, u64 bytenr, | 1852 | struct btrfs_root *root, u64 bytenr, u64 num_bytes, |
1781 | u64 orig_parent, u64 parent, | 1853 | u64 orig_parent, u64 parent, |
1782 | u64 root_objectid, u64 ref_generation, | 1854 | u64 root_objectid, u64 ref_generation, |
1783 | u64 owner_objectid); | 1855 | u64 owner_objectid); |
@@ -1838,7 +1910,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
1838 | int btrfs_cow_block(struct btrfs_trans_handle *trans, | 1910 | int btrfs_cow_block(struct btrfs_trans_handle *trans, |
1839 | struct btrfs_root *root, struct extent_buffer *buf, | 1911 | struct btrfs_root *root, struct extent_buffer *buf, |
1840 | struct extent_buffer *parent, int parent_slot, | 1912 | struct extent_buffer *parent, int parent_slot, |
1841 | struct extent_buffer **cow_ret, u64 prealloc_dest); | 1913 | struct extent_buffer **cow_ret); |
1842 | int btrfs_copy_root(struct btrfs_trans_handle *trans, | 1914 | int btrfs_copy_root(struct btrfs_trans_handle *trans, |
1843 | struct btrfs_root *root, | 1915 | struct btrfs_root *root, |
1844 | struct extent_buffer *buf, | 1916 | struct extent_buffer *buf, |
@@ -2060,7 +2132,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
2060 | unsigned long btrfs_force_ra(struct address_space *mapping, | 2132 | unsigned long btrfs_force_ra(struct address_space *mapping, |
2061 | struct file_ra_state *ra, struct file *file, | 2133 | struct file_ra_state *ra, struct file *file, |
2062 | pgoff_t offset, pgoff_t last_index); | 2134 | pgoff_t offset, pgoff_t last_index); |
2063 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); | 2135 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2064 | int btrfs_readpage(struct file *file, struct page *page); | 2136 | int btrfs_readpage(struct file *file, struct page *page); |
2065 | void btrfs_delete_inode(struct inode *inode); | 2137 | void btrfs_delete_inode(struct inode *inode); |
2066 | void btrfs_put_inode(struct inode *inode); | 2138 | void btrfs_put_inode(struct inode *inode); |
@@ -2133,21 +2205,4 @@ int btrfs_check_acl(struct inode *inode, int mask); | |||
2133 | int btrfs_init_acl(struct inode *inode, struct inode *dir); | 2205 | int btrfs_init_acl(struct inode *inode, struct inode *dir); |
2134 | int btrfs_acl_chmod(struct inode *inode); | 2206 | int btrfs_acl_chmod(struct inode *inode); |
2135 | 2207 | ||
2136 | /* free-space-cache.c */ | ||
2137 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
2138 | u64 bytenr, u64 size); | ||
2139 | int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
2140 | u64 offset, u64 bytes); | ||
2141 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
2142 | u64 bytenr, u64 size); | ||
2143 | int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
2144 | u64 offset, u64 bytes); | ||
2145 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
2146 | *block_group); | ||
2147 | struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache | ||
2148 | *block_group, u64 offset, | ||
2149 | u64 bytes); | ||
2150 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
2151 | u64 bytes); | ||
2152 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
2153 | #endif | 2208 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c new file mode 100644 index 000000000000..d6c01c096a40 --- /dev/null +++ b/fs/btrfs/delayed-ref.c | |||
@@ -0,0 +1,668 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/sort.h> | ||
21 | #include "ctree.h" | ||
22 | #include "delayed-ref.h" | ||
23 | #include "transaction.h" | ||
24 | |||
25 | /* | ||
26 | * delayed back reference update tracking. For subvolume trees | ||
27 | * we queue up extent allocations and backref maintenance for | ||
28 | * delayed processing. This avoids deep call chains where we | ||
29 | * add extents in the middle of btrfs_search_slot, and it allows | ||
30 | * us to buffer up frequently modified backrefs in an rb tree instead | ||
31 | * of hammering updates on the extent allocation tree. | ||
32 | * | ||
33 | * Right now this code is only used for reference counted trees, but | ||
34 | * the long term goal is to get rid of the similar code for delayed | ||
35 | * extent tree modifications. | ||
36 | */ | ||
37 | |||
38 | /* | ||
39 | * entries in the rb tree are ordered by the byte number of the extent | ||
40 | * and by the byte number of the parent block. | ||
41 | */ | ||
42 | static int comp_entry(struct btrfs_delayed_ref_node *ref, | ||
43 | u64 bytenr, u64 parent) | ||
44 | { | ||
45 | if (bytenr < ref->bytenr) | ||
46 | return -1; | ||
47 | if (bytenr > ref->bytenr) | ||
48 | return 1; | ||
49 | if (parent < ref->parent) | ||
50 | return -1; | ||
51 | if (parent > ref->parent) | ||
52 | return 1; | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * insert a new ref into the rbtree. This returns any existing refs | ||
58 | * for the same (bytenr,parent) tuple, or NULL if the new node was properly | ||
59 | * inserted. | ||
60 | */ | ||
61 | static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, | ||
62 | u64 bytenr, u64 parent, | ||
63 | struct rb_node *node) | ||
64 | { | ||
65 | struct rb_node **p = &root->rb_node; | ||
66 | struct rb_node *parent_node = NULL; | ||
67 | struct btrfs_delayed_ref_node *entry; | ||
68 | int cmp; | ||
69 | |||
70 | while (*p) { | ||
71 | parent_node = *p; | ||
72 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, | ||
73 | rb_node); | ||
74 | |||
75 | cmp = comp_entry(entry, bytenr, parent); | ||
76 | if (cmp < 0) | ||
77 | p = &(*p)->rb_left; | ||
78 | else if (cmp > 0) | ||
79 | p = &(*p)->rb_right; | ||
80 | else | ||
81 | return entry; | ||
82 | } | ||
83 | |||
84 | entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
85 | rb_link_node(node, parent_node, p); | ||
86 | rb_insert_color(node, root); | ||
87 | return NULL; | ||
88 | } | ||
89 | |||
90 | /* | ||
91 | * find an entry based on (bytenr,parent). This returns the delayed | ||
92 | * ref if it was able to find one, or NULL if nothing was in that spot | ||
93 | */ | ||
94 | static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, | ||
95 | u64 bytenr, u64 parent, | ||
96 | struct btrfs_delayed_ref_node **last) | ||
97 | { | ||
98 | struct rb_node *n = root->rb_node; | ||
99 | struct btrfs_delayed_ref_node *entry; | ||
100 | int cmp; | ||
101 | |||
102 | while (n) { | ||
103 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
104 | WARN_ON(!entry->in_tree); | ||
105 | if (last) | ||
106 | *last = entry; | ||
107 | |||
108 | cmp = comp_entry(entry, bytenr, parent); | ||
109 | if (cmp < 0) | ||
110 | n = n->rb_left; | ||
111 | else if (cmp > 0) | ||
112 | n = n->rb_right; | ||
113 | else | ||
114 | return entry; | ||
115 | } | ||
116 | return NULL; | ||
117 | } | ||
118 | |||
119 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | ||
120 | struct btrfs_delayed_ref_head *head) | ||
121 | { | ||
122 | struct btrfs_delayed_ref_root *delayed_refs; | ||
123 | |||
124 | delayed_refs = &trans->transaction->delayed_refs; | ||
125 | assert_spin_locked(&delayed_refs->lock); | ||
126 | if (mutex_trylock(&head->mutex)) | ||
127 | return 0; | ||
128 | |||
129 | atomic_inc(&head->node.refs); | ||
130 | spin_unlock(&delayed_refs->lock); | ||
131 | |||
132 | mutex_lock(&head->mutex); | ||
133 | spin_lock(&delayed_refs->lock); | ||
134 | if (!head->node.in_tree) { | ||
135 | mutex_unlock(&head->mutex); | ||
136 | btrfs_put_delayed_ref(&head->node); | ||
137 | return -EAGAIN; | ||
138 | } | ||
139 | btrfs_put_delayed_ref(&head->node); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | ||
144 | struct list_head *cluster, u64 start) | ||
145 | { | ||
146 | int count = 0; | ||
147 | struct btrfs_delayed_ref_root *delayed_refs; | ||
148 | struct rb_node *node; | ||
149 | struct btrfs_delayed_ref_node *ref; | ||
150 | struct btrfs_delayed_ref_head *head; | ||
151 | |||
152 | delayed_refs = &trans->transaction->delayed_refs; | ||
153 | if (start == 0) { | ||
154 | node = rb_first(&delayed_refs->root); | ||
155 | } else { | ||
156 | ref = NULL; | ||
157 | tree_search(&delayed_refs->root, start, (u64)-1, &ref); | ||
158 | if (ref) { | ||
159 | struct btrfs_delayed_ref_node *tmp; | ||
160 | |||
161 | node = rb_prev(&ref->rb_node); | ||
162 | while (node) { | ||
163 | tmp = rb_entry(node, | ||
164 | struct btrfs_delayed_ref_node, | ||
165 | rb_node); | ||
166 | if (tmp->bytenr < start) | ||
167 | break; | ||
168 | ref = tmp; | ||
169 | node = rb_prev(&ref->rb_node); | ||
170 | } | ||
171 | node = &ref->rb_node; | ||
172 | } else | ||
173 | node = rb_first(&delayed_refs->root); | ||
174 | } | ||
175 | again: | ||
176 | while (node && count < 32) { | ||
177 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
178 | if (btrfs_delayed_ref_is_head(ref)) { | ||
179 | head = btrfs_delayed_node_to_head(ref); | ||
180 | if (list_empty(&head->cluster)) { | ||
181 | list_add_tail(&head->cluster, cluster); | ||
182 | delayed_refs->run_delayed_start = | ||
183 | head->node.bytenr; | ||
184 | count++; | ||
185 | |||
186 | WARN_ON(delayed_refs->num_heads_ready == 0); | ||
187 | delayed_refs->num_heads_ready--; | ||
188 | } else if (count) { | ||
189 | /* the goal of the clustering is to find extents | ||
190 | * that are likely to end up in the same extent | ||
191 | * leaf on disk. So, we don't want them spread | ||
192 | * all over the tree. Stop now if we've hit | ||
193 | * a head that was already in use | ||
194 | */ | ||
195 | break; | ||
196 | } | ||
197 | } | ||
198 | node = rb_next(node); | ||
199 | } | ||
200 | if (count) { | ||
201 | return 0; | ||
202 | } else if (start) { | ||
203 | /* | ||
204 | * we've gone to the end of the rbtree without finding any | ||
205 | * clusters. start from the beginning and try again | ||
206 | */ | ||
207 | start = 0; | ||
208 | node = rb_first(&delayed_refs->root); | ||
209 | goto again; | ||
210 | } | ||
211 | return 1; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * This checks to see if there are any delayed refs in the | ||
216 | * btree for a given bytenr. It returns one if it finds any | ||
217 | * and zero otherwise. | ||
218 | * | ||
219 | * If it only finds a head node, it returns 0. | ||
220 | * | ||
221 | * The idea is to use this when deciding if you can safely delete an | ||
222 | * extent from the extent allocation tree. There may be a pending | ||
223 | * ref in the rbtree that adds or removes references, so as long as this | ||
224 | * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent | ||
225 | * allocation tree. | ||
226 | */ | ||
227 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr) | ||
228 | { | ||
229 | struct btrfs_delayed_ref_node *ref; | ||
230 | struct btrfs_delayed_ref_root *delayed_refs; | ||
231 | struct rb_node *prev_node; | ||
232 | int ret = 0; | ||
233 | |||
234 | delayed_refs = &trans->transaction->delayed_refs; | ||
235 | spin_lock(&delayed_refs->lock); | ||
236 | |||
237 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | ||
238 | if (ref) { | ||
239 | prev_node = rb_prev(&ref->rb_node); | ||
240 | if (!prev_node) | ||
241 | goto out; | ||
242 | ref = rb_entry(prev_node, struct btrfs_delayed_ref_node, | ||
243 | rb_node); | ||
244 | if (ref->bytenr == bytenr) | ||
245 | ret = 1; | ||
246 | } | ||
247 | out: | ||
248 | spin_unlock(&delayed_refs->lock); | ||
249 | return ret; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * helper function to lookup reference count | ||
254 | * | ||
255 | * the head node for delayed ref is used to store the sum of all the | ||
256 | * reference count modifications queued up in the rbtree. This way you | ||
257 | * can check to see what the reference count would be if all of the | ||
258 | * delayed refs are processed. | ||
259 | */ | ||
260 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
261 | struct btrfs_root *root, u64 bytenr, | ||
262 | u64 num_bytes, u32 *refs) | ||
263 | { | ||
264 | struct btrfs_delayed_ref_node *ref; | ||
265 | struct btrfs_delayed_ref_head *head; | ||
266 | struct btrfs_delayed_ref_root *delayed_refs; | ||
267 | struct btrfs_path *path; | ||
268 | struct extent_buffer *leaf; | ||
269 | struct btrfs_extent_item *ei; | ||
270 | struct btrfs_key key; | ||
271 | u32 num_refs; | ||
272 | int ret; | ||
273 | |||
274 | path = btrfs_alloc_path(); | ||
275 | if (!path) | ||
276 | return -ENOMEM; | ||
277 | |||
278 | key.objectid = bytenr; | ||
279 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
280 | key.offset = num_bytes; | ||
281 | delayed_refs = &trans->transaction->delayed_refs; | ||
282 | again: | ||
283 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
284 | &key, path, 0, 0); | ||
285 | if (ret < 0) | ||
286 | goto out; | ||
287 | |||
288 | if (ret == 0) { | ||
289 | leaf = path->nodes[0]; | ||
290 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
291 | struct btrfs_extent_item); | ||
292 | num_refs = btrfs_extent_refs(leaf, ei); | ||
293 | } else { | ||
294 | num_refs = 0; | ||
295 | ret = 0; | ||
296 | } | ||
297 | |||
298 | spin_lock(&delayed_refs->lock); | ||
299 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | ||
300 | if (ref) { | ||
301 | head = btrfs_delayed_node_to_head(ref); | ||
302 | if (mutex_trylock(&head->mutex)) { | ||
303 | num_refs += ref->ref_mod; | ||
304 | mutex_unlock(&head->mutex); | ||
305 | *refs = num_refs; | ||
306 | goto out; | ||
307 | } | ||
308 | |||
309 | atomic_inc(&ref->refs); | ||
310 | spin_unlock(&delayed_refs->lock); | ||
311 | |||
312 | btrfs_release_path(root->fs_info->extent_root, path); | ||
313 | |||
314 | mutex_lock(&head->mutex); | ||
315 | mutex_unlock(&head->mutex); | ||
316 | btrfs_put_delayed_ref(ref); | ||
317 | goto again; | ||
318 | } else { | ||
319 | *refs = num_refs; | ||
320 | } | ||
321 | out: | ||
322 | spin_unlock(&delayed_refs->lock); | ||
323 | btrfs_free_path(path); | ||
324 | return ret; | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * helper function to update an extent delayed ref in the | ||
329 | * rbtree. existing and update must both have the same | ||
330 | * bytenr and parent | ||
331 | * | ||
332 | * This may free existing if the update cancels out whatever | ||
333 | * operation it was doing. | ||
334 | */ | ||
335 | static noinline void | ||
336 | update_existing_ref(struct btrfs_trans_handle *trans, | ||
337 | struct btrfs_delayed_ref_root *delayed_refs, | ||
338 | struct btrfs_delayed_ref_node *existing, | ||
339 | struct btrfs_delayed_ref_node *update) | ||
340 | { | ||
341 | struct btrfs_delayed_ref *existing_ref; | ||
342 | struct btrfs_delayed_ref *ref; | ||
343 | |||
344 | existing_ref = btrfs_delayed_node_to_ref(existing); | ||
345 | ref = btrfs_delayed_node_to_ref(update); | ||
346 | |||
347 | if (ref->pin) | ||
348 | existing_ref->pin = 1; | ||
349 | |||
350 | if (ref->action != existing_ref->action) { | ||
351 | /* | ||
352 | * this is effectively undoing either an add or a | ||
353 | * drop. We decrement the ref_mod, and if it goes | ||
354 | * down to zero we just delete the entry without | ||
355 | * every changing the extent allocation tree. | ||
356 | */ | ||
357 | existing->ref_mod--; | ||
358 | if (existing->ref_mod == 0) { | ||
359 | rb_erase(&existing->rb_node, | ||
360 | &delayed_refs->root); | ||
361 | existing->in_tree = 0; | ||
362 | btrfs_put_delayed_ref(existing); | ||
363 | delayed_refs->num_entries--; | ||
364 | if (trans->delayed_ref_updates) | ||
365 | trans->delayed_ref_updates--; | ||
366 | } | ||
367 | } else { | ||
368 | if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { | ||
369 | /* if we're adding refs, make sure all the | ||
370 | * details match up. The extent could | ||
371 | * have been totally freed and reallocated | ||
372 | * by a different owner before the delayed | ||
373 | * ref entries were removed. | ||
374 | */ | ||
375 | existing_ref->owner_objectid = ref->owner_objectid; | ||
376 | existing_ref->generation = ref->generation; | ||
377 | existing_ref->root = ref->root; | ||
378 | existing->num_bytes = update->num_bytes; | ||
379 | } | ||
380 | /* | ||
381 | * the action on the existing ref matches | ||
382 | * the action on the ref we're trying to add. | ||
383 | * Bump the ref_mod by one so the backref that | ||
384 | * is eventually added/removed has the correct | ||
385 | * reference count | ||
386 | */ | ||
387 | existing->ref_mod += update->ref_mod; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * helper function to update the accounting in the head ref | ||
393 | * existing and update must have the same bytenr | ||
394 | */ | ||
395 | static noinline void | ||
396 | update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | ||
397 | struct btrfs_delayed_ref_node *update) | ||
398 | { | ||
399 | struct btrfs_delayed_ref_head *existing_ref; | ||
400 | struct btrfs_delayed_ref_head *ref; | ||
401 | |||
402 | existing_ref = btrfs_delayed_node_to_head(existing); | ||
403 | ref = btrfs_delayed_node_to_head(update); | ||
404 | |||
405 | if (ref->must_insert_reserved) { | ||
406 | /* if the extent was freed and then | ||
407 | * reallocated before the delayed ref | ||
408 | * entries were processed, we can end up | ||
409 | * with an existing head ref without | ||
410 | * the must_insert_reserved flag set. | ||
411 | * Set it again here | ||
412 | */ | ||
413 | existing_ref->must_insert_reserved = ref->must_insert_reserved; | ||
414 | |||
415 | /* | ||
416 | * update the num_bytes so we make sure the accounting | ||
417 | * is done correctly | ||
418 | */ | ||
419 | existing->num_bytes = update->num_bytes; | ||
420 | |||
421 | } | ||
422 | |||
423 | /* | ||
424 | * update the reference mod on the head to reflect this new operation | ||
425 | */ | ||
426 | existing->ref_mod += update->ref_mod; | ||
427 | } | ||
428 | |||
429 | /* | ||
430 | * helper function to actually insert a delayed ref into the rbtree. | ||
431 | * this does all the dirty work in terms of maintaining the correct | ||
432 | * overall modification count in the head node and properly dealing | ||
433 | * with updating existing nodes as new modifications are queued. | ||
434 | */ | ||
435 | static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | ||
436 | struct btrfs_delayed_ref_node *ref, | ||
437 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | ||
438 | u64 ref_generation, u64 owner_objectid, int action, | ||
439 | int pin) | ||
440 | { | ||
441 | struct btrfs_delayed_ref_node *existing; | ||
442 | struct btrfs_delayed_ref *full_ref; | ||
443 | struct btrfs_delayed_ref_head *head_ref = NULL; | ||
444 | struct btrfs_delayed_ref_root *delayed_refs; | ||
445 | int count_mod = 1; | ||
446 | int must_insert_reserved = 0; | ||
447 | |||
448 | /* | ||
449 | * the head node stores the sum of all the mods, so dropping a ref | ||
450 | * should drop the sum in the head node by one. | ||
451 | */ | ||
452 | if (parent == (u64)-1) { | ||
453 | if (action == BTRFS_DROP_DELAYED_REF) | ||
454 | count_mod = -1; | ||
455 | else if (action == BTRFS_UPDATE_DELAYED_HEAD) | ||
456 | count_mod = 0; | ||
457 | } | ||
458 | |||
459 | /* | ||
460 | * BTRFS_ADD_DELAYED_EXTENT means that we need to update | ||
461 | * the reserved accounting when the extent is finally added, or | ||
462 | * if a later modification deletes the delayed ref without ever | ||
463 | * inserting the extent into the extent allocation tree. | ||
464 | * ref->must_insert_reserved is the flag used to record | ||
465 | * that accounting mods are required. | ||
466 | * | ||
467 | * Once we record must_insert_reserved, switch the action to | ||
468 | * BTRFS_ADD_DELAYED_REF because other special casing is not required. | ||
469 | */ | ||
470 | if (action == BTRFS_ADD_DELAYED_EXTENT) { | ||
471 | must_insert_reserved = 1; | ||
472 | action = BTRFS_ADD_DELAYED_REF; | ||
473 | } else { | ||
474 | must_insert_reserved = 0; | ||
475 | } | ||
476 | |||
477 | |||
478 | delayed_refs = &trans->transaction->delayed_refs; | ||
479 | |||
480 | /* first set the basic ref node struct up */ | ||
481 | atomic_set(&ref->refs, 1); | ||
482 | ref->bytenr = bytenr; | ||
483 | ref->parent = parent; | ||
484 | ref->ref_mod = count_mod; | ||
485 | ref->in_tree = 1; | ||
486 | ref->num_bytes = num_bytes; | ||
487 | |||
488 | if (btrfs_delayed_ref_is_head(ref)) { | ||
489 | head_ref = btrfs_delayed_node_to_head(ref); | ||
490 | head_ref->must_insert_reserved = must_insert_reserved; | ||
491 | INIT_LIST_HEAD(&head_ref->cluster); | ||
492 | mutex_init(&head_ref->mutex); | ||
493 | } else { | ||
494 | full_ref = btrfs_delayed_node_to_ref(ref); | ||
495 | full_ref->root = ref_root; | ||
496 | full_ref->generation = ref_generation; | ||
497 | full_ref->owner_objectid = owner_objectid; | ||
498 | full_ref->pin = pin; | ||
499 | full_ref->action = action; | ||
500 | } | ||
501 | |||
502 | existing = tree_insert(&delayed_refs->root, bytenr, | ||
503 | parent, &ref->rb_node); | ||
504 | |||
505 | if (existing) { | ||
506 | if (btrfs_delayed_ref_is_head(ref)) | ||
507 | update_existing_head_ref(existing, ref); | ||
508 | else | ||
509 | update_existing_ref(trans, delayed_refs, existing, ref); | ||
510 | |||
511 | /* | ||
512 | * we've updated the existing ref, free the newly | ||
513 | * allocated ref | ||
514 | */ | ||
515 | kfree(ref); | ||
516 | } else { | ||
517 | if (btrfs_delayed_ref_is_head(ref)) { | ||
518 | delayed_refs->num_heads++; | ||
519 | delayed_refs->num_heads_ready++; | ||
520 | } | ||
521 | delayed_refs->num_entries++; | ||
522 | trans->delayed_ref_updates++; | ||
523 | } | ||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * add a delayed ref to the tree. This does all of the accounting required | ||
529 | * to make sure the delayed ref is eventually processed before this | ||
530 | * transaction commits. | ||
531 | */ | ||
532 | int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | ||
533 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | ||
534 | u64 ref_generation, u64 owner_objectid, int action, | ||
535 | int pin) | ||
536 | { | ||
537 | struct btrfs_delayed_ref *ref; | ||
538 | struct btrfs_delayed_ref_head *head_ref; | ||
539 | struct btrfs_delayed_ref_root *delayed_refs; | ||
540 | int ret; | ||
541 | |||
542 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
543 | if (!ref) | ||
544 | return -ENOMEM; | ||
545 | |||
546 | /* | ||
547 | * the parent = 0 case comes from cases where we don't actually | ||
548 | * know the parent yet. It will get updated later via a add/drop | ||
549 | * pair. | ||
550 | */ | ||
551 | if (parent == 0) | ||
552 | parent = bytenr; | ||
553 | |||
554 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | ||
555 | if (!head_ref) { | ||
556 | kfree(ref); | ||
557 | return -ENOMEM; | ||
558 | } | ||
559 | delayed_refs = &trans->transaction->delayed_refs; | ||
560 | spin_lock(&delayed_refs->lock); | ||
561 | |||
562 | /* | ||
563 | * insert both the head node and the new ref without dropping | ||
564 | * the spin lock | ||
565 | */ | ||
566 | ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, | ||
567 | (u64)-1, 0, 0, 0, action, pin); | ||
568 | BUG_ON(ret); | ||
569 | |||
570 | ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, | ||
571 | parent, ref_root, ref_generation, | ||
572 | owner_objectid, action, pin); | ||
573 | BUG_ON(ret); | ||
574 | spin_unlock(&delayed_refs->lock); | ||
575 | return 0; | ||
576 | } | ||
577 | |||
578 | /* | ||
579 | * this does a simple search for the head node for a given extent. | ||
580 | * It must be called with the delayed ref spinlock held, and it returns | ||
581 | * the head node if any where found, or NULL if not. | ||
582 | */ | ||
583 | struct btrfs_delayed_ref_head * | ||
584 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | ||
585 | { | ||
586 | struct btrfs_delayed_ref_node *ref; | ||
587 | struct btrfs_delayed_ref_root *delayed_refs; | ||
588 | |||
589 | delayed_refs = &trans->transaction->delayed_refs; | ||
590 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | ||
591 | if (ref) | ||
592 | return btrfs_delayed_node_to_head(ref); | ||
593 | return NULL; | ||
594 | } | ||
595 | |||
596 | /* | ||
597 | * add a delayed ref to the tree. This does all of the accounting required | ||
598 | * to make sure the delayed ref is eventually processed before this | ||
599 | * transaction commits. | ||
600 | * | ||
601 | * The main point of this call is to add and remove a backreference in a single | ||
602 | * shot, taking the lock only once, and only searching for the head node once. | ||
603 | * | ||
604 | * It is the same as doing a ref add and delete in two separate calls. | ||
605 | */ | ||
606 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | ||
607 | u64 bytenr, u64 num_bytes, u64 orig_parent, | ||
608 | u64 parent, u64 orig_ref_root, u64 ref_root, | ||
609 | u64 orig_ref_generation, u64 ref_generation, | ||
610 | u64 owner_objectid, int pin) | ||
611 | { | ||
612 | struct btrfs_delayed_ref *ref; | ||
613 | struct btrfs_delayed_ref *old_ref; | ||
614 | struct btrfs_delayed_ref_head *head_ref; | ||
615 | struct btrfs_delayed_ref_root *delayed_refs; | ||
616 | int ret; | ||
617 | |||
618 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
619 | if (!ref) | ||
620 | return -ENOMEM; | ||
621 | |||
622 | old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS); | ||
623 | if (!old_ref) { | ||
624 | kfree(ref); | ||
625 | return -ENOMEM; | ||
626 | } | ||
627 | |||
628 | /* | ||
629 | * the parent = 0 case comes from cases where we don't actually | ||
630 | * know the parent yet. It will get updated later via a add/drop | ||
631 | * pair. | ||
632 | */ | ||
633 | if (parent == 0) | ||
634 | parent = bytenr; | ||
635 | if (orig_parent == 0) | ||
636 | orig_parent = bytenr; | ||
637 | |||
638 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | ||
639 | if (!head_ref) { | ||
640 | kfree(ref); | ||
641 | kfree(old_ref); | ||
642 | return -ENOMEM; | ||
643 | } | ||
644 | delayed_refs = &trans->transaction->delayed_refs; | ||
645 | spin_lock(&delayed_refs->lock); | ||
646 | |||
647 | /* | ||
648 | * insert both the head node and the new ref without dropping | ||
649 | * the spin lock | ||
650 | */ | ||
651 | ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, | ||
652 | (u64)-1, 0, 0, 0, | ||
653 | BTRFS_UPDATE_DELAYED_HEAD, 0); | ||
654 | BUG_ON(ret); | ||
655 | |||
656 | ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, | ||
657 | parent, ref_root, ref_generation, | ||
658 | owner_objectid, BTRFS_ADD_DELAYED_REF, 0); | ||
659 | BUG_ON(ret); | ||
660 | |||
661 | ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes, | ||
662 | orig_parent, orig_ref_root, | ||
663 | orig_ref_generation, owner_objectid, | ||
664 | BTRFS_DROP_DELAYED_REF, pin); | ||
665 | BUG_ON(ret); | ||
666 | spin_unlock(&delayed_refs->lock); | ||
667 | return 0; | ||
668 | } | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h new file mode 100644 index 000000000000..3bec2ff0b15c --- /dev/null +++ b/fs/btrfs/delayed-ref.h | |||
@@ -0,0 +1,193 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | #ifndef __DELAYED_REF__ | ||
19 | #define __DELAYED_REF__ | ||
20 | |||
21 | /* these are the possible values of struct btrfs_delayed_ref->action */ | ||
22 | #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ | ||
23 | #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ | ||
24 | #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ | ||
25 | #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ | ||
26 | |||
27 | struct btrfs_delayed_ref_node { | ||
28 | struct rb_node rb_node; | ||
29 | |||
30 | /* the starting bytenr of the extent */ | ||
31 | u64 bytenr; | ||
32 | |||
33 | /* the parent our backref will point to */ | ||
34 | u64 parent; | ||
35 | |||
36 | /* the size of the extent */ | ||
37 | u64 num_bytes; | ||
38 | |||
39 | /* ref count on this data structure */ | ||
40 | atomic_t refs; | ||
41 | |||
42 | /* | ||
43 | * how many refs is this entry adding or deleting. For | ||
44 | * head refs, this may be a negative number because it is keeping | ||
45 | * track of the total mods done to the reference count. | ||
46 | * For individual refs, this will always be a positive number | ||
47 | * | ||
48 | * It may be more than one, since it is possible for a single | ||
49 | * parent to have more than one ref on an extent | ||
50 | */ | ||
51 | int ref_mod; | ||
52 | |||
53 | /* is this node still in the rbtree? */ | ||
54 | unsigned int in_tree:1; | ||
55 | }; | ||
56 | |||
57 | /* | ||
58 | * the head refs are used to hold a lock on a given extent, which allows us | ||
59 | * to make sure that only one process is running the delayed refs | ||
60 | * at a time for a single extent. They also store the sum of all the | ||
61 | * reference count modifications we've queued up. | ||
62 | */ | ||
63 | struct btrfs_delayed_ref_head { | ||
64 | struct btrfs_delayed_ref_node node; | ||
65 | |||
66 | /* | ||
67 | * the mutex is held while running the refs, and it is also | ||
68 | * held when checking the sum of reference modifications. | ||
69 | */ | ||
70 | struct mutex mutex; | ||
71 | |||
72 | struct list_head cluster; | ||
73 | |||
74 | /* | ||
75 | * when a new extent is allocated, it is just reserved in memory | ||
76 | * The actual extent isn't inserted into the extent allocation tree | ||
77 | * until the delayed ref is processed. must_insert_reserved is | ||
78 | * used to flag a delayed ref so the accounting can be updated | ||
79 | * when a full insert is done. | ||
80 | * | ||
81 | * It is possible the extent will be freed before it is ever | ||
82 | * inserted into the extent allocation tree. In this case | ||
83 | * we need to update the in ram accounting to properly reflect | ||
84 | * the free has happened. | ||
85 | */ | ||
86 | unsigned int must_insert_reserved:1; | ||
87 | }; | ||
88 | |||
89 | struct btrfs_delayed_ref { | ||
90 | struct btrfs_delayed_ref_node node; | ||
91 | |||
92 | /* the root objectid our ref will point to */ | ||
93 | u64 root; | ||
94 | |||
95 | /* the generation for the backref */ | ||
96 | u64 generation; | ||
97 | |||
98 | /* owner_objectid of the backref */ | ||
99 | u64 owner_objectid; | ||
100 | |||
101 | /* operation done by this entry in the rbtree */ | ||
102 | u8 action; | ||
103 | |||
104 | /* if pin == 1, when the extent is freed it will be pinned until | ||
105 | * transaction commit | ||
106 | */ | ||
107 | unsigned int pin:1; | ||
108 | }; | ||
109 | |||
110 | struct btrfs_delayed_ref_root { | ||
111 | struct rb_root root; | ||
112 | |||
113 | /* this spin lock protects the rbtree and the entries inside */ | ||
114 | spinlock_t lock; | ||
115 | |||
116 | /* how many delayed ref updates we've queued, used by the | ||
117 | * throttling code | ||
118 | */ | ||
119 | unsigned long num_entries; | ||
120 | |||
121 | /* total number of head nodes in tree */ | ||
122 | unsigned long num_heads; | ||
123 | |||
124 | /* total number of head nodes ready for processing */ | ||
125 | unsigned long num_heads_ready; | ||
126 | |||
127 | /* | ||
128 | * set when the tree is flushing before a transaction commit, | ||
129 | * used by the throttling code to decide if new updates need | ||
130 | * to be run right away | ||
131 | */ | ||
132 | int flushing; | ||
133 | |||
134 | u64 run_delayed_start; | ||
135 | }; | ||
136 | |||
137 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) | ||
138 | { | ||
139 | WARN_ON(atomic_read(&ref->refs) == 0); | ||
140 | if (atomic_dec_and_test(&ref->refs)) { | ||
141 | WARN_ON(ref->in_tree); | ||
142 | kfree(ref); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | ||
147 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | ||
148 | u64 ref_generation, u64 owner_objectid, int action, | ||
149 | int pin); | ||
150 | |||
151 | struct btrfs_delayed_ref_head * | ||
152 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | ||
153 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | ||
154 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
155 | struct btrfs_root *root, u64 bytenr, | ||
156 | u64 num_bytes, u32 *refs); | ||
157 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | ||
158 | u64 bytenr, u64 num_bytes, u64 orig_parent, | ||
159 | u64 parent, u64 orig_ref_root, u64 ref_root, | ||
160 | u64 orig_ref_generation, u64 ref_generation, | ||
161 | u64 owner_objectid, int pin); | ||
162 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | ||
163 | struct btrfs_delayed_ref_head *head); | ||
164 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | ||
165 | struct list_head *cluster, u64 search_start); | ||
166 | /* | ||
167 | * a node might live in a head or a regular ref, this lets you | ||
168 | * test for the proper type to use. | ||
169 | */ | ||
170 | static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) | ||
171 | { | ||
172 | return node->parent == (u64)-1; | ||
173 | } | ||
174 | |||
175 | /* | ||
176 | * helper functions to cast a node into its container | ||
177 | */ | ||
178 | static inline struct btrfs_delayed_ref * | ||
179 | btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node) | ||
180 | { | ||
181 | WARN_ON(btrfs_delayed_ref_is_head(node)); | ||
182 | return container_of(node, struct btrfs_delayed_ref, node); | ||
183 | |||
184 | } | ||
185 | |||
186 | static inline struct btrfs_delayed_ref_head * | ||
187 | btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node) | ||
188 | { | ||
189 | WARN_ON(!btrfs_delayed_ref_is_head(node)); | ||
190 | return container_of(node, struct btrfs_delayed_ref_head, node); | ||
191 | |||
192 | } | ||
193 | #endif | ||
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 926a0b287a7d..1d70236ba00c 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
145 | key.objectid = dir; | 145 | key.objectid = dir; |
146 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 146 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); |
147 | key.offset = btrfs_name_hash(name, name_len); | 147 | key.offset = btrfs_name_hash(name, name_len); |
148 | |||
148 | path = btrfs_alloc_path(); | 149 | path = btrfs_alloc_path(); |
150 | path->leave_spinning = 1; | ||
151 | |||
149 | data_size = sizeof(*dir_item) + name_len; | 152 | data_size = sizeof(*dir_item) + name_len; |
150 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | 153 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, |
151 | name, name_len); | 154 | name, name_len); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3e18175248e0..92caa8035f36 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "locking.h" | 38 | #include "locking.h" |
39 | #include "ref-cache.h" | 39 | #include "ref-cache.h" |
40 | #include "tree-log.h" | 40 | #include "tree-log.h" |
41 | #include "free-space-cache.h" | ||
41 | 42 | ||
42 | static struct extent_io_ops btree_extent_io_ops; | 43 | static struct extent_io_ops btree_extent_io_ops; |
43 | static void end_workqueue_fn(struct btrfs_work *work); | 44 | static void end_workqueue_fn(struct btrfs_work *work); |
@@ -668,14 +669,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
668 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | 669 | static int btree_writepage(struct page *page, struct writeback_control *wbc) |
669 | { | 670 | { |
670 | struct extent_io_tree *tree; | 671 | struct extent_io_tree *tree; |
672 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
673 | struct extent_buffer *eb; | ||
674 | int was_dirty; | ||
675 | |||
671 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 676 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
677 | if (!(current->flags & PF_MEMALLOC)) { | ||
678 | return extent_write_full_page(tree, page, | ||
679 | btree_get_extent, wbc); | ||
680 | } | ||
672 | 681 | ||
673 | if (current->flags & PF_MEMALLOC) { | 682 | redirty_page_for_writepage(wbc, page); |
674 | redirty_page_for_writepage(wbc, page); | 683 | eb = btrfs_find_tree_block(root, page_offset(page), |
675 | unlock_page(page); | 684 | PAGE_CACHE_SIZE); |
676 | return 0; | 685 | WARN_ON(!eb); |
686 | |||
687 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | ||
688 | if (!was_dirty) { | ||
689 | spin_lock(&root->fs_info->delalloc_lock); | ||
690 | root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; | ||
691 | spin_unlock(&root->fs_info->delalloc_lock); | ||
677 | } | 692 | } |
678 | return extent_write_full_page(tree, page, btree_get_extent, wbc); | 693 | free_extent_buffer(eb); |
694 | |||
695 | unlock_page(page); | ||
696 | return 0; | ||
679 | } | 697 | } |
680 | 698 | ||
681 | static int btree_writepages(struct address_space *mapping, | 699 | static int btree_writepages(struct address_space *mapping, |
@@ -684,15 +702,15 @@ static int btree_writepages(struct address_space *mapping, | |||
684 | struct extent_io_tree *tree; | 702 | struct extent_io_tree *tree; |
685 | tree = &BTRFS_I(mapping->host)->io_tree; | 703 | tree = &BTRFS_I(mapping->host)->io_tree; |
686 | if (wbc->sync_mode == WB_SYNC_NONE) { | 704 | if (wbc->sync_mode == WB_SYNC_NONE) { |
705 | struct btrfs_root *root = BTRFS_I(mapping->host)->root; | ||
687 | u64 num_dirty; | 706 | u64 num_dirty; |
688 | u64 start = 0; | ||
689 | unsigned long thresh = 32 * 1024 * 1024; | 707 | unsigned long thresh = 32 * 1024 * 1024; |
690 | 708 | ||
691 | if (wbc->for_kupdate) | 709 | if (wbc->for_kupdate) |
692 | return 0; | 710 | return 0; |
693 | 711 | ||
694 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 712 | /* this is a bit racy, but that's ok */ |
695 | thresh, EXTENT_DIRTY); | 713 | num_dirty = root->fs_info->dirty_metadata_bytes; |
696 | if (num_dirty < thresh) | 714 | if (num_dirty < thresh) |
697 | return 0; | 715 | return 0; |
698 | } | 716 | } |
@@ -859,9 +877,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
859 | root->fs_info->running_transaction->transid) { | 877 | root->fs_info->running_transaction->transid) { |
860 | btrfs_assert_tree_locked(buf); | 878 | btrfs_assert_tree_locked(buf); |
861 | 879 | ||
862 | /* ugh, clear_extent_buffer_dirty can be expensive */ | 880 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { |
863 | btrfs_set_lock_blocking(buf); | 881 | spin_lock(&root->fs_info->delalloc_lock); |
882 | if (root->fs_info->dirty_metadata_bytes >= buf->len) | ||
883 | root->fs_info->dirty_metadata_bytes -= buf->len; | ||
884 | else | ||
885 | WARN_ON(1); | ||
886 | spin_unlock(&root->fs_info->delalloc_lock); | ||
887 | } | ||
864 | 888 | ||
889 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | ||
890 | btrfs_set_lock_blocking(buf); | ||
865 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | 891 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
866 | buf); | 892 | buf); |
867 | } | 893 | } |
@@ -1387,8 +1413,6 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1387 | 1413 | ||
1388 | ret = extent_range_uptodate(io_tree, start + length, | 1414 | ret = extent_range_uptodate(io_tree, start + length, |
1389 | start + buf_len - 1); | 1415 | start + buf_len - 1); |
1390 | if (ret == 1) | ||
1391 | return ret; | ||
1392 | return ret; | 1416 | return ret; |
1393 | } | 1417 | } |
1394 | 1418 | ||
@@ -1471,12 +1495,6 @@ static int transaction_kthread(void *arg) | |||
1471 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1495 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1472 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1496 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1473 | 1497 | ||
1474 | if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { | ||
1475 | printk(KERN_INFO "btrfs: total reference cache " | ||
1476 | "size %llu\n", | ||
1477 | root->fs_info->total_ref_cache_size); | ||
1478 | } | ||
1479 | |||
1480 | mutex_lock(&root->fs_info->trans_mutex); | 1498 | mutex_lock(&root->fs_info->trans_mutex); |
1481 | cur = root->fs_info->running_transaction; | 1499 | cur = root->fs_info->running_transaction; |
1482 | if (!cur) { | 1500 | if (!cur) { |
@@ -1493,6 +1511,7 @@ static int transaction_kthread(void *arg) | |||
1493 | mutex_unlock(&root->fs_info->trans_mutex); | 1511 | mutex_unlock(&root->fs_info->trans_mutex); |
1494 | trans = btrfs_start_transaction(root, 1); | 1512 | trans = btrfs_start_transaction(root, 1); |
1495 | ret = btrfs_commit_transaction(trans, root); | 1513 | ret = btrfs_commit_transaction(trans, root); |
1514 | |||
1496 | sleep: | 1515 | sleep: |
1497 | wake_up_process(root->fs_info->cleaner_kthread); | 1516 | wake_up_process(root->fs_info->cleaner_kthread); |
1498 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1517 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
@@ -1552,6 +1571,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1552 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1571 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1553 | INIT_LIST_HEAD(&fs_info->hashers); | 1572 | INIT_LIST_HEAD(&fs_info->hashers); |
1554 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1573 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1574 | INIT_LIST_HEAD(&fs_info->ordered_operations); | ||
1555 | spin_lock_init(&fs_info->delalloc_lock); | 1575 | spin_lock_init(&fs_info->delalloc_lock); |
1556 | spin_lock_init(&fs_info->new_trans_lock); | 1576 | spin_lock_init(&fs_info->new_trans_lock); |
1557 | spin_lock_init(&fs_info->ref_cache_lock); | 1577 | spin_lock_init(&fs_info->ref_cache_lock); |
@@ -1611,10 +1631,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1611 | 1631 | ||
1612 | extent_io_tree_init(&fs_info->pinned_extents, | 1632 | extent_io_tree_init(&fs_info->pinned_extents, |
1613 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1633 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1614 | extent_io_tree_init(&fs_info->pending_del, | ||
1615 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1616 | extent_io_tree_init(&fs_info->extent_ins, | ||
1617 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1618 | fs_info->do_barriers = 1; | 1634 | fs_info->do_barriers = 1; |
1619 | 1635 | ||
1620 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | 1636 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); |
@@ -1627,15 +1643,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1627 | insert_inode_hash(fs_info->btree_inode); | 1643 | insert_inode_hash(fs_info->btree_inode); |
1628 | 1644 | ||
1629 | mutex_init(&fs_info->trans_mutex); | 1645 | mutex_init(&fs_info->trans_mutex); |
1646 | mutex_init(&fs_info->ordered_operations_mutex); | ||
1630 | mutex_init(&fs_info->tree_log_mutex); | 1647 | mutex_init(&fs_info->tree_log_mutex); |
1631 | mutex_init(&fs_info->drop_mutex); | 1648 | mutex_init(&fs_info->drop_mutex); |
1632 | mutex_init(&fs_info->extent_ins_mutex); | ||
1633 | mutex_init(&fs_info->pinned_mutex); | ||
1634 | mutex_init(&fs_info->chunk_mutex); | 1649 | mutex_init(&fs_info->chunk_mutex); |
1635 | mutex_init(&fs_info->transaction_kthread_mutex); | 1650 | mutex_init(&fs_info->transaction_kthread_mutex); |
1636 | mutex_init(&fs_info->cleaner_mutex); | 1651 | mutex_init(&fs_info->cleaner_mutex); |
1637 | mutex_init(&fs_info->volume_mutex); | 1652 | mutex_init(&fs_info->volume_mutex); |
1638 | mutex_init(&fs_info->tree_reloc_mutex); | 1653 | mutex_init(&fs_info->tree_reloc_mutex); |
1654 | |||
1655 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | ||
1656 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | ||
1657 | |||
1639 | init_waitqueue_head(&fs_info->transaction_throttle); | 1658 | init_waitqueue_head(&fs_info->transaction_throttle); |
1640 | init_waitqueue_head(&fs_info->transaction_wait); | 1659 | init_waitqueue_head(&fs_info->transaction_wait); |
1641 | init_waitqueue_head(&fs_info->async_submit_wait); | 1660 | init_waitqueue_head(&fs_info->async_submit_wait); |
@@ -2358,8 +2377,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2358 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | 2377 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; |
2359 | u64 transid = btrfs_header_generation(buf); | 2378 | u64 transid = btrfs_header_generation(buf); |
2360 | struct inode *btree_inode = root->fs_info->btree_inode; | 2379 | struct inode *btree_inode = root->fs_info->btree_inode; |
2361 | 2380 | int was_dirty; | |
2362 | btrfs_set_lock_blocking(buf); | ||
2363 | 2381 | ||
2364 | btrfs_assert_tree_locked(buf); | 2382 | btrfs_assert_tree_locked(buf); |
2365 | if (transid != root->fs_info->generation) { | 2383 | if (transid != root->fs_info->generation) { |
@@ -2370,7 +2388,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2370 | (unsigned long long)root->fs_info->generation); | 2388 | (unsigned long long)root->fs_info->generation); |
2371 | WARN_ON(1); | 2389 | WARN_ON(1); |
2372 | } | 2390 | } |
2373 | set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); | 2391 | was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
2392 | buf); | ||
2393 | if (!was_dirty) { | ||
2394 | spin_lock(&root->fs_info->delalloc_lock); | ||
2395 | root->fs_info->dirty_metadata_bytes += buf->len; | ||
2396 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2397 | } | ||
2374 | } | 2398 | } |
2375 | 2399 | ||
2376 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 2400 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) |
@@ -2385,7 +2409,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
2385 | unsigned long thresh = 32 * 1024 * 1024; | 2409 | unsigned long thresh = 32 * 1024 * 1024; |
2386 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 2410 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
2387 | 2411 | ||
2388 | if (current_is_pdflush() || current->flags & PF_MEMALLOC) | 2412 | if (current->flags & PF_MEMALLOC) |
2389 | return; | 2413 | return; |
2390 | 2414 | ||
2391 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 2415 | num_dirty = count_range_bits(tree, &start, (u64)-1, |
@@ -2410,6 +2434,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
2410 | int btree_lock_page_hook(struct page *page) | 2434 | int btree_lock_page_hook(struct page *page) |
2411 | { | 2435 | { |
2412 | struct inode *inode = page->mapping->host; | 2436 | struct inode *inode = page->mapping->host; |
2437 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2413 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 2438 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
2414 | struct extent_buffer *eb; | 2439 | struct extent_buffer *eb; |
2415 | unsigned long len; | 2440 | unsigned long len; |
@@ -2425,6 +2450,16 @@ int btree_lock_page_hook(struct page *page) | |||
2425 | 2450 | ||
2426 | btrfs_tree_lock(eb); | 2451 | btrfs_tree_lock(eb); |
2427 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 2452 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
2453 | |||
2454 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
2455 | spin_lock(&root->fs_info->delalloc_lock); | ||
2456 | if (root->fs_info->dirty_metadata_bytes >= eb->len) | ||
2457 | root->fs_info->dirty_metadata_bytes -= eb->len; | ||
2458 | else | ||
2459 | WARN_ON(1); | ||
2460 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2461 | } | ||
2462 | |||
2428 | btrfs_tree_unlock(eb); | 2463 | btrfs_tree_unlock(eb); |
2429 | free_extent_buffer(eb); | 2464 | free_extent_buffer(eb); |
2430 | out: | 2465 | out: |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 95029db227be..c958ecbc1916 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, | |||
72 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | 72 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); |
73 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); | 73 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); |
74 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | 74 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); |
75 | void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf); | ||
75 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); | 76 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); |
76 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); | 77 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); |
77 | int wait_on_tree_block_writeback(struct btrfs_root *root, | 78 | int wait_on_tree_block_writeback(struct btrfs_root *root, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fefe83ad2059..178df4c67de4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "volumes.h" | 31 | #include "volumes.h" |
32 | #include "locking.h" | 32 | #include "locking.h" |
33 | #include "ref-cache.h" | 33 | #include "ref-cache.h" |
34 | #include "free-space-cache.h" | ||
34 | 35 | ||
35 | #define PENDING_EXTENT_INSERT 0 | 36 | #define PENDING_EXTENT_INSERT 0 |
36 | #define PENDING_EXTENT_DELETE 1 | 37 | #define PENDING_EXTENT_DELETE 1 |
@@ -49,17 +50,23 @@ struct pending_extent_op { | |||
49 | int del; | 50 | int del; |
50 | }; | 51 | }; |
51 | 52 | ||
52 | static int finish_current_insert(struct btrfs_trans_handle *trans, | 53 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, |
53 | struct btrfs_root *extent_root, int all); | 54 | struct btrfs_root *root, u64 parent, |
54 | static int del_pending_extents(struct btrfs_trans_handle *trans, | 55 | u64 root_objectid, u64 ref_generation, |
55 | struct btrfs_root *extent_root, int all); | 56 | u64 owner, struct btrfs_key *ins, |
56 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 57 | int ref_mod); |
57 | struct btrfs_root *root, | 58 | static int update_reserved_extents(struct btrfs_root *root, |
58 | u64 bytenr, u64 num_bytes, int is_data); | 59 | u64 bytenr, u64 num, int reserve); |
59 | static int update_block_group(struct btrfs_trans_handle *trans, | 60 | static int update_block_group(struct btrfs_trans_handle *trans, |
60 | struct btrfs_root *root, | 61 | struct btrfs_root *root, |
61 | u64 bytenr, u64 num_bytes, int alloc, | 62 | u64 bytenr, u64 num_bytes, int alloc, |
62 | int mark_free); | 63 | int mark_free); |
64 | static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans, | ||
65 | struct btrfs_root *root, | ||
66 | u64 bytenr, u64 num_bytes, u64 parent, | ||
67 | u64 root_objectid, u64 ref_generation, | ||
68 | u64 owner_objectid, int pin, | ||
69 | int ref_to_drop); | ||
63 | 70 | ||
64 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 71 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
65 | struct btrfs_root *extent_root, u64 alloc_bytes, | 72 | struct btrfs_root *extent_root, u64 alloc_bytes, |
@@ -160,7 +167,6 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
160 | u64 extent_start, extent_end, size; | 167 | u64 extent_start, extent_end, size; |
161 | int ret; | 168 | int ret; |
162 | 169 | ||
163 | mutex_lock(&info->pinned_mutex); | ||
164 | while (start < end) { | 170 | while (start < end) { |
165 | ret = find_first_extent_bit(&info->pinned_extents, start, | 171 | ret = find_first_extent_bit(&info->pinned_extents, start, |
166 | &extent_start, &extent_end, | 172 | &extent_start, &extent_end, |
@@ -186,7 +192,6 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
186 | ret = btrfs_add_free_space(block_group, start, size); | 192 | ret = btrfs_add_free_space(block_group, start, size); |
187 | BUG_ON(ret); | 193 | BUG_ON(ret); |
188 | } | 194 | } |
189 | mutex_unlock(&info->pinned_mutex); | ||
190 | 195 | ||
191 | return 0; | 196 | return 0; |
192 | } | 197 | } |
@@ -285,8 +290,8 @@ next: | |||
285 | block_group->key.objectid + | 290 | block_group->key.objectid + |
286 | block_group->key.offset); | 291 | block_group->key.offset); |
287 | 292 | ||
288 | remove_sb_from_cache(root, block_group); | ||
289 | block_group->cached = 1; | 293 | block_group->cached = 1; |
294 | remove_sb_from_cache(root, block_group); | ||
290 | ret = 0; | 295 | ret = 0; |
291 | err: | 296 | err: |
292 | btrfs_free_path(path); | 297 | btrfs_free_path(path); |
@@ -320,7 +325,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( | |||
320 | return cache; | 325 | return cache; |
321 | } | 326 | } |
322 | 327 | ||
323 | static inline void put_block_group(struct btrfs_block_group_cache *cache) | 328 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
324 | { | 329 | { |
325 | if (atomic_dec_and_test(&cache->count)) | 330 | if (atomic_dec_and_test(&cache->count)) |
326 | kfree(cache); | 331 | kfree(cache); |
@@ -393,12 +398,12 @@ again: | |||
393 | div_factor(cache->key.offset, factor)) { | 398 | div_factor(cache->key.offset, factor)) { |
394 | group_start = cache->key.objectid; | 399 | group_start = cache->key.objectid; |
395 | spin_unlock(&cache->lock); | 400 | spin_unlock(&cache->lock); |
396 | put_block_group(cache); | 401 | btrfs_put_block_group(cache); |
397 | goto found; | 402 | goto found; |
398 | } | 403 | } |
399 | } | 404 | } |
400 | spin_unlock(&cache->lock); | 405 | spin_unlock(&cache->lock); |
401 | put_block_group(cache); | 406 | btrfs_put_block_group(cache); |
402 | cond_resched(); | 407 | cond_resched(); |
403 | } | 408 | } |
404 | if (!wrapped) { | 409 | if (!wrapped) { |
@@ -554,262 +559,13 @@ out: | |||
554 | return ret; | 559 | return ret; |
555 | } | 560 | } |
556 | 561 | ||
557 | /* | ||
558 | * updates all the backrefs that are pending on update_list for the | ||
559 | * extent_root | ||
560 | */ | ||
561 | static noinline int update_backrefs(struct btrfs_trans_handle *trans, | ||
562 | struct btrfs_root *extent_root, | ||
563 | struct btrfs_path *path, | ||
564 | struct list_head *update_list) | ||
565 | { | ||
566 | struct btrfs_key key; | ||
567 | struct btrfs_extent_ref *ref; | ||
568 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
569 | struct pending_extent_op *op; | ||
570 | struct extent_buffer *leaf; | ||
571 | int ret = 0; | ||
572 | struct list_head *cur = update_list->next; | ||
573 | u64 ref_objectid; | ||
574 | u64 ref_root = extent_root->root_key.objectid; | ||
575 | |||
576 | op = list_entry(cur, struct pending_extent_op, list); | ||
577 | |||
578 | search: | ||
579 | key.objectid = op->bytenr; | ||
580 | key.type = BTRFS_EXTENT_REF_KEY; | ||
581 | key.offset = op->orig_parent; | ||
582 | |||
583 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); | ||
584 | BUG_ON(ret); | ||
585 | |||
586 | leaf = path->nodes[0]; | ||
587 | |||
588 | loop: | ||
589 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | ||
590 | |||
591 | ref_objectid = btrfs_ref_objectid(leaf, ref); | ||
592 | |||
593 | if (btrfs_ref_root(leaf, ref) != ref_root || | ||
594 | btrfs_ref_generation(leaf, ref) != op->orig_generation || | ||
595 | (ref_objectid != op->level && | ||
596 | ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { | ||
597 | printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " | ||
598 | "root %llu, owner %u\n", | ||
599 | (unsigned long long)op->bytenr, | ||
600 | (unsigned long long)op->orig_parent, | ||
601 | (unsigned long long)ref_root, op->level); | ||
602 | btrfs_print_leaf(extent_root, leaf); | ||
603 | BUG(); | ||
604 | } | ||
605 | |||
606 | key.objectid = op->bytenr; | ||
607 | key.offset = op->parent; | ||
608 | key.type = BTRFS_EXTENT_REF_KEY; | ||
609 | ret = btrfs_set_item_key_safe(trans, extent_root, path, &key); | ||
610 | BUG_ON(ret); | ||
611 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | ||
612 | btrfs_set_ref_generation(leaf, ref, op->generation); | ||
613 | |||
614 | cur = cur->next; | ||
615 | |||
616 | list_del_init(&op->list); | ||
617 | unlock_extent(&info->extent_ins, op->bytenr, | ||
618 | op->bytenr + op->num_bytes - 1, GFP_NOFS); | ||
619 | kfree(op); | ||
620 | |||
621 | if (cur == update_list) { | ||
622 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
623 | btrfs_release_path(extent_root, path); | ||
624 | goto out; | ||
625 | } | ||
626 | |||
627 | op = list_entry(cur, struct pending_extent_op, list); | ||
628 | |||
629 | path->slots[0]++; | ||
630 | while (path->slots[0] < btrfs_header_nritems(leaf)) { | ||
631 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
632 | if (key.objectid == op->bytenr && | ||
633 | key.type == BTRFS_EXTENT_REF_KEY) | ||
634 | goto loop; | ||
635 | path->slots[0]++; | ||
636 | } | ||
637 | |||
638 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
639 | btrfs_release_path(extent_root, path); | ||
640 | goto search; | ||
641 | |||
642 | out: | ||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | static noinline int insert_extents(struct btrfs_trans_handle *trans, | ||
647 | struct btrfs_root *extent_root, | ||
648 | struct btrfs_path *path, | ||
649 | struct list_head *insert_list, int nr) | ||
650 | { | ||
651 | struct btrfs_key *keys; | ||
652 | u32 *data_size; | ||
653 | struct pending_extent_op *op; | ||
654 | struct extent_buffer *leaf; | ||
655 | struct list_head *cur = insert_list->next; | ||
656 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
657 | u64 ref_root = extent_root->root_key.objectid; | ||
658 | int i = 0, last = 0, ret; | ||
659 | int total = nr * 2; | ||
660 | |||
661 | if (!nr) | ||
662 | return 0; | ||
663 | |||
664 | keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS); | ||
665 | if (!keys) | ||
666 | return -ENOMEM; | ||
667 | |||
668 | data_size = kzalloc(total * sizeof(u32), GFP_NOFS); | ||
669 | if (!data_size) { | ||
670 | kfree(keys); | ||
671 | return -ENOMEM; | ||
672 | } | ||
673 | |||
674 | list_for_each_entry(op, insert_list, list) { | ||
675 | keys[i].objectid = op->bytenr; | ||
676 | keys[i].offset = op->num_bytes; | ||
677 | keys[i].type = BTRFS_EXTENT_ITEM_KEY; | ||
678 | data_size[i] = sizeof(struct btrfs_extent_item); | ||
679 | i++; | ||
680 | |||
681 | keys[i].objectid = op->bytenr; | ||
682 | keys[i].offset = op->parent; | ||
683 | keys[i].type = BTRFS_EXTENT_REF_KEY; | ||
684 | data_size[i] = sizeof(struct btrfs_extent_ref); | ||
685 | i++; | ||
686 | } | ||
687 | |||
688 | op = list_entry(cur, struct pending_extent_op, list); | ||
689 | i = 0; | ||
690 | while (i < total) { | ||
691 | int c; | ||
692 | ret = btrfs_insert_some_items(trans, extent_root, path, | ||
693 | keys+i, data_size+i, total-i); | ||
694 | BUG_ON(ret < 0); | ||
695 | |||
696 | if (last && ret > 1) | ||
697 | BUG(); | ||
698 | |||
699 | leaf = path->nodes[0]; | ||
700 | for (c = 0; c < ret; c++) { | ||
701 | int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY; | ||
702 | |||
703 | /* | ||
704 | * if the first item we inserted was a backref, then | ||
705 | * the EXTENT_ITEM will be the odd c's, else it will | ||
706 | * be the even c's | ||
707 | */ | ||
708 | if ((ref_first && (c % 2)) || | ||
709 | (!ref_first && !(c % 2))) { | ||
710 | struct btrfs_extent_item *itm; | ||
711 | |||
712 | itm = btrfs_item_ptr(leaf, path->slots[0] + c, | ||
713 | struct btrfs_extent_item); | ||
714 | btrfs_set_extent_refs(path->nodes[0], itm, 1); | ||
715 | op->del++; | ||
716 | } else { | ||
717 | struct btrfs_extent_ref *ref; | ||
718 | |||
719 | ref = btrfs_item_ptr(leaf, path->slots[0] + c, | ||
720 | struct btrfs_extent_ref); | ||
721 | btrfs_set_ref_root(leaf, ref, ref_root); | ||
722 | btrfs_set_ref_generation(leaf, ref, | ||
723 | op->generation); | ||
724 | btrfs_set_ref_objectid(leaf, ref, op->level); | ||
725 | btrfs_set_ref_num_refs(leaf, ref, 1); | ||
726 | op->del++; | ||
727 | } | ||
728 | |||
729 | /* | ||
730 | * using del to see when its ok to free up the | ||
731 | * pending_extent_op. In the case where we insert the | ||
732 | * last item on the list in order to help do batching | ||
733 | * we need to not free the extent op until we actually | ||
734 | * insert the extent_item | ||
735 | */ | ||
736 | if (op->del == 2) { | ||
737 | unlock_extent(&info->extent_ins, op->bytenr, | ||
738 | op->bytenr + op->num_bytes - 1, | ||
739 | GFP_NOFS); | ||
740 | cur = cur->next; | ||
741 | list_del_init(&op->list); | ||
742 | kfree(op); | ||
743 | if (cur != insert_list) | ||
744 | op = list_entry(cur, | ||
745 | struct pending_extent_op, | ||
746 | list); | ||
747 | } | ||
748 | } | ||
749 | btrfs_mark_buffer_dirty(leaf); | ||
750 | btrfs_release_path(extent_root, path); | ||
751 | |||
752 | /* | ||
753 | * Ok backref's and items usually go right next to eachother, | ||
754 | * but if we could only insert 1 item that means that we | ||
755 | * inserted on the end of a leaf, and we have no idea what may | ||
756 | * be on the next leaf so we just play it safe. In order to | ||
757 | * try and help this case we insert the last thing on our | ||
758 | * insert list so hopefully it will end up being the last | ||
759 | * thing on the leaf and everything else will be before it, | ||
760 | * which will let us insert a whole bunch of items at the same | ||
761 | * time. | ||
762 | */ | ||
763 | if (ret == 1 && !last && (i + ret < total)) { | ||
764 | /* | ||
765 | * last: where we will pick up the next time around | ||
766 | * i: our current key to insert, will be total - 1 | ||
767 | * cur: the current op we are screwing with | ||
768 | * op: duh | ||
769 | */ | ||
770 | last = i + ret; | ||
771 | i = total - 1; | ||
772 | cur = insert_list->prev; | ||
773 | op = list_entry(cur, struct pending_extent_op, list); | ||
774 | } else if (last) { | ||
775 | /* | ||
776 | * ok we successfully inserted the last item on the | ||
777 | * list, lets reset everything | ||
778 | * | ||
779 | * i: our current key to insert, so where we left off | ||
780 | * last time | ||
781 | * last: done with this | ||
782 | * cur: the op we are messing with | ||
783 | * op: duh | ||
784 | * total: since we inserted the last key, we need to | ||
785 | * decrement total so we dont overflow | ||
786 | */ | ||
787 | i = last; | ||
788 | last = 0; | ||
789 | total--; | ||
790 | if (i < total) { | ||
791 | cur = insert_list->next; | ||
792 | op = list_entry(cur, struct pending_extent_op, | ||
793 | list); | ||
794 | } | ||
795 | } else { | ||
796 | i += ret; | ||
797 | } | ||
798 | |||
799 | cond_resched(); | ||
800 | } | ||
801 | ret = 0; | ||
802 | kfree(keys); | ||
803 | kfree(data_size); | ||
804 | return ret; | ||
805 | } | ||
806 | |||
807 | static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | 562 | static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, |
808 | struct btrfs_root *root, | 563 | struct btrfs_root *root, |
809 | struct btrfs_path *path, | 564 | struct btrfs_path *path, |
810 | u64 bytenr, u64 parent, | 565 | u64 bytenr, u64 parent, |
811 | u64 ref_root, u64 ref_generation, | 566 | u64 ref_root, u64 ref_generation, |
812 | u64 owner_objectid) | 567 | u64 owner_objectid, |
568 | int refs_to_add) | ||
813 | { | 569 | { |
814 | struct btrfs_key key; | 570 | struct btrfs_key key; |
815 | struct extent_buffer *leaf; | 571 | struct extent_buffer *leaf; |
@@ -829,9 +585,10 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | |||
829 | btrfs_set_ref_root(leaf, ref, ref_root); | 585 | btrfs_set_ref_root(leaf, ref, ref_root); |
830 | btrfs_set_ref_generation(leaf, ref, ref_generation); | 586 | btrfs_set_ref_generation(leaf, ref, ref_generation); |
831 | btrfs_set_ref_objectid(leaf, ref, owner_objectid); | 587 | btrfs_set_ref_objectid(leaf, ref, owner_objectid); |
832 | btrfs_set_ref_num_refs(leaf, ref, 1); | 588 | btrfs_set_ref_num_refs(leaf, ref, refs_to_add); |
833 | } else if (ret == -EEXIST) { | 589 | } else if (ret == -EEXIST) { |
834 | u64 existing_owner; | 590 | u64 existing_owner; |
591 | |||
835 | BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); | 592 | BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); |
836 | leaf = path->nodes[0]; | 593 | leaf = path->nodes[0]; |
837 | ref = btrfs_item_ptr(leaf, path->slots[0], | 594 | ref = btrfs_item_ptr(leaf, path->slots[0], |
@@ -845,7 +602,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | |||
845 | 602 | ||
846 | num_refs = btrfs_ref_num_refs(leaf, ref); | 603 | num_refs = btrfs_ref_num_refs(leaf, ref); |
847 | BUG_ON(num_refs == 0); | 604 | BUG_ON(num_refs == 0); |
848 | btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); | 605 | btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add); |
849 | 606 | ||
850 | existing_owner = btrfs_ref_objectid(leaf, ref); | 607 | existing_owner = btrfs_ref_objectid(leaf, ref); |
851 | if (existing_owner != owner_objectid && | 608 | if (existing_owner != owner_objectid && |
@@ -857,6 +614,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | |||
857 | } else { | 614 | } else { |
858 | goto out; | 615 | goto out; |
859 | } | 616 | } |
617 | btrfs_unlock_up_safe(path, 1); | ||
860 | btrfs_mark_buffer_dirty(path->nodes[0]); | 618 | btrfs_mark_buffer_dirty(path->nodes[0]); |
861 | out: | 619 | out: |
862 | btrfs_release_path(root, path); | 620 | btrfs_release_path(root, path); |
@@ -865,7 +623,8 @@ out: | |||
865 | 623 | ||
866 | static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, | 624 | static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, |
867 | struct btrfs_root *root, | 625 | struct btrfs_root *root, |
868 | struct btrfs_path *path) | 626 | struct btrfs_path *path, |
627 | int refs_to_drop) | ||
869 | { | 628 | { |
870 | struct extent_buffer *leaf; | 629 | struct extent_buffer *leaf; |
871 | struct btrfs_extent_ref *ref; | 630 | struct btrfs_extent_ref *ref; |
@@ -875,8 +634,8 @@ static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
875 | leaf = path->nodes[0]; | 634 | leaf = path->nodes[0]; |
876 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | 635 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); |
877 | num_refs = btrfs_ref_num_refs(leaf, ref); | 636 | num_refs = btrfs_ref_num_refs(leaf, ref); |
878 | BUG_ON(num_refs == 0); | 637 | BUG_ON(num_refs < refs_to_drop); |
879 | num_refs -= 1; | 638 | num_refs -= refs_to_drop; |
880 | if (num_refs == 0) { | 639 | if (num_refs == 0) { |
881 | ret = btrfs_del_item(trans, root, path); | 640 | ret = btrfs_del_item(trans, root, path); |
882 | } else { | 641 | } else { |
@@ -927,332 +686,28 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
927 | #endif | 686 | #endif |
928 | } | 687 | } |
929 | 688 | ||
930 | static noinline int free_extents(struct btrfs_trans_handle *trans, | ||
931 | struct btrfs_root *extent_root, | ||
932 | struct list_head *del_list) | ||
933 | { | ||
934 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
935 | struct btrfs_path *path; | ||
936 | struct btrfs_key key, found_key; | ||
937 | struct extent_buffer *leaf; | ||
938 | struct list_head *cur; | ||
939 | struct pending_extent_op *op; | ||
940 | struct btrfs_extent_item *ei; | ||
941 | int ret, num_to_del, extent_slot = 0, found_extent = 0; | ||
942 | u32 refs; | ||
943 | u64 bytes_freed = 0; | ||
944 | |||
945 | path = btrfs_alloc_path(); | ||
946 | if (!path) | ||
947 | return -ENOMEM; | ||
948 | path->reada = 1; | ||
949 | |||
950 | search: | ||
951 | /* search for the backref for the current ref we want to delete */ | ||
952 | cur = del_list->next; | ||
953 | op = list_entry(cur, struct pending_extent_op, list); | ||
954 | ret = lookup_extent_backref(trans, extent_root, path, op->bytenr, | ||
955 | op->orig_parent, | ||
956 | extent_root->root_key.objectid, | ||
957 | op->orig_generation, op->level, 1); | ||
958 | if (ret) { | ||
959 | printk(KERN_ERR "btrfs unable to find backref byte nr %llu " | ||
960 | "root %llu gen %llu owner %u\n", | ||
961 | (unsigned long long)op->bytenr, | ||
962 | (unsigned long long)extent_root->root_key.objectid, | ||
963 | (unsigned long long)op->orig_generation, op->level); | ||
964 | btrfs_print_leaf(extent_root, path->nodes[0]); | ||
965 | WARN_ON(1); | ||
966 | goto out; | ||
967 | } | ||
968 | |||
969 | extent_slot = path->slots[0]; | ||
970 | num_to_del = 1; | ||
971 | found_extent = 0; | ||
972 | |||
973 | /* | ||
974 | * if we aren't the first item on the leaf we can move back one and see | ||
975 | * if our ref is right next to our extent item | ||
976 | */ | ||
977 | if (likely(extent_slot)) { | ||
978 | extent_slot--; | ||
979 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
980 | extent_slot); | ||
981 | if (found_key.objectid == op->bytenr && | ||
982 | found_key.type == BTRFS_EXTENT_ITEM_KEY && | ||
983 | found_key.offset == op->num_bytes) { | ||
984 | num_to_del++; | ||
985 | found_extent = 1; | ||
986 | } | ||
987 | } | ||
988 | |||
989 | /* | ||
990 | * if we didn't find the extent we need to delete the backref and then | ||
991 | * search for the extent item key so we can update its ref count | ||
992 | */ | ||
993 | if (!found_extent) { | ||
994 | key.objectid = op->bytenr; | ||
995 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
996 | key.offset = op->num_bytes; | ||
997 | |||
998 | ret = remove_extent_backref(trans, extent_root, path); | ||
999 | BUG_ON(ret); | ||
1000 | btrfs_release_path(extent_root, path); | ||
1001 | ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); | ||
1002 | BUG_ON(ret); | ||
1003 | extent_slot = path->slots[0]; | ||
1004 | } | ||
1005 | |||
1006 | /* this is where we update the ref count for the extent */ | ||
1007 | leaf = path->nodes[0]; | ||
1008 | ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); | ||
1009 | refs = btrfs_extent_refs(leaf, ei); | ||
1010 | BUG_ON(refs == 0); | ||
1011 | refs--; | ||
1012 | btrfs_set_extent_refs(leaf, ei, refs); | ||
1013 | |||
1014 | btrfs_mark_buffer_dirty(leaf); | ||
1015 | |||
1016 | /* | ||
1017 | * This extent needs deleting. The reason cur_slot is extent_slot + | ||
1018 | * num_to_del is because extent_slot points to the slot where the extent | ||
1019 | * is, and if the backref was not right next to the extent we will be | ||
1020 | * deleting at least 1 item, and will want to start searching at the | ||
1021 | * slot directly next to extent_slot. However if we did find the | ||
1022 | * backref next to the extent item them we will be deleting at least 2 | ||
1023 | * items and will want to start searching directly after the ref slot | ||
1024 | */ | ||
1025 | if (!refs) { | ||
1026 | struct list_head *pos, *n, *end; | ||
1027 | int cur_slot = extent_slot+num_to_del; | ||
1028 | u64 super_used; | ||
1029 | u64 root_used; | ||
1030 | |||
1031 | path->slots[0] = extent_slot; | ||
1032 | bytes_freed = op->num_bytes; | ||
1033 | |||
1034 | mutex_lock(&info->pinned_mutex); | ||
1035 | ret = pin_down_bytes(trans, extent_root, op->bytenr, | ||
1036 | op->num_bytes, op->level >= | ||
1037 | BTRFS_FIRST_FREE_OBJECTID); | ||
1038 | mutex_unlock(&info->pinned_mutex); | ||
1039 | BUG_ON(ret < 0); | ||
1040 | op->del = ret; | ||
1041 | |||
1042 | /* | ||
1043 | * we need to see if we can delete multiple things at once, so | ||
1044 | * start looping through the list of extents we are wanting to | ||
1045 | * delete and see if their extent/backref's are right next to | ||
1046 | * eachother and the extents only have 1 ref | ||
1047 | */ | ||
1048 | for (pos = cur->next; pos != del_list; pos = pos->next) { | ||
1049 | struct pending_extent_op *tmp; | ||
1050 | |||
1051 | tmp = list_entry(pos, struct pending_extent_op, list); | ||
1052 | |||
1053 | /* we only want to delete extent+ref at this stage */ | ||
1054 | if (cur_slot >= btrfs_header_nritems(leaf) - 1) | ||
1055 | break; | ||
1056 | |||
1057 | btrfs_item_key_to_cpu(leaf, &found_key, cur_slot); | ||
1058 | if (found_key.objectid != tmp->bytenr || | ||
1059 | found_key.type != BTRFS_EXTENT_ITEM_KEY || | ||
1060 | found_key.offset != tmp->num_bytes) | ||
1061 | break; | ||
1062 | |||
1063 | /* check to make sure this extent only has one ref */ | ||
1064 | ei = btrfs_item_ptr(leaf, cur_slot, | ||
1065 | struct btrfs_extent_item); | ||
1066 | if (btrfs_extent_refs(leaf, ei) != 1) | ||
1067 | break; | ||
1068 | |||
1069 | btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1); | ||
1070 | if (found_key.objectid != tmp->bytenr || | ||
1071 | found_key.type != BTRFS_EXTENT_REF_KEY || | ||
1072 | found_key.offset != tmp->orig_parent) | ||
1073 | break; | ||
1074 | |||
1075 | /* | ||
1076 | * the ref is right next to the extent, we can set the | ||
1077 | * ref count to 0 since we will delete them both now | ||
1078 | */ | ||
1079 | btrfs_set_extent_refs(leaf, ei, 0); | ||
1080 | |||
1081 | /* pin down the bytes for this extent */ | ||
1082 | mutex_lock(&info->pinned_mutex); | ||
1083 | ret = pin_down_bytes(trans, extent_root, tmp->bytenr, | ||
1084 | tmp->num_bytes, tmp->level >= | ||
1085 | BTRFS_FIRST_FREE_OBJECTID); | ||
1086 | mutex_unlock(&info->pinned_mutex); | ||
1087 | BUG_ON(ret < 0); | ||
1088 | |||
1089 | /* | ||
1090 | * use the del field to tell if we need to go ahead and | ||
1091 | * free up the extent when we delete the item or not. | ||
1092 | */ | ||
1093 | tmp->del = ret; | ||
1094 | bytes_freed += tmp->num_bytes; | ||
1095 | |||
1096 | num_to_del += 2; | ||
1097 | cur_slot += 2; | ||
1098 | } | ||
1099 | end = pos; | ||
1100 | |||
1101 | /* update the free space counters */ | ||
1102 | spin_lock(&info->delalloc_lock); | ||
1103 | super_used = btrfs_super_bytes_used(&info->super_copy); | ||
1104 | btrfs_set_super_bytes_used(&info->super_copy, | ||
1105 | super_used - bytes_freed); | ||
1106 | |||
1107 | root_used = btrfs_root_used(&extent_root->root_item); | ||
1108 | btrfs_set_root_used(&extent_root->root_item, | ||
1109 | root_used - bytes_freed); | ||
1110 | spin_unlock(&info->delalloc_lock); | ||
1111 | |||
1112 | /* delete the items */ | ||
1113 | ret = btrfs_del_items(trans, extent_root, path, | ||
1114 | path->slots[0], num_to_del); | ||
1115 | BUG_ON(ret); | ||
1116 | |||
1117 | /* | ||
1118 | * loop through the extents we deleted and do the cleanup work | ||
1119 | * on them | ||
1120 | */ | ||
1121 | for (pos = cur, n = pos->next; pos != end; | ||
1122 | pos = n, n = pos->next) { | ||
1123 | struct pending_extent_op *tmp; | ||
1124 | tmp = list_entry(pos, struct pending_extent_op, list); | ||
1125 | |||
1126 | /* | ||
1127 | * remember tmp->del tells us wether or not we pinned | ||
1128 | * down the extent | ||
1129 | */ | ||
1130 | ret = update_block_group(trans, extent_root, | ||
1131 | tmp->bytenr, tmp->num_bytes, 0, | ||
1132 | tmp->del); | ||
1133 | BUG_ON(ret); | ||
1134 | |||
1135 | list_del_init(&tmp->list); | ||
1136 | unlock_extent(&info->extent_ins, tmp->bytenr, | ||
1137 | tmp->bytenr + tmp->num_bytes - 1, | ||
1138 | GFP_NOFS); | ||
1139 | kfree(tmp); | ||
1140 | } | ||
1141 | } else if (refs && found_extent) { | ||
1142 | /* | ||
1143 | * the ref and extent were right next to eachother, but the | ||
1144 | * extent still has a ref, so just free the backref and keep | ||
1145 | * going | ||
1146 | */ | ||
1147 | ret = remove_extent_backref(trans, extent_root, path); | ||
1148 | BUG_ON(ret); | ||
1149 | |||
1150 | list_del_init(&op->list); | ||
1151 | unlock_extent(&info->extent_ins, op->bytenr, | ||
1152 | op->bytenr + op->num_bytes - 1, GFP_NOFS); | ||
1153 | kfree(op); | ||
1154 | } else { | ||
1155 | /* | ||
1156 | * the extent has multiple refs and the backref we were looking | ||
1157 | * for was not right next to it, so just unlock and go next, | ||
1158 | * we're good to go | ||
1159 | */ | ||
1160 | list_del_init(&op->list); | ||
1161 | unlock_extent(&info->extent_ins, op->bytenr, | ||
1162 | op->bytenr + op->num_bytes - 1, GFP_NOFS); | ||
1163 | kfree(op); | ||
1164 | } | ||
1165 | |||
1166 | btrfs_release_path(extent_root, path); | ||
1167 | if (!list_empty(del_list)) | ||
1168 | goto search; | ||
1169 | |||
1170 | out: | ||
1171 | btrfs_free_path(path); | ||
1172 | return ret; | ||
1173 | } | ||
1174 | |||
1175 | static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 689 | static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1176 | struct btrfs_root *root, u64 bytenr, | 690 | struct btrfs_root *root, u64 bytenr, |
691 | u64 num_bytes, | ||
1177 | u64 orig_parent, u64 parent, | 692 | u64 orig_parent, u64 parent, |
1178 | u64 orig_root, u64 ref_root, | 693 | u64 orig_root, u64 ref_root, |
1179 | u64 orig_generation, u64 ref_generation, | 694 | u64 orig_generation, u64 ref_generation, |
1180 | u64 owner_objectid) | 695 | u64 owner_objectid) |
1181 | { | 696 | { |
1182 | int ret; | 697 | int ret; |
1183 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 698 | int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID; |
1184 | struct btrfs_path *path; | ||
1185 | 699 | ||
1186 | if (root == root->fs_info->extent_root) { | 700 | ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes, |
1187 | struct pending_extent_op *extent_op; | 701 | orig_parent, parent, orig_root, |
1188 | u64 num_bytes; | 702 | ref_root, orig_generation, |
1189 | 703 | ref_generation, owner_objectid, pin); | |
1190 | BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); | ||
1191 | num_bytes = btrfs_level_size(root, (int)owner_objectid); | ||
1192 | mutex_lock(&root->fs_info->extent_ins_mutex); | ||
1193 | if (test_range_bit(&root->fs_info->extent_ins, bytenr, | ||
1194 | bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { | ||
1195 | u64 priv; | ||
1196 | ret = get_state_private(&root->fs_info->extent_ins, | ||
1197 | bytenr, &priv); | ||
1198 | BUG_ON(ret); | ||
1199 | extent_op = (struct pending_extent_op *) | ||
1200 | (unsigned long)priv; | ||
1201 | BUG_ON(extent_op->parent != orig_parent); | ||
1202 | BUG_ON(extent_op->generation != orig_generation); | ||
1203 | |||
1204 | extent_op->parent = parent; | ||
1205 | extent_op->generation = ref_generation; | ||
1206 | } else { | ||
1207 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
1208 | BUG_ON(!extent_op); | ||
1209 | |||
1210 | extent_op->type = PENDING_BACKREF_UPDATE; | ||
1211 | extent_op->bytenr = bytenr; | ||
1212 | extent_op->num_bytes = num_bytes; | ||
1213 | extent_op->parent = parent; | ||
1214 | extent_op->orig_parent = orig_parent; | ||
1215 | extent_op->generation = ref_generation; | ||
1216 | extent_op->orig_generation = orig_generation; | ||
1217 | extent_op->level = (int)owner_objectid; | ||
1218 | INIT_LIST_HEAD(&extent_op->list); | ||
1219 | extent_op->del = 0; | ||
1220 | |||
1221 | set_extent_bits(&root->fs_info->extent_ins, | ||
1222 | bytenr, bytenr + num_bytes - 1, | ||
1223 | EXTENT_WRITEBACK, GFP_NOFS); | ||
1224 | set_state_private(&root->fs_info->extent_ins, | ||
1225 | bytenr, (unsigned long)extent_op); | ||
1226 | } | ||
1227 | mutex_unlock(&root->fs_info->extent_ins_mutex); | ||
1228 | return 0; | ||
1229 | } | ||
1230 | |||
1231 | path = btrfs_alloc_path(); | ||
1232 | if (!path) | ||
1233 | return -ENOMEM; | ||
1234 | ret = lookup_extent_backref(trans, extent_root, path, | ||
1235 | bytenr, orig_parent, orig_root, | ||
1236 | orig_generation, owner_objectid, 1); | ||
1237 | if (ret) | ||
1238 | goto out; | ||
1239 | ret = remove_extent_backref(trans, extent_root, path); | ||
1240 | if (ret) | ||
1241 | goto out; | ||
1242 | ret = insert_extent_backref(trans, extent_root, path, bytenr, | ||
1243 | parent, ref_root, ref_generation, | ||
1244 | owner_objectid); | ||
1245 | BUG_ON(ret); | 704 | BUG_ON(ret); |
1246 | finish_current_insert(trans, extent_root, 0); | ||
1247 | del_pending_extents(trans, extent_root, 0); | ||
1248 | out: | ||
1249 | btrfs_free_path(path); | ||
1250 | return ret; | 705 | return ret; |
1251 | } | 706 | } |
1252 | 707 | ||
1253 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 708 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1254 | struct btrfs_root *root, u64 bytenr, | 709 | struct btrfs_root *root, u64 bytenr, |
1255 | u64 orig_parent, u64 parent, | 710 | u64 num_bytes, u64 orig_parent, u64 parent, |
1256 | u64 ref_root, u64 ref_generation, | 711 | u64 ref_root, u64 ref_generation, |
1257 | u64 owner_objectid) | 712 | u64 owner_objectid) |
1258 | { | 713 | { |
@@ -1260,20 +715,36 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | |||
1260 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && | 715 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && |
1261 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | 716 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) |
1262 | return 0; | 717 | return 0; |
1263 | ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, | 718 | |
1264 | parent, ref_root, ref_root, | 719 | ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes, |
1265 | ref_generation, ref_generation, | 720 | orig_parent, parent, ref_root, |
1266 | owner_objectid); | 721 | ref_root, ref_generation, |
722 | ref_generation, owner_objectid); | ||
1267 | return ret; | 723 | return ret; |
1268 | } | 724 | } |
1269 | |||
1270 | static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 725 | static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
1271 | struct btrfs_root *root, u64 bytenr, | 726 | struct btrfs_root *root, u64 bytenr, |
727 | u64 num_bytes, | ||
1272 | u64 orig_parent, u64 parent, | 728 | u64 orig_parent, u64 parent, |
1273 | u64 orig_root, u64 ref_root, | 729 | u64 orig_root, u64 ref_root, |
1274 | u64 orig_generation, u64 ref_generation, | 730 | u64 orig_generation, u64 ref_generation, |
1275 | u64 owner_objectid) | 731 | u64 owner_objectid) |
1276 | { | 732 | { |
733 | int ret; | ||
734 | |||
735 | ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root, | ||
736 | ref_generation, owner_objectid, | ||
737 | BTRFS_ADD_DELAYED_REF, 0); | ||
738 | BUG_ON(ret); | ||
739 | return ret; | ||
740 | } | ||
741 | |||
742 | static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans, | ||
743 | struct btrfs_root *root, u64 bytenr, | ||
744 | u64 num_bytes, u64 parent, u64 ref_root, | ||
745 | u64 ref_generation, u64 owner_objectid, | ||
746 | int refs_to_add) | ||
747 | { | ||
1277 | struct btrfs_path *path; | 748 | struct btrfs_path *path; |
1278 | int ret; | 749 | int ret; |
1279 | struct btrfs_key key; | 750 | struct btrfs_key key; |
@@ -1286,17 +757,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1286 | return -ENOMEM; | 757 | return -ENOMEM; |
1287 | 758 | ||
1288 | path->reada = 1; | 759 | path->reada = 1; |
760 | path->leave_spinning = 1; | ||
1289 | key.objectid = bytenr; | 761 | key.objectid = bytenr; |
1290 | key.type = BTRFS_EXTENT_ITEM_KEY; | 762 | key.type = BTRFS_EXTENT_ITEM_KEY; |
1291 | key.offset = (u64)-1; | 763 | key.offset = num_bytes; |
1292 | 764 | ||
1293 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, | 765 | /* first find the extent item and update its reference count */ |
1294 | 0, 1); | 766 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, |
1295 | if (ret < 0) | 767 | path, 0, 1); |
768 | if (ret < 0) { | ||
769 | btrfs_set_path_blocking(path); | ||
1296 | return ret; | 770 | return ret; |
1297 | BUG_ON(ret == 0 || path->slots[0] == 0); | 771 | } |
1298 | 772 | ||
1299 | path->slots[0]--; | 773 | if (ret > 0) { |
774 | WARN_ON(1); | ||
775 | btrfs_free_path(path); | ||
776 | return -EIO; | ||
777 | } | ||
1300 | l = path->nodes[0]; | 778 | l = path->nodes[0]; |
1301 | 779 | ||
1302 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 780 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
@@ -1310,21 +788,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1310 | BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); | 788 | BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); |
1311 | 789 | ||
1312 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | 790 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); |
791 | |||
1313 | refs = btrfs_extent_refs(l, item); | 792 | refs = btrfs_extent_refs(l, item); |
1314 | btrfs_set_extent_refs(l, item, refs + 1); | 793 | btrfs_set_extent_refs(l, item, refs + refs_to_add); |
794 | btrfs_unlock_up_safe(path, 1); | ||
795 | |||
1315 | btrfs_mark_buffer_dirty(path->nodes[0]); | 796 | btrfs_mark_buffer_dirty(path->nodes[0]); |
1316 | 797 | ||
1317 | btrfs_release_path(root->fs_info->extent_root, path); | 798 | btrfs_release_path(root->fs_info->extent_root, path); |
1318 | 799 | ||
1319 | path->reada = 1; | 800 | path->reada = 1; |
801 | path->leave_spinning = 1; | ||
802 | |||
803 | /* now insert the actual backref */ | ||
1320 | ret = insert_extent_backref(trans, root->fs_info->extent_root, | 804 | ret = insert_extent_backref(trans, root->fs_info->extent_root, |
1321 | path, bytenr, parent, | 805 | path, bytenr, parent, |
1322 | ref_root, ref_generation, | 806 | ref_root, ref_generation, |
1323 | owner_objectid); | 807 | owner_objectid, refs_to_add); |
1324 | BUG_ON(ret); | 808 | BUG_ON(ret); |
1325 | finish_current_insert(trans, root->fs_info->extent_root, 0); | ||
1326 | del_pending_extents(trans, root->fs_info->extent_root, 0); | ||
1327 | |||
1328 | btrfs_free_path(path); | 809 | btrfs_free_path(path); |
1329 | return 0; | 810 | return 0; |
1330 | } | 811 | } |
@@ -1339,68 +820,278 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1339 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && | 820 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && |
1340 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | 821 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) |
1341 | return 0; | 822 | return 0; |
1342 | ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, | 823 | |
824 | ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent, | ||
1343 | 0, ref_root, 0, ref_generation, | 825 | 0, ref_root, 0, ref_generation, |
1344 | owner_objectid); | 826 | owner_objectid); |
1345 | return ret; | 827 | return ret; |
1346 | } | 828 | } |
1347 | 829 | ||
1348 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | 830 | static int drop_delayed_ref(struct btrfs_trans_handle *trans, |
1349 | struct btrfs_root *root) | 831 | struct btrfs_root *root, |
832 | struct btrfs_delayed_ref_node *node) | ||
833 | { | ||
834 | int ret = 0; | ||
835 | struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node); | ||
836 | |||
837 | BUG_ON(node->ref_mod == 0); | ||
838 | ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, | ||
839 | node->parent, ref->root, ref->generation, | ||
840 | ref->owner_objectid, ref->pin, node->ref_mod); | ||
841 | |||
842 | return ret; | ||
843 | } | ||
844 | |||
845 | /* helper function to actually process a single delayed ref entry */ | ||
846 | static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans, | ||
847 | struct btrfs_root *root, | ||
848 | struct btrfs_delayed_ref_node *node, | ||
849 | int insert_reserved) | ||
1350 | { | 850 | { |
1351 | u64 start; | ||
1352 | u64 end; | ||
1353 | int ret; | 851 | int ret; |
852 | struct btrfs_delayed_ref *ref; | ||
1354 | 853 | ||
1355 | while(1) { | 854 | if (node->parent == (u64)-1) { |
1356 | finish_current_insert(trans, root->fs_info->extent_root, 1); | 855 | struct btrfs_delayed_ref_head *head; |
1357 | del_pending_extents(trans, root->fs_info->extent_root, 1); | 856 | /* |
857 | * we've hit the end of the chain and we were supposed | ||
858 | * to insert this extent into the tree. But, it got | ||
859 | * deleted before we ever needed to insert it, so all | ||
860 | * we have to do is clean up the accounting | ||
861 | */ | ||
862 | if (insert_reserved) { | ||
863 | update_reserved_extents(root, node->bytenr, | ||
864 | node->num_bytes, 0); | ||
865 | } | ||
866 | head = btrfs_delayed_node_to_head(node); | ||
867 | mutex_unlock(&head->mutex); | ||
868 | return 0; | ||
869 | } | ||
1358 | 870 | ||
1359 | /* is there more work to do? */ | 871 | ref = btrfs_delayed_node_to_ref(node); |
1360 | ret = find_first_extent_bit(&root->fs_info->pending_del, | 872 | if (ref->action == BTRFS_ADD_DELAYED_REF) { |
1361 | 0, &start, &end, EXTENT_WRITEBACK); | 873 | if (insert_reserved) { |
1362 | if (!ret) | 874 | struct btrfs_key ins; |
1363 | continue; | 875 | |
1364 | ret = find_first_extent_bit(&root->fs_info->extent_ins, | 876 | ins.objectid = node->bytenr; |
1365 | 0, &start, &end, EXTENT_WRITEBACK); | 877 | ins.offset = node->num_bytes; |
1366 | if (!ret) | 878 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
1367 | continue; | 879 | |
1368 | break; | 880 | /* record the full extent allocation */ |
881 | ret = __btrfs_alloc_reserved_extent(trans, root, | ||
882 | node->parent, ref->root, | ||
883 | ref->generation, ref->owner_objectid, | ||
884 | &ins, node->ref_mod); | ||
885 | update_reserved_extents(root, node->bytenr, | ||
886 | node->num_bytes, 0); | ||
887 | } else { | ||
888 | /* just add one backref */ | ||
889 | ret = add_extent_ref(trans, root, node->bytenr, | ||
890 | node->num_bytes, | ||
891 | node->parent, ref->root, ref->generation, | ||
892 | ref->owner_objectid, node->ref_mod); | ||
893 | } | ||
894 | BUG_ON(ret); | ||
895 | } else if (ref->action == BTRFS_DROP_DELAYED_REF) { | ||
896 | WARN_ON(insert_reserved); | ||
897 | ret = drop_delayed_ref(trans, root, node); | ||
1369 | } | 898 | } |
1370 | return 0; | 899 | return 0; |
1371 | } | 900 | } |
1372 | 901 | ||
1373 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | 902 | static noinline struct btrfs_delayed_ref_node * |
1374 | struct btrfs_root *root, u64 bytenr, | 903 | select_delayed_ref(struct btrfs_delayed_ref_head *head) |
1375 | u64 num_bytes, u32 *refs) | ||
1376 | { | 904 | { |
1377 | struct btrfs_path *path; | 905 | struct rb_node *node; |
906 | struct btrfs_delayed_ref_node *ref; | ||
907 | int action = BTRFS_ADD_DELAYED_REF; | ||
908 | again: | ||
909 | /* | ||
910 | * select delayed ref of type BTRFS_ADD_DELAYED_REF first. | ||
911 | * this prevents ref count from going down to zero when | ||
912 | * there still are pending delayed ref. | ||
913 | */ | ||
914 | node = rb_prev(&head->node.rb_node); | ||
915 | while (1) { | ||
916 | if (!node) | ||
917 | break; | ||
918 | ref = rb_entry(node, struct btrfs_delayed_ref_node, | ||
919 | rb_node); | ||
920 | if (ref->bytenr != head->node.bytenr) | ||
921 | break; | ||
922 | if (btrfs_delayed_node_to_ref(ref)->action == action) | ||
923 | return ref; | ||
924 | node = rb_prev(node); | ||
925 | } | ||
926 | if (action == BTRFS_ADD_DELAYED_REF) { | ||
927 | action = BTRFS_DROP_DELAYED_REF; | ||
928 | goto again; | ||
929 | } | ||
930 | return NULL; | ||
931 | } | ||
932 | |||
933 | static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | ||
934 | struct btrfs_root *root, | ||
935 | struct list_head *cluster) | ||
936 | { | ||
937 | struct btrfs_delayed_ref_root *delayed_refs; | ||
938 | struct btrfs_delayed_ref_node *ref; | ||
939 | struct btrfs_delayed_ref_head *locked_ref = NULL; | ||
1378 | int ret; | 940 | int ret; |
1379 | struct btrfs_key key; | 941 | int count = 0; |
1380 | struct extent_buffer *l; | 942 | int must_insert_reserved = 0; |
1381 | struct btrfs_extent_item *item; | ||
1382 | 943 | ||
1383 | WARN_ON(num_bytes < root->sectorsize); | 944 | delayed_refs = &trans->transaction->delayed_refs; |
1384 | path = btrfs_alloc_path(); | 945 | while (1) { |
1385 | path->reada = 1; | 946 | if (!locked_ref) { |
1386 | key.objectid = bytenr; | 947 | /* pick a new head ref from the cluster list */ |
1387 | key.offset = num_bytes; | 948 | if (list_empty(cluster)) |
1388 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 949 | break; |
1389 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, | 950 | |
1390 | 0, 0); | 951 | locked_ref = list_entry(cluster->next, |
1391 | if (ret < 0) | 952 | struct btrfs_delayed_ref_head, cluster); |
1392 | goto out; | 953 | |
1393 | if (ret != 0) { | 954 | /* grab the lock that says we are going to process |
1394 | btrfs_print_leaf(root, path->nodes[0]); | 955 | * all the refs for this head */ |
1395 | printk(KERN_INFO "btrfs failed to find block number %llu\n", | 956 | ret = btrfs_delayed_ref_lock(trans, locked_ref); |
1396 | (unsigned long long)bytenr); | 957 | |
1397 | BUG(); | 958 | /* |
959 | * we may have dropped the spin lock to get the head | ||
960 | * mutex lock, and that might have given someone else | ||
961 | * time to free the head. If that's true, it has been | ||
962 | * removed from our list and we can move on. | ||
963 | */ | ||
964 | if (ret == -EAGAIN) { | ||
965 | locked_ref = NULL; | ||
966 | count++; | ||
967 | continue; | ||
968 | } | ||
969 | } | ||
970 | |||
971 | /* | ||
972 | * record the must insert reserved flag before we | ||
973 | * drop the spin lock. | ||
974 | */ | ||
975 | must_insert_reserved = locked_ref->must_insert_reserved; | ||
976 | locked_ref->must_insert_reserved = 0; | ||
977 | |||
978 | /* | ||
979 | * locked_ref is the head node, so we have to go one | ||
980 | * node back for any delayed ref updates | ||
981 | */ | ||
982 | ref = select_delayed_ref(locked_ref); | ||
983 | if (!ref) { | ||
984 | /* All delayed refs have been processed, Go ahead | ||
985 | * and send the head node to run_one_delayed_ref, | ||
986 | * so that any accounting fixes can happen | ||
987 | */ | ||
988 | ref = &locked_ref->node; | ||
989 | list_del_init(&locked_ref->cluster); | ||
990 | locked_ref = NULL; | ||
991 | } | ||
992 | |||
993 | ref->in_tree = 0; | ||
994 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
995 | delayed_refs->num_entries--; | ||
996 | spin_unlock(&delayed_refs->lock); | ||
997 | |||
998 | ret = run_one_delayed_ref(trans, root, ref, | ||
999 | must_insert_reserved); | ||
1000 | BUG_ON(ret); | ||
1001 | btrfs_put_delayed_ref(ref); | ||
1002 | |||
1003 | count++; | ||
1004 | cond_resched(); | ||
1005 | spin_lock(&delayed_refs->lock); | ||
1006 | } | ||
1007 | return count; | ||
1008 | } | ||
1009 | |||
1010 | /* | ||
1011 | * this starts processing the delayed reference count updates and | ||
1012 | * extent insertions we have queued up so far. count can be | ||
1013 | * 0, which means to process everything in the tree at the start | ||
1014 | * of the run (but not newly added entries), or it can be some target | ||
1015 | * number you'd like to process. | ||
1016 | */ | ||
1017 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | ||
1018 | struct btrfs_root *root, unsigned long count) | ||
1019 | { | ||
1020 | struct rb_node *node; | ||
1021 | struct btrfs_delayed_ref_root *delayed_refs; | ||
1022 | struct btrfs_delayed_ref_node *ref; | ||
1023 | struct list_head cluster; | ||
1024 | int ret; | ||
1025 | int run_all = count == (unsigned long)-1; | ||
1026 | int run_most = 0; | ||
1027 | |||
1028 | if (root == root->fs_info->extent_root) | ||
1029 | root = root->fs_info->tree_root; | ||
1030 | |||
1031 | delayed_refs = &trans->transaction->delayed_refs; | ||
1032 | INIT_LIST_HEAD(&cluster); | ||
1033 | again: | ||
1034 | spin_lock(&delayed_refs->lock); | ||
1035 | if (count == 0) { | ||
1036 | count = delayed_refs->num_entries * 2; | ||
1037 | run_most = 1; | ||
1038 | } | ||
1039 | while (1) { | ||
1040 | if (!(run_all || run_most) && | ||
1041 | delayed_refs->num_heads_ready < 64) | ||
1042 | break; | ||
1043 | |||
1044 | /* | ||
1045 | * go find something we can process in the rbtree. We start at | ||
1046 | * the beginning of the tree, and then build a cluster | ||
1047 | * of refs to process starting at the first one we are able to | ||
1048 | * lock | ||
1049 | */ | ||
1050 | ret = btrfs_find_ref_cluster(trans, &cluster, | ||
1051 | delayed_refs->run_delayed_start); | ||
1052 | if (ret) | ||
1053 | break; | ||
1054 | |||
1055 | ret = run_clustered_refs(trans, root, &cluster); | ||
1056 | BUG_ON(ret < 0); | ||
1057 | |||
1058 | count -= min_t(unsigned long, ret, count); | ||
1059 | |||
1060 | if (count == 0) | ||
1061 | break; | ||
1062 | } | ||
1063 | |||
1064 | if (run_all) { | ||
1065 | node = rb_first(&delayed_refs->root); | ||
1066 | if (!node) | ||
1067 | goto out; | ||
1068 | count = (unsigned long)-1; | ||
1069 | |||
1070 | while (node) { | ||
1071 | ref = rb_entry(node, struct btrfs_delayed_ref_node, | ||
1072 | rb_node); | ||
1073 | if (btrfs_delayed_ref_is_head(ref)) { | ||
1074 | struct btrfs_delayed_ref_head *head; | ||
1075 | |||
1076 | head = btrfs_delayed_node_to_head(ref); | ||
1077 | atomic_inc(&ref->refs); | ||
1078 | |||
1079 | spin_unlock(&delayed_refs->lock); | ||
1080 | mutex_lock(&head->mutex); | ||
1081 | mutex_unlock(&head->mutex); | ||
1082 | |||
1083 | btrfs_put_delayed_ref(ref); | ||
1084 | cond_resched(); | ||
1085 | goto again; | ||
1086 | } | ||
1087 | node = rb_next(node); | ||
1088 | } | ||
1089 | spin_unlock(&delayed_refs->lock); | ||
1090 | schedule_timeout(1); | ||
1091 | goto again; | ||
1398 | } | 1092 | } |
1399 | l = path->nodes[0]; | ||
1400 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | ||
1401 | *refs = btrfs_extent_refs(l, item); | ||
1402 | out: | 1093 | out: |
1403 | btrfs_free_path(path); | 1094 | spin_unlock(&delayed_refs->lock); |
1404 | return 0; | 1095 | return 0; |
1405 | } | 1096 | } |
1406 | 1097 | ||
@@ -1624,7 +1315,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | |||
1624 | int refi = 0; | 1315 | int refi = 0; |
1625 | int slot; | 1316 | int slot; |
1626 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 1317 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
1627 | u64, u64, u64, u64, u64, u64, u64, u64); | 1318 | u64, u64, u64, u64, u64, u64, u64, u64, u64); |
1628 | 1319 | ||
1629 | ref_root = btrfs_header_owner(buf); | 1320 | ref_root = btrfs_header_owner(buf); |
1630 | ref_generation = btrfs_header_generation(buf); | 1321 | ref_generation = btrfs_header_generation(buf); |
@@ -1696,12 +1387,19 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | |||
1696 | 1387 | ||
1697 | if (level == 0) { | 1388 | if (level == 0) { |
1698 | btrfs_item_key_to_cpu(buf, &key, slot); | 1389 | btrfs_item_key_to_cpu(buf, &key, slot); |
1390 | fi = btrfs_item_ptr(buf, slot, | ||
1391 | struct btrfs_file_extent_item); | ||
1392 | |||
1393 | bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | ||
1394 | if (bytenr == 0) | ||
1395 | continue; | ||
1699 | 1396 | ||
1700 | ret = process_func(trans, root, bytenr, | 1397 | ret = process_func(trans, root, bytenr, |
1701 | orig_buf->start, buf->start, | 1398 | btrfs_file_extent_disk_num_bytes(buf, fi), |
1702 | orig_root, ref_root, | 1399 | orig_buf->start, buf->start, |
1703 | orig_generation, ref_generation, | 1400 | orig_root, ref_root, |
1704 | key.objectid); | 1401 | orig_generation, ref_generation, |
1402 | key.objectid); | ||
1705 | 1403 | ||
1706 | if (ret) { | 1404 | if (ret) { |
1707 | faili = slot; | 1405 | faili = slot; |
@@ -1709,7 +1407,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | |||
1709 | goto fail; | 1407 | goto fail; |
1710 | } | 1408 | } |
1711 | } else { | 1409 | } else { |
1712 | ret = process_func(trans, root, bytenr, | 1410 | ret = process_func(trans, root, bytenr, buf->len, |
1713 | orig_buf->start, buf->start, | 1411 | orig_buf->start, buf->start, |
1714 | orig_root, ref_root, | 1412 | orig_root, ref_root, |
1715 | orig_generation, ref_generation, | 1413 | orig_generation, ref_generation, |
@@ -1786,17 +1484,17 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans, | |||
1786 | if (bytenr == 0) | 1484 | if (bytenr == 0) |
1787 | continue; | 1485 | continue; |
1788 | ret = __btrfs_update_extent_ref(trans, root, bytenr, | 1486 | ret = __btrfs_update_extent_ref(trans, root, bytenr, |
1789 | orig_buf->start, buf->start, | 1487 | btrfs_file_extent_disk_num_bytes(buf, fi), |
1790 | orig_root, ref_root, | 1488 | orig_buf->start, buf->start, |
1791 | orig_generation, ref_generation, | 1489 | orig_root, ref_root, orig_generation, |
1792 | key.objectid); | 1490 | ref_generation, key.objectid); |
1793 | if (ret) | 1491 | if (ret) |
1794 | goto fail; | 1492 | goto fail; |
1795 | } else { | 1493 | } else { |
1796 | bytenr = btrfs_node_blockptr(buf, slot); | 1494 | bytenr = btrfs_node_blockptr(buf, slot); |
1797 | ret = __btrfs_update_extent_ref(trans, root, bytenr, | 1495 | ret = __btrfs_update_extent_ref(trans, root, bytenr, |
1798 | orig_buf->start, buf->start, | 1496 | buf->len, orig_buf->start, |
1799 | orig_root, ref_root, | 1497 | buf->start, orig_root, ref_root, |
1800 | orig_generation, ref_generation, | 1498 | orig_generation, ref_generation, |
1801 | level - 1); | 1499 | level - 1); |
1802 | if (ret) | 1500 | if (ret) |
@@ -1815,7 +1513,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, | |||
1815 | struct btrfs_block_group_cache *cache) | 1513 | struct btrfs_block_group_cache *cache) |
1816 | { | 1514 | { |
1817 | int ret; | 1515 | int ret; |
1818 | int pending_ret; | ||
1819 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 1516 | struct btrfs_root *extent_root = root->fs_info->extent_root; |
1820 | unsigned long bi; | 1517 | unsigned long bi; |
1821 | struct extent_buffer *leaf; | 1518 | struct extent_buffer *leaf; |
@@ -1831,12 +1528,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, | |||
1831 | btrfs_mark_buffer_dirty(leaf); | 1528 | btrfs_mark_buffer_dirty(leaf); |
1832 | btrfs_release_path(extent_root, path); | 1529 | btrfs_release_path(extent_root, path); |
1833 | fail: | 1530 | fail: |
1834 | finish_current_insert(trans, extent_root, 0); | ||
1835 | pending_ret = del_pending_extents(trans, extent_root, 0); | ||
1836 | if (ret) | 1531 | if (ret) |
1837 | return ret; | 1532 | return ret; |
1838 | if (pending_ret) | ||
1839 | return pending_ret; | ||
1840 | return 0; | 1533 | return 0; |
1841 | 1534 | ||
1842 | } | 1535 | } |
@@ -1900,7 +1593,7 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) | |||
1900 | if (!block_group || block_group->ro) | 1593 | if (!block_group || block_group->ro) |
1901 | readonly = 1; | 1594 | readonly = 1; |
1902 | if (block_group) | 1595 | if (block_group) |
1903 | put_block_group(block_group); | 1596 | btrfs_put_block_group(block_group); |
1904 | return readonly; | 1597 | return readonly; |
1905 | } | 1598 | } |
1906 | 1599 | ||
@@ -2324,7 +2017,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
2324 | WARN_ON(ret); | 2017 | WARN_ON(ret); |
2325 | } | 2018 | } |
2326 | } | 2019 | } |
2327 | put_block_group(cache); | 2020 | btrfs_put_block_group(cache); |
2328 | total -= num_bytes; | 2021 | total -= num_bytes; |
2329 | bytenr += num_bytes; | 2022 | bytenr += num_bytes; |
2330 | } | 2023 | } |
@@ -2341,7 +2034,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
2341 | return 0; | 2034 | return 0; |
2342 | 2035 | ||
2343 | bytenr = cache->key.objectid; | 2036 | bytenr = cache->key.objectid; |
2344 | put_block_group(cache); | 2037 | btrfs_put_block_group(cache); |
2345 | 2038 | ||
2346 | return bytenr; | 2039 | return bytenr; |
2347 | } | 2040 | } |
@@ -2353,7 +2046,6 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2353 | struct btrfs_block_group_cache *cache; | 2046 | struct btrfs_block_group_cache *cache; |
2354 | struct btrfs_fs_info *fs_info = root->fs_info; | 2047 | struct btrfs_fs_info *fs_info = root->fs_info; |
2355 | 2048 | ||
2356 | WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex)); | ||
2357 | if (pin) { | 2049 | if (pin) { |
2358 | set_extent_dirty(&fs_info->pinned_extents, | 2050 | set_extent_dirty(&fs_info->pinned_extents, |
2359 | bytenr, bytenr + num - 1, GFP_NOFS); | 2051 | bytenr, bytenr + num - 1, GFP_NOFS); |
@@ -2361,6 +2053,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2361 | clear_extent_dirty(&fs_info->pinned_extents, | 2053 | clear_extent_dirty(&fs_info->pinned_extents, |
2362 | bytenr, bytenr + num - 1, GFP_NOFS); | 2054 | bytenr, bytenr + num - 1, GFP_NOFS); |
2363 | } | 2055 | } |
2056 | |||
2364 | while (num > 0) { | 2057 | while (num > 0) { |
2365 | cache = btrfs_lookup_block_group(fs_info, bytenr); | 2058 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
2366 | BUG_ON(!cache); | 2059 | BUG_ON(!cache); |
@@ -2385,7 +2078,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2385 | if (cache->cached) | 2078 | if (cache->cached) |
2386 | btrfs_add_free_space(cache, bytenr, len); | 2079 | btrfs_add_free_space(cache, bytenr, len); |
2387 | } | 2080 | } |
2388 | put_block_group(cache); | 2081 | btrfs_put_block_group(cache); |
2389 | bytenr += len; | 2082 | bytenr += len; |
2390 | num -= len; | 2083 | num -= len; |
2391 | } | 2084 | } |
@@ -2416,7 +2109,7 @@ static int update_reserved_extents(struct btrfs_root *root, | |||
2416 | } | 2109 | } |
2417 | spin_unlock(&cache->lock); | 2110 | spin_unlock(&cache->lock); |
2418 | spin_unlock(&cache->space_info->lock); | 2111 | spin_unlock(&cache->space_info->lock); |
2419 | put_block_group(cache); | 2112 | btrfs_put_block_group(cache); |
2420 | bytenr += len; | 2113 | bytenr += len; |
2421 | num -= len; | 2114 | num -= len; |
2422 | } | 2115 | } |
@@ -2431,7 +2124,6 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | |||
2431 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | 2124 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; |
2432 | int ret; | 2125 | int ret; |
2433 | 2126 | ||
2434 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2435 | while (1) { | 2127 | while (1) { |
2436 | ret = find_first_extent_bit(pinned_extents, last, | 2128 | ret = find_first_extent_bit(pinned_extents, last, |
2437 | &start, &end, EXTENT_DIRTY); | 2129 | &start, &end, EXTENT_DIRTY); |
@@ -2440,7 +2132,6 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | |||
2440 | set_extent_dirty(copy, start, end, GFP_NOFS); | 2132 | set_extent_dirty(copy, start, end, GFP_NOFS); |
2441 | last = end + 1; | 2133 | last = end + 1; |
2442 | } | 2134 | } |
2443 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2444 | return 0; | 2135 | return 0; |
2445 | } | 2136 | } |
2446 | 2137 | ||
@@ -2452,7 +2143,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
2452 | u64 end; | 2143 | u64 end; |
2453 | int ret; | 2144 | int ret; |
2454 | 2145 | ||
2455 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2456 | while (1) { | 2146 | while (1) { |
2457 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 2147 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
2458 | EXTENT_DIRTY); | 2148 | EXTENT_DIRTY); |
@@ -2461,209 +2151,20 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
2461 | 2151 | ||
2462 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 2152 | ret = btrfs_discard_extent(root, start, end + 1 - start); |
2463 | 2153 | ||
2154 | /* unlocks the pinned mutex */ | ||
2464 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | 2155 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); |
2465 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 2156 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
2466 | 2157 | ||
2467 | if (need_resched()) { | 2158 | cond_resched(); |
2468 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2469 | cond_resched(); | ||
2470 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2471 | } | ||
2472 | } | 2159 | } |
2473 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2474 | return ret; | 2160 | return ret; |
2475 | } | 2161 | } |
2476 | 2162 | ||
2477 | static int finish_current_insert(struct btrfs_trans_handle *trans, | ||
2478 | struct btrfs_root *extent_root, int all) | ||
2479 | { | ||
2480 | u64 start; | ||
2481 | u64 end; | ||
2482 | u64 priv; | ||
2483 | u64 search = 0; | ||
2484 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2485 | struct btrfs_path *path; | ||
2486 | struct pending_extent_op *extent_op, *tmp; | ||
2487 | struct list_head insert_list, update_list; | ||
2488 | int ret; | ||
2489 | int num_inserts = 0, max_inserts, restart = 0; | ||
2490 | |||
2491 | path = btrfs_alloc_path(); | ||
2492 | INIT_LIST_HEAD(&insert_list); | ||
2493 | INIT_LIST_HEAD(&update_list); | ||
2494 | |||
2495 | max_inserts = extent_root->leafsize / | ||
2496 | (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) + | ||
2497 | sizeof(struct btrfs_extent_ref) + | ||
2498 | sizeof(struct btrfs_extent_item)); | ||
2499 | again: | ||
2500 | mutex_lock(&info->extent_ins_mutex); | ||
2501 | while (1) { | ||
2502 | ret = find_first_extent_bit(&info->extent_ins, search, &start, | ||
2503 | &end, EXTENT_WRITEBACK); | ||
2504 | if (ret) { | ||
2505 | if (restart && !num_inserts && | ||
2506 | list_empty(&update_list)) { | ||
2507 | restart = 0; | ||
2508 | search = 0; | ||
2509 | continue; | ||
2510 | } | ||
2511 | break; | ||
2512 | } | ||
2513 | |||
2514 | ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); | ||
2515 | if (!ret) { | ||
2516 | if (all) | ||
2517 | restart = 1; | ||
2518 | search = end + 1; | ||
2519 | if (need_resched()) { | ||
2520 | mutex_unlock(&info->extent_ins_mutex); | ||
2521 | cond_resched(); | ||
2522 | mutex_lock(&info->extent_ins_mutex); | ||
2523 | } | ||
2524 | continue; | ||
2525 | } | ||
2526 | |||
2527 | ret = get_state_private(&info->extent_ins, start, &priv); | ||
2528 | BUG_ON(ret); | ||
2529 | extent_op = (struct pending_extent_op *)(unsigned long) priv; | ||
2530 | |||
2531 | if (extent_op->type == PENDING_EXTENT_INSERT) { | ||
2532 | num_inserts++; | ||
2533 | list_add_tail(&extent_op->list, &insert_list); | ||
2534 | search = end + 1; | ||
2535 | if (num_inserts == max_inserts) { | ||
2536 | restart = 1; | ||
2537 | break; | ||
2538 | } | ||
2539 | } else if (extent_op->type == PENDING_BACKREF_UPDATE) { | ||
2540 | list_add_tail(&extent_op->list, &update_list); | ||
2541 | search = end + 1; | ||
2542 | } else { | ||
2543 | BUG(); | ||
2544 | } | ||
2545 | } | ||
2546 | |||
2547 | /* | ||
2548 | * process the update list, clear the writeback bit for it, and if | ||
2549 | * somebody marked this thing for deletion then just unlock it and be | ||
2550 | * done, the free_extents will handle it | ||
2551 | */ | ||
2552 | list_for_each_entry_safe(extent_op, tmp, &update_list, list) { | ||
2553 | clear_extent_bits(&info->extent_ins, extent_op->bytenr, | ||
2554 | extent_op->bytenr + extent_op->num_bytes - 1, | ||
2555 | EXTENT_WRITEBACK, GFP_NOFS); | ||
2556 | if (extent_op->del) { | ||
2557 | list_del_init(&extent_op->list); | ||
2558 | unlock_extent(&info->extent_ins, extent_op->bytenr, | ||
2559 | extent_op->bytenr + extent_op->num_bytes | ||
2560 | - 1, GFP_NOFS); | ||
2561 | kfree(extent_op); | ||
2562 | } | ||
2563 | } | ||
2564 | mutex_unlock(&info->extent_ins_mutex); | ||
2565 | |||
2566 | /* | ||
2567 | * still have things left on the update list, go ahead an update | ||
2568 | * everything | ||
2569 | */ | ||
2570 | if (!list_empty(&update_list)) { | ||
2571 | ret = update_backrefs(trans, extent_root, path, &update_list); | ||
2572 | BUG_ON(ret); | ||
2573 | |||
2574 | /* we may have COW'ed new blocks, so lets start over */ | ||
2575 | if (all) | ||
2576 | restart = 1; | ||
2577 | } | ||
2578 | |||
2579 | /* | ||
2580 | * if no inserts need to be done, but we skipped some extents and we | ||
2581 | * need to make sure everything is cleaned then reset everything and | ||
2582 | * go back to the beginning | ||
2583 | */ | ||
2584 | if (!num_inserts && restart) { | ||
2585 | search = 0; | ||
2586 | restart = 0; | ||
2587 | INIT_LIST_HEAD(&update_list); | ||
2588 | INIT_LIST_HEAD(&insert_list); | ||
2589 | goto again; | ||
2590 | } else if (!num_inserts) { | ||
2591 | goto out; | ||
2592 | } | ||
2593 | |||
2594 | /* | ||
2595 | * process the insert extents list. Again if we are deleting this | ||
2596 | * extent, then just unlock it, pin down the bytes if need be, and be | ||
2597 | * done with it. Saves us from having to actually insert the extent | ||
2598 | * into the tree and then subsequently come along and delete it | ||
2599 | */ | ||
2600 | mutex_lock(&info->extent_ins_mutex); | ||
2601 | list_for_each_entry_safe(extent_op, tmp, &insert_list, list) { | ||
2602 | clear_extent_bits(&info->extent_ins, extent_op->bytenr, | ||
2603 | extent_op->bytenr + extent_op->num_bytes - 1, | ||
2604 | EXTENT_WRITEBACK, GFP_NOFS); | ||
2605 | if (extent_op->del) { | ||
2606 | u64 used; | ||
2607 | list_del_init(&extent_op->list); | ||
2608 | unlock_extent(&info->extent_ins, extent_op->bytenr, | ||
2609 | extent_op->bytenr + extent_op->num_bytes | ||
2610 | - 1, GFP_NOFS); | ||
2611 | |||
2612 | mutex_lock(&extent_root->fs_info->pinned_mutex); | ||
2613 | ret = pin_down_bytes(trans, extent_root, | ||
2614 | extent_op->bytenr, | ||
2615 | extent_op->num_bytes, 0); | ||
2616 | mutex_unlock(&extent_root->fs_info->pinned_mutex); | ||
2617 | |||
2618 | spin_lock(&info->delalloc_lock); | ||
2619 | used = btrfs_super_bytes_used(&info->super_copy); | ||
2620 | btrfs_set_super_bytes_used(&info->super_copy, | ||
2621 | used - extent_op->num_bytes); | ||
2622 | used = btrfs_root_used(&extent_root->root_item); | ||
2623 | btrfs_set_root_used(&extent_root->root_item, | ||
2624 | used - extent_op->num_bytes); | ||
2625 | spin_unlock(&info->delalloc_lock); | ||
2626 | |||
2627 | ret = update_block_group(trans, extent_root, | ||
2628 | extent_op->bytenr, | ||
2629 | extent_op->num_bytes, | ||
2630 | 0, ret > 0); | ||
2631 | BUG_ON(ret); | ||
2632 | kfree(extent_op); | ||
2633 | num_inserts--; | ||
2634 | } | ||
2635 | } | ||
2636 | mutex_unlock(&info->extent_ins_mutex); | ||
2637 | |||
2638 | ret = insert_extents(trans, extent_root, path, &insert_list, | ||
2639 | num_inserts); | ||
2640 | BUG_ON(ret); | ||
2641 | |||
2642 | /* | ||
2643 | * if restart is set for whatever reason we need to go back and start | ||
2644 | * searching through the pending list again. | ||
2645 | * | ||
2646 | * We just inserted some extents, which could have resulted in new | ||
2647 | * blocks being allocated, which would result in new blocks needing | ||
2648 | * updates, so if all is set we _must_ restart to get the updated | ||
2649 | * blocks. | ||
2650 | */ | ||
2651 | if (restart || all) { | ||
2652 | INIT_LIST_HEAD(&insert_list); | ||
2653 | INIT_LIST_HEAD(&update_list); | ||
2654 | search = 0; | ||
2655 | restart = 0; | ||
2656 | num_inserts = 0; | ||
2657 | goto again; | ||
2658 | } | ||
2659 | out: | ||
2660 | btrfs_free_path(path); | ||
2661 | return 0; | ||
2662 | } | ||
2663 | |||
2664 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 2163 | static int pin_down_bytes(struct btrfs_trans_handle *trans, |
2665 | struct btrfs_root *root, | 2164 | struct btrfs_root *root, |
2666 | u64 bytenr, u64 num_bytes, int is_data) | 2165 | struct btrfs_path *path, |
2166 | u64 bytenr, u64 num_bytes, int is_data, | ||
2167 | struct extent_buffer **must_clean) | ||
2667 | { | 2168 | { |
2668 | int err = 0; | 2169 | int err = 0; |
2669 | struct extent_buffer *buf; | 2170 | struct extent_buffer *buf; |
@@ -2686,17 +2187,18 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
2686 | u64 header_transid = btrfs_header_generation(buf); | 2187 | u64 header_transid = btrfs_header_generation(buf); |
2687 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | 2188 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && |
2688 | header_owner != BTRFS_TREE_RELOC_OBJECTID && | 2189 | header_owner != BTRFS_TREE_RELOC_OBJECTID && |
2190 | header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID && | ||
2689 | header_transid == trans->transid && | 2191 | header_transid == trans->transid && |
2690 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 2192 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
2691 | clean_tree_block(NULL, root, buf); | 2193 | *must_clean = buf; |
2692 | btrfs_tree_unlock(buf); | ||
2693 | free_extent_buffer(buf); | ||
2694 | return 1; | 2194 | return 1; |
2695 | } | 2195 | } |
2696 | btrfs_tree_unlock(buf); | 2196 | btrfs_tree_unlock(buf); |
2697 | } | 2197 | } |
2698 | free_extent_buffer(buf); | 2198 | free_extent_buffer(buf); |
2699 | pinit: | 2199 | pinit: |
2200 | btrfs_set_path_blocking(path); | ||
2201 | /* unlocks the pinned mutex */ | ||
2700 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 2202 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); |
2701 | 2203 | ||
2702 | BUG_ON(err < 0); | 2204 | BUG_ON(err < 0); |
@@ -2710,7 +2212,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2710 | struct btrfs_root *root, | 2212 | struct btrfs_root *root, |
2711 | u64 bytenr, u64 num_bytes, u64 parent, | 2213 | u64 bytenr, u64 num_bytes, u64 parent, |
2712 | u64 root_objectid, u64 ref_generation, | 2214 | u64 root_objectid, u64 ref_generation, |
2713 | u64 owner_objectid, int pin, int mark_free) | 2215 | u64 owner_objectid, int pin, int mark_free, |
2216 | int refs_to_drop) | ||
2714 | { | 2217 | { |
2715 | struct btrfs_path *path; | 2218 | struct btrfs_path *path; |
2716 | struct btrfs_key key; | 2219 | struct btrfs_key key; |
@@ -2732,6 +2235,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2732 | return -ENOMEM; | 2235 | return -ENOMEM; |
2733 | 2236 | ||
2734 | path->reada = 1; | 2237 | path->reada = 1; |
2238 | path->leave_spinning = 1; | ||
2735 | ret = lookup_extent_backref(trans, extent_root, path, | 2239 | ret = lookup_extent_backref(trans, extent_root, path, |
2736 | bytenr, parent, root_objectid, | 2240 | bytenr, parent, root_objectid, |
2737 | ref_generation, owner_objectid, 1); | 2241 | ref_generation, owner_objectid, 1); |
@@ -2753,9 +2257,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2753 | break; | 2257 | break; |
2754 | } | 2258 | } |
2755 | if (!found_extent) { | 2259 | if (!found_extent) { |
2756 | ret = remove_extent_backref(trans, extent_root, path); | 2260 | ret = remove_extent_backref(trans, extent_root, path, |
2261 | refs_to_drop); | ||
2757 | BUG_ON(ret); | 2262 | BUG_ON(ret); |
2758 | btrfs_release_path(extent_root, path); | 2263 | btrfs_release_path(extent_root, path); |
2264 | path->leave_spinning = 1; | ||
2759 | ret = btrfs_search_slot(trans, extent_root, | 2265 | ret = btrfs_search_slot(trans, extent_root, |
2760 | &key, path, -1, 1); | 2266 | &key, path, -1, 1); |
2761 | if (ret) { | 2267 | if (ret) { |
@@ -2771,8 +2277,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2771 | btrfs_print_leaf(extent_root, path->nodes[0]); | 2277 | btrfs_print_leaf(extent_root, path->nodes[0]); |
2772 | WARN_ON(1); | 2278 | WARN_ON(1); |
2773 | printk(KERN_ERR "btrfs unable to find ref byte nr %llu " | 2279 | printk(KERN_ERR "btrfs unable to find ref byte nr %llu " |
2774 | "root %llu gen %llu owner %llu\n", | 2280 | "parent %llu root %llu gen %llu owner %llu\n", |
2775 | (unsigned long long)bytenr, | 2281 | (unsigned long long)bytenr, |
2282 | (unsigned long long)parent, | ||
2776 | (unsigned long long)root_objectid, | 2283 | (unsigned long long)root_objectid, |
2777 | (unsigned long long)ref_generation, | 2284 | (unsigned long long)ref_generation, |
2778 | (unsigned long long)owner_objectid); | 2285 | (unsigned long long)owner_objectid); |
@@ -2782,17 +2289,23 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2782 | ei = btrfs_item_ptr(leaf, extent_slot, | 2289 | ei = btrfs_item_ptr(leaf, extent_slot, |
2783 | struct btrfs_extent_item); | 2290 | struct btrfs_extent_item); |
2784 | refs = btrfs_extent_refs(leaf, ei); | 2291 | refs = btrfs_extent_refs(leaf, ei); |
2785 | BUG_ON(refs == 0); | ||
2786 | refs -= 1; | ||
2787 | btrfs_set_extent_refs(leaf, ei, refs); | ||
2788 | 2292 | ||
2293 | /* | ||
2294 | * we're not allowed to delete the extent item if there | ||
2295 | * are other delayed ref updates pending | ||
2296 | */ | ||
2297 | |||
2298 | BUG_ON(refs < refs_to_drop); | ||
2299 | refs -= refs_to_drop; | ||
2300 | btrfs_set_extent_refs(leaf, ei, refs); | ||
2789 | btrfs_mark_buffer_dirty(leaf); | 2301 | btrfs_mark_buffer_dirty(leaf); |
2790 | 2302 | ||
2791 | if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { | 2303 | if (refs == 0 && found_extent && |
2304 | path->slots[0] == extent_slot + 1) { | ||
2792 | struct btrfs_extent_ref *ref; | 2305 | struct btrfs_extent_ref *ref; |
2793 | ref = btrfs_item_ptr(leaf, path->slots[0], | 2306 | ref = btrfs_item_ptr(leaf, path->slots[0], |
2794 | struct btrfs_extent_ref); | 2307 | struct btrfs_extent_ref); |
2795 | BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); | 2308 | BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop); |
2796 | /* if the back ref and the extent are next to each other | 2309 | /* if the back ref and the extent are next to each other |
2797 | * they get deleted below in one shot | 2310 | * they get deleted below in one shot |
2798 | */ | 2311 | */ |
@@ -2800,11 +2313,13 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2800 | num_to_del = 2; | 2313 | num_to_del = 2; |
2801 | } else if (found_extent) { | 2314 | } else if (found_extent) { |
2802 | /* otherwise delete the extent back ref */ | 2315 | /* otherwise delete the extent back ref */ |
2803 | ret = remove_extent_backref(trans, extent_root, path); | 2316 | ret = remove_extent_backref(trans, extent_root, path, |
2317 | refs_to_drop); | ||
2804 | BUG_ON(ret); | 2318 | BUG_ON(ret); |
2805 | /* if refs are 0, we need to setup the path for deletion */ | 2319 | /* if refs are 0, we need to setup the path for deletion */ |
2806 | if (refs == 0) { | 2320 | if (refs == 0) { |
2807 | btrfs_release_path(extent_root, path); | 2321 | btrfs_release_path(extent_root, path); |
2322 | path->leave_spinning = 1; | ||
2808 | ret = btrfs_search_slot(trans, extent_root, &key, path, | 2323 | ret = btrfs_search_slot(trans, extent_root, &key, path, |
2809 | -1, 1); | 2324 | -1, 1); |
2810 | BUG_ON(ret); | 2325 | BUG_ON(ret); |
@@ -2814,16 +2329,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2814 | if (refs == 0) { | 2329 | if (refs == 0) { |
2815 | u64 super_used; | 2330 | u64 super_used; |
2816 | u64 root_used; | 2331 | u64 root_used; |
2332 | struct extent_buffer *must_clean = NULL; | ||
2817 | 2333 | ||
2818 | if (pin) { | 2334 | if (pin) { |
2819 | mutex_lock(&root->fs_info->pinned_mutex); | 2335 | ret = pin_down_bytes(trans, root, path, |
2820 | ret = pin_down_bytes(trans, root, bytenr, num_bytes, | 2336 | bytenr, num_bytes, |
2821 | owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); | 2337 | owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, |
2822 | mutex_unlock(&root->fs_info->pinned_mutex); | 2338 | &must_clean); |
2823 | if (ret > 0) | 2339 | if (ret > 0) |
2824 | mark_free = 1; | 2340 | mark_free = 1; |
2825 | BUG_ON(ret < 0); | 2341 | BUG_ON(ret < 0); |
2826 | } | 2342 | } |
2343 | |||
2827 | /* block accounting for super block */ | 2344 | /* block accounting for super block */ |
2828 | spin_lock(&info->delalloc_lock); | 2345 | spin_lock(&info->delalloc_lock); |
2829 | super_used = btrfs_super_bytes_used(&info->super_copy); | 2346 | super_used = btrfs_super_bytes_used(&info->super_copy); |
@@ -2835,14 +2352,34 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2835 | btrfs_set_root_used(&root->root_item, | 2352 | btrfs_set_root_used(&root->root_item, |
2836 | root_used - num_bytes); | 2353 | root_used - num_bytes); |
2837 | spin_unlock(&info->delalloc_lock); | 2354 | spin_unlock(&info->delalloc_lock); |
2355 | |||
2356 | /* | ||
2357 | * it is going to be very rare for someone to be waiting | ||
2358 | * on the block we're freeing. del_items might need to | ||
2359 | * schedule, so rather than get fancy, just force it | ||
2360 | * to blocking here | ||
2361 | */ | ||
2362 | if (must_clean) | ||
2363 | btrfs_set_lock_blocking(must_clean); | ||
2364 | |||
2838 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 2365 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
2839 | num_to_del); | 2366 | num_to_del); |
2840 | BUG_ON(ret); | 2367 | BUG_ON(ret); |
2841 | btrfs_release_path(extent_root, path); | 2368 | btrfs_release_path(extent_root, path); |
2842 | 2369 | ||
2370 | if (must_clean) { | ||
2371 | clean_tree_block(NULL, root, must_clean); | ||
2372 | btrfs_tree_unlock(must_clean); | ||
2373 | free_extent_buffer(must_clean); | ||
2374 | } | ||
2375 | |||
2843 | if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 2376 | if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { |
2844 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 2377 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
2845 | BUG_ON(ret); | 2378 | BUG_ON(ret); |
2379 | } else { | ||
2380 | invalidate_mapping_pages(info->btree_inode->i_mapping, | ||
2381 | bytenr >> PAGE_CACHE_SHIFT, | ||
2382 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); | ||
2846 | } | 2383 | } |
2847 | 2384 | ||
2848 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, | 2385 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, |
@@ -2850,218 +2387,103 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2850 | BUG_ON(ret); | 2387 | BUG_ON(ret); |
2851 | } | 2388 | } |
2852 | btrfs_free_path(path); | 2389 | btrfs_free_path(path); |
2853 | finish_current_insert(trans, extent_root, 0); | ||
2854 | return ret; | 2390 | return ret; |
2855 | } | 2391 | } |
2856 | 2392 | ||
2857 | /* | 2393 | /* |
2858 | * find all the blocks marked as pending in the radix tree and remove | 2394 | * remove an extent from the root, returns 0 on success |
2859 | * them from the extent map | ||
2860 | */ | 2395 | */ |
2861 | static int del_pending_extents(struct btrfs_trans_handle *trans, | 2396 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
2862 | struct btrfs_root *extent_root, int all) | 2397 | struct btrfs_root *root, |
2398 | u64 bytenr, u64 num_bytes, u64 parent, | ||
2399 | u64 root_objectid, u64 ref_generation, | ||
2400 | u64 owner_objectid, int pin, | ||
2401 | int refs_to_drop) | ||
2863 | { | 2402 | { |
2864 | int ret; | 2403 | WARN_ON(num_bytes < root->sectorsize); |
2865 | int err = 0; | ||
2866 | u64 start; | ||
2867 | u64 end; | ||
2868 | u64 priv; | ||
2869 | u64 search = 0; | ||
2870 | int nr = 0, skipped = 0; | ||
2871 | struct extent_io_tree *pending_del; | ||
2872 | struct extent_io_tree *extent_ins; | ||
2873 | struct pending_extent_op *extent_op; | ||
2874 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2875 | struct list_head delete_list; | ||
2876 | |||
2877 | INIT_LIST_HEAD(&delete_list); | ||
2878 | extent_ins = &extent_root->fs_info->extent_ins; | ||
2879 | pending_del = &extent_root->fs_info->pending_del; | ||
2880 | |||
2881 | again: | ||
2882 | mutex_lock(&info->extent_ins_mutex); | ||
2883 | while (1) { | ||
2884 | ret = find_first_extent_bit(pending_del, search, &start, &end, | ||
2885 | EXTENT_WRITEBACK); | ||
2886 | if (ret) { | ||
2887 | if (all && skipped && !nr) { | ||
2888 | search = 0; | ||
2889 | skipped = 0; | ||
2890 | continue; | ||
2891 | } | ||
2892 | mutex_unlock(&info->extent_ins_mutex); | ||
2893 | break; | ||
2894 | } | ||
2895 | |||
2896 | ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); | ||
2897 | if (!ret) { | ||
2898 | search = end+1; | ||
2899 | skipped = 1; | ||
2900 | |||
2901 | if (need_resched()) { | ||
2902 | mutex_unlock(&info->extent_ins_mutex); | ||
2903 | cond_resched(); | ||
2904 | mutex_lock(&info->extent_ins_mutex); | ||
2905 | } | ||
2906 | |||
2907 | continue; | ||
2908 | } | ||
2909 | BUG_ON(ret < 0); | ||
2910 | |||
2911 | ret = get_state_private(pending_del, start, &priv); | ||
2912 | BUG_ON(ret); | ||
2913 | extent_op = (struct pending_extent_op *)(unsigned long)priv; | ||
2914 | |||
2915 | clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK, | ||
2916 | GFP_NOFS); | ||
2917 | if (!test_range_bit(extent_ins, start, end, | ||
2918 | EXTENT_WRITEBACK, 0)) { | ||
2919 | list_add_tail(&extent_op->list, &delete_list); | ||
2920 | nr++; | ||
2921 | } else { | ||
2922 | kfree(extent_op); | ||
2923 | |||
2924 | ret = get_state_private(&info->extent_ins, start, | ||
2925 | &priv); | ||
2926 | BUG_ON(ret); | ||
2927 | extent_op = (struct pending_extent_op *) | ||
2928 | (unsigned long)priv; | ||
2929 | |||
2930 | clear_extent_bits(&info->extent_ins, start, end, | ||
2931 | EXTENT_WRITEBACK, GFP_NOFS); | ||
2932 | |||
2933 | if (extent_op->type == PENDING_BACKREF_UPDATE) { | ||
2934 | list_add_tail(&extent_op->list, &delete_list); | ||
2935 | search = end + 1; | ||
2936 | nr++; | ||
2937 | continue; | ||
2938 | } | ||
2939 | |||
2940 | mutex_lock(&extent_root->fs_info->pinned_mutex); | ||
2941 | ret = pin_down_bytes(trans, extent_root, start, | ||
2942 | end + 1 - start, 0); | ||
2943 | mutex_unlock(&extent_root->fs_info->pinned_mutex); | ||
2944 | |||
2945 | ret = update_block_group(trans, extent_root, start, | ||
2946 | end + 1 - start, 0, ret > 0); | ||
2947 | |||
2948 | unlock_extent(extent_ins, start, end, GFP_NOFS); | ||
2949 | BUG_ON(ret); | ||
2950 | kfree(extent_op); | ||
2951 | } | ||
2952 | if (ret) | ||
2953 | err = ret; | ||
2954 | |||
2955 | search = end + 1; | ||
2956 | |||
2957 | if (need_resched()) { | ||
2958 | mutex_unlock(&info->extent_ins_mutex); | ||
2959 | cond_resched(); | ||
2960 | mutex_lock(&info->extent_ins_mutex); | ||
2961 | } | ||
2962 | } | ||
2963 | 2404 | ||
2964 | if (nr) { | 2405 | /* |
2965 | ret = free_extents(trans, extent_root, &delete_list); | 2406 | * if metadata always pin |
2966 | BUG_ON(ret); | 2407 | * if data pin when any transaction has committed this |
2967 | } | 2408 | */ |
2409 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID || | ||
2410 | ref_generation != trans->transid) | ||
2411 | pin = 1; | ||
2968 | 2412 | ||
2969 | if (all && skipped) { | 2413 | if (ref_generation != trans->transid) |
2970 | INIT_LIST_HEAD(&delete_list); | 2414 | pin = 1; |
2971 | search = 0; | ||
2972 | nr = 0; | ||
2973 | goto again; | ||
2974 | } | ||
2975 | 2415 | ||
2976 | if (!err) | 2416 | return __free_extent(trans, root, bytenr, num_bytes, parent, |
2977 | finish_current_insert(trans, extent_root, 0); | 2417 | root_objectid, ref_generation, |
2978 | return err; | 2418 | owner_objectid, pin, pin == 0, refs_to_drop); |
2979 | } | 2419 | } |
2980 | 2420 | ||
2981 | /* | 2421 | /* |
2982 | * remove an extent from the root, returns 0 on success | 2422 | * when we free an extent, it is possible (and likely) that we free the last |
2423 | * delayed ref for that extent as well. This searches the delayed ref tree for | ||
2424 | * a given extent, and if there are no other delayed refs to be processed, it | ||
2425 | * removes it from the tree. | ||
2983 | */ | 2426 | */ |
2984 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 2427 | static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, |
2985 | struct btrfs_root *root, | 2428 | struct btrfs_root *root, u64 bytenr) |
2986 | u64 bytenr, u64 num_bytes, u64 parent, | ||
2987 | u64 root_objectid, u64 ref_generation, | ||
2988 | u64 owner_objectid, int pin) | ||
2989 | { | 2429 | { |
2990 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 2430 | struct btrfs_delayed_ref_head *head; |
2991 | int pending_ret; | 2431 | struct btrfs_delayed_ref_root *delayed_refs; |
2432 | struct btrfs_delayed_ref_node *ref; | ||
2433 | struct rb_node *node; | ||
2992 | int ret; | 2434 | int ret; |
2993 | 2435 | ||
2994 | WARN_ON(num_bytes < root->sectorsize); | 2436 | delayed_refs = &trans->transaction->delayed_refs; |
2995 | if (root == extent_root) { | 2437 | spin_lock(&delayed_refs->lock); |
2996 | struct pending_extent_op *extent_op = NULL; | 2438 | head = btrfs_find_delayed_ref_head(trans, bytenr); |
2997 | 2439 | if (!head) | |
2998 | mutex_lock(&root->fs_info->extent_ins_mutex); | 2440 | goto out; |
2999 | if (test_range_bit(&root->fs_info->extent_ins, bytenr, | ||
3000 | bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { | ||
3001 | u64 priv; | ||
3002 | ret = get_state_private(&root->fs_info->extent_ins, | ||
3003 | bytenr, &priv); | ||
3004 | BUG_ON(ret); | ||
3005 | extent_op = (struct pending_extent_op *) | ||
3006 | (unsigned long)priv; | ||
3007 | 2441 | ||
3008 | extent_op->del = 1; | 2442 | node = rb_prev(&head->node.rb_node); |
3009 | if (extent_op->type == PENDING_EXTENT_INSERT) { | 2443 | if (!node) |
3010 | mutex_unlock(&root->fs_info->extent_ins_mutex); | 2444 | goto out; |
3011 | return 0; | ||
3012 | } | ||
3013 | } | ||
3014 | 2445 | ||
3015 | if (extent_op) { | 2446 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); |
3016 | ref_generation = extent_op->orig_generation; | ||
3017 | parent = extent_op->orig_parent; | ||
3018 | } | ||
3019 | 2447 | ||
3020 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 2448 | /* there are still entries for this ref, we can't drop it */ |
3021 | BUG_ON(!extent_op); | 2449 | if (ref->bytenr == bytenr) |
3022 | 2450 | goto out; | |
3023 | extent_op->type = PENDING_EXTENT_DELETE; | ||
3024 | extent_op->bytenr = bytenr; | ||
3025 | extent_op->num_bytes = num_bytes; | ||
3026 | extent_op->parent = parent; | ||
3027 | extent_op->orig_parent = parent; | ||
3028 | extent_op->generation = ref_generation; | ||
3029 | extent_op->orig_generation = ref_generation; | ||
3030 | extent_op->level = (int)owner_objectid; | ||
3031 | INIT_LIST_HEAD(&extent_op->list); | ||
3032 | extent_op->del = 0; | ||
3033 | |||
3034 | set_extent_bits(&root->fs_info->pending_del, | ||
3035 | bytenr, bytenr + num_bytes - 1, | ||
3036 | EXTENT_WRITEBACK, GFP_NOFS); | ||
3037 | set_state_private(&root->fs_info->pending_del, | ||
3038 | bytenr, (unsigned long)extent_op); | ||
3039 | mutex_unlock(&root->fs_info->extent_ins_mutex); | ||
3040 | return 0; | ||
3041 | } | ||
3042 | /* if metadata always pin */ | ||
3043 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
3044 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | ||
3045 | mutex_lock(&root->fs_info->pinned_mutex); | ||
3046 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | ||
3047 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
3048 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
3049 | return 0; | ||
3050 | } | ||
3051 | pin = 1; | ||
3052 | } | ||
3053 | 2451 | ||
3054 | /* if data pin when any transaction has committed this */ | 2452 | /* |
3055 | if (ref_generation != trans->transid) | 2453 | * waiting for the lock here would deadlock. If someone else has it |
3056 | pin = 1; | 2454 | * locked they are already in the process of dropping it anyway |
2455 | */ | ||
2456 | if (!mutex_trylock(&head->mutex)) | ||
2457 | goto out; | ||
3057 | 2458 | ||
3058 | ret = __free_extent(trans, root, bytenr, num_bytes, parent, | 2459 | /* |
3059 | root_objectid, ref_generation, | 2460 | * at this point we have a head with no other entries. Go |
3060 | owner_objectid, pin, pin == 0); | 2461 | * ahead and process it. |
2462 | */ | ||
2463 | head->node.in_tree = 0; | ||
2464 | rb_erase(&head->node.rb_node, &delayed_refs->root); | ||
2465 | |||
2466 | delayed_refs->num_entries--; | ||
2467 | |||
2468 | /* | ||
2469 | * we don't take a ref on the node because we're removing it from the | ||
2470 | * tree, so we just steal the ref the tree was holding. | ||
2471 | */ | ||
2472 | delayed_refs->num_heads--; | ||
2473 | if (list_empty(&head->cluster)) | ||
2474 | delayed_refs->num_heads_ready--; | ||
3061 | 2475 | ||
3062 | finish_current_insert(trans, root->fs_info->extent_root, 0); | 2476 | list_del_init(&head->cluster); |
3063 | pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0); | 2477 | spin_unlock(&delayed_refs->lock); |
3064 | return ret ? ret : pending_ret; | 2478 | |
2479 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | ||
2480 | &head->node, head->must_insert_reserved); | ||
2481 | BUG_ON(ret); | ||
2482 | btrfs_put_delayed_ref(&head->node); | ||
2483 | return 0; | ||
2484 | out: | ||
2485 | spin_unlock(&delayed_refs->lock); | ||
2486 | return 0; | ||
3065 | } | 2487 | } |
3066 | 2488 | ||
3067 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 2489 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
@@ -3072,9 +2494,28 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3072 | { | 2494 | { |
3073 | int ret; | 2495 | int ret; |
3074 | 2496 | ||
3075 | ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, | 2497 | /* |
3076 | root_objectid, ref_generation, | 2498 | * tree log blocks never actually go into the extent allocation |
3077 | owner_objectid, pin); | 2499 | * tree, just update pinning info and exit early. |
2500 | * | ||
2501 | * data extents referenced by the tree log do need to have | ||
2502 | * their reference counts bumped. | ||
2503 | */ | ||
2504 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && | ||
2505 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
2506 | /* unlocks the pinned mutex */ | ||
2507 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | ||
2508 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
2509 | ret = 0; | ||
2510 | } else { | ||
2511 | ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, | ||
2512 | root_objectid, ref_generation, | ||
2513 | owner_objectid, | ||
2514 | BTRFS_DROP_DELAYED_REF, 1); | ||
2515 | BUG_ON(ret); | ||
2516 | ret = check_ref_cleanup(trans, root, bytenr); | ||
2517 | BUG_ON(ret); | ||
2518 | } | ||
3078 | return ret; | 2519 | return ret; |
3079 | } | 2520 | } |
3080 | 2521 | ||
@@ -3103,228 +2544,237 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3103 | { | 2544 | { |
3104 | int ret = 0; | 2545 | int ret = 0; |
3105 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 2546 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
3106 | u64 total_needed = num_bytes; | 2547 | struct btrfs_free_cluster *last_ptr = NULL; |
3107 | u64 *last_ptr = NULL; | ||
3108 | u64 last_wanted = 0; | ||
3109 | struct btrfs_block_group_cache *block_group = NULL; | 2548 | struct btrfs_block_group_cache *block_group = NULL; |
3110 | int chunk_alloc_done = 0; | ||
3111 | int empty_cluster = 2 * 1024 * 1024; | 2549 | int empty_cluster = 2 * 1024 * 1024; |
3112 | int allowed_chunk_alloc = 0; | 2550 | int allowed_chunk_alloc = 0; |
3113 | struct list_head *head = NULL, *cur = NULL; | ||
3114 | int loop = 0; | ||
3115 | int extra_loop = 0; | ||
3116 | struct btrfs_space_info *space_info; | 2551 | struct btrfs_space_info *space_info; |
2552 | int last_ptr_loop = 0; | ||
2553 | int loop = 0; | ||
3117 | 2554 | ||
3118 | WARN_ON(num_bytes < root->sectorsize); | 2555 | WARN_ON(num_bytes < root->sectorsize); |
3119 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 2556 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
3120 | ins->objectid = 0; | 2557 | ins->objectid = 0; |
3121 | ins->offset = 0; | 2558 | ins->offset = 0; |
3122 | 2559 | ||
2560 | space_info = __find_space_info(root->fs_info, data); | ||
2561 | |||
3123 | if (orig_root->ref_cows || empty_size) | 2562 | if (orig_root->ref_cows || empty_size) |
3124 | allowed_chunk_alloc = 1; | 2563 | allowed_chunk_alloc = 1; |
3125 | 2564 | ||
3126 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 2565 | if (data & BTRFS_BLOCK_GROUP_METADATA) { |
3127 | last_ptr = &root->fs_info->last_alloc; | 2566 | last_ptr = &root->fs_info->meta_alloc_cluster; |
3128 | if (!btrfs_test_opt(root, SSD)) | 2567 | if (!btrfs_test_opt(root, SSD)) |
3129 | empty_cluster = 64 * 1024; | 2568 | empty_cluster = 64 * 1024; |
3130 | } | 2569 | } |
3131 | 2570 | ||
3132 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) | 2571 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { |
3133 | last_ptr = &root->fs_info->last_data_alloc; | 2572 | last_ptr = &root->fs_info->data_alloc_cluster; |
2573 | } | ||
3134 | 2574 | ||
3135 | if (last_ptr) { | 2575 | if (last_ptr) { |
3136 | if (*last_ptr) { | 2576 | spin_lock(&last_ptr->lock); |
3137 | hint_byte = *last_ptr; | 2577 | if (last_ptr->block_group) |
3138 | last_wanted = *last_ptr; | 2578 | hint_byte = last_ptr->window_start; |
3139 | } else | 2579 | spin_unlock(&last_ptr->lock); |
3140 | empty_size += empty_cluster; | ||
3141 | } else { | ||
3142 | empty_cluster = 0; | ||
3143 | } | 2580 | } |
2581 | |||
3144 | search_start = max(search_start, first_logical_byte(root, 0)); | 2582 | search_start = max(search_start, first_logical_byte(root, 0)); |
3145 | search_start = max(search_start, hint_byte); | 2583 | search_start = max(search_start, hint_byte); |
3146 | 2584 | ||
3147 | if (last_wanted && search_start != last_wanted) { | 2585 | if (!last_ptr) { |
3148 | last_wanted = 0; | 2586 | empty_cluster = 0; |
3149 | empty_size += empty_cluster; | 2587 | loop = 1; |
3150 | } | 2588 | } |
3151 | 2589 | ||
3152 | total_needed += empty_size; | 2590 | if (search_start == hint_byte) { |
3153 | block_group = btrfs_lookup_block_group(root->fs_info, search_start); | 2591 | block_group = btrfs_lookup_block_group(root->fs_info, |
3154 | if (!block_group) | 2592 | search_start); |
3155 | block_group = btrfs_lookup_first_block_group(root->fs_info, | 2593 | if (block_group && block_group_bits(block_group, data)) { |
3156 | search_start); | 2594 | down_read(&space_info->groups_sem); |
3157 | space_info = __find_space_info(root->fs_info, data); | 2595 | goto have_block_group; |
2596 | } else if (block_group) { | ||
2597 | btrfs_put_block_group(block_group); | ||
2598 | } | ||
2599 | } | ||
3158 | 2600 | ||
2601 | search: | ||
3159 | down_read(&space_info->groups_sem); | 2602 | down_read(&space_info->groups_sem); |
3160 | while (1) { | 2603 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
3161 | struct btrfs_free_space *free_space; | 2604 | u64 offset; |
3162 | /* | ||
3163 | * the only way this happens if our hint points to a block | ||
3164 | * group thats not of the proper type, while looping this | ||
3165 | * should never happen | ||
3166 | */ | ||
3167 | if (empty_size) | ||
3168 | extra_loop = 1; | ||
3169 | 2605 | ||
3170 | if (!block_group) | 2606 | atomic_inc(&block_group->count); |
3171 | goto new_group_no_lock; | 2607 | search_start = block_group->key.objectid; |
3172 | 2608 | ||
2609 | have_block_group: | ||
3173 | if (unlikely(!block_group->cached)) { | 2610 | if (unlikely(!block_group->cached)) { |
3174 | mutex_lock(&block_group->cache_mutex); | 2611 | mutex_lock(&block_group->cache_mutex); |
3175 | ret = cache_block_group(root, block_group); | 2612 | ret = cache_block_group(root, block_group); |
3176 | mutex_unlock(&block_group->cache_mutex); | 2613 | mutex_unlock(&block_group->cache_mutex); |
3177 | if (ret) | 2614 | if (ret) { |
2615 | btrfs_put_block_group(block_group); | ||
3178 | break; | 2616 | break; |
2617 | } | ||
3179 | } | 2618 | } |
3180 | 2619 | ||
3181 | mutex_lock(&block_group->alloc_mutex); | ||
3182 | if (unlikely(!block_group_bits(block_group, data))) | ||
3183 | goto new_group; | ||
3184 | |||
3185 | if (unlikely(block_group->ro)) | 2620 | if (unlikely(block_group->ro)) |
3186 | goto new_group; | 2621 | goto loop; |
3187 | 2622 | ||
3188 | free_space = btrfs_find_free_space(block_group, search_start, | 2623 | if (last_ptr) { |
3189 | total_needed); | 2624 | /* |
3190 | if (free_space) { | 2625 | * the refill lock keeps out other |
3191 | u64 start = block_group->key.objectid; | 2626 | * people trying to start a new cluster |
3192 | u64 end = block_group->key.objectid + | 2627 | */ |
3193 | block_group->key.offset; | 2628 | spin_lock(&last_ptr->refill_lock); |
2629 | offset = btrfs_alloc_from_cluster(block_group, last_ptr, | ||
2630 | num_bytes, search_start); | ||
2631 | if (offset) { | ||
2632 | /* we have a block, we're done */ | ||
2633 | spin_unlock(&last_ptr->refill_lock); | ||
2634 | goto checks; | ||
2635 | } | ||
3194 | 2636 | ||
3195 | search_start = stripe_align(root, free_space->offset); | 2637 | spin_lock(&last_ptr->lock); |
2638 | /* | ||
2639 | * whoops, this cluster doesn't actually point to | ||
2640 | * this block group. Get a ref on the block | ||
2641 | * group is does point to and try again | ||
2642 | */ | ||
2643 | if (!last_ptr_loop && last_ptr->block_group && | ||
2644 | last_ptr->block_group != block_group) { | ||
2645 | |||
2646 | btrfs_put_block_group(block_group); | ||
2647 | block_group = last_ptr->block_group; | ||
2648 | atomic_inc(&block_group->count); | ||
2649 | spin_unlock(&last_ptr->lock); | ||
2650 | spin_unlock(&last_ptr->refill_lock); | ||
2651 | |||
2652 | last_ptr_loop = 1; | ||
2653 | search_start = block_group->key.objectid; | ||
2654 | goto have_block_group; | ||
2655 | } | ||
2656 | spin_unlock(&last_ptr->lock); | ||
3196 | 2657 | ||
3197 | /* move on to the next group */ | 2658 | /* |
3198 | if (search_start + num_bytes >= search_end) | 2659 | * this cluster didn't work out, free it and |
3199 | goto new_group; | 2660 | * start over |
2661 | */ | ||
2662 | btrfs_return_cluster_to_free_space(NULL, last_ptr); | ||
3200 | 2663 | ||
3201 | /* move on to the next group */ | 2664 | last_ptr_loop = 0; |
3202 | if (search_start + num_bytes > end) | ||
3203 | goto new_group; | ||
3204 | 2665 | ||
3205 | if (last_wanted && search_start != last_wanted) { | 2666 | /* allocate a cluster in this block group */ |
3206 | total_needed += empty_cluster; | 2667 | ret = btrfs_find_space_cluster(trans, |
3207 | empty_size += empty_cluster; | 2668 | block_group, last_ptr, |
3208 | last_wanted = 0; | 2669 | offset, num_bytes, |
2670 | empty_cluster + empty_size); | ||
2671 | if (ret == 0) { | ||
3209 | /* | 2672 | /* |
3210 | * if search_start is still in this block group | 2673 | * now pull our allocation out of this |
3211 | * then we just re-search this block group | 2674 | * cluster |
3212 | */ | 2675 | */ |
3213 | if (search_start >= start && | 2676 | offset = btrfs_alloc_from_cluster(block_group, |
3214 | search_start < end) { | 2677 | last_ptr, num_bytes, |
3215 | mutex_unlock(&block_group->alloc_mutex); | 2678 | search_start); |
3216 | continue; | 2679 | if (offset) { |
2680 | /* we found one, proceed */ | ||
2681 | spin_unlock(&last_ptr->refill_lock); | ||
2682 | goto checks; | ||
3217 | } | 2683 | } |
3218 | |||
3219 | /* else we go to the next block group */ | ||
3220 | goto new_group; | ||
3221 | } | 2684 | } |
3222 | 2685 | /* | |
3223 | if (exclude_nr > 0 && | 2686 | * at this point we either didn't find a cluster |
3224 | (search_start + num_bytes > exclude_start && | 2687 | * or we weren't able to allocate a block from our |
3225 | search_start < exclude_start + exclude_nr)) { | 2688 | * cluster. Free the cluster we've been trying |
3226 | search_start = exclude_start + exclude_nr; | 2689 | * to use, and go to the next block group |
3227 | /* | 2690 | */ |
3228 | * if search_start is still in this block group | 2691 | if (loop < 2) { |
3229 | * then we just re-search this block group | 2692 | btrfs_return_cluster_to_free_space(NULL, |
3230 | */ | 2693 | last_ptr); |
3231 | if (search_start >= start && | 2694 | spin_unlock(&last_ptr->refill_lock); |
3232 | search_start < end) { | 2695 | goto loop; |
3233 | mutex_unlock(&block_group->alloc_mutex); | ||
3234 | last_wanted = 0; | ||
3235 | continue; | ||
3236 | } | ||
3237 | |||
3238 | /* else we go to the next block group */ | ||
3239 | goto new_group; | ||
3240 | } | 2696 | } |
2697 | spin_unlock(&last_ptr->refill_lock); | ||
2698 | } | ||
3241 | 2699 | ||
3242 | ins->objectid = search_start; | 2700 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
3243 | ins->offset = num_bytes; | 2701 | num_bytes, empty_size); |
2702 | if (!offset) | ||
2703 | goto loop; | ||
2704 | checks: | ||
2705 | search_start = stripe_align(root, offset); | ||
3244 | 2706 | ||
3245 | btrfs_remove_free_space_lock(block_group, search_start, | 2707 | /* move on to the next group */ |
3246 | num_bytes); | 2708 | if (search_start + num_bytes >= search_end) { |
3247 | /* we are all good, lets return */ | 2709 | btrfs_add_free_space(block_group, offset, num_bytes); |
3248 | mutex_unlock(&block_group->alloc_mutex); | 2710 | goto loop; |
3249 | break; | ||
3250 | } | 2711 | } |
3251 | new_group: | ||
3252 | mutex_unlock(&block_group->alloc_mutex); | ||
3253 | put_block_group(block_group); | ||
3254 | block_group = NULL; | ||
3255 | new_group_no_lock: | ||
3256 | /* don't try to compare new allocations against the | ||
3257 | * last allocation any more | ||
3258 | */ | ||
3259 | last_wanted = 0; | ||
3260 | 2712 | ||
3261 | /* | 2713 | /* move on to the next group */ |
3262 | * Here's how this works. | 2714 | if (search_start + num_bytes > |
3263 | * loop == 0: we were searching a block group via a hint | 2715 | block_group->key.objectid + block_group->key.offset) { |
3264 | * and didn't find anything, so we start at | 2716 | btrfs_add_free_space(block_group, offset, num_bytes); |
3265 | * the head of the block groups and keep searching | 2717 | goto loop; |
3266 | * loop == 1: we're searching through all of the block groups | 2718 | } |
3267 | * if we hit the head again we have searched | 2719 | |
3268 | * all of the block groups for this space and we | 2720 | if (exclude_nr > 0 && |
3269 | * need to try and allocate, if we cant error out. | 2721 | (search_start + num_bytes > exclude_start && |
3270 | * loop == 2: we allocated more space and are looping through | 2722 | search_start < exclude_start + exclude_nr)) { |
3271 | * all of the block groups again. | 2723 | search_start = exclude_start + exclude_nr; |
3272 | */ | 2724 | |
3273 | if (loop == 0) { | 2725 | btrfs_add_free_space(block_group, offset, num_bytes); |
3274 | head = &space_info->block_groups; | 2726 | /* |
3275 | cur = head->next; | 2727 | * if search_start is still in this block group |
3276 | loop++; | 2728 | * then we just re-search this block group |
3277 | } else if (loop == 1 && cur == head) { | ||
3278 | int keep_going; | ||
3279 | |||
3280 | /* at this point we give up on the empty_size | ||
3281 | * allocations and just try to allocate the min | ||
3282 | * space. | ||
3283 | * | ||
3284 | * The extra_loop field was set if an empty_size | ||
3285 | * allocation was attempted above, and if this | ||
3286 | * is try we need to try the loop again without | ||
3287 | * the additional empty_size. | ||
3288 | */ | 2729 | */ |
3289 | total_needed -= empty_size; | 2730 | if (search_start >= block_group->key.objectid && |
3290 | empty_size = 0; | 2731 | search_start < (block_group->key.objectid + |
3291 | keep_going = extra_loop; | 2732 | block_group->key.offset)) |
3292 | loop++; | 2733 | goto have_block_group; |
2734 | goto loop; | ||
2735 | } | ||
3293 | 2736 | ||
3294 | if (allowed_chunk_alloc && !chunk_alloc_done) { | 2737 | ins->objectid = search_start; |
3295 | up_read(&space_info->groups_sem); | 2738 | ins->offset = num_bytes; |
3296 | ret = do_chunk_alloc(trans, root, num_bytes + | 2739 | |
3297 | 2 * 1024 * 1024, data, 1); | 2740 | if (offset < search_start) |
3298 | down_read(&space_info->groups_sem); | 2741 | btrfs_add_free_space(block_group, offset, |
3299 | if (ret < 0) | 2742 | search_start - offset); |
3300 | goto loop_check; | 2743 | BUG_ON(offset > search_start); |
3301 | head = &space_info->block_groups; | 2744 | |
3302 | /* | 2745 | /* we are all good, lets return */ |
3303 | * we've allocated a new chunk, keep | 2746 | break; |
3304 | * trying | 2747 | loop: |
3305 | */ | 2748 | btrfs_put_block_group(block_group); |
3306 | keep_going = 1; | 2749 | } |
3307 | chunk_alloc_done = 1; | 2750 | up_read(&space_info->groups_sem); |
3308 | } else if (!allowed_chunk_alloc) { | 2751 | |
3309 | space_info->force_alloc = 1; | 2752 | /* loop == 0, try to find a clustered alloc in every block group |
3310 | } | 2753 | * loop == 1, try again after forcing a chunk allocation |
3311 | loop_check: | 2754 | * loop == 2, set empty_size and empty_cluster to 0 and try again |
3312 | if (keep_going) { | 2755 | */ |
3313 | cur = head->next; | 2756 | if (!ins->objectid && loop < 3 && |
3314 | extra_loop = 0; | 2757 | (empty_size || empty_cluster || allowed_chunk_alloc)) { |
3315 | } else { | 2758 | if (loop >= 2) { |
3316 | break; | 2759 | empty_size = 0; |
3317 | } | 2760 | empty_cluster = 0; |
3318 | } else if (cur == head) { | ||
3319 | break; | ||
3320 | } | 2761 | } |
3321 | 2762 | ||
3322 | block_group = list_entry(cur, struct btrfs_block_group_cache, | 2763 | if (allowed_chunk_alloc) { |
3323 | list); | 2764 | ret = do_chunk_alloc(trans, root, num_bytes + |
3324 | atomic_inc(&block_group->count); | 2765 | 2 * 1024 * 1024, data, 1); |
2766 | allowed_chunk_alloc = 0; | ||
2767 | } else { | ||
2768 | space_info->force_alloc = 1; | ||
2769 | } | ||
3325 | 2770 | ||
3326 | search_start = block_group->key.objectid; | 2771 | if (loop < 3) { |
3327 | cur = cur->next; | 2772 | loop++; |
2773 | goto search; | ||
2774 | } | ||
2775 | ret = -ENOSPC; | ||
2776 | } else if (!ins->objectid) { | ||
2777 | ret = -ENOSPC; | ||
3328 | } | 2778 | } |
3329 | 2779 | ||
3330 | /* we found what we needed */ | 2780 | /* we found what we needed */ |
@@ -3332,21 +2782,10 @@ loop_check: | |||
3332 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) | 2782 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) |
3333 | trans->block_group = block_group->key.objectid; | 2783 | trans->block_group = block_group->key.objectid; |
3334 | 2784 | ||
3335 | if (last_ptr) | 2785 | btrfs_put_block_group(block_group); |
3336 | *last_ptr = ins->objectid + ins->offset; | ||
3337 | ret = 0; | 2786 | ret = 0; |
3338 | } else if (!ret) { | ||
3339 | printk(KERN_ERR "btrfs searching for %llu bytes, " | ||
3340 | "num_bytes %llu, loop %d, allowed_alloc %d\n", | ||
3341 | (unsigned long long)total_needed, | ||
3342 | (unsigned long long)num_bytes, | ||
3343 | loop, allowed_chunk_alloc); | ||
3344 | ret = -ENOSPC; | ||
3345 | } | 2787 | } |
3346 | if (block_group) | ||
3347 | put_block_group(block_group); | ||
3348 | 2788 | ||
3349 | up_read(&space_info->groups_sem); | ||
3350 | return ret; | 2789 | return ret; |
3351 | } | 2790 | } |
3352 | 2791 | ||
@@ -3451,7 +2890,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
3451 | ret = btrfs_discard_extent(root, start, len); | 2890 | ret = btrfs_discard_extent(root, start, len); |
3452 | 2891 | ||
3453 | btrfs_add_free_space(cache, start, len); | 2892 | btrfs_add_free_space(cache, start, len); |
3454 | put_block_group(cache); | 2893 | btrfs_put_block_group(cache); |
3455 | update_reserved_extents(root, start, len, 0); | 2894 | update_reserved_extents(root, start, len, 0); |
3456 | 2895 | ||
3457 | return ret; | 2896 | return ret; |
@@ -3475,10 +2914,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
3475 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | 2914 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, |
3476 | struct btrfs_root *root, u64 parent, | 2915 | struct btrfs_root *root, u64 parent, |
3477 | u64 root_objectid, u64 ref_generation, | 2916 | u64 root_objectid, u64 ref_generation, |
3478 | u64 owner, struct btrfs_key *ins) | 2917 | u64 owner, struct btrfs_key *ins, |
2918 | int ref_mod) | ||
3479 | { | 2919 | { |
3480 | int ret; | 2920 | int ret; |
3481 | int pending_ret; | ||
3482 | u64 super_used; | 2921 | u64 super_used; |
3483 | u64 root_used; | 2922 | u64 root_used; |
3484 | u64 num_bytes = ins->offset; | 2923 | u64 num_bytes = ins->offset; |
@@ -3503,33 +2942,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
3503 | btrfs_set_root_used(&root->root_item, root_used + num_bytes); | 2942 | btrfs_set_root_used(&root->root_item, root_used + num_bytes); |
3504 | spin_unlock(&info->delalloc_lock); | 2943 | spin_unlock(&info->delalloc_lock); |
3505 | 2944 | ||
3506 | if (root == extent_root) { | ||
3507 | struct pending_extent_op *extent_op; | ||
3508 | |||
3509 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
3510 | BUG_ON(!extent_op); | ||
3511 | |||
3512 | extent_op->type = PENDING_EXTENT_INSERT; | ||
3513 | extent_op->bytenr = ins->objectid; | ||
3514 | extent_op->num_bytes = ins->offset; | ||
3515 | extent_op->parent = parent; | ||
3516 | extent_op->orig_parent = 0; | ||
3517 | extent_op->generation = ref_generation; | ||
3518 | extent_op->orig_generation = 0; | ||
3519 | extent_op->level = (int)owner; | ||
3520 | INIT_LIST_HEAD(&extent_op->list); | ||
3521 | extent_op->del = 0; | ||
3522 | |||
3523 | mutex_lock(&root->fs_info->extent_ins_mutex); | ||
3524 | set_extent_bits(&root->fs_info->extent_ins, ins->objectid, | ||
3525 | ins->objectid + ins->offset - 1, | ||
3526 | EXTENT_WRITEBACK, GFP_NOFS); | ||
3527 | set_state_private(&root->fs_info->extent_ins, | ||
3528 | ins->objectid, (unsigned long)extent_op); | ||
3529 | mutex_unlock(&root->fs_info->extent_ins_mutex); | ||
3530 | goto update_block; | ||
3531 | } | ||
3532 | |||
3533 | memcpy(&keys[0], ins, sizeof(*ins)); | 2945 | memcpy(&keys[0], ins, sizeof(*ins)); |
3534 | keys[1].objectid = ins->objectid; | 2946 | keys[1].objectid = ins->objectid; |
3535 | keys[1].type = BTRFS_EXTENT_REF_KEY; | 2947 | keys[1].type = BTRFS_EXTENT_REF_KEY; |
@@ -3540,37 +2952,31 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
3540 | path = btrfs_alloc_path(); | 2952 | path = btrfs_alloc_path(); |
3541 | BUG_ON(!path); | 2953 | BUG_ON(!path); |
3542 | 2954 | ||
2955 | path->leave_spinning = 1; | ||
3543 | ret = btrfs_insert_empty_items(trans, extent_root, path, keys, | 2956 | ret = btrfs_insert_empty_items(trans, extent_root, path, keys, |
3544 | sizes, 2); | 2957 | sizes, 2); |
3545 | BUG_ON(ret); | 2958 | BUG_ON(ret); |
3546 | 2959 | ||
3547 | extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2960 | extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
3548 | struct btrfs_extent_item); | 2961 | struct btrfs_extent_item); |
3549 | btrfs_set_extent_refs(path->nodes[0], extent_item, 1); | 2962 | btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod); |
3550 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, | 2963 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, |
3551 | struct btrfs_extent_ref); | 2964 | struct btrfs_extent_ref); |
3552 | 2965 | ||
3553 | btrfs_set_ref_root(path->nodes[0], ref, root_objectid); | 2966 | btrfs_set_ref_root(path->nodes[0], ref, root_objectid); |
3554 | btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); | 2967 | btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); |
3555 | btrfs_set_ref_objectid(path->nodes[0], ref, owner); | 2968 | btrfs_set_ref_objectid(path->nodes[0], ref, owner); |
3556 | btrfs_set_ref_num_refs(path->nodes[0], ref, 1); | 2969 | btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod); |
3557 | 2970 | ||
3558 | btrfs_mark_buffer_dirty(path->nodes[0]); | 2971 | btrfs_mark_buffer_dirty(path->nodes[0]); |
3559 | 2972 | ||
3560 | trans->alloc_exclude_start = 0; | 2973 | trans->alloc_exclude_start = 0; |
3561 | trans->alloc_exclude_nr = 0; | 2974 | trans->alloc_exclude_nr = 0; |
3562 | btrfs_free_path(path); | 2975 | btrfs_free_path(path); |
3563 | finish_current_insert(trans, extent_root, 0); | ||
3564 | pending_ret = del_pending_extents(trans, extent_root, 0); | ||
3565 | 2976 | ||
3566 | if (ret) | 2977 | if (ret) |
3567 | goto out; | 2978 | goto out; |
3568 | if (pending_ret) { | ||
3569 | ret = pending_ret; | ||
3570 | goto out; | ||
3571 | } | ||
3572 | 2979 | ||
3573 | update_block: | ||
3574 | ret = update_block_group(trans, root, ins->objectid, | 2980 | ret = update_block_group(trans, root, ins->objectid, |
3575 | ins->offset, 1, 0); | 2981 | ins->offset, 1, 0); |
3576 | if (ret) { | 2982 | if (ret) { |
@@ -3592,9 +2998,12 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
3592 | 2998 | ||
3593 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) | 2999 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) |
3594 | return 0; | 3000 | return 0; |
3595 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | 3001 | |
3596 | ref_generation, owner, ins); | 3002 | ret = btrfs_add_delayed_ref(trans, ins->objectid, |
3597 | update_reserved_extents(root, ins->objectid, ins->offset, 0); | 3003 | ins->offset, parent, root_objectid, |
3004 | ref_generation, owner, | ||
3005 | BTRFS_ADD_DELAYED_EXTENT, 0); | ||
3006 | BUG_ON(ret); | ||
3598 | return ret; | 3007 | return ret; |
3599 | } | 3008 | } |
3600 | 3009 | ||
@@ -3619,9 +3028,9 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | |||
3619 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 3028 | ret = btrfs_remove_free_space(block_group, ins->objectid, |
3620 | ins->offset); | 3029 | ins->offset); |
3621 | BUG_ON(ret); | 3030 | BUG_ON(ret); |
3622 | put_block_group(block_group); | 3031 | btrfs_put_block_group(block_group); |
3623 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | 3032 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, |
3624 | ref_generation, owner, ins); | 3033 | ref_generation, owner, ins, 1); |
3625 | return ret; | 3034 | return ret; |
3626 | } | 3035 | } |
3627 | 3036 | ||
@@ -3640,20 +3049,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
3640 | u64 search_end, struct btrfs_key *ins, u64 data) | 3049 | u64 search_end, struct btrfs_key *ins, u64 data) |
3641 | { | 3050 | { |
3642 | int ret; | 3051 | int ret; |
3643 | |||
3644 | ret = __btrfs_reserve_extent(trans, root, num_bytes, | 3052 | ret = __btrfs_reserve_extent(trans, root, num_bytes, |
3645 | min_alloc_size, empty_size, hint_byte, | 3053 | min_alloc_size, empty_size, hint_byte, |
3646 | search_end, ins, data); | 3054 | search_end, ins, data); |
3647 | BUG_ON(ret); | 3055 | BUG_ON(ret); |
3648 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 3056 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
3649 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, | 3057 | ret = btrfs_add_delayed_ref(trans, ins->objectid, |
3650 | root_objectid, ref_generation, | 3058 | ins->offset, parent, root_objectid, |
3651 | owner_objectid, ins); | 3059 | ref_generation, owner_objectid, |
3060 | BTRFS_ADD_DELAYED_EXTENT, 0); | ||
3652 | BUG_ON(ret); | 3061 | BUG_ON(ret); |
3653 | |||
3654 | } else { | ||
3655 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
3656 | } | 3062 | } |
3063 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
3657 | return ret; | 3064 | return ret; |
3658 | } | 3065 | } |
3659 | 3066 | ||
@@ -3789,7 +3196,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3789 | 3196 | ||
3790 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | 3197 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); |
3791 | 3198 | ||
3792 | ret = __btrfs_free_extent(trans, root, disk_bytenr, | 3199 | ret = btrfs_free_extent(trans, root, disk_bytenr, |
3793 | btrfs_file_extent_disk_num_bytes(leaf, fi), | 3200 | btrfs_file_extent_disk_num_bytes(leaf, fi), |
3794 | leaf->start, leaf_owner, leaf_generation, | 3201 | leaf->start, leaf_owner, leaf_generation, |
3795 | key.objectid, 0); | 3202 | key.objectid, 0); |
@@ -3829,7 +3236,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3829 | */ | 3236 | */ |
3830 | for (i = 0; i < ref->nritems; i++) { | 3237 | for (i = 0; i < ref->nritems; i++) { |
3831 | info = ref->extents + sorted[i].slot; | 3238 | info = ref->extents + sorted[i].slot; |
3832 | ret = __btrfs_free_extent(trans, root, info->bytenr, | 3239 | ret = btrfs_free_extent(trans, root, info->bytenr, |
3833 | info->num_bytes, ref->bytenr, | 3240 | info->num_bytes, ref->bytenr, |
3834 | ref->owner, ref->generation, | 3241 | ref->owner, ref->generation, |
3835 | info->objectid, 0); | 3242 | info->objectid, 0); |
@@ -3846,12 +3253,13 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3846 | return 0; | 3253 | return 0; |
3847 | } | 3254 | } |
3848 | 3255 | ||
3849 | static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, | 3256 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, |
3257 | struct btrfs_root *root, u64 start, | ||
3850 | u64 len, u32 *refs) | 3258 | u64 len, u32 *refs) |
3851 | { | 3259 | { |
3852 | int ret; | 3260 | int ret; |
3853 | 3261 | ||
3854 | ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); | 3262 | ret = btrfs_lookup_extent_ref(trans, root, start, len, refs); |
3855 | BUG_ON(ret); | 3263 | BUG_ON(ret); |
3856 | 3264 | ||
3857 | #if 0 /* some debugging code in case we see problems here */ | 3265 | #if 0 /* some debugging code in case we see problems here */ |
@@ -3959,7 +3367,8 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | |||
3959 | * we just decrement it below and don't update any | 3367 | * we just decrement it below and don't update any |
3960 | * of the refs the leaf points to. | 3368 | * of the refs the leaf points to. |
3961 | */ | 3369 | */ |
3962 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | 3370 | ret = drop_snap_lookup_refcount(trans, root, bytenr, |
3371 | blocksize, &refs); | ||
3963 | BUG_ON(ret); | 3372 | BUG_ON(ret); |
3964 | if (refs != 1) | 3373 | if (refs != 1) |
3965 | continue; | 3374 | continue; |
@@ -4010,7 +3419,7 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | |||
4010 | */ | 3419 | */ |
4011 | for (i = 0; i < refi; i++) { | 3420 | for (i = 0; i < refi; i++) { |
4012 | bytenr = sorted[i].bytenr; | 3421 | bytenr = sorted[i].bytenr; |
4013 | ret = __btrfs_free_extent(trans, root, bytenr, | 3422 | ret = btrfs_free_extent(trans, root, bytenr, |
4014 | blocksize, eb->start, | 3423 | blocksize, eb->start, |
4015 | root_owner, root_gen, 0, 1); | 3424 | root_owner, root_gen, 0, 1); |
4016 | BUG_ON(ret); | 3425 | BUG_ON(ret); |
@@ -4053,7 +3462,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4053 | 3462 | ||
4054 | WARN_ON(*level < 0); | 3463 | WARN_ON(*level < 0); |
4055 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | 3464 | WARN_ON(*level >= BTRFS_MAX_LEVEL); |
4056 | ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, | 3465 | ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, |
4057 | path->nodes[*level]->len, &refs); | 3466 | path->nodes[*level]->len, &refs); |
4058 | BUG_ON(ret); | 3467 | BUG_ON(ret); |
4059 | if (refs > 1) | 3468 | if (refs > 1) |
@@ -4104,7 +3513,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4104 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | 3513 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); |
4105 | blocksize = btrfs_level_size(root, *level - 1); | 3514 | blocksize = btrfs_level_size(root, *level - 1); |
4106 | 3515 | ||
4107 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | 3516 | ret = drop_snap_lookup_refcount(trans, root, bytenr, |
3517 | blocksize, &refs); | ||
4108 | BUG_ON(ret); | 3518 | BUG_ON(ret); |
4109 | 3519 | ||
4110 | /* | 3520 | /* |
@@ -4119,7 +3529,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4119 | root_gen = btrfs_header_generation(parent); | 3529 | root_gen = btrfs_header_generation(parent); |
4120 | path->slots[*level]++; | 3530 | path->slots[*level]++; |
4121 | 3531 | ||
4122 | ret = __btrfs_free_extent(trans, root, bytenr, | 3532 | ret = btrfs_free_extent(trans, root, bytenr, |
4123 | blocksize, parent->start, | 3533 | blocksize, parent->start, |
4124 | root_owner, root_gen, | 3534 | root_owner, root_gen, |
4125 | *level - 1, 1); | 3535 | *level - 1, 1); |
@@ -4165,7 +3575,7 @@ out: | |||
4165 | * cleanup and free the reference on the last node | 3575 | * cleanup and free the reference on the last node |
4166 | * we processed | 3576 | * we processed |
4167 | */ | 3577 | */ |
4168 | ret = __btrfs_free_extent(trans, root, bytenr, blocksize, | 3578 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, |
4169 | parent->start, root_owner, root_gen, | 3579 | parent->start, root_owner, root_gen, |
4170 | *level, 1); | 3580 | *level, 1); |
4171 | free_extent_buffer(path->nodes[*level]); | 3581 | free_extent_buffer(path->nodes[*level]); |
@@ -4354,6 +3764,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
4354 | struct btrfs_path *path; | 3764 | struct btrfs_path *path; |
4355 | int i; | 3765 | int i; |
4356 | int orig_level; | 3766 | int orig_level; |
3767 | int update_count; | ||
4357 | struct btrfs_root_item *root_item = &root->root_item; | 3768 | struct btrfs_root_item *root_item = &root->root_item; |
4358 | 3769 | ||
4359 | WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); | 3770 | WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); |
@@ -4395,6 +3806,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
4395 | } | 3806 | } |
4396 | } | 3807 | } |
4397 | while (1) { | 3808 | while (1) { |
3809 | unsigned long update; | ||
4398 | wret = walk_down_tree(trans, root, path, &level); | 3810 | wret = walk_down_tree(trans, root, path, &level); |
4399 | if (wret > 0) | 3811 | if (wret > 0) |
4400 | break; | 3812 | break; |
@@ -4407,12 +3819,21 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
4407 | break; | 3819 | break; |
4408 | if (wret < 0) | 3820 | if (wret < 0) |
4409 | ret = wret; | 3821 | ret = wret; |
4410 | if (trans->transaction->in_commit) { | 3822 | if (trans->transaction->in_commit || |
3823 | trans->transaction->delayed_refs.flushing) { | ||
4411 | ret = -EAGAIN; | 3824 | ret = -EAGAIN; |
4412 | break; | 3825 | break; |
4413 | } | 3826 | } |
4414 | atomic_inc(&root->fs_info->throttle_gen); | 3827 | atomic_inc(&root->fs_info->throttle_gen); |
4415 | wake_up(&root->fs_info->transaction_throttle); | 3828 | wake_up(&root->fs_info->transaction_throttle); |
3829 | for (update_count = 0; update_count < 16; update_count++) { | ||
3830 | update = trans->delayed_ref_updates; | ||
3831 | trans->delayed_ref_updates = 0; | ||
3832 | if (update) | ||
3833 | btrfs_run_delayed_refs(trans, root, update); | ||
3834 | else | ||
3835 | break; | ||
3836 | } | ||
4416 | } | 3837 | } |
4417 | for (i = 0; i <= orig_level; i++) { | 3838 | for (i = 0; i <= orig_level; i++) { |
4418 | if (path->nodes[i]) { | 3839 | if (path->nodes[i]) { |
@@ -5457,6 +4878,7 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, | |||
5457 | root->root_key.objectid, | 4878 | root->root_key.objectid, |
5458 | trans->transid, key.objectid); | 4879 | trans->transid, key.objectid); |
5459 | BUG_ON(ret); | 4880 | BUG_ON(ret); |
4881 | |||
5460 | ret = btrfs_free_extent(trans, root, | 4882 | ret = btrfs_free_extent(trans, root, |
5461 | bytenr, num_bytes, leaf->start, | 4883 | bytenr, num_bytes, leaf->start, |
5462 | btrfs_header_owner(leaf), | 4884 | btrfs_header_owner(leaf), |
@@ -5768,9 +5190,6 @@ static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
5768 | ref_path, NULL, NULL); | 5190 | ref_path, NULL, NULL); |
5769 | BUG_ON(ret); | 5191 | BUG_ON(ret); |
5770 | 5192 | ||
5771 | if (root == root->fs_info->extent_root) | ||
5772 | btrfs_extent_post_op(trans, root); | ||
5773 | |||
5774 | return 0; | 5193 | return 0; |
5775 | } | 5194 | } |
5776 | 5195 | ||
@@ -6038,6 +5457,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
6038 | if (!path) | 5457 | if (!path) |
6039 | return -ENOMEM; | 5458 | return -ENOMEM; |
6040 | 5459 | ||
5460 | path->leave_spinning = 1; | ||
6041 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | 5461 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); |
6042 | if (ret) | 5462 | if (ret) |
6043 | goto out; | 5463 | goto out; |
@@ -6208,6 +5628,9 @@ again: | |||
6208 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | 5628 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); |
6209 | mutex_unlock(&root->fs_info->cleaner_mutex); | 5629 | mutex_unlock(&root->fs_info->cleaner_mutex); |
6210 | 5630 | ||
5631 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
5632 | btrfs_commit_transaction(trans, info->tree_root); | ||
5633 | |||
6211 | while (1) { | 5634 | while (1) { |
6212 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 5635 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
6213 | if (ret < 0) | 5636 | if (ret < 0) |
@@ -6294,7 +5717,7 @@ next: | |||
6294 | WARN_ON(block_group->reserved > 0); | 5717 | WARN_ON(block_group->reserved > 0); |
6295 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | 5718 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); |
6296 | spin_unlock(&block_group->lock); | 5719 | spin_unlock(&block_group->lock); |
6297 | put_block_group(block_group); | 5720 | btrfs_put_block_group(block_group); |
6298 | ret = 0; | 5721 | ret = 0; |
6299 | out: | 5722 | out: |
6300 | btrfs_free_path(path); | 5723 | btrfs_free_path(path); |
@@ -6421,9 +5844,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
6421 | 5844 | ||
6422 | atomic_set(&cache->count, 1); | 5845 | atomic_set(&cache->count, 1); |
6423 | spin_lock_init(&cache->lock); | 5846 | spin_lock_init(&cache->lock); |
6424 | mutex_init(&cache->alloc_mutex); | 5847 | spin_lock_init(&cache->tree_lock); |
6425 | mutex_init(&cache->cache_mutex); | 5848 | mutex_init(&cache->cache_mutex); |
6426 | INIT_LIST_HEAD(&cache->list); | 5849 | INIT_LIST_HEAD(&cache->list); |
5850 | INIT_LIST_HEAD(&cache->cluster_list); | ||
6427 | read_extent_buffer(leaf, &cache->item, | 5851 | read_extent_buffer(leaf, &cache->item, |
6428 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 5852 | btrfs_item_ptr_offset(leaf, path->slots[0]), |
6429 | sizeof(cache->item)); | 5853 | sizeof(cache->item)); |
@@ -6466,7 +5890,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
6466 | 5890 | ||
6467 | extent_root = root->fs_info->extent_root; | 5891 | extent_root = root->fs_info->extent_root; |
6468 | 5892 | ||
6469 | root->fs_info->last_trans_new_blockgroup = trans->transid; | 5893 | root->fs_info->last_trans_log_full_commit = trans->transid; |
6470 | 5894 | ||
6471 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 5895 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
6472 | if (!cache) | 5896 | if (!cache) |
@@ -6477,9 +5901,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
6477 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 5901 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
6478 | atomic_set(&cache->count, 1); | 5902 | atomic_set(&cache->count, 1); |
6479 | spin_lock_init(&cache->lock); | 5903 | spin_lock_init(&cache->lock); |
6480 | mutex_init(&cache->alloc_mutex); | 5904 | spin_lock_init(&cache->tree_lock); |
6481 | mutex_init(&cache->cache_mutex); | 5905 | mutex_init(&cache->cache_mutex); |
6482 | INIT_LIST_HEAD(&cache->list); | 5906 | INIT_LIST_HEAD(&cache->list); |
5907 | INIT_LIST_HEAD(&cache->cluster_list); | ||
6483 | 5908 | ||
6484 | btrfs_set_block_group_used(&cache->item, bytes_used); | 5909 | btrfs_set_block_group_used(&cache->item, bytes_used); |
6485 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); | 5910 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); |
@@ -6500,9 +5925,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
6500 | sizeof(cache->item)); | 5925 | sizeof(cache->item)); |
6501 | BUG_ON(ret); | 5926 | BUG_ON(ret); |
6502 | 5927 | ||
6503 | finish_current_insert(trans, extent_root, 0); | ||
6504 | ret = del_pending_extents(trans, extent_root, 0); | ||
6505 | BUG_ON(ret); | ||
6506 | set_avail_alloc_bits(extent_root->fs_info, type); | 5928 | set_avail_alloc_bits(extent_root->fs_info, type); |
6507 | 5929 | ||
6508 | return 0; | 5930 | return 0; |
@@ -6542,8 +5964,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
6542 | spin_unlock(&block_group->space_info->lock); | 5964 | spin_unlock(&block_group->space_info->lock); |
6543 | block_group->space_info->full = 0; | 5965 | block_group->space_info->full = 0; |
6544 | 5966 | ||
6545 | put_block_group(block_group); | 5967 | btrfs_put_block_group(block_group); |
6546 | put_block_group(block_group); | 5968 | btrfs_put_block_group(block_group); |
6547 | 5969 | ||
6548 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 5970 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
6549 | if (ret > 0) | 5971 | if (ret > 0) |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ebe6b29e6069..eb2bee8b7fbf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2884,25 +2884,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2884 | disko = 0; | 2884 | disko = 0; |
2885 | flags = 0; | 2885 | flags = 0; |
2886 | 2886 | ||
2887 | switch (em->block_start) { | 2887 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { |
2888 | case EXTENT_MAP_LAST_BYTE: | ||
2889 | end = 1; | 2888 | end = 1; |
2890 | flags |= FIEMAP_EXTENT_LAST; | 2889 | flags |= FIEMAP_EXTENT_LAST; |
2891 | break; | 2890 | } else if (em->block_start == EXTENT_MAP_HOLE) { |
2892 | case EXTENT_MAP_HOLE: | ||
2893 | flags |= FIEMAP_EXTENT_UNWRITTEN; | 2891 | flags |= FIEMAP_EXTENT_UNWRITTEN; |
2894 | break; | 2892 | } else if (em->block_start == EXTENT_MAP_INLINE) { |
2895 | case EXTENT_MAP_INLINE: | ||
2896 | flags |= (FIEMAP_EXTENT_DATA_INLINE | | 2893 | flags |= (FIEMAP_EXTENT_DATA_INLINE | |
2897 | FIEMAP_EXTENT_NOT_ALIGNED); | 2894 | FIEMAP_EXTENT_NOT_ALIGNED); |
2898 | break; | 2895 | } else if (em->block_start == EXTENT_MAP_DELALLOC) { |
2899 | case EXTENT_MAP_DELALLOC: | ||
2900 | flags |= (FIEMAP_EXTENT_DELALLOC | | 2896 | flags |= (FIEMAP_EXTENT_DELALLOC | |
2901 | FIEMAP_EXTENT_UNKNOWN); | 2897 | FIEMAP_EXTENT_UNKNOWN); |
2902 | break; | 2898 | } else { |
2903 | default: | ||
2904 | disko = em->block_start; | 2899 | disko = em->block_start; |
2905 | break; | ||
2906 | } | 2900 | } |
2907 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2901 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
2908 | flags |= FIEMAP_EXTENT_ENCODED; | 2902 | flags |= FIEMAP_EXTENT_ENCODED; |
@@ -3124,20 +3118,15 @@ void free_extent_buffer(struct extent_buffer *eb) | |||
3124 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, | 3118 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, |
3125 | struct extent_buffer *eb) | 3119 | struct extent_buffer *eb) |
3126 | { | 3120 | { |
3127 | int set; | ||
3128 | unsigned long i; | 3121 | unsigned long i; |
3129 | unsigned long num_pages; | 3122 | unsigned long num_pages; |
3130 | struct page *page; | 3123 | struct page *page; |
3131 | 3124 | ||
3132 | u64 start = eb->start; | ||
3133 | u64 end = start + eb->len - 1; | ||
3134 | |||
3135 | set = clear_extent_dirty(tree, start, end, GFP_NOFS); | ||
3136 | num_pages = num_extent_pages(eb->start, eb->len); | 3125 | num_pages = num_extent_pages(eb->start, eb->len); |
3137 | 3126 | ||
3138 | for (i = 0; i < num_pages; i++) { | 3127 | for (i = 0; i < num_pages; i++) { |
3139 | page = extent_buffer_page(eb, i); | 3128 | page = extent_buffer_page(eb, i); |
3140 | if (!set && !PageDirty(page)) | 3129 | if (!PageDirty(page)) |
3141 | continue; | 3130 | continue; |
3142 | 3131 | ||
3143 | lock_page(page); | 3132 | lock_page(page); |
@@ -3146,22 +3135,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3146 | else | 3135 | else |
3147 | set_page_private(page, EXTENT_PAGE_PRIVATE); | 3136 | set_page_private(page, EXTENT_PAGE_PRIVATE); |
3148 | 3137 | ||
3149 | /* | ||
3150 | * if we're on the last page or the first page and the | ||
3151 | * block isn't aligned on a page boundary, do extra checks | ||
3152 | * to make sure we don't clean page that is partially dirty | ||
3153 | */ | ||
3154 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | ||
3155 | ((i == num_pages - 1) && | ||
3156 | ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { | ||
3157 | start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
3158 | end = start + PAGE_CACHE_SIZE - 1; | ||
3159 | if (test_range_bit(tree, start, end, | ||
3160 | EXTENT_DIRTY, 0)) { | ||
3161 | unlock_page(page); | ||
3162 | continue; | ||
3163 | } | ||
3164 | } | ||
3165 | clear_page_dirty_for_io(page); | 3138 | clear_page_dirty_for_io(page); |
3166 | spin_lock_irq(&page->mapping->tree_lock); | 3139 | spin_lock_irq(&page->mapping->tree_lock); |
3167 | if (!PageDirty(page)) { | 3140 | if (!PageDirty(page)) { |
@@ -3187,29 +3160,13 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3187 | { | 3160 | { |
3188 | unsigned long i; | 3161 | unsigned long i; |
3189 | unsigned long num_pages; | 3162 | unsigned long num_pages; |
3163 | int was_dirty = 0; | ||
3190 | 3164 | ||
3165 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | ||
3191 | num_pages = num_extent_pages(eb->start, eb->len); | 3166 | num_pages = num_extent_pages(eb->start, eb->len); |
3192 | for (i = 0; i < num_pages; i++) { | 3167 | for (i = 0; i < num_pages; i++) |
3193 | struct page *page = extent_buffer_page(eb, i); | ||
3194 | /* writepage may need to do something special for the | ||
3195 | * first page, we have to make sure page->private is | ||
3196 | * properly set. releasepage may drop page->private | ||
3197 | * on us if the page isn't already dirty. | ||
3198 | */ | ||
3199 | lock_page(page); | ||
3200 | if (i == 0) { | ||
3201 | set_page_extent_head(page, eb->len); | ||
3202 | } else if (PagePrivate(page) && | ||
3203 | page->private != EXTENT_PAGE_PRIVATE) { | ||
3204 | set_page_extent_mapped(page); | ||
3205 | } | ||
3206 | __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); | 3168 | __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); |
3207 | set_extent_dirty(tree, page_offset(page), | 3169 | return was_dirty; |
3208 | page_offset(page) + PAGE_CACHE_SIZE - 1, | ||
3209 | GFP_NOFS); | ||
3210 | unlock_page(page); | ||
3211 | } | ||
3212 | return 0; | ||
3213 | } | 3170 | } |
3214 | 3171 | ||
3215 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 3172 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
@@ -3789,6 +3746,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | |||
3789 | ret = 0; | 3746 | ret = 0; |
3790 | goto out; | 3747 | goto out; |
3791 | } | 3748 | } |
3749 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
3750 | ret = 0; | ||
3751 | goto out; | ||
3752 | } | ||
3792 | /* at this point we can safely release the extent buffer */ | 3753 | /* at this point we can safely release the extent buffer */ |
3793 | num_pages = num_extent_pages(eb->start, eb->len); | 3754 | num_pages = num_extent_pages(eb->start, eb->len); |
3794 | for (i = 0; i < num_pages; i++) | 3755 | for (i = 0; i < num_pages; i++) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1f9df88afbf6..5bc20abf3f3d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -25,6 +25,7 @@ | |||
25 | /* these are bit numbers for test/set bit */ | 25 | /* these are bit numbers for test/set bit */ |
26 | #define EXTENT_BUFFER_UPTODATE 0 | 26 | #define EXTENT_BUFFER_UPTODATE 0 |
27 | #define EXTENT_BUFFER_BLOCKING 1 | 27 | #define EXTENT_BUFFER_BLOCKING 1 |
28 | #define EXTENT_BUFFER_DIRTY 2 | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * page->private values. Every page that is controlled by the extent | 31 | * page->private values. Every page that is controlled by the extent |
@@ -254,6 +255,8 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
254 | struct extent_buffer *eb); | 255 | struct extent_buffer *eb); |
255 | int set_extent_buffer_dirty(struct extent_io_tree *tree, | 256 | int set_extent_buffer_dirty(struct extent_io_tree *tree, |
256 | struct extent_buffer *eb); | 257 | struct extent_buffer *eb); |
258 | int test_extent_buffer_dirty(struct extent_io_tree *tree, | ||
259 | struct extent_buffer *eb); | ||
257 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, | 260 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, |
258 | struct extent_buffer *eb); | 261 | struct extent_buffer *eb); |
259 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 262 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 50da69da20ce..b187917b36fa 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -234,7 +234,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
234 | rb = tree_insert(&tree->map, em->start, &em->rb_node); | 234 | rb = tree_insert(&tree->map, em->start, &em->rb_node); |
235 | if (rb) { | 235 | if (rb) { |
236 | ret = -EEXIST; | 236 | ret = -EEXIST; |
237 | free_extent_map(merge); | ||
238 | goto out; | 237 | goto out; |
239 | } | 238 | } |
240 | atomic_inc(&em->refs); | 239 | atomic_inc(&em->refs); |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 964652435fd1..9b99886562d0 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -52,6 +52,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
52 | file_key.offset = pos; | 52 | file_key.offset = pos; |
53 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 53 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); |
54 | 54 | ||
55 | path->leave_spinning = 1; | ||
55 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, | 56 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, |
56 | sizeof(*item)); | 57 | sizeof(*item)); |
57 | if (ret < 0) | 58 | if (ret < 0) |
@@ -523,6 +524,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
523 | key.offset = end_byte - 1; | 524 | key.offset = end_byte - 1; |
524 | key.type = BTRFS_EXTENT_CSUM_KEY; | 525 | key.type = BTRFS_EXTENT_CSUM_KEY; |
525 | 526 | ||
527 | path->leave_spinning = 1; | ||
526 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 528 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
527 | if (ret > 0) { | 529 | if (ret > 0) { |
528 | if (path->slots[0] == 0) | 530 | if (path->slots[0] == 0) |
@@ -757,8 +759,10 @@ insert: | |||
757 | } else { | 759 | } else { |
758 | ins_size = csum_size; | 760 | ins_size = csum_size; |
759 | } | 761 | } |
762 | path->leave_spinning = 1; | ||
760 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, | 763 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, |
761 | ins_size); | 764 | ins_size); |
765 | path->leave_spinning = 0; | ||
762 | if (ret < 0) | 766 | if (ret < 0) |
763 | goto fail_unlock; | 767 | goto fail_unlock; |
764 | if (ret != 0) { | 768 | if (ret != 0) { |
@@ -776,7 +780,6 @@ found: | |||
776 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | 780 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + |
777 | btrfs_item_size_nr(leaf, path->slots[0])); | 781 | btrfs_item_size_nr(leaf, path->slots[0])); |
778 | eb_token = NULL; | 782 | eb_token = NULL; |
779 | cond_resched(); | ||
780 | next_sector: | 783 | next_sector: |
781 | 784 | ||
782 | if (!eb_token || | 785 | if (!eb_token || |
@@ -817,9 +820,9 @@ next_sector: | |||
817 | eb_token = NULL; | 820 | eb_token = NULL; |
818 | } | 821 | } |
819 | btrfs_mark_buffer_dirty(path->nodes[0]); | 822 | btrfs_mark_buffer_dirty(path->nodes[0]); |
820 | cond_resched(); | ||
821 | if (total_bytes < sums->len) { | 823 | if (total_bytes < sums->len) { |
822 | btrfs_release_path(root, path); | 824 | btrfs_release_path(root, path); |
825 | cond_resched(); | ||
823 | goto again; | 826 | goto again; |
824 | } | 827 | } |
825 | out: | 828 | out: |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index dc78954861b3..9c9fb46ccd08 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -606,6 +606,7 @@ next_slot: | |||
606 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); | 606 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); |
607 | 607 | ||
608 | btrfs_release_path(root, path); | 608 | btrfs_release_path(root, path); |
609 | path->leave_spinning = 1; | ||
609 | ret = btrfs_insert_empty_item(trans, root, path, &ins, | 610 | ret = btrfs_insert_empty_item(trans, root, path, &ins, |
610 | sizeof(*extent)); | 611 | sizeof(*extent)); |
611 | BUG_ON(ret); | 612 | BUG_ON(ret); |
@@ -639,17 +640,22 @@ next_slot: | |||
639 | ram_bytes); | 640 | ram_bytes); |
640 | btrfs_set_file_extent_type(leaf, extent, found_type); | 641 | btrfs_set_file_extent_type(leaf, extent, found_type); |
641 | 642 | ||
643 | btrfs_unlock_up_safe(path, 1); | ||
642 | btrfs_mark_buffer_dirty(path->nodes[0]); | 644 | btrfs_mark_buffer_dirty(path->nodes[0]); |
645 | btrfs_set_lock_blocking(path->nodes[0]); | ||
643 | 646 | ||
644 | if (disk_bytenr != 0) { | 647 | if (disk_bytenr != 0) { |
645 | ret = btrfs_update_extent_ref(trans, root, | 648 | ret = btrfs_update_extent_ref(trans, root, |
646 | disk_bytenr, orig_parent, | 649 | disk_bytenr, |
650 | le64_to_cpu(old.disk_num_bytes), | ||
651 | orig_parent, | ||
647 | leaf->start, | 652 | leaf->start, |
648 | root->root_key.objectid, | 653 | root->root_key.objectid, |
649 | trans->transid, ins.objectid); | 654 | trans->transid, ins.objectid); |
650 | 655 | ||
651 | BUG_ON(ret); | 656 | BUG_ON(ret); |
652 | } | 657 | } |
658 | path->leave_spinning = 0; | ||
653 | btrfs_release_path(root, path); | 659 | btrfs_release_path(root, path); |
654 | if (disk_bytenr != 0) | 660 | if (disk_bytenr != 0) |
655 | inode_add_bytes(inode, extent_end - end); | 661 | inode_add_bytes(inode, extent_end - end); |
@@ -912,7 +918,7 @@ again: | |||
912 | btrfs_set_file_extent_other_encoding(leaf, fi, 0); | 918 | btrfs_set_file_extent_other_encoding(leaf, fi, 0); |
913 | 919 | ||
914 | if (orig_parent != leaf->start) { | 920 | if (orig_parent != leaf->start) { |
915 | ret = btrfs_update_extent_ref(trans, root, bytenr, | 921 | ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes, |
916 | orig_parent, leaf->start, | 922 | orig_parent, leaf->start, |
917 | root->root_key.objectid, | 923 | root->root_key.objectid, |
918 | trans->transid, inode->i_ino); | 924 | trans->transid, inode->i_ino); |
@@ -1155,6 +1161,20 @@ out_nolock: | |||
1155 | page_cache_release(pinned[1]); | 1161 | page_cache_release(pinned[1]); |
1156 | *ppos = pos; | 1162 | *ppos = pos; |
1157 | 1163 | ||
1164 | /* | ||
1165 | * we want to make sure fsync finds this change | ||
1166 | * but we haven't joined a transaction running right now. | ||
1167 | * | ||
1168 | * Later on, someone is sure to update the inode and get the | ||
1169 | * real transid recorded. | ||
1170 | * | ||
1171 | * We set last_trans now to the fs_info generation + 1, | ||
1172 | * this will either be one more than the running transaction | ||
1173 | * or the generation used for the next transaction if there isn't | ||
1174 | * one running right now. | ||
1175 | */ | ||
1176 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | ||
1177 | |||
1158 | if (num_written > 0 && will_write) { | 1178 | if (num_written > 0 && will_write) { |
1159 | struct btrfs_trans_handle *trans; | 1179 | struct btrfs_trans_handle *trans; |
1160 | 1180 | ||
@@ -1167,8 +1187,11 @@ out_nolock: | |||
1167 | ret = btrfs_log_dentry_safe(trans, root, | 1187 | ret = btrfs_log_dentry_safe(trans, root, |
1168 | file->f_dentry); | 1188 | file->f_dentry); |
1169 | if (ret == 0) { | 1189 | if (ret == 0) { |
1170 | btrfs_sync_log(trans, root); | 1190 | ret = btrfs_sync_log(trans, root); |
1171 | btrfs_end_transaction(trans, root); | 1191 | if (ret == 0) |
1192 | btrfs_end_transaction(trans, root); | ||
1193 | else | ||
1194 | btrfs_commit_transaction(trans, root); | ||
1172 | } else { | 1195 | } else { |
1173 | btrfs_commit_transaction(trans, root); | 1196 | btrfs_commit_transaction(trans, root); |
1174 | } | 1197 | } |
@@ -1185,6 +1208,18 @@ out_nolock: | |||
1185 | 1208 | ||
1186 | int btrfs_release_file(struct inode *inode, struct file *filp) | 1209 | int btrfs_release_file(struct inode *inode, struct file *filp) |
1187 | { | 1210 | { |
1211 | /* | ||
1212 | * ordered_data_close is set by settattr when we are about to truncate | ||
1213 | * a file from a non-zero size to a zero size. This tries to | ||
1214 | * flush down new bytes that may have been written if the | ||
1215 | * application were using truncate to replace a file in place. | ||
1216 | */ | ||
1217 | if (BTRFS_I(inode)->ordered_data_close) { | ||
1218 | BTRFS_I(inode)->ordered_data_close = 0; | ||
1219 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); | ||
1220 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | ||
1221 | filemap_flush(inode->i_mapping); | ||
1222 | } | ||
1188 | if (filp->private_data) | 1223 | if (filp->private_data) |
1189 | btrfs_ioctl_trans_end(filp); | 1224 | btrfs_ioctl_trans_end(filp); |
1190 | return 0; | 1225 | return 0; |
@@ -1260,8 +1295,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1260 | if (ret > 0) { | 1295 | if (ret > 0) { |
1261 | ret = btrfs_commit_transaction(trans, root); | 1296 | ret = btrfs_commit_transaction(trans, root); |
1262 | } else { | 1297 | } else { |
1263 | btrfs_sync_log(trans, root); | 1298 | ret = btrfs_sync_log(trans, root); |
1264 | ret = btrfs_end_transaction(trans, root); | 1299 | if (ret == 0) |
1300 | ret = btrfs_end_transaction(trans, root); | ||
1301 | else | ||
1302 | ret = btrfs_commit_transaction(trans, root); | ||
1265 | } | 1303 | } |
1266 | mutex_lock(&dentry->d_inode->i_mutex); | 1304 | mutex_lock(&dentry->d_inode->i_mutex); |
1267 | out: | 1305 | out: |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d1e5f0e84c58..768b9523662d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -18,6 +18,15 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include "ctree.h" | 20 | #include "ctree.h" |
21 | #include "free-space-cache.h" | ||
22 | #include "transaction.h" | ||
23 | |||
24 | struct btrfs_free_space { | ||
25 | struct rb_node bytes_index; | ||
26 | struct rb_node offset_index; | ||
27 | u64 offset; | ||
28 | u64 bytes; | ||
29 | }; | ||
21 | 30 | ||
22 | static int tree_insert_offset(struct rb_root *root, u64 offset, | 31 | static int tree_insert_offset(struct rb_root *root, u64 offset, |
23 | struct rb_node *node) | 32 | struct rb_node *node) |
@@ -68,14 +77,24 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes, | |||
68 | } | 77 | } |
69 | 78 | ||
70 | /* | 79 | /* |
71 | * searches the tree for the given offset. If contains is set we will return | 80 | * searches the tree for the given offset. |
72 | * the free space that contains the given offset. If contains is not set we | 81 | * |
73 | * will return the free space that starts at or after the given offset and is | 82 | * fuzzy == 1: this is used for allocations where we are given a hint of where |
74 | * at least bytes long. | 83 | * to look for free space. Because the hint may not be completely on an offset |
84 | * mark, or the hint may no longer point to free space we need to fudge our | ||
85 | * results a bit. So we look for free space starting at or after offset with at | ||
86 | * least bytes size. We prefer to find as close to the given offset as we can. | ||
87 | * Also if the offset is within a free space range, then we will return the free | ||
88 | * space that contains the given offset, which means we can return a free space | ||
89 | * chunk with an offset before the provided offset. | ||
90 | * | ||
91 | * fuzzy == 0: this is just a normal tree search. Give us the free space that | ||
92 | * starts at the given offset which is at least bytes size, and if its not there | ||
93 | * return NULL. | ||
75 | */ | 94 | */ |
76 | static struct btrfs_free_space *tree_search_offset(struct rb_root *root, | 95 | static struct btrfs_free_space *tree_search_offset(struct rb_root *root, |
77 | u64 offset, u64 bytes, | 96 | u64 offset, u64 bytes, |
78 | int contains) | 97 | int fuzzy) |
79 | { | 98 | { |
80 | struct rb_node *n = root->rb_node; | 99 | struct rb_node *n = root->rb_node; |
81 | struct btrfs_free_space *entry, *ret = NULL; | 100 | struct btrfs_free_space *entry, *ret = NULL; |
@@ -84,13 +103,14 @@ static struct btrfs_free_space *tree_search_offset(struct rb_root *root, | |||
84 | entry = rb_entry(n, struct btrfs_free_space, offset_index); | 103 | entry = rb_entry(n, struct btrfs_free_space, offset_index); |
85 | 104 | ||
86 | if (offset < entry->offset) { | 105 | if (offset < entry->offset) { |
87 | if (!contains && | 106 | if (fuzzy && |
88 | (!ret || entry->offset < ret->offset) && | 107 | (!ret || entry->offset < ret->offset) && |
89 | (bytes <= entry->bytes)) | 108 | (bytes <= entry->bytes)) |
90 | ret = entry; | 109 | ret = entry; |
91 | n = n->rb_left; | 110 | n = n->rb_left; |
92 | } else if (offset > entry->offset) { | 111 | } else if (offset > entry->offset) { |
93 | if ((entry->offset + entry->bytes - 1) >= offset && | 112 | if (fuzzy && |
113 | (entry->offset + entry->bytes - 1) >= offset && | ||
94 | bytes <= entry->bytes) { | 114 | bytes <= entry->bytes) { |
95 | ret = entry; | 115 | ret = entry; |
96 | break; | 116 | break; |
@@ -171,6 +191,7 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
171 | int ret = 0; | 191 | int ret = 0; |
172 | 192 | ||
173 | 193 | ||
194 | BUG_ON(!info->bytes); | ||
174 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, | 195 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, |
175 | &info->offset_index); | 196 | &info->offset_index); |
176 | if (ret) | 197 | if (ret) |
@@ -184,108 +205,70 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
184 | return ret; | 205 | return ret; |
185 | } | 206 | } |
186 | 207 | ||
187 | static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 208 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, |
188 | u64 offset, u64 bytes) | 209 | u64 offset, u64 bytes) |
189 | { | 210 | { |
190 | struct btrfs_free_space *right_info; | 211 | struct btrfs_free_space *right_info; |
191 | struct btrfs_free_space *left_info; | 212 | struct btrfs_free_space *left_info; |
192 | struct btrfs_free_space *info = NULL; | 213 | struct btrfs_free_space *info = NULL; |
193 | struct btrfs_free_space *alloc_info; | ||
194 | int ret = 0; | 214 | int ret = 0; |
195 | 215 | ||
196 | alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | 216 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); |
197 | if (!alloc_info) | 217 | if (!info) |
198 | return -ENOMEM; | 218 | return -ENOMEM; |
199 | 219 | ||
220 | info->offset = offset; | ||
221 | info->bytes = bytes; | ||
222 | |||
223 | spin_lock(&block_group->tree_lock); | ||
224 | |||
200 | /* | 225 | /* |
201 | * first we want to see if there is free space adjacent to the range we | 226 | * first we want to see if there is free space adjacent to the range we |
202 | * are adding, if there is remove that struct and add a new one to | 227 | * are adding, if there is remove that struct and add a new one to |
203 | * cover the entire range | 228 | * cover the entire range |
204 | */ | 229 | */ |
205 | right_info = tree_search_offset(&block_group->free_space_offset, | 230 | right_info = tree_search_offset(&block_group->free_space_offset, |
206 | offset+bytes, 0, 1); | 231 | offset+bytes, 0, 0); |
207 | left_info = tree_search_offset(&block_group->free_space_offset, | 232 | left_info = tree_search_offset(&block_group->free_space_offset, |
208 | offset-1, 0, 1); | 233 | offset-1, 0, 1); |
209 | 234 | ||
210 | if (right_info && right_info->offset == offset+bytes) { | 235 | if (right_info) { |
211 | unlink_free_space(block_group, right_info); | 236 | unlink_free_space(block_group, right_info); |
212 | info = right_info; | 237 | info->bytes += right_info->bytes; |
213 | info->offset = offset; | 238 | kfree(right_info); |
214 | info->bytes += bytes; | ||
215 | } else if (right_info && right_info->offset != offset+bytes) { | ||
216 | printk(KERN_ERR "btrfs adding space in the middle of an " | ||
217 | "existing free space area. existing: " | ||
218 | "offset=%llu, bytes=%llu. new: offset=%llu, " | ||
219 | "bytes=%llu\n", (unsigned long long)right_info->offset, | ||
220 | (unsigned long long)right_info->bytes, | ||
221 | (unsigned long long)offset, | ||
222 | (unsigned long long)bytes); | ||
223 | BUG(); | ||
224 | } | 239 | } |
225 | 240 | ||
226 | if (left_info) { | 241 | if (left_info && left_info->offset + left_info->bytes == offset) { |
227 | unlink_free_space(block_group, left_info); | 242 | unlink_free_space(block_group, left_info); |
228 | 243 | info->offset = left_info->offset; | |
229 | if (unlikely((left_info->offset + left_info->bytes) != | 244 | info->bytes += left_info->bytes; |
230 | offset)) { | 245 | kfree(left_info); |
231 | printk(KERN_ERR "btrfs free space to the left " | ||
232 | "of new free space isn't " | ||
233 | "quite right. existing: offset=%llu, " | ||
234 | "bytes=%llu. new: offset=%llu, bytes=%llu\n", | ||
235 | (unsigned long long)left_info->offset, | ||
236 | (unsigned long long)left_info->bytes, | ||
237 | (unsigned long long)offset, | ||
238 | (unsigned long long)bytes); | ||
239 | BUG(); | ||
240 | } | ||
241 | |||
242 | if (info) { | ||
243 | info->offset = left_info->offset; | ||
244 | info->bytes += left_info->bytes; | ||
245 | kfree(left_info); | ||
246 | } else { | ||
247 | info = left_info; | ||
248 | info->bytes += bytes; | ||
249 | } | ||
250 | } | 246 | } |
251 | 247 | ||
252 | if (info) { | ||
253 | ret = link_free_space(block_group, info); | ||
254 | if (!ret) | ||
255 | info = NULL; | ||
256 | goto out; | ||
257 | } | ||
258 | |||
259 | info = alloc_info; | ||
260 | alloc_info = NULL; | ||
261 | info->offset = offset; | ||
262 | info->bytes = bytes; | ||
263 | |||
264 | ret = link_free_space(block_group, info); | 248 | ret = link_free_space(block_group, info); |
265 | if (ret) | 249 | if (ret) |
266 | kfree(info); | 250 | kfree(info); |
267 | out: | 251 | |
252 | spin_unlock(&block_group->tree_lock); | ||
253 | |||
268 | if (ret) { | 254 | if (ret) { |
269 | printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); | 255 | printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); |
270 | if (ret == -EEXIST) | 256 | BUG_ON(ret == -EEXIST); |
271 | BUG(); | ||
272 | } | 257 | } |
273 | 258 | ||
274 | kfree(alloc_info); | ||
275 | |||
276 | return ret; | 259 | return ret; |
277 | } | 260 | } |
278 | 261 | ||
279 | static int | 262 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, |
280 | __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | 263 | u64 offset, u64 bytes) |
281 | u64 offset, u64 bytes) | ||
282 | { | 264 | { |
283 | struct btrfs_free_space *info; | 265 | struct btrfs_free_space *info; |
284 | int ret = 0; | 266 | int ret = 0; |
285 | 267 | ||
268 | spin_lock(&block_group->tree_lock); | ||
269 | |||
286 | info = tree_search_offset(&block_group->free_space_offset, offset, 0, | 270 | info = tree_search_offset(&block_group->free_space_offset, offset, 0, |
287 | 1); | 271 | 1); |
288 | |||
289 | if (info && info->offset == offset) { | 272 | if (info && info->offset == offset) { |
290 | if (info->bytes < bytes) { | 273 | if (info->bytes < bytes) { |
291 | printk(KERN_ERR "Found free space at %llu, size %llu," | 274 | printk(KERN_ERR "Found free space at %llu, size %llu," |
@@ -295,12 +278,14 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
295 | (unsigned long long)bytes); | 278 | (unsigned long long)bytes); |
296 | WARN_ON(1); | 279 | WARN_ON(1); |
297 | ret = -EINVAL; | 280 | ret = -EINVAL; |
281 | spin_unlock(&block_group->tree_lock); | ||
298 | goto out; | 282 | goto out; |
299 | } | 283 | } |
300 | unlink_free_space(block_group, info); | 284 | unlink_free_space(block_group, info); |
301 | 285 | ||
302 | if (info->bytes == bytes) { | 286 | if (info->bytes == bytes) { |
303 | kfree(info); | 287 | kfree(info); |
288 | spin_unlock(&block_group->tree_lock); | ||
304 | goto out; | 289 | goto out; |
305 | } | 290 | } |
306 | 291 | ||
@@ -308,6 +293,7 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
308 | info->bytes -= bytes; | 293 | info->bytes -= bytes; |
309 | 294 | ||
310 | ret = link_free_space(block_group, info); | 295 | ret = link_free_space(block_group, info); |
296 | spin_unlock(&block_group->tree_lock); | ||
311 | BUG_ON(ret); | 297 | BUG_ON(ret); |
312 | } else if (info && info->offset < offset && | 298 | } else if (info && info->offset < offset && |
313 | info->offset + info->bytes >= offset + bytes) { | 299 | info->offset + info->bytes >= offset + bytes) { |
@@ -333,70 +319,33 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
333 | */ | 319 | */ |
334 | kfree(info); | 320 | kfree(info); |
335 | } | 321 | } |
336 | 322 | spin_unlock(&block_group->tree_lock); | |
337 | /* step two, insert a new info struct to cover anything | 323 | /* step two, insert a new info struct to cover anything |
338 | * before the hole | 324 | * before the hole |
339 | */ | 325 | */ |
340 | ret = __btrfs_add_free_space(block_group, old_start, | 326 | ret = btrfs_add_free_space(block_group, old_start, |
341 | offset - old_start); | 327 | offset - old_start); |
342 | BUG_ON(ret); | 328 | BUG_ON(ret); |
343 | } else { | 329 | } else { |
330 | spin_unlock(&block_group->tree_lock); | ||
331 | if (!info) { | ||
332 | printk(KERN_ERR "couldn't find space %llu to free\n", | ||
333 | (unsigned long long)offset); | ||
334 | printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", | ||
335 | block_group->cached, block_group->key.objectid, | ||
336 | block_group->key.offset); | ||
337 | btrfs_dump_free_space(block_group, bytes); | ||
338 | } else if (info) { | ||
339 | printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " | ||
340 | "but wanted offset=%llu bytes=%llu\n", | ||
341 | info->offset, info->bytes, offset, bytes); | ||
342 | } | ||
344 | WARN_ON(1); | 343 | WARN_ON(1); |
345 | } | 344 | } |
346 | out: | 345 | out: |
347 | return ret; | 346 | return ret; |
348 | } | 347 | } |
349 | 348 | ||
350 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
351 | u64 offset, u64 bytes) | ||
352 | { | ||
353 | int ret; | ||
354 | struct btrfs_free_space *sp; | ||
355 | |||
356 | mutex_lock(&block_group->alloc_mutex); | ||
357 | ret = __btrfs_add_free_space(block_group, offset, bytes); | ||
358 | sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1); | ||
359 | BUG_ON(!sp); | ||
360 | mutex_unlock(&block_group->alloc_mutex); | ||
361 | |||
362 | return ret; | ||
363 | } | ||
364 | |||
365 | int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
366 | u64 offset, u64 bytes) | ||
367 | { | ||
368 | int ret; | ||
369 | struct btrfs_free_space *sp; | ||
370 | |||
371 | ret = __btrfs_add_free_space(block_group, offset, bytes); | ||
372 | sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1); | ||
373 | BUG_ON(!sp); | ||
374 | |||
375 | return ret; | ||
376 | } | ||
377 | |||
378 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
379 | u64 offset, u64 bytes) | ||
380 | { | ||
381 | int ret = 0; | ||
382 | |||
383 | mutex_lock(&block_group->alloc_mutex); | ||
384 | ret = __btrfs_remove_free_space(block_group, offset, bytes); | ||
385 | mutex_unlock(&block_group->alloc_mutex); | ||
386 | |||
387 | return ret; | ||
388 | } | ||
389 | |||
390 | int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
391 | u64 offset, u64 bytes) | ||
392 | { | ||
393 | int ret; | ||
394 | |||
395 | ret = __btrfs_remove_free_space(block_group, offset, bytes); | ||
396 | |||
397 | return ret; | ||
398 | } | ||
399 | |||
400 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | 349 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, |
401 | u64 bytes) | 350 | u64 bytes) |
402 | { | 351 | { |
@@ -408,6 +357,8 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
408 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 357 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
409 | if (info->bytes >= bytes) | 358 | if (info->bytes >= bytes) |
410 | count++; | 359 | count++; |
360 | printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset, | ||
361 | info->bytes); | ||
411 | } | 362 | } |
412 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" | 363 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" |
413 | "\n", count); | 364 | "\n", count); |
@@ -428,68 +379,337 @@ u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) | |||
428 | return ret; | 379 | return ret; |
429 | } | 380 | } |
430 | 381 | ||
382 | /* | ||
383 | * for a given cluster, put all of its extents back into the free | ||
384 | * space cache. If the block group passed doesn't match the block group | ||
385 | * pointed to by the cluster, someone else raced in and freed the | ||
386 | * cluster already. In that case, we just return without changing anything | ||
387 | */ | ||
388 | static int | ||
389 | __btrfs_return_cluster_to_free_space( | ||
390 | struct btrfs_block_group_cache *block_group, | ||
391 | struct btrfs_free_cluster *cluster) | ||
392 | { | ||
393 | struct btrfs_free_space *entry; | ||
394 | struct rb_node *node; | ||
395 | |||
396 | spin_lock(&cluster->lock); | ||
397 | if (cluster->block_group != block_group) | ||
398 | goto out; | ||
399 | |||
400 | cluster->window_start = 0; | ||
401 | node = rb_first(&cluster->root); | ||
402 | while(node) { | ||
403 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
404 | node = rb_next(&entry->offset_index); | ||
405 | rb_erase(&entry->offset_index, &cluster->root); | ||
406 | link_free_space(block_group, entry); | ||
407 | } | ||
408 | list_del_init(&cluster->block_group_list); | ||
409 | |||
410 | btrfs_put_block_group(cluster->block_group); | ||
411 | cluster->block_group = NULL; | ||
412 | cluster->root.rb_node = NULL; | ||
413 | out: | ||
414 | spin_unlock(&cluster->lock); | ||
415 | return 0; | ||
416 | } | ||
417 | |||
431 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | 418 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) |
432 | { | 419 | { |
433 | struct btrfs_free_space *info; | 420 | struct btrfs_free_space *info; |
434 | struct rb_node *node; | 421 | struct rb_node *node; |
422 | struct btrfs_free_cluster *cluster; | ||
423 | struct btrfs_free_cluster *safe; | ||
424 | |||
425 | spin_lock(&block_group->tree_lock); | ||
426 | |||
427 | list_for_each_entry_safe(cluster, safe, &block_group->cluster_list, | ||
428 | block_group_list) { | ||
429 | |||
430 | WARN_ON(cluster->block_group != block_group); | ||
431 | __btrfs_return_cluster_to_free_space(block_group, cluster); | ||
432 | } | ||
435 | 433 | ||
436 | mutex_lock(&block_group->alloc_mutex); | ||
437 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { | 434 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { |
438 | info = rb_entry(node, struct btrfs_free_space, bytes_index); | 435 | info = rb_entry(node, struct btrfs_free_space, bytes_index); |
439 | unlink_free_space(block_group, info); | 436 | unlink_free_space(block_group, info); |
440 | kfree(info); | 437 | kfree(info); |
441 | if (need_resched()) { | 438 | if (need_resched()) { |
442 | mutex_unlock(&block_group->alloc_mutex); | 439 | spin_unlock(&block_group->tree_lock); |
443 | cond_resched(); | 440 | cond_resched(); |
444 | mutex_lock(&block_group->alloc_mutex); | 441 | spin_lock(&block_group->tree_lock); |
445 | } | 442 | } |
446 | } | 443 | } |
447 | mutex_unlock(&block_group->alloc_mutex); | 444 | spin_unlock(&block_group->tree_lock); |
448 | } | 445 | } |
449 | 446 | ||
450 | #if 0 | 447 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, |
451 | static struct btrfs_free_space *btrfs_find_free_space_offset(struct | 448 | u64 offset, u64 bytes, u64 empty_size) |
452 | btrfs_block_group_cache | ||
453 | *block_group, u64 offset, | ||
454 | u64 bytes) | ||
455 | { | 449 | { |
456 | struct btrfs_free_space *ret; | 450 | struct btrfs_free_space *entry = NULL; |
451 | u64 ret = 0; | ||
457 | 452 | ||
458 | mutex_lock(&block_group->alloc_mutex); | 453 | spin_lock(&block_group->tree_lock); |
459 | ret = tree_search_offset(&block_group->free_space_offset, offset, | 454 | entry = tree_search_offset(&block_group->free_space_offset, offset, |
460 | bytes, 0); | 455 | bytes + empty_size, 1); |
461 | mutex_unlock(&block_group->alloc_mutex); | 456 | if (!entry) |
457 | entry = tree_search_bytes(&block_group->free_space_bytes, | ||
458 | offset, bytes + empty_size); | ||
459 | if (entry) { | ||
460 | unlink_free_space(block_group, entry); | ||
461 | ret = entry->offset; | ||
462 | entry->offset += bytes; | ||
463 | entry->bytes -= bytes; | ||
464 | |||
465 | if (!entry->bytes) | ||
466 | kfree(entry); | ||
467 | else | ||
468 | link_free_space(block_group, entry); | ||
469 | } | ||
470 | spin_unlock(&block_group->tree_lock); | ||
462 | 471 | ||
463 | return ret; | 472 | return ret; |
464 | } | 473 | } |
465 | 474 | ||
466 | static struct btrfs_free_space *btrfs_find_free_space_bytes(struct | 475 | /* |
467 | btrfs_block_group_cache | 476 | * given a cluster, put all of its extents back into the free space |
468 | *block_group, u64 offset, | 477 | * cache. If a block group is passed, this function will only free |
469 | u64 bytes) | 478 | * a cluster that belongs to the passed block group. |
479 | * | ||
480 | * Otherwise, it'll get a reference on the block group pointed to by the | ||
481 | * cluster and remove the cluster from it. | ||
482 | */ | ||
483 | int btrfs_return_cluster_to_free_space( | ||
484 | struct btrfs_block_group_cache *block_group, | ||
485 | struct btrfs_free_cluster *cluster) | ||
470 | { | 486 | { |
471 | struct btrfs_free_space *ret; | 487 | int ret; |
472 | 488 | ||
473 | mutex_lock(&block_group->alloc_mutex); | 489 | /* first, get a safe pointer to the block group */ |
490 | spin_lock(&cluster->lock); | ||
491 | if (!block_group) { | ||
492 | block_group = cluster->block_group; | ||
493 | if (!block_group) { | ||
494 | spin_unlock(&cluster->lock); | ||
495 | return 0; | ||
496 | } | ||
497 | } else if (cluster->block_group != block_group) { | ||
498 | /* someone else has already freed it don't redo their work */ | ||
499 | spin_unlock(&cluster->lock); | ||
500 | return 0; | ||
501 | } | ||
502 | atomic_inc(&block_group->count); | ||
503 | spin_unlock(&cluster->lock); | ||
474 | 504 | ||
475 | ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes); | 505 | /* now return any extents the cluster had on it */ |
476 | mutex_unlock(&block_group->alloc_mutex); | 506 | spin_lock(&block_group->tree_lock); |
507 | ret = __btrfs_return_cluster_to_free_space(block_group, cluster); | ||
508 | spin_unlock(&block_group->tree_lock); | ||
477 | 509 | ||
510 | /* finally drop our ref */ | ||
511 | btrfs_put_block_group(block_group); | ||
478 | return ret; | 512 | return ret; |
479 | } | 513 | } |
480 | #endif | ||
481 | 514 | ||
482 | struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache | 515 | /* |
483 | *block_group, u64 offset, | 516 | * given a cluster, try to allocate 'bytes' from it, returns 0 |
484 | u64 bytes) | 517 | * if it couldn't find anything suitably large, or a logical disk offset |
518 | * if things worked out | ||
519 | */ | ||
520 | u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | ||
521 | struct btrfs_free_cluster *cluster, u64 bytes, | ||
522 | u64 min_start) | ||
523 | { | ||
524 | struct btrfs_free_space *entry = NULL; | ||
525 | struct rb_node *node; | ||
526 | u64 ret = 0; | ||
527 | |||
528 | spin_lock(&cluster->lock); | ||
529 | if (bytes > cluster->max_size) | ||
530 | goto out; | ||
531 | |||
532 | if (cluster->block_group != block_group) | ||
533 | goto out; | ||
534 | |||
535 | node = rb_first(&cluster->root); | ||
536 | if (!node) | ||
537 | goto out; | ||
538 | |||
539 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
540 | |||
541 | while(1) { | ||
542 | if (entry->bytes < bytes || entry->offset < min_start) { | ||
543 | struct rb_node *node; | ||
544 | |||
545 | node = rb_next(&entry->offset_index); | ||
546 | if (!node) | ||
547 | break; | ||
548 | entry = rb_entry(node, struct btrfs_free_space, | ||
549 | offset_index); | ||
550 | continue; | ||
551 | } | ||
552 | ret = entry->offset; | ||
553 | |||
554 | entry->offset += bytes; | ||
555 | entry->bytes -= bytes; | ||
556 | |||
557 | if (entry->bytes == 0) { | ||
558 | rb_erase(&entry->offset_index, &cluster->root); | ||
559 | kfree(entry); | ||
560 | } | ||
561 | break; | ||
562 | } | ||
563 | out: | ||
564 | spin_unlock(&cluster->lock); | ||
565 | return ret; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * here we try to find a cluster of blocks in a block group. The goal | ||
570 | * is to find at least bytes free and up to empty_size + bytes free. | ||
571 | * We might not find them all in one contiguous area. | ||
572 | * | ||
573 | * returns zero and sets up cluster if things worked out, otherwise | ||
574 | * it returns -enospc | ||
575 | */ | ||
576 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
577 | struct btrfs_block_group_cache *block_group, | ||
578 | struct btrfs_free_cluster *cluster, | ||
579 | u64 offset, u64 bytes, u64 empty_size) | ||
485 | { | 580 | { |
486 | struct btrfs_free_space *ret = NULL; | 581 | struct btrfs_free_space *entry = NULL; |
582 | struct rb_node *node; | ||
583 | struct btrfs_free_space *next; | ||
584 | struct btrfs_free_space *last; | ||
585 | u64 min_bytes; | ||
586 | u64 window_start; | ||
587 | u64 window_free; | ||
588 | u64 max_extent = 0; | ||
589 | int total_retries = 0; | ||
590 | int ret; | ||
591 | |||
592 | /* for metadata, allow allocates with more holes */ | ||
593 | if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
594 | /* | ||
595 | * we want to do larger allocations when we are | ||
596 | * flushing out the delayed refs, it helps prevent | ||
597 | * making more work as we go along. | ||
598 | */ | ||
599 | if (trans->transaction->delayed_refs.flushing) | ||
600 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
601 | else | ||
602 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
603 | } else | ||
604 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
605 | |||
606 | spin_lock(&block_group->tree_lock); | ||
607 | spin_lock(&cluster->lock); | ||
608 | |||
609 | /* someone already found a cluster, hooray */ | ||
610 | if (cluster->block_group) { | ||
611 | ret = 0; | ||
612 | goto out; | ||
613 | } | ||
614 | again: | ||
615 | min_bytes = min(min_bytes, bytes + empty_size); | ||
616 | entry = tree_search_bytes(&block_group->free_space_bytes, | ||
617 | offset, min_bytes); | ||
618 | if (!entry) { | ||
619 | ret = -ENOSPC; | ||
620 | goto out; | ||
621 | } | ||
622 | window_start = entry->offset; | ||
623 | window_free = entry->bytes; | ||
624 | last = entry; | ||
625 | max_extent = entry->bytes; | ||
626 | |||
627 | while(1) { | ||
628 | /* out window is just right, lets fill it */ | ||
629 | if (window_free >= bytes + empty_size) | ||
630 | break; | ||
487 | 631 | ||
488 | ret = tree_search_offset(&block_group->free_space_offset, offset, | 632 | node = rb_next(&last->offset_index); |
489 | bytes, 0); | 633 | if (!node) { |
490 | if (!ret) | 634 | ret = -ENOSPC; |
491 | ret = tree_search_bytes(&block_group->free_space_bytes, | 635 | goto out; |
492 | offset, bytes); | 636 | } |
637 | next = rb_entry(node, struct btrfs_free_space, offset_index); | ||
638 | |||
639 | /* | ||
640 | * we haven't filled the empty size and the window is | ||
641 | * very large. reset and try again | ||
642 | */ | ||
643 | if (next->offset - window_start > (bytes + empty_size) * 2) { | ||
644 | entry = next; | ||
645 | window_start = entry->offset; | ||
646 | window_free = entry->bytes; | ||
647 | last = entry; | ||
648 | max_extent = 0; | ||
649 | total_retries++; | ||
650 | if (total_retries % 256 == 0) { | ||
651 | if (min_bytes >= (bytes + empty_size)) { | ||
652 | ret = -ENOSPC; | ||
653 | goto out; | ||
654 | } | ||
655 | /* | ||
656 | * grow our allocation a bit, we're not having | ||
657 | * much luck | ||
658 | */ | ||
659 | min_bytes *= 2; | ||
660 | goto again; | ||
661 | } | ||
662 | } else { | ||
663 | last = next; | ||
664 | window_free += next->bytes; | ||
665 | if (entry->bytes > max_extent) | ||
666 | max_extent = entry->bytes; | ||
667 | } | ||
668 | } | ||
669 | |||
670 | cluster->window_start = entry->offset; | ||
671 | |||
672 | /* | ||
673 | * now we've found our entries, pull them out of the free space | ||
674 | * cache and put them into the cluster rbtree | ||
675 | * | ||
676 | * The cluster includes an rbtree, but only uses the offset index | ||
677 | * of each free space cache entry. | ||
678 | */ | ||
679 | while(1) { | ||
680 | node = rb_next(&entry->offset_index); | ||
681 | unlink_free_space(block_group, entry); | ||
682 | ret = tree_insert_offset(&cluster->root, entry->offset, | ||
683 | &entry->offset_index); | ||
684 | BUG_ON(ret); | ||
685 | |||
686 | if (!node || entry == last) | ||
687 | break; | ||
688 | |||
689 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
690 | } | ||
691 | ret = 0; | ||
692 | cluster->max_size = max_extent; | ||
693 | atomic_inc(&block_group->count); | ||
694 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | ||
695 | cluster->block_group = block_group; | ||
696 | out: | ||
697 | spin_unlock(&cluster->lock); | ||
698 | spin_unlock(&block_group->tree_lock); | ||
493 | 699 | ||
494 | return ret; | 700 | return ret; |
495 | } | 701 | } |
702 | |||
703 | /* | ||
704 | * simple code to zero out a cluster | ||
705 | */ | ||
706 | void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | ||
707 | { | ||
708 | spin_lock_init(&cluster->lock); | ||
709 | spin_lock_init(&cluster->refill_lock); | ||
710 | cluster->root.rb_node = NULL; | ||
711 | cluster->max_size = 0; | ||
712 | INIT_LIST_HEAD(&cluster->block_group_list); | ||
713 | cluster->block_group = NULL; | ||
714 | } | ||
715 | |||
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h new file mode 100644 index 000000000000..ab0bdc0a63ce --- /dev/null +++ b/fs/btrfs/free-space-cache.h | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef __BTRFS_FREE_SPACE_CACHE | ||
20 | #define __BTRFS_FREE_SPACE_CACHE | ||
21 | |||
22 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
23 | u64 bytenr, u64 size); | ||
24 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
25 | u64 bytenr, u64 size); | ||
26 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
27 | *block_group); | ||
28 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | ||
29 | u64 offset, u64 bytes, u64 empty_size); | ||
30 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
31 | u64 bytes); | ||
32 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
33 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
34 | struct btrfs_block_group_cache *block_group, | ||
35 | struct btrfs_free_cluster *cluster, | ||
36 | u64 offset, u64 bytes, u64 empty_size); | ||
37 | void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster); | ||
38 | u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | ||
39 | struct btrfs_free_cluster *cluster, u64 bytes, | ||
40 | u64 min_start); | ||
41 | int btrfs_return_cluster_to_free_space( | ||
42 | struct btrfs_block_group_cache *block_group, | ||
43 | struct btrfs_free_cluster *cluster); | ||
44 | #endif | ||
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 3d46fa1f29a4..6b627c611808 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -73,6 +73,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
73 | if (!path) | 73 | if (!path) |
74 | return -ENOMEM; | 74 | return -ENOMEM; |
75 | 75 | ||
76 | path->leave_spinning = 1; | ||
77 | |||
76 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 78 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
77 | if (ret > 0) { | 79 | if (ret > 0) { |
78 | ret = -ENOENT; | 80 | ret = -ENOENT; |
@@ -127,6 +129,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
127 | if (!path) | 129 | if (!path) |
128 | return -ENOMEM; | 130 | return -ENOMEM; |
129 | 131 | ||
132 | path->leave_spinning = 1; | ||
130 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 133 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
131 | ins_len); | 134 | ins_len); |
132 | if (ret == -EEXIST) { | 135 | if (ret == -EEXIST) { |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7d4f948bc22a..a0d1dd492a58 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -134,6 +134,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
134 | if (!path) | 134 | if (!path) |
135 | return -ENOMEM; | 135 | return -ENOMEM; |
136 | 136 | ||
137 | path->leave_spinning = 1; | ||
137 | btrfs_set_trans_block_group(trans, inode); | 138 | btrfs_set_trans_block_group(trans, inode); |
138 | 139 | ||
139 | key.objectid = inode->i_ino; | 140 | key.objectid = inode->i_ino; |
@@ -167,9 +168,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
167 | cur_size = min_t(unsigned long, compressed_size, | 168 | cur_size = min_t(unsigned long, compressed_size, |
168 | PAGE_CACHE_SIZE); | 169 | PAGE_CACHE_SIZE); |
169 | 170 | ||
170 | kaddr = kmap(cpage); | 171 | kaddr = kmap_atomic(cpage, KM_USER0); |
171 | write_extent_buffer(leaf, kaddr, ptr, cur_size); | 172 | write_extent_buffer(leaf, kaddr, ptr, cur_size); |
172 | kunmap(cpage); | 173 | kunmap_atomic(kaddr, KM_USER0); |
173 | 174 | ||
174 | i++; | 175 | i++; |
175 | ptr += cur_size; | 176 | ptr += cur_size; |
@@ -204,7 +205,7 @@ fail: | |||
204 | * does the checks required to make sure the data is small enough | 205 | * does the checks required to make sure the data is small enough |
205 | * to fit as an inline extent. | 206 | * to fit as an inline extent. |
206 | */ | 207 | */ |
207 | static int cow_file_range_inline(struct btrfs_trans_handle *trans, | 208 | static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, |
208 | struct btrfs_root *root, | 209 | struct btrfs_root *root, |
209 | struct inode *inode, u64 start, u64 end, | 210 | struct inode *inode, u64 start, u64 end, |
210 | size_t compressed_size, | 211 | size_t compressed_size, |
@@ -854,11 +855,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
854 | u64 cur_end; | 855 | u64 cur_end; |
855 | int limit = 10 * 1024 * 1042; | 856 | int limit = 10 * 1024 * 1042; |
856 | 857 | ||
857 | if (!btrfs_test_opt(root, COMPRESS)) { | ||
858 | return cow_file_range(inode, locked_page, start, end, | ||
859 | page_started, nr_written, 1); | ||
860 | } | ||
861 | |||
862 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | | 858 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | |
863 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); | 859 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); |
864 | while (start < end) { | 860 | while (start < end) { |
@@ -935,7 +931,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root, | |||
935 | * If no cow copies or snapshots exist, we write directly to the existing | 931 | * If no cow copies or snapshots exist, we write directly to the existing |
936 | * blocks on disk | 932 | * blocks on disk |
937 | */ | 933 | */ |
938 | static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, | 934 | static noinline int run_delalloc_nocow(struct inode *inode, |
935 | struct page *locked_page, | ||
939 | u64 start, u64 end, int *page_started, int force, | 936 | u64 start, u64 end, int *page_started, int force, |
940 | unsigned long *nr_written) | 937 | unsigned long *nr_written) |
941 | { | 938 | { |
@@ -1133,6 +1130,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1133 | unsigned long *nr_written) | 1130 | unsigned long *nr_written) |
1134 | { | 1131 | { |
1135 | int ret; | 1132 | int ret; |
1133 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1136 | 1134 | ||
1137 | if (btrfs_test_flag(inode, NODATACOW)) | 1135 | if (btrfs_test_flag(inode, NODATACOW)) |
1138 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1136 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
@@ -1140,10 +1138,12 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1140 | else if (btrfs_test_flag(inode, PREALLOC)) | 1138 | else if (btrfs_test_flag(inode, PREALLOC)) |
1141 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1139 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1142 | page_started, 0, nr_written); | 1140 | page_started, 0, nr_written); |
1141 | else if (!btrfs_test_opt(root, COMPRESS)) | ||
1142 | ret = cow_file_range(inode, locked_page, start, end, | ||
1143 | page_started, nr_written, 1); | ||
1143 | else | 1144 | else |
1144 | ret = cow_file_range_async(inode, locked_page, start, end, | 1145 | ret = cow_file_range_async(inode, locked_page, start, end, |
1145 | page_started, nr_written); | 1146 | page_started, nr_written); |
1146 | |||
1147 | return ret; | 1147 | return ret; |
1148 | } | 1148 | } |
1149 | 1149 | ||
@@ -1453,6 +1453,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1453 | path = btrfs_alloc_path(); | 1453 | path = btrfs_alloc_path(); |
1454 | BUG_ON(!path); | 1454 | BUG_ON(!path); |
1455 | 1455 | ||
1456 | path->leave_spinning = 1; | ||
1456 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1457 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
1457 | file_pos + num_bytes, file_pos, &hint); | 1458 | file_pos + num_bytes, file_pos, &hint); |
1458 | BUG_ON(ret); | 1459 | BUG_ON(ret); |
@@ -1475,6 +1476,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1475 | btrfs_set_file_extent_compression(leaf, fi, compression); | 1476 | btrfs_set_file_extent_compression(leaf, fi, compression); |
1476 | btrfs_set_file_extent_encryption(leaf, fi, encryption); | 1477 | btrfs_set_file_extent_encryption(leaf, fi, encryption); |
1477 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); | 1478 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); |
1479 | |||
1480 | btrfs_unlock_up_safe(path, 1); | ||
1481 | btrfs_set_lock_blocking(leaf); | ||
1482 | |||
1478 | btrfs_mark_buffer_dirty(leaf); | 1483 | btrfs_mark_buffer_dirty(leaf); |
1479 | 1484 | ||
1480 | inode_add_bytes(inode, num_bytes); | 1485 | inode_add_bytes(inode, num_bytes); |
@@ -1487,11 +1492,35 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1487 | root->root_key.objectid, | 1492 | root->root_key.objectid, |
1488 | trans->transid, inode->i_ino, &ins); | 1493 | trans->transid, inode->i_ino, &ins); |
1489 | BUG_ON(ret); | 1494 | BUG_ON(ret); |
1490 | |||
1491 | btrfs_free_path(path); | 1495 | btrfs_free_path(path); |
1496 | |||
1492 | return 0; | 1497 | return 0; |
1493 | } | 1498 | } |
1494 | 1499 | ||
1500 | /* | ||
1501 | * helper function for btrfs_finish_ordered_io, this | ||
1502 | * just reads in some of the csum leaves to prime them into ram | ||
1503 | * before we start the transaction. It limits the amount of btree | ||
1504 | * reads required while inside the transaction. | ||
1505 | */ | ||
1506 | static noinline void reada_csum(struct btrfs_root *root, | ||
1507 | struct btrfs_path *path, | ||
1508 | struct btrfs_ordered_extent *ordered_extent) | ||
1509 | { | ||
1510 | struct btrfs_ordered_sum *sum; | ||
1511 | u64 bytenr; | ||
1512 | |||
1513 | sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum, | ||
1514 | list); | ||
1515 | bytenr = sum->sums[0].bytenr; | ||
1516 | |||
1517 | /* | ||
1518 | * we don't care about the results, the point of this search is | ||
1519 | * just to get the btree leaves into ram | ||
1520 | */ | ||
1521 | btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0); | ||
1522 | } | ||
1523 | |||
1495 | /* as ordered data IO finishes, this gets called so we can finish | 1524 | /* as ordered data IO finishes, this gets called so we can finish |
1496 | * an ordered extent if the range of bytes in the file it covers are | 1525 | * an ordered extent if the range of bytes in the file it covers are |
1497 | * fully written. | 1526 | * fully written. |
@@ -1500,8 +1529,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1500 | { | 1529 | { |
1501 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1530 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1502 | struct btrfs_trans_handle *trans; | 1531 | struct btrfs_trans_handle *trans; |
1503 | struct btrfs_ordered_extent *ordered_extent; | 1532 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1504 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1533 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1534 | struct btrfs_path *path; | ||
1505 | int compressed = 0; | 1535 | int compressed = 0; |
1506 | int ret; | 1536 | int ret; |
1507 | 1537 | ||
@@ -1509,9 +1539,33 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1509 | if (!ret) | 1539 | if (!ret) |
1510 | return 0; | 1540 | return 0; |
1511 | 1541 | ||
1542 | /* | ||
1543 | * before we join the transaction, try to do some of our IO. | ||
1544 | * This will limit the amount of IO that we have to do with | ||
1545 | * the transaction running. We're unlikely to need to do any | ||
1546 | * IO if the file extents are new, the disk_i_size checks | ||
1547 | * covers the most common case. | ||
1548 | */ | ||
1549 | if (start < BTRFS_I(inode)->disk_i_size) { | ||
1550 | path = btrfs_alloc_path(); | ||
1551 | if (path) { | ||
1552 | ret = btrfs_lookup_file_extent(NULL, root, path, | ||
1553 | inode->i_ino, | ||
1554 | start, 0); | ||
1555 | ordered_extent = btrfs_lookup_ordered_extent(inode, | ||
1556 | start); | ||
1557 | if (!list_empty(&ordered_extent->list)) { | ||
1558 | btrfs_release_path(root, path); | ||
1559 | reada_csum(root, path, ordered_extent); | ||
1560 | } | ||
1561 | btrfs_free_path(path); | ||
1562 | } | ||
1563 | } | ||
1564 | |||
1512 | trans = btrfs_join_transaction(root, 1); | 1565 | trans = btrfs_join_transaction(root, 1); |
1513 | 1566 | ||
1514 | ordered_extent = btrfs_lookup_ordered_extent(inode, start); | 1567 | if (!ordered_extent) |
1568 | ordered_extent = btrfs_lookup_ordered_extent(inode, start); | ||
1515 | BUG_ON(!ordered_extent); | 1569 | BUG_ON(!ordered_extent); |
1516 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) | 1570 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) |
1517 | goto nocow; | 1571 | goto nocow; |
@@ -2101,6 +2155,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2101 | 2155 | ||
2102 | path = btrfs_alloc_path(); | 2156 | path = btrfs_alloc_path(); |
2103 | BUG_ON(!path); | 2157 | BUG_ON(!path); |
2158 | path->leave_spinning = 1; | ||
2104 | ret = btrfs_lookup_inode(trans, root, path, | 2159 | ret = btrfs_lookup_inode(trans, root, path, |
2105 | &BTRFS_I(inode)->location, 1); | 2160 | &BTRFS_I(inode)->location, 1); |
2106 | if (ret) { | 2161 | if (ret) { |
@@ -2147,6 +2202,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2147 | goto err; | 2202 | goto err; |
2148 | } | 2203 | } |
2149 | 2204 | ||
2205 | path->leave_spinning = 1; | ||
2150 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | 2206 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, |
2151 | name, name_len, -1); | 2207 | name, name_len, -1); |
2152 | if (IS_ERR(di)) { | 2208 | if (IS_ERR(di)) { |
@@ -2190,8 +2246,6 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2190 | ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, | 2246 | ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, |
2191 | inode, dir->i_ino); | 2247 | inode, dir->i_ino); |
2192 | BUG_ON(ret != 0 && ret != -ENOENT); | 2248 | BUG_ON(ret != 0 && ret != -ENOENT); |
2193 | if (ret != -ENOENT) | ||
2194 | BTRFS_I(dir)->log_dirty_trans = trans->transid; | ||
2195 | 2249 | ||
2196 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, | 2250 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, |
2197 | dir, index); | 2251 | dir, index); |
@@ -2224,6 +2278,9 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2224 | trans = btrfs_start_transaction(root, 1); | 2278 | trans = btrfs_start_transaction(root, 1); |
2225 | 2279 | ||
2226 | btrfs_set_trans_block_group(trans, dir); | 2280 | btrfs_set_trans_block_group(trans, dir); |
2281 | |||
2282 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); | ||
2283 | |||
2227 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2284 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
2228 | dentry->d_name.name, dentry->d_name.len); | 2285 | dentry->d_name.name, dentry->d_name.len); |
2229 | 2286 | ||
@@ -2498,6 +2555,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2498 | key.type = (u8)-1; | 2555 | key.type = (u8)-1; |
2499 | 2556 | ||
2500 | search_again: | 2557 | search_again: |
2558 | path->leave_spinning = 1; | ||
2501 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 2559 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
2502 | if (ret < 0) | 2560 | if (ret < 0) |
2503 | goto error; | 2561 | goto error; |
@@ -2644,6 +2702,7 @@ delete: | |||
2644 | break; | 2702 | break; |
2645 | } | 2703 | } |
2646 | if (found_extent) { | 2704 | if (found_extent) { |
2705 | btrfs_set_path_blocking(path); | ||
2647 | ret = btrfs_free_extent(trans, root, extent_start, | 2706 | ret = btrfs_free_extent(trans, root, extent_start, |
2648 | extent_num_bytes, | 2707 | extent_num_bytes, |
2649 | leaf->start, root_owner, | 2708 | leaf->start, root_owner, |
@@ -2848,11 +2907,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
2848 | if (err) | 2907 | if (err) |
2849 | return err; | 2908 | return err; |
2850 | 2909 | ||
2851 | if (S_ISREG(inode->i_mode) && | 2910 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
2852 | attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { | 2911 | if (attr->ia_size > inode->i_size) { |
2853 | err = btrfs_cont_expand(inode, attr->ia_size); | 2912 | err = btrfs_cont_expand(inode, attr->ia_size); |
2854 | if (err) | 2913 | if (err) |
2855 | return err; | 2914 | return err; |
2915 | } else if (inode->i_size > 0 && | ||
2916 | attr->ia_size == 0) { | ||
2917 | |||
2918 | /* we're truncating a file that used to have good | ||
2919 | * data down to zero. Make sure it gets into | ||
2920 | * the ordered flush list so that any new writes | ||
2921 | * get down to disk quickly. | ||
2922 | */ | ||
2923 | BTRFS_I(inode)->ordered_data_close = 1; | ||
2924 | } | ||
2856 | } | 2925 | } |
2857 | 2926 | ||
2858 | err = inode_setattr(inode, attr); | 2927 | err = inode_setattr(inode, attr); |
@@ -2984,13 +3053,14 @@ static noinline void init_btrfs_i(struct inode *inode) | |||
2984 | bi->disk_i_size = 0; | 3053 | bi->disk_i_size = 0; |
2985 | bi->flags = 0; | 3054 | bi->flags = 0; |
2986 | bi->index_cnt = (u64)-1; | 3055 | bi->index_cnt = (u64)-1; |
2987 | bi->log_dirty_trans = 0; | 3056 | bi->last_unlink_trans = 0; |
2988 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | 3057 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); |
2989 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | 3058 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, |
2990 | inode->i_mapping, GFP_NOFS); | 3059 | inode->i_mapping, GFP_NOFS); |
2991 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | 3060 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, |
2992 | inode->i_mapping, GFP_NOFS); | 3061 | inode->i_mapping, GFP_NOFS); |
2993 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | 3062 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); |
3063 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | ||
2994 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | 3064 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); |
2995 | mutex_init(&BTRFS_I(inode)->extent_mutex); | 3065 | mutex_init(&BTRFS_I(inode)->extent_mutex); |
2996 | mutex_init(&BTRFS_I(inode)->log_mutex); | 3066 | mutex_init(&BTRFS_I(inode)->log_mutex); |
@@ -3411,8 +3481,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3411 | 3481 | ||
3412 | if (dir) { | 3482 | if (dir) { |
3413 | ret = btrfs_set_inode_index(dir, index); | 3483 | ret = btrfs_set_inode_index(dir, index); |
3414 | if (ret) | 3484 | if (ret) { |
3485 | iput(inode); | ||
3415 | return ERR_PTR(ret); | 3486 | return ERR_PTR(ret); |
3487 | } | ||
3416 | } | 3488 | } |
3417 | /* | 3489 | /* |
3418 | * index_cnt is ignored for everything but a dir, | 3490 | * index_cnt is ignored for everything but a dir, |
@@ -3449,6 +3521,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3449 | sizes[0] = sizeof(struct btrfs_inode_item); | 3521 | sizes[0] = sizeof(struct btrfs_inode_item); |
3450 | sizes[1] = name_len + sizeof(*ref); | 3522 | sizes[1] = name_len + sizeof(*ref); |
3451 | 3523 | ||
3524 | path->leave_spinning = 1; | ||
3452 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); | 3525 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); |
3453 | if (ret != 0) | 3526 | if (ret != 0) |
3454 | goto fail; | 3527 | goto fail; |
@@ -3494,6 +3567,7 @@ fail: | |||
3494 | if (dir) | 3567 | if (dir) |
3495 | BTRFS_I(dir)->index_cnt--; | 3568 | BTRFS_I(dir)->index_cnt--; |
3496 | btrfs_free_path(path); | 3569 | btrfs_free_path(path); |
3570 | iput(inode); | ||
3497 | return ERR_PTR(ret); | 3571 | return ERR_PTR(ret); |
3498 | } | 3572 | } |
3499 | 3573 | ||
@@ -3727,6 +3801,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
3727 | drop_inode = 1; | 3801 | drop_inode = 1; |
3728 | 3802 | ||
3729 | nr = trans->blocks_used; | 3803 | nr = trans->blocks_used; |
3804 | |||
3805 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
3730 | btrfs_end_transaction_throttle(trans, root); | 3806 | btrfs_end_transaction_throttle(trans, root); |
3731 | fail: | 3807 | fail: |
3732 | if (drop_inode) { | 3808 | if (drop_inode) { |
@@ -4292,8 +4368,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
4292 | * beyond EOF, then the page is guaranteed safe against truncation until we | 4368 | * beyond EOF, then the page is guaranteed safe against truncation until we |
4293 | * unlock the page. | 4369 | * unlock the page. |
4294 | */ | 4370 | */ |
4295 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) | 4371 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) |
4296 | { | 4372 | { |
4373 | struct page *page = vmf->page; | ||
4297 | struct inode *inode = fdentry(vma->vm_file)->d_inode; | 4374 | struct inode *inode = fdentry(vma->vm_file)->d_inode; |
4298 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4375 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4299 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 4376 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
@@ -4306,10 +4383,15 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4306 | u64 page_end; | 4383 | u64 page_end; |
4307 | 4384 | ||
4308 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 4385 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); |
4309 | if (ret) | 4386 | if (ret) { |
4387 | if (ret == -ENOMEM) | ||
4388 | ret = VM_FAULT_OOM; | ||
4389 | else /* -ENOSPC, -EIO, etc */ | ||
4390 | ret = VM_FAULT_SIGBUS; | ||
4310 | goto out; | 4391 | goto out; |
4392 | } | ||
4311 | 4393 | ||
4312 | ret = -EINVAL; | 4394 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
4313 | again: | 4395 | again: |
4314 | lock_page(page); | 4396 | lock_page(page); |
4315 | size = i_size_read(inode); | 4397 | size = i_size_read(inode); |
@@ -4357,6 +4439,8 @@ again: | |||
4357 | } | 4439 | } |
4358 | ClearPageChecked(page); | 4440 | ClearPageChecked(page); |
4359 | set_page_dirty(page); | 4441 | set_page_dirty(page); |
4442 | |||
4443 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | ||
4360 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 4444 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
4361 | 4445 | ||
4362 | out_unlock: | 4446 | out_unlock: |
@@ -4382,6 +4466,27 @@ static void btrfs_truncate(struct inode *inode) | |||
4382 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 4466 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
4383 | 4467 | ||
4384 | trans = btrfs_start_transaction(root, 1); | 4468 | trans = btrfs_start_transaction(root, 1); |
4469 | |||
4470 | /* | ||
4471 | * setattr is responsible for setting the ordered_data_close flag, | ||
4472 | * but that is only tested during the last file release. That | ||
4473 | * could happen well after the next commit, leaving a great big | ||
4474 | * window where new writes may get lost if someone chooses to write | ||
4475 | * to this file after truncating to zero | ||
4476 | * | ||
4477 | * The inode doesn't have any dirty data here, and so if we commit | ||
4478 | * this is a noop. If someone immediately starts writing to the inode | ||
4479 | * it is very likely we'll catch some of their writes in this | ||
4480 | * transaction, and the commit will find this file on the ordered | ||
4481 | * data list with good things to send down. | ||
4482 | * | ||
4483 | * This is a best effort solution, there is still a window where | ||
4484 | * using truncate to replace the contents of the file will | ||
4485 | * end up with a zero length file after a crash. | ||
4486 | */ | ||
4487 | if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) | ||
4488 | btrfs_add_ordered_operation(trans, root, inode); | ||
4489 | |||
4385 | btrfs_set_trans_block_group(trans, inode); | 4490 | btrfs_set_trans_block_group(trans, inode); |
4386 | btrfs_i_size_write(inode, inode->i_size); | 4491 | btrfs_i_size_write(inode, inode->i_size); |
4387 | 4492 | ||
@@ -4458,12 +4563,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
4458 | ei->i_acl = BTRFS_ACL_NOT_CACHED; | 4563 | ei->i_acl = BTRFS_ACL_NOT_CACHED; |
4459 | ei->i_default_acl = BTRFS_ACL_NOT_CACHED; | 4564 | ei->i_default_acl = BTRFS_ACL_NOT_CACHED; |
4460 | INIT_LIST_HEAD(&ei->i_orphan); | 4565 | INIT_LIST_HEAD(&ei->i_orphan); |
4566 | INIT_LIST_HEAD(&ei->ordered_operations); | ||
4461 | return &ei->vfs_inode; | 4567 | return &ei->vfs_inode; |
4462 | } | 4568 | } |
4463 | 4569 | ||
4464 | void btrfs_destroy_inode(struct inode *inode) | 4570 | void btrfs_destroy_inode(struct inode *inode) |
4465 | { | 4571 | { |
4466 | struct btrfs_ordered_extent *ordered; | 4572 | struct btrfs_ordered_extent *ordered; |
4573 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4574 | |||
4467 | WARN_ON(!list_empty(&inode->i_dentry)); | 4575 | WARN_ON(!list_empty(&inode->i_dentry)); |
4468 | WARN_ON(inode->i_data.nrpages); | 4576 | WARN_ON(inode->i_data.nrpages); |
4469 | 4577 | ||
@@ -4474,13 +4582,24 @@ void btrfs_destroy_inode(struct inode *inode) | |||
4474 | BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) | 4582 | BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) |
4475 | posix_acl_release(BTRFS_I(inode)->i_default_acl); | 4583 | posix_acl_release(BTRFS_I(inode)->i_default_acl); |
4476 | 4584 | ||
4477 | spin_lock(&BTRFS_I(inode)->root->list_lock); | 4585 | /* |
4586 | * Make sure we're properly removed from the ordered operation | ||
4587 | * lists. | ||
4588 | */ | ||
4589 | smp_mb(); | ||
4590 | if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { | ||
4591 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
4592 | list_del_init(&BTRFS_I(inode)->ordered_operations); | ||
4593 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
4594 | } | ||
4595 | |||
4596 | spin_lock(&root->list_lock); | ||
4478 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 4597 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
4479 | printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" | 4598 | printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" |
4480 | " list\n", inode->i_ino); | 4599 | " list\n", inode->i_ino); |
4481 | dump_stack(); | 4600 | dump_stack(); |
4482 | } | 4601 | } |
4483 | spin_unlock(&BTRFS_I(inode)->root->list_lock); | 4602 | spin_unlock(&root->list_lock); |
4484 | 4603 | ||
4485 | while (1) { | 4604 | while (1) { |
4486 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 4605 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
@@ -4605,8 +4724,36 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4605 | if (ret) | 4724 | if (ret) |
4606 | goto out_unlock; | 4725 | goto out_unlock; |
4607 | 4726 | ||
4727 | /* | ||
4728 | * we're using rename to replace one file with another. | ||
4729 | * and the replacement file is large. Start IO on it now so | ||
4730 | * we don't add too much work to the end of the transaction | ||
4731 | */ | ||
4732 | if (new_inode && old_inode && S_ISREG(old_inode->i_mode) && | ||
4733 | new_inode->i_size && | ||
4734 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | ||
4735 | filemap_flush(old_inode->i_mapping); | ||
4736 | |||
4608 | trans = btrfs_start_transaction(root, 1); | 4737 | trans = btrfs_start_transaction(root, 1); |
4609 | 4738 | ||
4739 | /* | ||
4740 | * make sure the inode gets flushed if it is replacing | ||
4741 | * something. | ||
4742 | */ | ||
4743 | if (new_inode && new_inode->i_size && | ||
4744 | old_inode && S_ISREG(old_inode->i_mode)) { | ||
4745 | btrfs_add_ordered_operation(trans, root, old_inode); | ||
4746 | } | ||
4747 | |||
4748 | /* | ||
4749 | * this is an ugly little race, but the rename is required to make | ||
4750 | * sure that if we crash, the inode is either at the old name | ||
4751 | * or the new one. pinning the log transaction lets us make sure | ||
4752 | * we don't allow a log commit to come in after we unlink the | ||
4753 | * name but before we add the new name back in. | ||
4754 | */ | ||
4755 | btrfs_pin_log_trans(root); | ||
4756 | |||
4610 | btrfs_set_trans_block_group(trans, new_dir); | 4757 | btrfs_set_trans_block_group(trans, new_dir); |
4611 | 4758 | ||
4612 | btrfs_inc_nlink(old_dentry->d_inode); | 4759 | btrfs_inc_nlink(old_dentry->d_inode); |
@@ -4614,6 +4761,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4614 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 4761 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
4615 | old_inode->i_ctime = ctime; | 4762 | old_inode->i_ctime = ctime; |
4616 | 4763 | ||
4764 | if (old_dentry->d_parent != new_dentry->d_parent) | ||
4765 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | ||
4766 | |||
4617 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, | 4767 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, |
4618 | old_dentry->d_name.name, | 4768 | old_dentry->d_name.name, |
4619 | old_dentry->d_name.len); | 4769 | old_dentry->d_name.len); |
@@ -4645,7 +4795,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4645 | if (ret) | 4795 | if (ret) |
4646 | goto out_fail; | 4796 | goto out_fail; |
4647 | 4797 | ||
4798 | btrfs_log_new_name(trans, old_inode, old_dir, | ||
4799 | new_dentry->d_parent); | ||
4648 | out_fail: | 4800 | out_fail: |
4801 | |||
4802 | /* this btrfs_end_log_trans just allows the current | ||
4803 | * log-sub transaction to complete | ||
4804 | */ | ||
4805 | btrfs_end_log_trans(root); | ||
4649 | btrfs_end_transaction_throttle(trans, root); | 4806 | btrfs_end_transaction_throttle(trans, root); |
4650 | out_unlock: | 4807 | out_unlock: |
4651 | return ret; | 4808 | return ret; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bca729fc80c8..7594bec1be10 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -267,7 +267,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, | |||
267 | goto out_dput; | 267 | goto out_dput; |
268 | 268 | ||
269 | if (!IS_POSIXACL(parent->dentry->d_inode)) | 269 | if (!IS_POSIXACL(parent->dentry->d_inode)) |
270 | mode &= ~current->fs->umask; | 270 | mode &= ~current_umask(); |
271 | 271 | ||
272 | error = mnt_want_write(parent->mnt); | 272 | error = mnt_want_write(parent->mnt); |
273 | if (error) | 273 | if (error) |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 47b0a88c12a2..1c36e5cd8f55 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -60,8 +60,8 @@ void btrfs_clear_lock_blocking(struct extent_buffer *eb) | |||
60 | 60 | ||
61 | /* | 61 | /* |
62 | * unfortunately, many of the places that currently set a lock to blocking | 62 | * unfortunately, many of the places that currently set a lock to blocking |
63 | * don't end up blocking for every long, and often they don't block | 63 | * don't end up blocking for very long, and often they don't block |
64 | * at all. For a dbench 50 run, if we don't spin one the blocking bit | 64 | * at all. For a dbench 50 run, if we don't spin on the blocking bit |
65 | * at all, the context switch rate can jump up to 400,000/sec or more. | 65 | * at all, the context switch rate can jump up to 400,000/sec or more. |
66 | * | 66 | * |
67 | * So, we're still stuck with this crummy spin on the blocking bit, | 67 | * So, we're still stuck with this crummy spin on the blocking bit, |
@@ -71,12 +71,13 @@ void btrfs_clear_lock_blocking(struct extent_buffer *eb) | |||
71 | static int btrfs_spin_on_block(struct extent_buffer *eb) | 71 | static int btrfs_spin_on_block(struct extent_buffer *eb) |
72 | { | 72 | { |
73 | int i; | 73 | int i; |
74 | |||
74 | for (i = 0; i < 512; i++) { | 75 | for (i = 0; i < 512; i++) { |
75 | cpu_relax(); | ||
76 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 76 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
77 | return 1; | 77 | return 1; |
78 | if (need_resched()) | 78 | if (need_resched()) |
79 | break; | 79 | break; |
80 | cpu_relax(); | ||
80 | } | 81 | } |
81 | return 0; | 82 | return 0; |
82 | } | 83 | } |
@@ -95,13 +96,15 @@ int btrfs_try_spin_lock(struct extent_buffer *eb) | |||
95 | { | 96 | { |
96 | int i; | 97 | int i; |
97 | 98 | ||
98 | spin_nested(eb); | 99 | if (btrfs_spin_on_block(eb)) { |
99 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 100 | spin_nested(eb); |
100 | return 1; | 101 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
101 | spin_unlock(&eb->lock); | 102 | return 1; |
102 | 103 | spin_unlock(&eb->lock); | |
104 | } | ||
103 | /* spin for a bit on the BLOCKING flag */ | 105 | /* spin for a bit on the BLOCKING flag */ |
104 | for (i = 0; i < 2; i++) { | 106 | for (i = 0; i < 2; i++) { |
107 | cpu_relax(); | ||
105 | if (!btrfs_spin_on_block(eb)) | 108 | if (!btrfs_spin_on_block(eb)) |
106 | break; | 109 | break; |
107 | 110 | ||
@@ -148,6 +151,9 @@ int btrfs_tree_lock(struct extent_buffer *eb) | |||
148 | DEFINE_WAIT(wait); | 151 | DEFINE_WAIT(wait); |
149 | wait.func = btrfs_wake_function; | 152 | wait.func = btrfs_wake_function; |
150 | 153 | ||
154 | if (!btrfs_spin_on_block(eb)) | ||
155 | goto sleep; | ||
156 | |||
151 | while(1) { | 157 | while(1) { |
152 | spin_nested(eb); | 158 | spin_nested(eb); |
153 | 159 | ||
@@ -165,9 +171,10 @@ int btrfs_tree_lock(struct extent_buffer *eb) | |||
165 | * spin for a bit, and if the blocking flag goes away, | 171 | * spin for a bit, and if the blocking flag goes away, |
166 | * loop around | 172 | * loop around |
167 | */ | 173 | */ |
174 | cpu_relax(); | ||
168 | if (btrfs_spin_on_block(eb)) | 175 | if (btrfs_spin_on_block(eb)) |
169 | continue; | 176 | continue; |
170 | 177 | sleep: | |
171 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, | 178 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, |
172 | TASK_UNINTERRUPTIBLE); | 179 | TASK_UNINTERRUPTIBLE); |
173 | 180 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 77c2411a5f0f..53c87b197d70 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -310,6 +310,16 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
310 | 310 | ||
311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
312 | list_del_init(&entry->root_extent_list); | 312 | list_del_init(&entry->root_extent_list); |
313 | |||
314 | /* | ||
315 | * we have no more ordered extents for this inode and | ||
316 | * no dirty pages. We can safely remove it from the | ||
317 | * list of ordered extents | ||
318 | */ | ||
319 | if (RB_EMPTY_ROOT(&tree->tree) && | ||
320 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { | ||
321 | list_del_init(&BTRFS_I(inode)->ordered_operations); | ||
322 | } | ||
313 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 323 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
314 | 324 | ||
315 | mutex_unlock(&tree->mutex); | 325 | mutex_unlock(&tree->mutex); |
@@ -370,6 +380,68 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) | |||
370 | } | 380 | } |
371 | 381 | ||
372 | /* | 382 | /* |
383 | * this is used during transaction commit to write all the inodes | ||
384 | * added to the ordered operation list. These files must be fully on | ||
385 | * disk before the transaction commits. | ||
386 | * | ||
387 | * we have two modes here, one is to just start the IO via filemap_flush | ||
388 | * and the other is to wait for all the io. When we wait, we have an | ||
389 | * extra check to make sure the ordered operation list really is empty | ||
390 | * before we return | ||
391 | */ | ||
392 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) | ||
393 | { | ||
394 | struct btrfs_inode *btrfs_inode; | ||
395 | struct inode *inode; | ||
396 | struct list_head splice; | ||
397 | |||
398 | INIT_LIST_HEAD(&splice); | ||
399 | |||
400 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
401 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
402 | again: | ||
403 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
404 | |||
405 | while (!list_empty(&splice)) { | ||
406 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
407 | ordered_operations); | ||
408 | |||
409 | inode = &btrfs_inode->vfs_inode; | ||
410 | |||
411 | list_del_init(&btrfs_inode->ordered_operations); | ||
412 | |||
413 | /* | ||
414 | * the inode may be getting freed (in sys_unlink path). | ||
415 | */ | ||
416 | inode = igrab(inode); | ||
417 | |||
418 | if (!wait && inode) { | ||
419 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | ||
420 | &root->fs_info->ordered_operations); | ||
421 | } | ||
422 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
423 | |||
424 | if (inode) { | ||
425 | if (wait) | ||
426 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
427 | else | ||
428 | filemap_flush(inode->i_mapping); | ||
429 | iput(inode); | ||
430 | } | ||
431 | |||
432 | cond_resched(); | ||
433 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
434 | } | ||
435 | if (wait && !list_empty(&root->fs_info->ordered_operations)) | ||
436 | goto again; | ||
437 | |||
438 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
439 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
440 | |||
441 | return 0; | ||
442 | } | ||
443 | |||
444 | /* | ||
373 | * Used to start IO or wait for a given ordered extent to finish. | 445 | * Used to start IO or wait for a given ordered extent to finish. |
374 | * | 446 | * |
375 | * If wait is one, this effectively waits on page writeback for all the pages | 447 | * If wait is one, this effectively waits on page writeback for all the pages |
@@ -726,3 +798,49 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | |||
726 | 798 | ||
727 | return ret; | 799 | return ret; |
728 | } | 800 | } |
801 | |||
802 | /* | ||
803 | * add a given inode to the list of inodes that must be fully on | ||
804 | * disk before a transaction commit finishes. | ||
805 | * | ||
806 | * This basically gives us the ext3 style data=ordered mode, and it is mostly | ||
807 | * used to make sure renamed files are fully on disk. | ||
808 | * | ||
809 | * It is a noop if the inode is already fully on disk. | ||
810 | * | ||
811 | * If trans is not null, we'll do a friendly check for a transaction that | ||
812 | * is already flushing things and force the IO down ourselves. | ||
813 | */ | ||
814 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | ||
815 | struct btrfs_root *root, | ||
816 | struct inode *inode) | ||
817 | { | ||
818 | u64 last_mod; | ||
819 | |||
820 | last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); | ||
821 | |||
822 | /* | ||
823 | * if this file hasn't been changed since the last transaction | ||
824 | * commit, we can safely return without doing anything | ||
825 | */ | ||
826 | if (last_mod < root->fs_info->last_trans_committed) | ||
827 | return 0; | ||
828 | |||
829 | /* | ||
830 | * the transaction is already committing. Just start the IO and | ||
831 | * don't bother with all of this list nonsense | ||
832 | */ | ||
833 | if (trans && root->fs_info->running_transaction->blocked) { | ||
834 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
835 | return 0; | ||
836 | } | ||
837 | |||
838 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
839 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { | ||
840 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | ||
841 | &root->fs_info->ordered_operations); | ||
842 | } | ||
843 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
844 | |||
845 | return 0; | ||
846 | } | ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index ab66d5e8d6d6..3d31c8827b01 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -155,4 +155,8 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | |||
155 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | 155 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, |
156 | loff_t end, int sync_mode); | 156 | loff_t end, int sync_mode); |
157 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); | 157 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); |
158 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | ||
159 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | ||
160 | struct btrfs_root *root, | ||
161 | struct inode *inode); | ||
158 | #endif | 162 | #endif |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 19a4daf03ccb..9744af9d71e9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/highmem.h> | 24 | #include <linux/highmem.h> |
25 | #include <linux/time.h> | 25 | #include <linux/time.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/seq_file.h> | ||
27 | #include <linux/string.h> | 28 | #include <linux/string.h> |
28 | #include <linux/smp_lock.h> | 29 | #include <linux/smp_lock.h> |
29 | #include <linux/backing-dev.h> | 30 | #include <linux/backing-dev.h> |
@@ -66,7 +67,8 @@ static void btrfs_put_super(struct super_block *sb) | |||
66 | enum { | 67 | enum { |
67 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 68 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
68 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 69 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
69 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err, | 70 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, |
71 | Opt_flushoncommit, Opt_err, | ||
70 | }; | 72 | }; |
71 | 73 | ||
72 | static match_table_t tokens = { | 74 | static match_table_t tokens = { |
@@ -83,6 +85,8 @@ static match_table_t tokens = { | |||
83 | {Opt_compress, "compress"}, | 85 | {Opt_compress, "compress"}, |
84 | {Opt_ssd, "ssd"}, | 86 | {Opt_ssd, "ssd"}, |
85 | {Opt_noacl, "noacl"}, | 87 | {Opt_noacl, "noacl"}, |
88 | {Opt_notreelog, "notreelog"}, | ||
89 | {Opt_flushoncommit, "flushoncommit"}, | ||
86 | {Opt_err, NULL}, | 90 | {Opt_err, NULL}, |
87 | }; | 91 | }; |
88 | 92 | ||
@@ -222,6 +226,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
222 | case Opt_noacl: | 226 | case Opt_noacl: |
223 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; | 227 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; |
224 | break; | 228 | break; |
229 | case Opt_notreelog: | ||
230 | printk(KERN_INFO "btrfs: disabling tree log\n"); | ||
231 | btrfs_set_opt(info->mount_opt, NOTREELOG); | ||
232 | break; | ||
233 | case Opt_flushoncommit: | ||
234 | printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); | ||
235 | btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); | ||
236 | break; | ||
225 | default: | 237 | default: |
226 | break; | 238 | break; |
227 | } | 239 | } |
@@ -363,9 +375,8 @@ fail_close: | |||
363 | int btrfs_sync_fs(struct super_block *sb, int wait) | 375 | int btrfs_sync_fs(struct super_block *sb, int wait) |
364 | { | 376 | { |
365 | struct btrfs_trans_handle *trans; | 377 | struct btrfs_trans_handle *trans; |
366 | struct btrfs_root *root; | 378 | struct btrfs_root *root = btrfs_sb(sb); |
367 | int ret; | 379 | int ret; |
368 | root = btrfs_sb(sb); | ||
369 | 380 | ||
370 | if (sb->s_flags & MS_RDONLY) | 381 | if (sb->s_flags & MS_RDONLY) |
371 | return 0; | 382 | return 0; |
@@ -385,6 +396,41 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
385 | return ret; | 396 | return ret; |
386 | } | 397 | } |
387 | 398 | ||
399 | static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | ||
400 | { | ||
401 | struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); | ||
402 | struct btrfs_fs_info *info = root->fs_info; | ||
403 | |||
404 | if (btrfs_test_opt(root, DEGRADED)) | ||
405 | seq_puts(seq, ",degraded"); | ||
406 | if (btrfs_test_opt(root, NODATASUM)) | ||
407 | seq_puts(seq, ",nodatasum"); | ||
408 | if (btrfs_test_opt(root, NODATACOW)) | ||
409 | seq_puts(seq, ",nodatacow"); | ||
410 | if (btrfs_test_opt(root, NOBARRIER)) | ||
411 | seq_puts(seq, ",nobarrier"); | ||
412 | if (info->max_extent != (u64)-1) | ||
413 | seq_printf(seq, ",max_extent=%llu", info->max_extent); | ||
414 | if (info->max_inline != 8192 * 1024) | ||
415 | seq_printf(seq, ",max_inline=%llu", info->max_inline); | ||
416 | if (info->alloc_start != 0) | ||
417 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); | ||
418 | if (info->thread_pool_size != min_t(unsigned long, | ||
419 | num_online_cpus() + 2, 8)) | ||
420 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | ||
421 | if (btrfs_test_opt(root, COMPRESS)) | ||
422 | seq_puts(seq, ",compress"); | ||
423 | if (btrfs_test_opt(root, SSD)) | ||
424 | seq_puts(seq, ",ssd"); | ||
425 | if (btrfs_test_opt(root, NOTREELOG)) | ||
426 | seq_puts(seq, ",no-treelog"); | ||
427 | if (btrfs_test_opt(root, FLUSHONCOMMIT)) | ||
428 | seq_puts(seq, ",flush-on-commit"); | ||
429 | if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) | ||
430 | seq_puts(seq, ",noacl"); | ||
431 | return 0; | ||
432 | } | ||
433 | |||
388 | static void btrfs_write_super(struct super_block *sb) | 434 | static void btrfs_write_super(struct super_block *sb) |
389 | { | 435 | { |
390 | sb->s_dirt = 0; | 436 | sb->s_dirt = 0; |
@@ -630,7 +676,7 @@ static struct super_operations btrfs_super_ops = { | |||
630 | .put_super = btrfs_put_super, | 676 | .put_super = btrfs_put_super, |
631 | .write_super = btrfs_write_super, | 677 | .write_super = btrfs_write_super, |
632 | .sync_fs = btrfs_sync_fs, | 678 | .sync_fs = btrfs_sync_fs, |
633 | .show_options = generic_show_options, | 679 | .show_options = btrfs_show_options, |
634 | .write_inode = btrfs_write_inode, | 680 | .write_inode = btrfs_write_inode, |
635 | .dirty_inode = btrfs_dirty_inode, | 681 | .dirty_inode = btrfs_dirty_inode, |
636 | .alloc_inode = btrfs_alloc_inode, | 682 | .alloc_inode = btrfs_alloc_inode, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4112d53d4f4d..2869b3361eb6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -53,8 +53,6 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
53 | GFP_NOFS); | 53 | GFP_NOFS); |
54 | BUG_ON(!cur_trans); | 54 | BUG_ON(!cur_trans); |
55 | root->fs_info->generation++; | 55 | root->fs_info->generation++; |
56 | root->fs_info->last_alloc = 0; | ||
57 | root->fs_info->last_data_alloc = 0; | ||
58 | cur_trans->num_writers = 1; | 56 | cur_trans->num_writers = 1; |
59 | cur_trans->num_joined = 0; | 57 | cur_trans->num_joined = 0; |
60 | cur_trans->transid = root->fs_info->generation; | 58 | cur_trans->transid = root->fs_info->generation; |
@@ -65,6 +63,15 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
65 | cur_trans->use_count = 1; | 63 | cur_trans->use_count = 1; |
66 | cur_trans->commit_done = 0; | 64 | cur_trans->commit_done = 0; |
67 | cur_trans->start_time = get_seconds(); | 65 | cur_trans->start_time = get_seconds(); |
66 | |||
67 | cur_trans->delayed_refs.root.rb_node = NULL; | ||
68 | cur_trans->delayed_refs.num_entries = 0; | ||
69 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
70 | cur_trans->delayed_refs.num_heads = 0; | ||
71 | cur_trans->delayed_refs.flushing = 0; | ||
72 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
73 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
74 | |||
68 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 75 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
69 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 76 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); |
70 | extent_io_tree_init(&cur_trans->dirty_pages, | 77 | extent_io_tree_init(&cur_trans->dirty_pages, |
@@ -182,6 +189,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
182 | h->block_group = 0; | 189 | h->block_group = 0; |
183 | h->alloc_exclude_nr = 0; | 190 | h->alloc_exclude_nr = 0; |
184 | h->alloc_exclude_start = 0; | 191 | h->alloc_exclude_start = 0; |
192 | h->delayed_ref_updates = 0; | ||
193 | |||
185 | root->fs_info->running_transaction->use_count++; | 194 | root->fs_info->running_transaction->use_count++; |
186 | mutex_unlock(&root->fs_info->trans_mutex); | 195 | mutex_unlock(&root->fs_info->trans_mutex); |
187 | return h; | 196 | return h; |
@@ -271,7 +280,6 @@ void btrfs_throttle(struct btrfs_root *root) | |||
271 | if (!root->fs_info->open_ioctl_trans) | 280 | if (!root->fs_info->open_ioctl_trans) |
272 | wait_current_trans(root); | 281 | wait_current_trans(root); |
273 | mutex_unlock(&root->fs_info->trans_mutex); | 282 | mutex_unlock(&root->fs_info->trans_mutex); |
274 | |||
275 | throttle_on_drops(root); | 283 | throttle_on_drops(root); |
276 | } | 284 | } |
277 | 285 | ||
@@ -280,6 +288,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
280 | { | 288 | { |
281 | struct btrfs_transaction *cur_trans; | 289 | struct btrfs_transaction *cur_trans; |
282 | struct btrfs_fs_info *info = root->fs_info; | 290 | struct btrfs_fs_info *info = root->fs_info; |
291 | int count = 0; | ||
292 | |||
293 | while (count < 4) { | ||
294 | unsigned long cur = trans->delayed_ref_updates; | ||
295 | trans->delayed_ref_updates = 0; | ||
296 | if (cur && | ||
297 | trans->transaction->delayed_refs.num_heads_ready > 64) { | ||
298 | trans->delayed_ref_updates = 0; | ||
299 | |||
300 | /* | ||
301 | * do a full flush if the transaction is trying | ||
302 | * to close | ||
303 | */ | ||
304 | if (trans->transaction->delayed_refs.flushing) | ||
305 | cur = 0; | ||
306 | btrfs_run_delayed_refs(trans, root, cur); | ||
307 | } else { | ||
308 | break; | ||
309 | } | ||
310 | count++; | ||
311 | } | ||
283 | 312 | ||
284 | mutex_lock(&info->trans_mutex); | 313 | mutex_lock(&info->trans_mutex); |
285 | cur_trans = info->running_transaction; | 314 | cur_trans = info->running_transaction; |
@@ -424,9 +453,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
424 | u64 old_root_bytenr; | 453 | u64 old_root_bytenr; |
425 | struct btrfs_root *tree_root = root->fs_info->tree_root; | 454 | struct btrfs_root *tree_root = root->fs_info->tree_root; |
426 | 455 | ||
427 | btrfs_extent_post_op(trans, root); | ||
428 | btrfs_write_dirty_block_groups(trans, root); | 456 | btrfs_write_dirty_block_groups(trans, root); |
429 | btrfs_extent_post_op(trans, root); | 457 | |
458 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
459 | BUG_ON(ret); | ||
430 | 460 | ||
431 | while (1) { | 461 | while (1) { |
432 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | 462 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); |
@@ -438,14 +468,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
438 | btrfs_header_level(root->node)); | 468 | btrfs_header_level(root->node)); |
439 | btrfs_set_root_generation(&root->root_item, trans->transid); | 469 | btrfs_set_root_generation(&root->root_item, trans->transid); |
440 | 470 | ||
441 | btrfs_extent_post_op(trans, root); | ||
442 | |||
443 | ret = btrfs_update_root(trans, tree_root, | 471 | ret = btrfs_update_root(trans, tree_root, |
444 | &root->root_key, | 472 | &root->root_key, |
445 | &root->root_item); | 473 | &root->root_item); |
446 | BUG_ON(ret); | 474 | BUG_ON(ret); |
447 | btrfs_write_dirty_block_groups(trans, root); | 475 | btrfs_write_dirty_block_groups(trans, root); |
448 | btrfs_extent_post_op(trans, root); | 476 | |
477 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
478 | BUG_ON(ret); | ||
449 | } | 479 | } |
450 | return 0; | 480 | return 0; |
451 | } | 481 | } |
@@ -459,15 +489,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
459 | struct btrfs_fs_info *fs_info = root->fs_info; | 489 | struct btrfs_fs_info *fs_info = root->fs_info; |
460 | struct list_head *next; | 490 | struct list_head *next; |
461 | struct extent_buffer *eb; | 491 | struct extent_buffer *eb; |
492 | int ret; | ||
462 | 493 | ||
463 | btrfs_extent_post_op(trans, fs_info->tree_root); | 494 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
495 | BUG_ON(ret); | ||
464 | 496 | ||
465 | eb = btrfs_lock_root_node(fs_info->tree_root); | 497 | eb = btrfs_lock_root_node(fs_info->tree_root); |
466 | btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0); | 498 | btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); |
467 | btrfs_tree_unlock(eb); | 499 | btrfs_tree_unlock(eb); |
468 | free_extent_buffer(eb); | 500 | free_extent_buffer(eb); |
469 | 501 | ||
470 | btrfs_extent_post_op(trans, fs_info->tree_root); | 502 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
503 | BUG_ON(ret); | ||
471 | 504 | ||
472 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 505 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
473 | next = fs_info->dirty_cowonly_roots.next; | 506 | next = fs_info->dirty_cowonly_roots.next; |
@@ -475,6 +508,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
475 | root = list_entry(next, struct btrfs_root, dirty_list); | 508 | root = list_entry(next, struct btrfs_root, dirty_list); |
476 | 509 | ||
477 | update_cowonly_root(trans, root); | 510 | update_cowonly_root(trans, root); |
511 | |||
512 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
513 | BUG_ON(ret); | ||
478 | } | 514 | } |
479 | return 0; | 515 | return 0; |
480 | } | 516 | } |
@@ -635,6 +671,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
635 | } | 671 | } |
636 | 672 | ||
637 | /* | 673 | /* |
674 | * when dropping snapshots, we generate a ton of delayed refs, and it makes | ||
675 | * sense not to join the transaction while it is trying to flush the current | ||
676 | * queue of delayed refs out. | ||
677 | * | ||
678 | * This is used by the drop snapshot code only | ||
679 | */ | ||
680 | static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | ||
681 | { | ||
682 | DEFINE_WAIT(wait); | ||
683 | |||
684 | mutex_lock(&info->trans_mutex); | ||
685 | while (info->running_transaction && | ||
686 | info->running_transaction->delayed_refs.flushing) { | ||
687 | prepare_to_wait(&info->transaction_wait, &wait, | ||
688 | TASK_UNINTERRUPTIBLE); | ||
689 | mutex_unlock(&info->trans_mutex); | ||
690 | schedule(); | ||
691 | mutex_lock(&info->trans_mutex); | ||
692 | finish_wait(&info->transaction_wait, &wait); | ||
693 | } | ||
694 | mutex_unlock(&info->trans_mutex); | ||
695 | return 0; | ||
696 | } | ||
697 | |||
698 | /* | ||
638 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on | 699 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on |
639 | * all of them | 700 | * all of them |
640 | */ | 701 | */ |
@@ -661,7 +722,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | |||
661 | atomic_inc(&root->fs_info->throttles); | 722 | atomic_inc(&root->fs_info->throttles); |
662 | 723 | ||
663 | while (1) { | 724 | while (1) { |
725 | /* | ||
726 | * we don't want to jump in and create a bunch of | ||
727 | * delayed refs if the transaction is starting to close | ||
728 | */ | ||
729 | wait_transaction_pre_flush(tree_root->fs_info); | ||
664 | trans = btrfs_start_transaction(tree_root, 1); | 730 | trans = btrfs_start_transaction(tree_root, 1); |
731 | |||
732 | /* | ||
733 | * we've joined a transaction, make sure it isn't | ||
734 | * closing right now | ||
735 | */ | ||
736 | if (trans->transaction->delayed_refs.flushing) { | ||
737 | btrfs_end_transaction(trans, tree_root); | ||
738 | continue; | ||
739 | } | ||
740 | |||
665 | mutex_lock(&root->fs_info->drop_mutex); | 741 | mutex_lock(&root->fs_info->drop_mutex); |
666 | ret = btrfs_drop_snapshot(trans, dirty->root); | 742 | ret = btrfs_drop_snapshot(trans, dirty->root); |
667 | if (ret != -EAGAIN) | 743 | if (ret != -EAGAIN) |
@@ -766,7 +842,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
766 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 842 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
767 | 843 | ||
768 | old = btrfs_lock_root_node(root); | 844 | old = btrfs_lock_root_node(root); |
769 | btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); | 845 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
770 | 846 | ||
771 | btrfs_copy_root(trans, root, old, &tmp, objectid); | 847 | btrfs_copy_root(trans, root, old, &tmp, objectid); |
772 | btrfs_tree_unlock(old); | 848 | btrfs_tree_unlock(old); |
@@ -894,12 +970,32 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
894 | struct extent_io_tree *pinned_copy; | 970 | struct extent_io_tree *pinned_copy; |
895 | DEFINE_WAIT(wait); | 971 | DEFINE_WAIT(wait); |
896 | int ret; | 972 | int ret; |
973 | int should_grow = 0; | ||
974 | unsigned long now = get_seconds(); | ||
975 | int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); | ||
976 | |||
977 | btrfs_run_ordered_operations(root, 0); | ||
978 | |||
979 | /* make a pass through all the delayed refs we have so far | ||
980 | * any runnings procs may add more while we are here | ||
981 | */ | ||
982 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
983 | BUG_ON(ret); | ||
984 | |||
985 | cur_trans = trans->transaction; | ||
986 | /* | ||
987 | * set the flushing flag so procs in this transaction have to | ||
988 | * start sending their work down. | ||
989 | */ | ||
990 | cur_trans->delayed_refs.flushing = 1; | ||
991 | |||
992 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
993 | BUG_ON(ret); | ||
897 | 994 | ||
898 | INIT_LIST_HEAD(&dirty_fs_roots); | ||
899 | mutex_lock(&root->fs_info->trans_mutex); | 995 | mutex_lock(&root->fs_info->trans_mutex); |
900 | if (trans->transaction->in_commit) { | 996 | INIT_LIST_HEAD(&dirty_fs_roots); |
901 | cur_trans = trans->transaction; | 997 | if (cur_trans->in_commit) { |
902 | trans->transaction->use_count++; | 998 | cur_trans->use_count++; |
903 | mutex_unlock(&root->fs_info->trans_mutex); | 999 | mutex_unlock(&root->fs_info->trans_mutex); |
904 | btrfs_end_transaction(trans, root); | 1000 | btrfs_end_transaction(trans, root); |
905 | 1001 | ||
@@ -922,7 +1018,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
922 | 1018 | ||
923 | trans->transaction->in_commit = 1; | 1019 | trans->transaction->in_commit = 1; |
924 | trans->transaction->blocked = 1; | 1020 | trans->transaction->blocked = 1; |
925 | cur_trans = trans->transaction; | ||
926 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1021 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
927 | prev_trans = list_entry(cur_trans->list.prev, | 1022 | prev_trans = list_entry(cur_trans->list.prev, |
928 | struct btrfs_transaction, list); | 1023 | struct btrfs_transaction, list); |
@@ -937,6 +1032,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
937 | } | 1032 | } |
938 | } | 1033 | } |
939 | 1034 | ||
1035 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) | ||
1036 | should_grow = 1; | ||
1037 | |||
940 | do { | 1038 | do { |
941 | int snap_pending = 0; | 1039 | int snap_pending = 0; |
942 | joined = cur_trans->num_joined; | 1040 | joined = cur_trans->num_joined; |
@@ -949,26 +1047,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
949 | 1047 | ||
950 | if (cur_trans->num_writers > 1) | 1048 | if (cur_trans->num_writers > 1) |
951 | timeout = MAX_SCHEDULE_TIMEOUT; | 1049 | timeout = MAX_SCHEDULE_TIMEOUT; |
952 | else | 1050 | else if (should_grow) |
953 | timeout = 1; | 1051 | timeout = 1; |
954 | 1052 | ||
955 | mutex_unlock(&root->fs_info->trans_mutex); | 1053 | mutex_unlock(&root->fs_info->trans_mutex); |
956 | 1054 | ||
957 | if (snap_pending) { | 1055 | if (flush_on_commit || snap_pending) { |
1056 | if (flush_on_commit) | ||
1057 | btrfs_start_delalloc_inodes(root); | ||
958 | ret = btrfs_wait_ordered_extents(root, 1); | 1058 | ret = btrfs_wait_ordered_extents(root, 1); |
959 | BUG_ON(ret); | 1059 | BUG_ON(ret); |
960 | } | 1060 | } |
961 | 1061 | ||
962 | schedule_timeout(timeout); | 1062 | /* |
1063 | * rename don't use btrfs_join_transaction, so, once we | ||
1064 | * set the transaction to blocked above, we aren't going | ||
1065 | * to get any new ordered operations. We can safely run | ||
1066 | * it here and no for sure that nothing new will be added | ||
1067 | * to the list | ||
1068 | */ | ||
1069 | btrfs_run_ordered_operations(root, 1); | ||
1070 | |||
1071 | smp_mb(); | ||
1072 | if (cur_trans->num_writers > 1 || should_grow) | ||
1073 | schedule_timeout(timeout); | ||
963 | 1074 | ||
964 | mutex_lock(&root->fs_info->trans_mutex); | 1075 | mutex_lock(&root->fs_info->trans_mutex); |
965 | finish_wait(&cur_trans->writer_wait, &wait); | 1076 | finish_wait(&cur_trans->writer_wait, &wait); |
966 | } while (cur_trans->num_writers > 1 || | 1077 | } while (cur_trans->num_writers > 1 || |
967 | (cur_trans->num_joined != joined)); | 1078 | (should_grow && cur_trans->num_joined != joined)); |
968 | 1079 | ||
969 | ret = create_pending_snapshots(trans, root->fs_info); | 1080 | ret = create_pending_snapshots(trans, root->fs_info); |
970 | BUG_ON(ret); | 1081 | BUG_ON(ret); |
971 | 1082 | ||
1083 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1084 | BUG_ON(ret); | ||
1085 | |||
972 | WARN_ON(cur_trans != trans->transaction); | 1086 | WARN_ON(cur_trans != trans->transaction); |
973 | 1087 | ||
974 | /* btrfs_commit_tree_roots is responsible for getting the | 1088 | /* btrfs_commit_tree_roots is responsible for getting the |
@@ -1032,6 +1146,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1032 | btrfs_copy_pinned(root, pinned_copy); | 1146 | btrfs_copy_pinned(root, pinned_copy); |
1033 | 1147 | ||
1034 | trans->transaction->blocked = 0; | 1148 | trans->transaction->blocked = 0; |
1149 | |||
1035 | wake_up(&root->fs_info->transaction_throttle); | 1150 | wake_up(&root->fs_info->transaction_throttle); |
1036 | wake_up(&root->fs_info->transaction_wait); | 1151 | wake_up(&root->fs_info->transaction_wait); |
1037 | 1152 | ||
@@ -1058,6 +1173,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1058 | mutex_lock(&root->fs_info->trans_mutex); | 1173 | mutex_lock(&root->fs_info->trans_mutex); |
1059 | 1174 | ||
1060 | cur_trans->commit_done = 1; | 1175 | cur_trans->commit_done = 1; |
1176 | |||
1061 | root->fs_info->last_trans_committed = cur_trans->transid; | 1177 | root->fs_info->last_trans_committed = cur_trans->transid; |
1062 | wake_up(&cur_trans->commit_wait); | 1178 | wake_up(&cur_trans->commit_wait); |
1063 | 1179 | ||
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ea292117f882..94f5bde2b58d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -19,10 +19,16 @@ | |||
19 | #ifndef __BTRFS_TRANSACTION__ | 19 | #ifndef __BTRFS_TRANSACTION__ |
20 | #define __BTRFS_TRANSACTION__ | 20 | #define __BTRFS_TRANSACTION__ |
21 | #include "btrfs_inode.h" | 21 | #include "btrfs_inode.h" |
22 | #include "delayed-ref.h" | ||
22 | 23 | ||
23 | struct btrfs_transaction { | 24 | struct btrfs_transaction { |
24 | u64 transid; | 25 | u64 transid; |
26 | /* | ||
27 | * total writers in this transaction, it must be zero before the | ||
28 | * transaction can end | ||
29 | */ | ||
25 | unsigned long num_writers; | 30 | unsigned long num_writers; |
31 | |||
26 | unsigned long num_joined; | 32 | unsigned long num_joined; |
27 | int in_commit; | 33 | int in_commit; |
28 | int use_count; | 34 | int use_count; |
@@ -34,6 +40,7 @@ struct btrfs_transaction { | |||
34 | wait_queue_head_t writer_wait; | 40 | wait_queue_head_t writer_wait; |
35 | wait_queue_head_t commit_wait; | 41 | wait_queue_head_t commit_wait; |
36 | struct list_head pending_snapshots; | 42 | struct list_head pending_snapshots; |
43 | struct btrfs_delayed_ref_root delayed_refs; | ||
37 | }; | 44 | }; |
38 | 45 | ||
39 | struct btrfs_trans_handle { | 46 | struct btrfs_trans_handle { |
@@ -44,6 +51,7 @@ struct btrfs_trans_handle { | |||
44 | u64 block_group; | 51 | u64 block_group; |
45 | u64 alloc_exclude_start; | 52 | u64 alloc_exclude_start; |
46 | u64 alloc_exclude_nr; | 53 | u64 alloc_exclude_nr; |
54 | unsigned long delayed_ref_updates; | ||
47 | }; | 55 | }; |
48 | 56 | ||
49 | struct btrfs_pending_snapshot { | 57 | struct btrfs_pending_snapshot { |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 98d25fa4570e..b10eacdb1620 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -124,8 +124,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
124 | } | 124 | } |
125 | 125 | ||
126 | btrfs_release_path(root, path); | 126 | btrfs_release_path(root, path); |
127 | if (is_extent) | ||
128 | btrfs_extent_post_op(trans, root); | ||
129 | out: | 127 | out: |
130 | if (path) | 128 | if (path) |
131 | btrfs_free_path(path); | 129 | btrfs_free_path(path); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9c462fbd60fa..25f20ea11f27 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -35,6 +35,49 @@ | |||
35 | #define LOG_INODE_EXISTS 1 | 35 | #define LOG_INODE_EXISTS 1 |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * directory trouble cases | ||
39 | * | ||
40 | * 1) on rename or unlink, if the inode being unlinked isn't in the fsync | ||
41 | * log, we must force a full commit before doing an fsync of the directory | ||
42 | * where the unlink was done. | ||
43 | * ---> record transid of last unlink/rename per directory | ||
44 | * | ||
45 | * mkdir foo/some_dir | ||
46 | * normal commit | ||
47 | * rename foo/some_dir foo2/some_dir | ||
48 | * mkdir foo/some_dir | ||
49 | * fsync foo/some_dir/some_file | ||
50 | * | ||
51 | * The fsync above will unlink the original some_dir without recording | ||
52 | * it in its new location (foo2). After a crash, some_dir will be gone | ||
53 | * unless the fsync of some_file forces a full commit | ||
54 | * | ||
55 | * 2) we must log any new names for any file or dir that is in the fsync | ||
56 | * log. ---> check inode while renaming/linking. | ||
57 | * | ||
58 | * 2a) we must log any new names for any file or dir during rename | ||
59 | * when the directory they are being removed from was logged. | ||
60 | * ---> check inode and old parent dir during rename | ||
61 | * | ||
62 | * 2a is actually the more important variant. With the extra logging | ||
63 | * a crash might unlink the old name without recreating the new one | ||
64 | * | ||
65 | * 3) after a crash, we must go through any directories with a link count | ||
66 | * of zero and redo the rm -rf | ||
67 | * | ||
68 | * mkdir f1/foo | ||
69 | * normal commit | ||
70 | * rm -rf f1/foo | ||
71 | * fsync(f1) | ||
72 | * | ||
73 | * The directory f1 was fully removed from the FS, but fsync was never | ||
74 | * called on f1, only its parent dir. After a crash the rm -rf must | ||
75 | * be replayed. This must be able to recurse down the entire | ||
76 | * directory tree. The inode link count fixup code takes care of the | ||
77 | * ugly details. | ||
78 | */ | ||
79 | |||
80 | /* | ||
38 | * stages for the tree walking. The first | 81 | * stages for the tree walking. The first |
39 | * stage (0) is to only pin down the blocks we find | 82 | * stage (0) is to only pin down the blocks we find |
40 | * the second stage (1) is to make sure that all the inodes | 83 | * the second stage (1) is to make sure that all the inodes |
@@ -47,12 +90,17 @@ | |||
47 | #define LOG_WALK_REPLAY_INODES 1 | 90 | #define LOG_WALK_REPLAY_INODES 1 |
48 | #define LOG_WALK_REPLAY_ALL 2 | 91 | #define LOG_WALK_REPLAY_ALL 2 |
49 | 92 | ||
50 | static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | 93 | static int btrfs_log_inode(struct btrfs_trans_handle *trans, |
51 | struct btrfs_root *root, struct inode *inode, | 94 | struct btrfs_root *root, struct inode *inode, |
52 | int inode_only); | 95 | int inode_only); |
53 | static int link_to_fixup_dir(struct btrfs_trans_handle *trans, | 96 | static int link_to_fixup_dir(struct btrfs_trans_handle *trans, |
54 | struct btrfs_root *root, | 97 | struct btrfs_root *root, |
55 | struct btrfs_path *path, u64 objectid); | 98 | struct btrfs_path *path, u64 objectid); |
99 | static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | ||
100 | struct btrfs_root *root, | ||
101 | struct btrfs_root *log, | ||
102 | struct btrfs_path *path, | ||
103 | u64 dirid, int del_all); | ||
56 | 104 | ||
57 | /* | 105 | /* |
58 | * tree logging is a special write ahead log used to make sure that | 106 | * tree logging is a special write ahead log used to make sure that |
@@ -133,10 +181,25 @@ static int join_running_log_trans(struct btrfs_root *root) | |||
133 | } | 181 | } |
134 | 182 | ||
135 | /* | 183 | /* |
184 | * This either makes the current running log transaction wait | ||
185 | * until you call btrfs_end_log_trans() or it makes any future | ||
186 | * log transactions wait until you call btrfs_end_log_trans() | ||
187 | */ | ||
188 | int btrfs_pin_log_trans(struct btrfs_root *root) | ||
189 | { | ||
190 | int ret = -ENOENT; | ||
191 | |||
192 | mutex_lock(&root->log_mutex); | ||
193 | atomic_inc(&root->log_writers); | ||
194 | mutex_unlock(&root->log_mutex); | ||
195 | return ret; | ||
196 | } | ||
197 | |||
198 | /* | ||
136 | * indicate we're done making changes to the log tree | 199 | * indicate we're done making changes to the log tree |
137 | * and wake up anyone waiting to do a sync | 200 | * and wake up anyone waiting to do a sync |
138 | */ | 201 | */ |
139 | static int end_log_trans(struct btrfs_root *root) | 202 | int btrfs_end_log_trans(struct btrfs_root *root) |
140 | { | 203 | { |
141 | if (atomic_dec_and_test(&root->log_writers)) { | 204 | if (atomic_dec_and_test(&root->log_writers)) { |
142 | smp_mb(); | 205 | smp_mb(); |
@@ -199,12 +262,9 @@ static int process_one_buffer(struct btrfs_root *log, | |||
199 | struct extent_buffer *eb, | 262 | struct extent_buffer *eb, |
200 | struct walk_control *wc, u64 gen) | 263 | struct walk_control *wc, u64 gen) |
201 | { | 264 | { |
202 | if (wc->pin) { | 265 | if (wc->pin) |
203 | mutex_lock(&log->fs_info->pinned_mutex); | ||
204 | btrfs_update_pinned_extents(log->fs_info->extent_root, | 266 | btrfs_update_pinned_extents(log->fs_info->extent_root, |
205 | eb->start, eb->len, 1); | 267 | eb->start, eb->len, 1); |
206 | mutex_unlock(&log->fs_info->pinned_mutex); | ||
207 | } | ||
208 | 268 | ||
209 | if (btrfs_buffer_uptodate(eb, gen)) { | 269 | if (btrfs_buffer_uptodate(eb, gen)) { |
210 | if (wc->write) | 270 | if (wc->write) |
@@ -603,6 +663,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, | |||
603 | 663 | ||
604 | ret = link_to_fixup_dir(trans, root, path, location.objectid); | 664 | ret = link_to_fixup_dir(trans, root, path, location.objectid); |
605 | BUG_ON(ret); | 665 | BUG_ON(ret); |
666 | |||
606 | ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); | 667 | ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); |
607 | BUG_ON(ret); | 668 | BUG_ON(ret); |
608 | kfree(name); | 669 | kfree(name); |
@@ -804,6 +865,7 @@ conflict_again: | |||
804 | victim_name_len)) { | 865 | victim_name_len)) { |
805 | btrfs_inc_nlink(inode); | 866 | btrfs_inc_nlink(inode); |
806 | btrfs_release_path(root, path); | 867 | btrfs_release_path(root, path); |
868 | |||
807 | ret = btrfs_unlink_inode(trans, root, dir, | 869 | ret = btrfs_unlink_inode(trans, root, dir, |
808 | inode, victim_name, | 870 | inode, victim_name, |
809 | victim_name_len); | 871 | victim_name_len); |
@@ -922,13 +984,20 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
922 | key.offset--; | 984 | key.offset--; |
923 | btrfs_release_path(root, path); | 985 | btrfs_release_path(root, path); |
924 | } | 986 | } |
925 | btrfs_free_path(path); | 987 | btrfs_release_path(root, path); |
926 | if (nlink != inode->i_nlink) { | 988 | if (nlink != inode->i_nlink) { |
927 | inode->i_nlink = nlink; | 989 | inode->i_nlink = nlink; |
928 | btrfs_update_inode(trans, root, inode); | 990 | btrfs_update_inode(trans, root, inode); |
929 | } | 991 | } |
930 | BTRFS_I(inode)->index_cnt = (u64)-1; | 992 | BTRFS_I(inode)->index_cnt = (u64)-1; |
931 | 993 | ||
994 | if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { | ||
995 | ret = replay_dir_deletes(trans, root, NULL, path, | ||
996 | inode->i_ino, 1); | ||
997 | BUG_ON(ret); | ||
998 | } | ||
999 | btrfs_free_path(path); | ||
1000 | |||
932 | return 0; | 1001 | return 0; |
933 | } | 1002 | } |
934 | 1003 | ||
@@ -971,9 +1040,12 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | |||
971 | 1040 | ||
972 | iput(inode); | 1041 | iput(inode); |
973 | 1042 | ||
974 | if (key.offset == 0) | 1043 | /* |
975 | break; | 1044 | * fixup on a directory may create new entries, |
976 | key.offset--; | 1045 | * make sure we always look for the highset possible |
1046 | * offset | ||
1047 | */ | ||
1048 | key.offset = (u64)-1; | ||
977 | } | 1049 | } |
978 | btrfs_release_path(root, path); | 1050 | btrfs_release_path(root, path); |
979 | return 0; | 1051 | return 0; |
@@ -1150,8 +1222,7 @@ insert: | |||
1150 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, | 1222 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, |
1151 | name, name_len, log_type, &log_key); | 1223 | name, name_len, log_type, &log_key); |
1152 | 1224 | ||
1153 | if (ret && ret != -ENOENT) | 1225 | BUG_ON(ret && ret != -ENOENT); |
1154 | BUG(); | ||
1155 | goto out; | 1226 | goto out; |
1156 | } | 1227 | } |
1157 | 1228 | ||
@@ -1313,11 +1384,11 @@ again: | |||
1313 | read_extent_buffer(eb, name, (unsigned long)(di + 1), | 1384 | read_extent_buffer(eb, name, (unsigned long)(di + 1), |
1314 | name_len); | 1385 | name_len); |
1315 | log_di = NULL; | 1386 | log_di = NULL; |
1316 | if (dir_key->type == BTRFS_DIR_ITEM_KEY) { | 1387 | if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) { |
1317 | log_di = btrfs_lookup_dir_item(trans, log, log_path, | 1388 | log_di = btrfs_lookup_dir_item(trans, log, log_path, |
1318 | dir_key->objectid, | 1389 | dir_key->objectid, |
1319 | name, name_len, 0); | 1390 | name, name_len, 0); |
1320 | } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) { | 1391 | } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) { |
1321 | log_di = btrfs_lookup_dir_index_item(trans, log, | 1392 | log_di = btrfs_lookup_dir_index_item(trans, log, |
1322 | log_path, | 1393 | log_path, |
1323 | dir_key->objectid, | 1394 | dir_key->objectid, |
@@ -1378,7 +1449,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | |||
1378 | struct btrfs_root *root, | 1449 | struct btrfs_root *root, |
1379 | struct btrfs_root *log, | 1450 | struct btrfs_root *log, |
1380 | struct btrfs_path *path, | 1451 | struct btrfs_path *path, |
1381 | u64 dirid) | 1452 | u64 dirid, int del_all) |
1382 | { | 1453 | { |
1383 | u64 range_start; | 1454 | u64 range_start; |
1384 | u64 range_end; | 1455 | u64 range_end; |
@@ -1408,10 +1479,14 @@ again: | |||
1408 | range_start = 0; | 1479 | range_start = 0; |
1409 | range_end = 0; | 1480 | range_end = 0; |
1410 | while (1) { | 1481 | while (1) { |
1411 | ret = find_dir_range(log, path, dirid, key_type, | 1482 | if (del_all) |
1412 | &range_start, &range_end); | 1483 | range_end = (u64)-1; |
1413 | if (ret != 0) | 1484 | else { |
1414 | break; | 1485 | ret = find_dir_range(log, path, dirid, key_type, |
1486 | &range_start, &range_end); | ||
1487 | if (ret != 0) | ||
1488 | break; | ||
1489 | } | ||
1415 | 1490 | ||
1416 | dir_key.offset = range_start; | 1491 | dir_key.offset = range_start; |
1417 | while (1) { | 1492 | while (1) { |
@@ -1437,7 +1512,8 @@ again: | |||
1437 | break; | 1512 | break; |
1438 | 1513 | ||
1439 | ret = check_item_in_log(trans, root, log, path, | 1514 | ret = check_item_in_log(trans, root, log, path, |
1440 | log_path, dir, &found_key); | 1515 | log_path, dir, |
1516 | &found_key); | ||
1441 | BUG_ON(ret); | 1517 | BUG_ON(ret); |
1442 | if (found_key.offset == (u64)-1) | 1518 | if (found_key.offset == (u64)-1) |
1443 | break; | 1519 | break; |
@@ -1514,7 +1590,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1514 | mode = btrfs_inode_mode(eb, inode_item); | 1590 | mode = btrfs_inode_mode(eb, inode_item); |
1515 | if (S_ISDIR(mode)) { | 1591 | if (S_ISDIR(mode)) { |
1516 | ret = replay_dir_deletes(wc->trans, | 1592 | ret = replay_dir_deletes(wc->trans, |
1517 | root, log, path, key.objectid); | 1593 | root, log, path, key.objectid, 0); |
1518 | BUG_ON(ret); | 1594 | BUG_ON(ret); |
1519 | } | 1595 | } |
1520 | ret = overwrite_item(wc->trans, root, path, | 1596 | ret = overwrite_item(wc->trans, root, path, |
@@ -1533,6 +1609,17 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1533 | root, inode, inode->i_size, | 1609 | root, inode, inode->i_size, |
1534 | BTRFS_EXTENT_DATA_KEY); | 1610 | BTRFS_EXTENT_DATA_KEY); |
1535 | BUG_ON(ret); | 1611 | BUG_ON(ret); |
1612 | |||
1613 | /* if the nlink count is zero here, the iput | ||
1614 | * will free the inode. We bump it to make | ||
1615 | * sure it doesn't get freed until the link | ||
1616 | * count fixup is done | ||
1617 | */ | ||
1618 | if (inode->i_nlink == 0) { | ||
1619 | btrfs_inc_nlink(inode); | ||
1620 | btrfs_update_inode(wc->trans, | ||
1621 | root, inode); | ||
1622 | } | ||
1536 | iput(inode); | 1623 | iput(inode); |
1537 | } | 1624 | } |
1538 | ret = link_to_fixup_dir(wc->trans, root, | 1625 | ret = link_to_fixup_dir(wc->trans, root, |
@@ -1840,7 +1927,8 @@ static int update_log_root(struct btrfs_trans_handle *trans, | |||
1840 | return ret; | 1927 | return ret; |
1841 | } | 1928 | } |
1842 | 1929 | ||
1843 | static int wait_log_commit(struct btrfs_root *root, unsigned long transid) | 1930 | static int wait_log_commit(struct btrfs_trans_handle *trans, |
1931 | struct btrfs_root *root, unsigned long transid) | ||
1844 | { | 1932 | { |
1845 | DEFINE_WAIT(wait); | 1933 | DEFINE_WAIT(wait); |
1846 | int index = transid % 2; | 1934 | int index = transid % 2; |
@@ -1854,9 +1942,12 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid) | |||
1854 | prepare_to_wait(&root->log_commit_wait[index], | 1942 | prepare_to_wait(&root->log_commit_wait[index], |
1855 | &wait, TASK_UNINTERRUPTIBLE); | 1943 | &wait, TASK_UNINTERRUPTIBLE); |
1856 | mutex_unlock(&root->log_mutex); | 1944 | mutex_unlock(&root->log_mutex); |
1857 | if (root->log_transid < transid + 2 && | 1945 | |
1946 | if (root->fs_info->last_trans_log_full_commit != | ||
1947 | trans->transid && root->log_transid < transid + 2 && | ||
1858 | atomic_read(&root->log_commit[index])) | 1948 | atomic_read(&root->log_commit[index])) |
1859 | schedule(); | 1949 | schedule(); |
1950 | |||
1860 | finish_wait(&root->log_commit_wait[index], &wait); | 1951 | finish_wait(&root->log_commit_wait[index], &wait); |
1861 | mutex_lock(&root->log_mutex); | 1952 | mutex_lock(&root->log_mutex); |
1862 | } while (root->log_transid < transid + 2 && | 1953 | } while (root->log_transid < transid + 2 && |
@@ -1864,14 +1955,16 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid) | |||
1864 | return 0; | 1955 | return 0; |
1865 | } | 1956 | } |
1866 | 1957 | ||
1867 | static int wait_for_writer(struct btrfs_root *root) | 1958 | static int wait_for_writer(struct btrfs_trans_handle *trans, |
1959 | struct btrfs_root *root) | ||
1868 | { | 1960 | { |
1869 | DEFINE_WAIT(wait); | 1961 | DEFINE_WAIT(wait); |
1870 | while (atomic_read(&root->log_writers)) { | 1962 | while (atomic_read(&root->log_writers)) { |
1871 | prepare_to_wait(&root->log_writer_wait, | 1963 | prepare_to_wait(&root->log_writer_wait, |
1872 | &wait, TASK_UNINTERRUPTIBLE); | 1964 | &wait, TASK_UNINTERRUPTIBLE); |
1873 | mutex_unlock(&root->log_mutex); | 1965 | mutex_unlock(&root->log_mutex); |
1874 | if (atomic_read(&root->log_writers)) | 1966 | if (root->fs_info->last_trans_log_full_commit != |
1967 | trans->transid && atomic_read(&root->log_writers)) | ||
1875 | schedule(); | 1968 | schedule(); |
1876 | mutex_lock(&root->log_mutex); | 1969 | mutex_lock(&root->log_mutex); |
1877 | finish_wait(&root->log_writer_wait, &wait); | 1970 | finish_wait(&root->log_writer_wait, &wait); |
@@ -1882,7 +1975,14 @@ static int wait_for_writer(struct btrfs_root *root) | |||
1882 | /* | 1975 | /* |
1883 | * btrfs_sync_log does sends a given tree log down to the disk and | 1976 | * btrfs_sync_log does sends a given tree log down to the disk and |
1884 | * updates the super blocks to record it. When this call is done, | 1977 | * updates the super blocks to record it. When this call is done, |
1885 | * you know that any inodes previously logged are safely on disk | 1978 | * you know that any inodes previously logged are safely on disk only |
1979 | * if it returns 0. | ||
1980 | * | ||
1981 | * Any other return value means you need to call btrfs_commit_transaction. | ||
1982 | * Some of the edge cases for fsyncing directories that have had unlinks | ||
1983 | * or renames done in the past mean that sometimes the only safe | ||
1984 | * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN, | ||
1985 | * that has happened. | ||
1886 | */ | 1986 | */ |
1887 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 1987 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
1888 | struct btrfs_root *root) | 1988 | struct btrfs_root *root) |
@@ -1896,7 +1996,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1896 | mutex_lock(&root->log_mutex); | 1996 | mutex_lock(&root->log_mutex); |
1897 | index1 = root->log_transid % 2; | 1997 | index1 = root->log_transid % 2; |
1898 | if (atomic_read(&root->log_commit[index1])) { | 1998 | if (atomic_read(&root->log_commit[index1])) { |
1899 | wait_log_commit(root, root->log_transid); | 1999 | wait_log_commit(trans, root, root->log_transid); |
1900 | mutex_unlock(&root->log_mutex); | 2000 | mutex_unlock(&root->log_mutex); |
1901 | return 0; | 2001 | return 0; |
1902 | } | 2002 | } |
@@ -1904,18 +2004,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1904 | 2004 | ||
1905 | /* wait for previous tree log sync to complete */ | 2005 | /* wait for previous tree log sync to complete */ |
1906 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2006 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
1907 | wait_log_commit(root, root->log_transid - 1); | 2007 | wait_log_commit(trans, root, root->log_transid - 1); |
1908 | 2008 | ||
1909 | while (1) { | 2009 | while (1) { |
1910 | unsigned long batch = root->log_batch; | 2010 | unsigned long batch = root->log_batch; |
1911 | mutex_unlock(&root->log_mutex); | 2011 | mutex_unlock(&root->log_mutex); |
1912 | schedule_timeout_uninterruptible(1); | 2012 | schedule_timeout_uninterruptible(1); |
1913 | mutex_lock(&root->log_mutex); | 2013 | mutex_lock(&root->log_mutex); |
1914 | wait_for_writer(root); | 2014 | |
2015 | wait_for_writer(trans, root); | ||
1915 | if (batch == root->log_batch) | 2016 | if (batch == root->log_batch) |
1916 | break; | 2017 | break; |
1917 | } | 2018 | } |
1918 | 2019 | ||
2020 | /* bail out if we need to do a full commit */ | ||
2021 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | ||
2022 | ret = -EAGAIN; | ||
2023 | mutex_unlock(&root->log_mutex); | ||
2024 | goto out; | ||
2025 | } | ||
2026 | |||
1919 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); | 2027 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); |
1920 | BUG_ON(ret); | 2028 | BUG_ON(ret); |
1921 | 2029 | ||
@@ -1951,16 +2059,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1951 | 2059 | ||
1952 | index2 = log_root_tree->log_transid % 2; | 2060 | index2 = log_root_tree->log_transid % 2; |
1953 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2061 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
1954 | wait_log_commit(log_root_tree, log_root_tree->log_transid); | 2062 | wait_log_commit(trans, log_root_tree, |
2063 | log_root_tree->log_transid); | ||
1955 | mutex_unlock(&log_root_tree->log_mutex); | 2064 | mutex_unlock(&log_root_tree->log_mutex); |
1956 | goto out; | 2065 | goto out; |
1957 | } | 2066 | } |
1958 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2067 | atomic_set(&log_root_tree->log_commit[index2], 1); |
1959 | 2068 | ||
1960 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) | 2069 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { |
1961 | wait_log_commit(log_root_tree, log_root_tree->log_transid - 1); | 2070 | wait_log_commit(trans, log_root_tree, |
2071 | log_root_tree->log_transid - 1); | ||
2072 | } | ||
2073 | |||
2074 | wait_for_writer(trans, log_root_tree); | ||
1962 | 2075 | ||
1963 | wait_for_writer(log_root_tree); | 2076 | /* |
2077 | * now that we've moved on to the tree of log tree roots, | ||
2078 | * check the full commit flag again | ||
2079 | */ | ||
2080 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | ||
2081 | mutex_unlock(&log_root_tree->log_mutex); | ||
2082 | ret = -EAGAIN; | ||
2083 | goto out_wake_log_root; | ||
2084 | } | ||
1964 | 2085 | ||
1965 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, | 2086 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, |
1966 | &log_root_tree->dirty_log_pages); | 2087 | &log_root_tree->dirty_log_pages); |
@@ -1985,7 +2106,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1985 | * in and cause problems either. | 2106 | * in and cause problems either. |
1986 | */ | 2107 | */ |
1987 | write_ctree_super(trans, root->fs_info->tree_root, 2); | 2108 | write_ctree_super(trans, root->fs_info->tree_root, 2); |
2109 | ret = 0; | ||
1988 | 2110 | ||
2111 | out_wake_log_root: | ||
1989 | atomic_set(&log_root_tree->log_commit[index2], 0); | 2112 | atomic_set(&log_root_tree->log_commit[index2], 0); |
1990 | smp_mb(); | 2113 | smp_mb(); |
1991 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) | 2114 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) |
@@ -1998,7 +2121,8 @@ out: | |||
1998 | return 0; | 2121 | return 0; |
1999 | } | 2122 | } |
2000 | 2123 | ||
2001 | /* * free all the extents used by the tree log. This should be called | 2124 | /* |
2125 | * free all the extents used by the tree log. This should be called | ||
2002 | * at commit time of the full transaction | 2126 | * at commit time of the full transaction |
2003 | */ | 2127 | */ |
2004 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | 2128 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) |
@@ -2132,7 +2256,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2132 | 2256 | ||
2133 | btrfs_free_path(path); | 2257 | btrfs_free_path(path); |
2134 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2258 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
2135 | end_log_trans(root); | 2259 | btrfs_end_log_trans(root); |
2136 | 2260 | ||
2137 | return 0; | 2261 | return 0; |
2138 | } | 2262 | } |
@@ -2159,7 +2283,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
2159 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | 2283 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, |
2160 | dirid, &index); | 2284 | dirid, &index); |
2161 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2285 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2162 | end_log_trans(root); | 2286 | btrfs_end_log_trans(root); |
2163 | 2287 | ||
2164 | return ret; | 2288 | return ret; |
2165 | } | 2289 | } |
@@ -2559,7 +2683,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2559 | * | 2683 | * |
2560 | * This handles both files and directories. | 2684 | * This handles both files and directories. |
2561 | */ | 2685 | */ |
2562 | static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | 2686 | static int btrfs_log_inode(struct btrfs_trans_handle *trans, |
2563 | struct btrfs_root *root, struct inode *inode, | 2687 | struct btrfs_root *root, struct inode *inode, |
2564 | int inode_only) | 2688 | int inode_only) |
2565 | { | 2689 | { |
@@ -2585,28 +2709,17 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2585 | min_key.offset = 0; | 2709 | min_key.offset = 0; |
2586 | 2710 | ||
2587 | max_key.objectid = inode->i_ino; | 2711 | max_key.objectid = inode->i_ino; |
2712 | |||
2713 | /* today the code can only do partial logging of directories */ | ||
2714 | if (!S_ISDIR(inode->i_mode)) | ||
2715 | inode_only = LOG_INODE_ALL; | ||
2716 | |||
2588 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) | 2717 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) |
2589 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 2718 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
2590 | else | 2719 | else |
2591 | max_key.type = (u8)-1; | 2720 | max_key.type = (u8)-1; |
2592 | max_key.offset = (u64)-1; | 2721 | max_key.offset = (u64)-1; |
2593 | 2722 | ||
2594 | /* | ||
2595 | * if this inode has already been logged and we're in inode_only | ||
2596 | * mode, we don't want to delete the things that have already | ||
2597 | * been written to the log. | ||
2598 | * | ||
2599 | * But, if the inode has been through an inode_only log, | ||
2600 | * the logged_trans field is not set. This allows us to catch | ||
2601 | * any new names for this inode in the backrefs by logging it | ||
2602 | * again | ||
2603 | */ | ||
2604 | if (inode_only == LOG_INODE_EXISTS && | ||
2605 | BTRFS_I(inode)->logged_trans == trans->transid) { | ||
2606 | btrfs_free_path(path); | ||
2607 | btrfs_free_path(dst_path); | ||
2608 | goto out; | ||
2609 | } | ||
2610 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 2723 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
2611 | 2724 | ||
2612 | /* | 2725 | /* |
@@ -2693,7 +2806,6 @@ next_slot: | |||
2693 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 2806 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
2694 | btrfs_release_path(root, path); | 2807 | btrfs_release_path(root, path); |
2695 | btrfs_release_path(log, dst_path); | 2808 | btrfs_release_path(log, dst_path); |
2696 | BTRFS_I(inode)->log_dirty_trans = 0; | ||
2697 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 2809 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
2698 | BUG_ON(ret); | 2810 | BUG_ON(ret); |
2699 | } | 2811 | } |
@@ -2702,19 +2814,69 @@ next_slot: | |||
2702 | 2814 | ||
2703 | btrfs_free_path(path); | 2815 | btrfs_free_path(path); |
2704 | btrfs_free_path(dst_path); | 2816 | btrfs_free_path(dst_path); |
2705 | out: | ||
2706 | return 0; | 2817 | return 0; |
2707 | } | 2818 | } |
2708 | 2819 | ||
2709 | int btrfs_log_inode(struct btrfs_trans_handle *trans, | 2820 | /* |
2710 | struct btrfs_root *root, struct inode *inode, | 2821 | * follow the dentry parent pointers up the chain and see if any |
2711 | int inode_only) | 2822 | * of the directories in it require a full commit before they can |
2823 | * be logged. Returns zero if nothing special needs to be done or 1 if | ||
2824 | * a full commit is required. | ||
2825 | */ | ||
2826 | static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | ||
2827 | struct inode *inode, | ||
2828 | struct dentry *parent, | ||
2829 | struct super_block *sb, | ||
2830 | u64 last_committed) | ||
2712 | { | 2831 | { |
2713 | int ret; | 2832 | int ret = 0; |
2833 | struct btrfs_root *root; | ||
2714 | 2834 | ||
2715 | start_log_trans(trans, root); | 2835 | /* |
2716 | ret = __btrfs_log_inode(trans, root, inode, inode_only); | 2836 | * for regular files, if its inode is already on disk, we don't |
2717 | end_log_trans(root); | 2837 | * have to worry about the parents at all. This is because |
2838 | * we can use the last_unlink_trans field to record renames | ||
2839 | * and other fun in this file. | ||
2840 | */ | ||
2841 | if (S_ISREG(inode->i_mode) && | ||
2842 | BTRFS_I(inode)->generation <= last_committed && | ||
2843 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | ||
2844 | goto out; | ||
2845 | |||
2846 | if (!S_ISDIR(inode->i_mode)) { | ||
2847 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | ||
2848 | goto out; | ||
2849 | inode = parent->d_inode; | ||
2850 | } | ||
2851 | |||
2852 | while (1) { | ||
2853 | BTRFS_I(inode)->logged_trans = trans->transid; | ||
2854 | smp_mb(); | ||
2855 | |||
2856 | if (BTRFS_I(inode)->last_unlink_trans > last_committed) { | ||
2857 | root = BTRFS_I(inode)->root; | ||
2858 | |||
2859 | /* | ||
2860 | * make sure any commits to the log are forced | ||
2861 | * to be full commits | ||
2862 | */ | ||
2863 | root->fs_info->last_trans_log_full_commit = | ||
2864 | trans->transid; | ||
2865 | ret = 1; | ||
2866 | break; | ||
2867 | } | ||
2868 | |||
2869 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | ||
2870 | break; | ||
2871 | |||
2872 | if (parent == sb->s_root) | ||
2873 | break; | ||
2874 | |||
2875 | parent = parent->d_parent; | ||
2876 | inode = parent->d_inode; | ||
2877 | |||
2878 | } | ||
2879 | out: | ||
2718 | return ret; | 2880 | return ret; |
2719 | } | 2881 | } |
2720 | 2882 | ||
@@ -2724,31 +2886,70 @@ int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2724 | * only logging is done of any parent directories that are older than | 2886 | * only logging is done of any parent directories that are older than |
2725 | * the last committed transaction | 2887 | * the last committed transaction |
2726 | */ | 2888 | */ |
2727 | int btrfs_log_dentry(struct btrfs_trans_handle *trans, | 2889 | int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, |
2728 | struct btrfs_root *root, struct dentry *dentry) | 2890 | struct btrfs_root *root, struct inode *inode, |
2891 | struct dentry *parent, int exists_only) | ||
2729 | { | 2892 | { |
2730 | int inode_only = LOG_INODE_ALL; | 2893 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
2731 | struct super_block *sb; | 2894 | struct super_block *sb; |
2732 | int ret; | 2895 | int ret = 0; |
2896 | u64 last_committed = root->fs_info->last_trans_committed; | ||
2897 | |||
2898 | sb = inode->i_sb; | ||
2899 | |||
2900 | if (btrfs_test_opt(root, NOTREELOG)) { | ||
2901 | ret = 1; | ||
2902 | goto end_no_trans; | ||
2903 | } | ||
2904 | |||
2905 | if (root->fs_info->last_trans_log_full_commit > | ||
2906 | root->fs_info->last_trans_committed) { | ||
2907 | ret = 1; | ||
2908 | goto end_no_trans; | ||
2909 | } | ||
2910 | |||
2911 | ret = check_parent_dirs_for_sync(trans, inode, parent, | ||
2912 | sb, last_committed); | ||
2913 | if (ret) | ||
2914 | goto end_no_trans; | ||
2733 | 2915 | ||
2734 | start_log_trans(trans, root); | 2916 | start_log_trans(trans, root); |
2735 | sb = dentry->d_inode->i_sb; | ||
2736 | while (1) { | ||
2737 | ret = __btrfs_log_inode(trans, root, dentry->d_inode, | ||
2738 | inode_only); | ||
2739 | BUG_ON(ret); | ||
2740 | inode_only = LOG_INODE_EXISTS; | ||
2741 | 2917 | ||
2742 | dentry = dentry->d_parent; | 2918 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2743 | if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb) | 2919 | BUG_ON(ret); |
2920 | |||
2921 | /* | ||
2922 | * for regular files, if its inode is already on disk, we don't | ||
2923 | * have to worry about the parents at all. This is because | ||
2924 | * we can use the last_unlink_trans field to record renames | ||
2925 | * and other fun in this file. | ||
2926 | */ | ||
2927 | if (S_ISREG(inode->i_mode) && | ||
2928 | BTRFS_I(inode)->generation <= last_committed && | ||
2929 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | ||
2930 | goto no_parent; | ||
2931 | |||
2932 | inode_only = LOG_INODE_EXISTS; | ||
2933 | while (1) { | ||
2934 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | ||
2744 | break; | 2935 | break; |
2745 | 2936 | ||
2746 | if (BTRFS_I(dentry->d_inode)->generation <= | 2937 | inode = parent->d_inode; |
2747 | root->fs_info->last_trans_committed) | 2938 | if (BTRFS_I(inode)->generation > |
2939 | root->fs_info->last_trans_committed) { | ||
2940 | ret = btrfs_log_inode(trans, root, inode, inode_only); | ||
2941 | BUG_ON(ret); | ||
2942 | } | ||
2943 | if (parent == sb->s_root) | ||
2748 | break; | 2944 | break; |
2945 | |||
2946 | parent = parent->d_parent; | ||
2749 | } | 2947 | } |
2750 | end_log_trans(root); | 2948 | no_parent: |
2751 | return 0; | 2949 | ret = 0; |
2950 | btrfs_end_log_trans(root); | ||
2951 | end_no_trans: | ||
2952 | return ret; | ||
2752 | } | 2953 | } |
2753 | 2954 | ||
2754 | /* | 2955 | /* |
@@ -2760,12 +2961,8 @@ int btrfs_log_dentry(struct btrfs_trans_handle *trans, | |||
2760 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 2961 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
2761 | struct btrfs_root *root, struct dentry *dentry) | 2962 | struct btrfs_root *root, struct dentry *dentry) |
2762 | { | 2963 | { |
2763 | u64 gen; | 2964 | return btrfs_log_inode_parent(trans, root, dentry->d_inode, |
2764 | gen = root->fs_info->last_trans_new_blockgroup; | 2965 | dentry->d_parent, 0); |
2765 | if (gen > root->fs_info->last_trans_committed) | ||
2766 | return 1; | ||
2767 | else | ||
2768 | return btrfs_log_dentry(trans, root, dentry); | ||
2769 | } | 2966 | } |
2770 | 2967 | ||
2771 | /* | 2968 | /* |
@@ -2884,3 +3081,94 @@ again: | |||
2884 | kfree(log_root_tree); | 3081 | kfree(log_root_tree); |
2885 | return 0; | 3082 | return 0; |
2886 | } | 3083 | } |
3084 | |||
3085 | /* | ||
3086 | * there are some corner cases where we want to force a full | ||
3087 | * commit instead of allowing a directory to be logged. | ||
3088 | * | ||
3089 | * They revolve around files there were unlinked from the directory, and | ||
3090 | * this function updates the parent directory so that a full commit is | ||
3091 | * properly done if it is fsync'd later after the unlinks are done. | ||
3092 | */ | ||
3093 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | ||
3094 | struct inode *dir, struct inode *inode, | ||
3095 | int for_rename) | ||
3096 | { | ||
3097 | /* | ||
3098 | * when we're logging a file, if it hasn't been renamed | ||
3099 | * or unlinked, and its inode is fully committed on disk, | ||
3100 | * we don't have to worry about walking up the directory chain | ||
3101 | * to log its parents. | ||
3102 | * | ||
3103 | * So, we use the last_unlink_trans field to put this transid | ||
3104 | * into the file. When the file is logged we check it and | ||
3105 | * don't log the parents if the file is fully on disk. | ||
3106 | */ | ||
3107 | if (S_ISREG(inode->i_mode)) | ||
3108 | BTRFS_I(inode)->last_unlink_trans = trans->transid; | ||
3109 | |||
3110 | /* | ||
3111 | * if this directory was already logged any new | ||
3112 | * names for this file/dir will get recorded | ||
3113 | */ | ||
3114 | smp_mb(); | ||
3115 | if (BTRFS_I(dir)->logged_trans == trans->transid) | ||
3116 | return; | ||
3117 | |||
3118 | /* | ||
3119 | * if the inode we're about to unlink was logged, | ||
3120 | * the log will be properly updated for any new names | ||
3121 | */ | ||
3122 | if (BTRFS_I(inode)->logged_trans == trans->transid) | ||
3123 | return; | ||
3124 | |||
3125 | /* | ||
3126 | * when renaming files across directories, if the directory | ||
3127 | * there we're unlinking from gets fsync'd later on, there's | ||
3128 | * no way to find the destination directory later and fsync it | ||
3129 | * properly. So, we have to be conservative and force commits | ||
3130 | * so the new name gets discovered. | ||
3131 | */ | ||
3132 | if (for_rename) | ||
3133 | goto record; | ||
3134 | |||
3135 | /* we can safely do the unlink without any special recording */ | ||
3136 | return; | ||
3137 | |||
3138 | record: | ||
3139 | BTRFS_I(dir)->last_unlink_trans = trans->transid; | ||
3140 | } | ||
3141 | |||
3142 | /* | ||
3143 | * Call this after adding a new name for a file and it will properly | ||
3144 | * update the log to reflect the new name. | ||
3145 | * | ||
3146 | * It will return zero if all goes well, and it will return 1 if a | ||
3147 | * full transaction commit is required. | ||
3148 | */ | ||
3149 | int btrfs_log_new_name(struct btrfs_trans_handle *trans, | ||
3150 | struct inode *inode, struct inode *old_dir, | ||
3151 | struct dentry *parent) | ||
3152 | { | ||
3153 | struct btrfs_root * root = BTRFS_I(inode)->root; | ||
3154 | |||
3155 | /* | ||
3156 | * this will force the logging code to walk the dentry chain | ||
3157 | * up for the file | ||
3158 | */ | ||
3159 | if (S_ISREG(inode->i_mode)) | ||
3160 | BTRFS_I(inode)->last_unlink_trans = trans->transid; | ||
3161 | |||
3162 | /* | ||
3163 | * if this inode hasn't been logged and directory we're renaming it | ||
3164 | * from hasn't been logged, we don't need to log it | ||
3165 | */ | ||
3166 | if (BTRFS_I(inode)->logged_trans <= | ||
3167 | root->fs_info->last_trans_committed && | ||
3168 | (!old_dir || BTRFS_I(old_dir)->logged_trans <= | ||
3169 | root->fs_info->last_trans_committed)) | ||
3170 | return 0; | ||
3171 | |||
3172 | return btrfs_log_inode_parent(trans, root, inode, parent, 1); | ||
3173 | } | ||
3174 | |||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index b9409b32ed02..d09c7609e16b 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -22,14 +22,9 @@ | |||
22 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 22 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
23 | struct btrfs_root *root); | 23 | struct btrfs_root *root); |
24 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 24 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
25 | int btrfs_log_dentry(struct btrfs_trans_handle *trans, | ||
26 | struct btrfs_root *root, struct dentry *dentry); | ||
27 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 25 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
28 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 26 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
29 | struct btrfs_root *root, struct dentry *dentry); | 27 | struct btrfs_root *root, struct dentry *dentry); |
30 | int btrfs_log_inode(struct btrfs_trans_handle *trans, | ||
31 | struct btrfs_root *root, struct inode *inode, | ||
32 | int inode_only); | ||
33 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | 28 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, |
34 | struct btrfs_root *root, | 29 | struct btrfs_root *root, |
35 | const char *name, int name_len, | 30 | const char *name, int name_len, |
@@ -38,4 +33,16 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
38 | struct btrfs_root *root, | 33 | struct btrfs_root *root, |
39 | const char *name, int name_len, | 34 | const char *name, int name_len, |
40 | struct inode *inode, u64 dirid); | 35 | struct inode *inode, u64 dirid); |
36 | int btrfs_join_running_log_trans(struct btrfs_root *root); | ||
37 | int btrfs_end_log_trans(struct btrfs_root *root); | ||
38 | int btrfs_pin_log_trans(struct btrfs_root *root); | ||
39 | int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | ||
40 | struct btrfs_root *root, struct inode *inode, | ||
41 | struct dentry *parent, int exists_only); | ||
42 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | ||
43 | struct inode *dir, struct inode *inode, | ||
44 | int for_rename); | ||
45 | int btrfs_log_new_name(struct btrfs_trans_handle *trans, | ||
46 | struct inode *inode, struct inode *old_dir, | ||
47 | struct dentry *parent); | ||
41 | #endif | 48 | #endif |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd06e18e5aac..e0913e469728 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/random.h> | 22 | #include <linux/random.h> |
23 | #include <linux/iocontext.h> | ||
23 | #include <asm/div64.h> | 24 | #include <asm/div64.h> |
24 | #include "compat.h" | 25 | #include "compat.h" |
25 | #include "ctree.h" | 26 | #include "ctree.h" |
@@ -145,8 +146,9 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
145 | int again = 0; | 146 | int again = 0; |
146 | unsigned long num_run = 0; | 147 | unsigned long num_run = 0; |
147 | unsigned long limit; | 148 | unsigned long limit; |
149 | unsigned long last_waited = 0; | ||
148 | 150 | ||
149 | bdi = device->bdev->bd_inode->i_mapping->backing_dev_info; | 151 | bdi = blk_get_backing_dev_info(device->bdev); |
150 | fs_info = device->dev_root->fs_info; | 152 | fs_info = device->dev_root->fs_info; |
151 | limit = btrfs_async_submit_limit(fs_info); | 153 | limit = btrfs_async_submit_limit(fs_info); |
152 | limit = limit * 2 / 3; | 154 | limit = limit * 2 / 3; |
@@ -207,7 +209,32 @@ loop_lock: | |||
207 | if (pending && bdi_write_congested(bdi) && num_run > 16 && | 209 | if (pending && bdi_write_congested(bdi) && num_run > 16 && |
208 | fs_info->fs_devices->open_devices > 1) { | 210 | fs_info->fs_devices->open_devices > 1) { |
209 | struct bio *old_head; | 211 | struct bio *old_head; |
212 | struct io_context *ioc; | ||
210 | 213 | ||
214 | ioc = current->io_context; | ||
215 | |||
216 | /* | ||
217 | * the main goal here is that we don't want to | ||
218 | * block if we're going to be able to submit | ||
219 | * more requests without blocking. | ||
220 | * | ||
221 | * This code does two great things, it pokes into | ||
222 | * the elevator code from a filesystem _and_ | ||
223 | * it makes assumptions about how batching works. | ||
224 | */ | ||
225 | if (ioc && ioc->nr_batch_requests > 0 && | ||
226 | time_before(jiffies, ioc->last_waited + HZ/50UL) && | ||
227 | (last_waited == 0 || | ||
228 | ioc->last_waited == last_waited)) { | ||
229 | /* | ||
230 | * we want to go through our batch of | ||
231 | * requests and stop. So, we copy out | ||
232 | * the ioc->last_waited time and test | ||
233 | * against it before looping | ||
234 | */ | ||
235 | last_waited = ioc->last_waited; | ||
236 | continue; | ||
237 | } | ||
211 | spin_lock(&device->io_lock); | 238 | spin_lock(&device->io_lock); |
212 | 239 | ||
213 | old_head = device->pending_bios; | 240 | old_head = device->pending_bios; |
@@ -231,6 +258,18 @@ loop_lock: | |||
231 | if (device->pending_bios) | 258 | if (device->pending_bios) |
232 | goto loop_lock; | 259 | goto loop_lock; |
233 | spin_unlock(&device->io_lock); | 260 | spin_unlock(&device->io_lock); |
261 | |||
262 | /* | ||
263 | * IO has already been through a long path to get here. Checksumming, | ||
264 | * async helper threads, perhaps compression. We've done a pretty | ||
265 | * good job of collecting a batch of IO and should just unplug | ||
266 | * the device right away. | ||
267 | * | ||
268 | * This will help anyone who is waiting on the IO, they might have | ||
269 | * already unplugged, but managed to do so before the bio they | ||
270 | * cared about found its way down here. | ||
271 | */ | ||
272 | blk_run_backing_dev(bdi, NULL); | ||
234 | done: | 273 | done: |
235 | return 0; | 274 | return 0; |
236 | } | 275 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 86c44e9ae110..2185de72ff7d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -76,7 +76,7 @@ struct btrfs_device { | |||
76 | struct btrfs_fs_devices { | 76 | struct btrfs_fs_devices { |
77 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ | 77 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ |
78 | 78 | ||
79 | /* the device with this id has the most recent coyp of the super */ | 79 | /* the device with this id has the most recent copy of the super */ |
80 | u64 latest_devid; | 80 | u64 latest_devid; |
81 | u64 latest_trans; | 81 | u64 latest_trans; |
82 | u64 num_devices; | 82 | u64 num_devices; |